def home(request): context = {} context['query'] = False context['search_query'] = '' if request.method == 'GET': return render(request, 'hue/home.html', context) if 'search_q' in request.POST: search_query = request.POST['search_q'] context['search_query'] = search_query print(search_query) context['query'] = True # twitter.twitter_query(search_query) r = RedditParser() r.reddit_query(search_query, 25, 25) path = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile(inspect.currentframe() ))[0],"datumbox"))) ifile = cmd_subfolder + '/data.json' ofile = path + '/sentiment.csv' print ifile print ofile sentiment.analyze_sentiment(ifile, ofile, 0.1) path = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile(inspect.currentframe() ))[0],"semantic-similarity-master"))) cofile = path + '/reddit_senti.json' os.system(path + "/similar" + ' ' + ifile + ' ' + ofile + ' ' + cofile) with open(cofile) as data_file: data = json.load(data_file) context['data'] = json.dumps(data) return render(request, 'hue/home.html', context)
def scrape(self): ''' Goes through list of links to scrape and updates db''' parser = RedditParser() urls = self.getUrls("reddit.com") for url in list(urls): oldNum = url.comments newNum = parser.getNotificationsFromUrl(url.url) print oldNum, newNum if newNum > oldNum: self.updateDb(url.url, newNum)
def make_graph(args, outfile): graph = Digraph('Subreddit connection graph starting with:\n' + ', '.join(args)) parser = RedditParser() next_subs = set(subreddit.lower() for subreddit in args) logging.debug("Received on commandline: " + ' '.join(next_subs)) visited = set() for current_depth in xrange(MAX_DEPTH): current_subs = next_subs visited.update(next_subs) if len(current_subs) == 0: break next_subs = set() for subreddit in current_subs: logging.debug("Visiting: " + subreddit) info = parser.get_info(subreddit) if info: links, num_subs = info else: continue current_node = graph.add_node(subreddit, shape="circle", width=width(num_subs), fixedsize=True, label='\n'.join([subreddit, str(num_subs)])) logging.debug("Received links: " + ' '.join(links)) for link in links: if link not in visited: next_subs.add(link) new_node = graph.add_node(link) current_node >> new_node logging.debug("Done main loop. Remaining unvisited subs:\n" + '\n'.join(next_subs)) # Style the subreddits we didn't visit due to maximum search depth being # reached. for link in next_subs: info = parser.get_info(link) if info: links, num_subs = info else: continue logging.debug("Updating properties for remaining subreddit: %s" % link) graph.add_node(link, shape="circle", width=width(num_subs), fixedsize=True, label='\n'.join([link, str(num_subs)])) graph.layout(engines.dot) graph.render(outfile)
class MasterParser(): def __init__(self): self.redditParser = RedditParser() def parseFromUrl(self, url): urlparts = urlparse(url) if urlparts.netloc == "www.reddit.com": return int(self.redditParser.getNotificationsFromUrl(url)) # TODO: Add other parsers raise Exception, "Site is not reddit.com"
def __init__(self): self.redditParser = RedditParser()