Beispiel #1
0
def home(request):
	context = {}
	context['query'] = False
	context['search_query'] = ''

	if request.method == 'GET':
		return render(request, 'hue/home.html', context)

	if 'search_q' in request.POST:
		search_query = request.POST['search_q']
		context['search_query'] = search_query
        print(search_query)
        context['query'] = True

        # twitter.twitter_query(search_query)
        r = RedditParser()
        r.reddit_query(search_query, 25, 25)

        path = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile(inspect.currentframe() ))[0],"datumbox")))
        ifile  = cmd_subfolder + '/data.json'
        ofile  = path + '/sentiment.csv'

        print ifile
        print ofile

        sentiment.analyze_sentiment(ifile, ofile, 0.1)

        path = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile(inspect.currentframe() ))[0],"semantic-similarity-master")))
        cofile = path + '/reddit_senti.json'
        os.system(path + "/similar" + ' ' + ifile + ' ' + ofile + ' ' + cofile)
        with open(cofile) as data_file:
            data = json.load(data_file)
        context['data'] = json.dumps(data)

	return render(request, 'hue/home.html', context)
Beispiel #2
0
	def scrape(self):
		''' Goes through list of links to scrape and updates db'''
		parser = RedditParser()
		urls = self.getUrls("reddit.com")
		for url in list(urls):
			oldNum = url.comments
			newNum = parser.getNotificationsFromUrl(url.url)
			print oldNum, newNum
			if newNum > oldNum:
				self.updateDb(url.url, newNum)
def make_graph(args, outfile):
	graph = Digraph('Subreddit connection graph starting with:\n' +
			', '.join(args))

	parser = RedditParser()
	next_subs = set(subreddit.lower() for subreddit in args)
	logging.debug("Received on commandline: " + ' '.join(next_subs))
	visited = set()

	for current_depth in xrange(MAX_DEPTH):
		current_subs = next_subs
		visited.update(next_subs)
		if len(current_subs) == 0:
			break
		next_subs = set()

		for subreddit in current_subs:
			logging.debug("Visiting: " + subreddit)
			info = parser.get_info(subreddit)
			if info:
				links, num_subs = info
			else:
				continue
			current_node = graph.add_node(subreddit, shape="circle",
					width=width(num_subs), fixedsize=True,
					label='\n'.join([subreddit, str(num_subs)]))
			logging.debug("Received links: " + ' '.join(links))

			for link in links:
				if link not in visited:
					next_subs.add(link)
				new_node = graph.add_node(link)
				current_node >> new_node

	logging.debug("Done main loop.  Remaining unvisited subs:\n" +
			'\n'.join(next_subs))
	# Style the subreddits we didn't visit due to maximum search depth being
	# reached.
	for link in next_subs:
		info = parser.get_info(link)
		if info:
			links, num_subs = info
		else:
			continue
		logging.debug("Updating properties for remaining subreddit: %s" % link)
		graph.add_node(link, shape="circle",
				width=width(num_subs), fixedsize=True,
				label='\n'.join([link, str(num_subs)]))

	graph.layout(engines.dot)
	graph.render(outfile)
Beispiel #4
0
class MasterParser():
	def __init__(self):
		self.redditParser = RedditParser()
		
	def parseFromUrl(self, url):
		urlparts = urlparse(url)
		if urlparts.netloc == "www.reddit.com":
			return int(self.redditParser.getNotificationsFromUrl(url))
		# TODO: Add other parsers
			
		raise Exception, "Site is not reddit.com"
Beispiel #5
0
	def __init__(self):
		self.redditParser = RedditParser()