def visualize_users(quora_data): dot = Digraph(comment='Users subgraph', engine='sfdp') seen_users = set() for document in quora_data: username = _get_username(document) # Checking if user was already added to the graph if username not in seen_users: # Adding user to graph as node dot.node(username, label=username) seen_users.add(username) for document in quora_data: username = _get_username(document) # Traversing over following users and adding edge for following in document[username]['following']: following_sanitized = _sanitize_username(following) if following_sanitized in seen_users: dot.edge(username, following_sanitized) # Traversing over user's followers for follower in document[username]['followers']: follower_sanitized = _sanitize_username(follower) if follower_sanitized in seen_users: dot.edge(follower_sanitized, username) dot = _apply_styles(dot, styles) # print dot.source dot.render(os.path.join('images', 'users.gv'), view=True)
def _crawl_by_user(self, user, depth): # Stopping crawling when depth exceeds maxdepth if depth > self.maxdepth: return if user in self.crawled_users or user in self.bad_users: return print 'crawling user: %s' % user user_stats = User.get_user_stats(user, followers=True, following=True) # If something went awry crawling particular user if user_stats == {}: self.bad_users.add(user) return print 'user_stats:\n', user_stats print '---------------------------------------------------' self.crawled_users[user] = user_stats # Inserting into database as we go... self.db.users.insert({user: user_stats}) for related_user in user_stats['following'] + user_stats['followers']: self._crawl_by_user(_sanitize_username(related_user), depth + 1)
def crawl_questions_and_answers(self): ## This is for downloading - uncomment if you want to download ## # questions_data = list(self.db.questions.find()) # for document in questions_data: # question = _get_question(document) # print question # question_author, answers_authors = Quora.get_authors_of_questions_and_answers(question) # question_author = _sanitize_username(question_author) # answers_authors = [_sanitize_username(author) for author in answers_authors] # stats = {'question_author' : question_author, 'answers_authors': answers_authors} # print 'question_author:', question_author # print 'answers_authors:', answers_authors # # Inserting into database: # self.db.answers.insert({question: stats}) ## This is purely for updating ## answers_data = list(self.db.answers.find()) for document in answers_data: question = _get_question(document) if document[question]['question_author'] == '': print question print document['_id'] question_author, answers_authors = Quora.get_authors_of_questions_and_answers( question) question_author = _sanitize_username(question_author) answers_authors = [ _sanitize_username(author) for author in answers_authors ] print 'question_author:', question_author print 'answers_authors:', answers_authors stats = { 'question_author': question_author, 'answers_authors': answers_authors } self.db.answers.update({'_id': document['_id']}, {"$set": { question: stats }}, upsert=False) else: question_author = document[question]['question_author'] answers_authors = document[question]['answers_authors'] question_author = _sanitize_username(question_author) answers_authors = [ _sanitize_username(author) for author in answers_authors ] stats = { 'question_author': question_author, 'answers_authors': answers_authors } self.db.answers.update({'_id': document['_id']}, {"$set": { question: stats }}, upsert=False)