Beispiel #1
0
def visualize_users(quora_data):
    dot = Digraph(comment='Users subgraph', engine='sfdp')
    seen_users = set()
    for document in quora_data:
        username = _get_username(document)
        # Checking if user was already added to the graph
        if username not in seen_users:
            # Adding user to graph as node
            dot.node(username, label=username)
            seen_users.add(username)

    for document in quora_data:
        username = _get_username(document)
        # Traversing over following users and adding edge
        for following in document[username]['following']:
            following_sanitized = _sanitize_username(following)
            if following_sanitized in seen_users:
                dot.edge(username, following_sanitized)
        # Traversing over user's followers
        for follower in document[username]['followers']:
            follower_sanitized = _sanitize_username(follower)
            if follower_sanitized in seen_users:
                dot.edge(follower_sanitized, username)

    dot = _apply_styles(dot, styles)
    # print dot.source
    dot.render(os.path.join('images', 'users.gv'), view=True)
Beispiel #2
0
def visualize_users(quora_data):
    dot = Digraph(comment='Users subgraph', engine='sfdp')
    seen_users = set()
    for document in quora_data:
        username = _get_username(document)
        # Checking if user was already added to the graph
        if username not in seen_users:
            # Adding user to graph as node
            dot.node(username, label=username)
            seen_users.add(username)

    for document in quora_data:
        username = _get_username(document)
        # Traversing over following users and adding edge
        for following in document[username]['following']:
            following_sanitized = _sanitize_username(following)
            if following_sanitized in seen_users:
                dot.edge(username, following_sanitized)
        # Traversing over user's followers
        for follower in document[username]['followers']:
            follower_sanitized = _sanitize_username(follower)
            if follower_sanitized in seen_users:
                dot.edge(follower_sanitized, username)

    dot = _apply_styles(dot, styles)
    # print dot.source
    dot.render(os.path.join('images', 'users.gv'), view=True)
Beispiel #3
0
    def _crawl_by_user(self, user, depth):
        # Stopping crawling when depth exceeds maxdepth
        if depth > self.maxdepth:
            return

        if user in self.crawled_users or user in self.bad_users:
            return

        print 'crawling user: %s' % user

        user_stats = User.get_user_stats(user, followers=True, following=True)

        # If something went awry crawling particular user
        if user_stats == {}:
            self.bad_users.add(user)
            return

        print 'user_stats:\n', user_stats
        print '---------------------------------------------------'

        self.crawled_users[user] = user_stats

        # Inserting into database as we go...
        self.db.users.insert({user: user_stats})

        for related_user in user_stats['following'] + user_stats['followers']:
            self._crawl_by_user(_sanitize_username(related_user), depth + 1)
Beispiel #4
0
    def crawl_questions_and_answers(self):
        ## This is for downloading - uncomment if you want to download ##
        # questions_data = list(self.db.questions.find())
        # for document in questions_data:
        #     question = _get_question(document)
        #     print question
        #     question_author, answers_authors = Quora.get_authors_of_questions_and_answers(question)
        #     question_author = _sanitize_username(question_author)
        #     answers_authors = [_sanitize_username(author) for author in answers_authors]
        #     stats = {'question_author' : question_author, 'answers_authors': answers_authors}
        #     print 'question_author:', question_author
        #     print 'answers_authors:', answers_authors

        #     # Inserting into database:
        #     self.db.answers.insert({question: stats})

        ## This is purely for updating ##
        answers_data = list(self.db.answers.find())
        for document in answers_data:
            question = _get_question(document)

            if document[question]['question_author'] == '':
                print question
                print document['_id']

                question_author, answers_authors = Quora.get_authors_of_questions_and_answers(
                    question)

                question_author = _sanitize_username(question_author)
                answers_authors = [
                    _sanitize_username(author) for author in answers_authors
                ]

                print 'question_author:', question_author
                print 'answers_authors:', answers_authors
                stats = {
                    'question_author': question_author,
                    'answers_authors': answers_authors
                }
                self.db.answers.update({'_id': document['_id']},
                                       {"$set": {
                                           question: stats
                                       }},
                                       upsert=False)

            else:
                question_author = document[question]['question_author']
                answers_authors = document[question]['answers_authors']

                question_author = _sanitize_username(question_author)
                answers_authors = [
                    _sanitize_username(author) for author in answers_authors
                ]

                stats = {
                    'question_author': question_author,
                    'answers_authors': answers_authors
                }
                self.db.answers.update({'_id': document['_id']},
                                       {"$set": {
                                           question: stats
                                       }},
                                       upsert=False)