def explore_questions_by_topic(quora_data, root_question, topic): already_explored_questions = set() d = {} for document in quora_data: question = _get_question(document) d[question] = document[question] d[question]['related_questions'] = [_sanitize_question(x) for x in d[question]['related_questions']] questions_queue = [] already_explored_questions.add(root_question) questions_queue.append([root_question]) questions_path = [] while questions_queue: # get the first path from the queue questions_path = questions_queue.pop(0) # get the last node from the path question = questions_path[-1] # questions_path found print d[question]['topics'] if topic in d[question]['topics']: break # enumerate all adjacent nodes, construct a new path and push it into the queue for related_question in d[question]['related_questions']: if related_question not in already_explored_questions and related_question in d: already_explored_questions.add(related_question) new_questions_path = list(questions_path) new_questions_path.append(related_question) questions_queue.append(new_questions_path) print 'Path leading to %s : %r' % (topic, questions_path)
def explore_questions_by_topic(quora_data, root_question, topic): already_explored_questions = set() d = {} for document in quora_data: question = _get_question(document) d[question] = document[question] d[question]['related_questions'] = [ _sanitize_question(x) for x in d[question]['related_questions'] ] questions_queue = [] already_explored_questions.add(root_question) questions_queue.append([root_question]) questions_path = [] while questions_queue: # get the first path from the queue questions_path = questions_queue.pop(0) # get the last node from the path question = questions_path[-1] # questions_path found print d[question]['topics'] if topic in d[question]['topics']: break # enumerate all adjacent nodes, construct a new path and push it into the queue for related_question in d[question]['related_questions']: if related_question not in already_explored_questions and related_question in d: already_explored_questions.add(related_question) new_questions_path = list(questions_path) new_questions_path.append(related_question) questions_queue.append(new_questions_path) print 'Path leading to %s : %r' % (topic, questions_path)
def _crawl_by_question(self, question, depth): # Stopping crawling when depth exceeds maxdepth if depth > self.maxdepth: return # Not crawling the question that was already crawled if question in self.crawled_questions or question in self.bad_questions: return print 'crawling question: %s' % question question_stats = Quora.get_question_stats(question) # If something went awry crawling particular question if question_stats == {}: self.bad_questions.add(question) return latest_answers = Quora.get_latest_answers(question) question_stats['latest_answers'] = latest_answers print 'question_stats:\n', question_stats print 'latest_answers:\n', latest_answers print 'related_questions: \n', question_stats['related_questions'] print '---------------------------------------------------' self.crawled_questions[question] = question_stats # Inserting into database as we go... self.db.questions.insert({question: question_stats}) for related_question in question_stats['related_questions']: # Only considering complete questions (i.e. not ending in ...) if not related_question.endswith('...'): self._crawl_by_question(_sanitize_question(related_question), depth + 1)