def process_dbquestiontags(self, question_identifier, tag): dbquestiontag = QuestionsTags() dbquestiontag.question_identifier = question_identifier for dbtag in self.dbtags: if dbtag.tag == tag: dbquestiontag.tag_id = dbtag.id break if dbquestiontag.tag_id is None: logging.debug(tag + " NOT found. Adding it") # First look for it in the db dbtag = self.session.query(Tags).filter(Tags.tag == tag).first() if dbtag is None: dbtag = Tags() dbtag.tag = tag self.session.add(dbtag) self.session.commit() self.dbtags.append(dbtag) dbquestiontag.tag_id = dbtag.id self.session.add(dbquestiontag) self.session.commit()
def askbot_parser(session, url): # Initial parsing of general info, users and questions askbot = Askbot(url) all_users = [] for questionset in askbot.questions(): users_id = [] for dbquestion in questionset: # TODO: at some point the questions() iterator should # provide each "question" and not a set of them print "Analyzing: " + dbquestion.url updated, found = askbot.is_question_updated(dbquestion, session) if found and updated: # no changes needed print " * NOT updating information for this question" continue if found and not updated: # So far using the simpliest approach: remove all info related to # this question and re-insert values: drop question, tags, # answers and comments for question and answers. # This is done in this way to avoid several 'if' clauses to # control if question was found/not found or updated/not updated print "Restarting dataset for this question" askbot.remove_question(dbquestion, session) dbquestion = askbot.get_question(dbquestion) users_id.append(dbquestion.author_identifier) session.add(dbquestion) session.commit() #Comments comments = askbot.question_comments(dbquestion) for comment in comments: session.add(comment) session.commit() #Answers answers = askbot.answers(dbquestion) for answer in answers: if answer.user_identifier is not None: users_id.append(int(answer.user_identifier)) session.add(answer) session.commit() # comments per answer comments = askbot.answer_comments(answer) for comment in comments: session.add(comment) session.commit() #Tags from pysibyl.db import Tags, QuestionsTags alltags = {tag.tag.lower() : tag for tag in session.query(Tags).all()} tags = askbot.tags(alltags) for tag in tags: session.add(tag) session.commit() questiontag = QuestionsTags() questiontag.question_identifier = dbquestion.id questiontag.tag_id = tag.id session.add(questiontag) session.commit() #Users for user_id in users_id: if user_id not in all_users: #User not previously inserted user = askbot.get_user(user_id) session.add(user) session.commit() all_users.append(user_id)
def askbot_parser(session, url): # Initial parsing of general info, users and questions askbot = Askbot(url) all_users = [] for questionset in askbot.questions(): users_id = [] for dbquestion in questionset: # TODO: at some point the questions() iterator should # provide each "question" and not a set of them print "Analyzing: " + dbquestion.url updated, found = askbot.is_question_updated(dbquestion, session) if found and updated: # no changes needed print " * NOT updating information for this question" continue if found and not updated: # So far using the simpliest approach: remove all info related to # this question and re-insert values: drop question, tags, # answers and comments for question and answers. # This is done in this way to avoid several 'if' clauses to # control if question was found/not found or updated/not updated print "Restarting dataset for this question" askbot.remove_question(dbquestion, session) dbquestion = askbot.get_question(dbquestion) users_id.append(dbquestion.author_identifier) session.add(dbquestion) session.commit() #Comments comments = askbot.question_comments(dbquestion) for comment in comments: session.add(comment) session.commit() #Answers answers = askbot.answers(dbquestion) for answer in answers: if answer.user_identifier is not None: users_id.append(int(answer.user_identifier)) session.add(answer) session.commit() # comments per answer comments = askbot.answer_comments(answer) for comment in comments: session.add(comment) session.commit() #Tags from pysibyl.db import Tags, QuestionsTags alltags = { tag.tag.lower(): tag for tag in session.query(Tags).all() } tags = askbot.tags(alltags) for tag in tags: session.add(tag) session.commit() questiontag = QuestionsTags() questiontag.question_identifier = dbquestion.id questiontag.tag_id = tag.id session.add(questiontag) session.commit() #Users for user_id in users_id: if user_id not in all_users: #User not previously inserted user = askbot.get_user(user_id) session.add(user) session.commit() all_users.append(user_id)