Beispiel #1
0
    def process_dbquestiontags(self, question_identifier, tag):
        dbquestiontag = QuestionsTags()
        dbquestiontag.question_identifier = question_identifier
        for dbtag in self.dbtags:
            if dbtag.tag == tag:
                dbquestiontag.tag_id = dbtag.id
                break
        if dbquestiontag.tag_id is None:
            logging.debug(tag + " NOT found. Adding it")
            # First look for it in the db
            dbtag = self.session.query(Tags).filter(Tags.tag == tag).first()
            if dbtag is None:
                dbtag = Tags()
                dbtag.tag = tag
                self.session.add(dbtag)
                self.session.commit()
            self.dbtags.append(dbtag)
            dbquestiontag.tag_id = dbtag.id

        self.session.add(dbquestiontag)
        self.session.commit()
Beispiel #2
0
    def process_dbquestiontags(self, question_identifier, tag):
        dbquestiontag = QuestionsTags()
        dbquestiontag.question_identifier = question_identifier
        for dbtag in self.dbtags:
            if dbtag.tag == tag:
                dbquestiontag.tag_id = dbtag.id
                break
        if dbquestiontag.tag_id is None:
            logging.debug(tag + " NOT found. Adding it")
            # First look for it in the db
            dbtag = self.session.query(Tags).filter(Tags.tag == tag).first()
            if dbtag is None:
                dbtag = Tags()
                dbtag.tag = tag
                self.session.add(dbtag)
                self.session.commit()
            self.dbtags.append(dbtag)
            dbquestiontag.tag_id = dbtag.id

        self.session.add(dbquestiontag)
        self.session.commit()
Beispiel #3
0
def askbot_parser(session, url):
    # Initial parsing of general info, users and questions

    askbot = Askbot(url)
    all_users = []

    for questionset in askbot.questions():
        users_id = []
        for dbquestion in questionset:
            # TODO: at some point the questions() iterator should
            # provide each "question" and not a set of them
            print "Analyzing: " + dbquestion.url

            updated, found = askbot.is_question_updated(dbquestion, session)
            if found and updated:
                # no changes needed
                print "    * NOT updating information for this question"
                continue

            if found and not updated:
                # So far using the simpliest approach: remove all info related to
                # this question and re-insert values: drop question, tags, 
                # answers and comments for question and answers.
                # This is done in this way to avoid several 'if' clauses to 
                # control if question was found/not found or updated/not updated
                print "Restarting dataset for this question"
                askbot.remove_question(dbquestion, session)

            dbquestion = askbot.get_question(dbquestion)
            users_id.append(dbquestion.author_identifier)
            session.add(dbquestion)
            session.commit()

            #Comments
            comments = askbot.question_comments(dbquestion)
            for comment in comments:
                session.add(comment)
                session.commit()

            #Answers
            answers = askbot.answers(dbquestion)
            for answer in answers:
                if answer.user_identifier is not None:
                    users_id.append(int(answer.user_identifier))
                session.add(answer)
                session.commit()
                # comments per answer
                comments = askbot.answer_comments(answer)
                for comment in comments:
                    session.add(comment)
                    session.commit()

            #Tags
            from pysibyl.db import Tags, QuestionsTags
            alltags = {tag.tag.lower() : tag for tag in session.query(Tags).all()}
            tags = askbot.tags(alltags)
            for tag in tags:
                session.add(tag)
                session.commit()

                questiontag = QuestionsTags()
                questiontag.question_identifier = dbquestion.id
                questiontag.tag_id = tag.id

                session.add(questiontag)
                session.commit()

            #Users
            for user_id in users_id:
                if user_id not in all_users:
                    #User not previously inserted
                    user = askbot.get_user(user_id)
                    session.add(user)
                    session.commit()
                    all_users.append(user_id)
Beispiel #4
0
def askbot_parser(session, url):
    # Initial parsing of general info, users and questions

    askbot = Askbot(url)
    all_users = []

    for questionset in askbot.questions():
        users_id = []
        for dbquestion in questionset:
            # TODO: at some point the questions() iterator should
            # provide each "question" and not a set of them
            print "Analyzing: " + dbquestion.url

            updated, found = askbot.is_question_updated(dbquestion, session)
            if found and updated:
                # no changes needed
                print "    * NOT updating information for this question"
                continue

            if found and not updated:
                # So far using the simpliest approach: remove all info related to
                # this question and re-insert values: drop question, tags,
                # answers and comments for question and answers.
                # This is done in this way to avoid several 'if' clauses to
                # control if question was found/not found or updated/not updated
                print "Restarting dataset for this question"
                askbot.remove_question(dbquestion, session)

            dbquestion = askbot.get_question(dbquestion)
            users_id.append(dbquestion.author_identifier)
            session.add(dbquestion)
            session.commit()

            #Comments
            comments = askbot.question_comments(dbquestion)
            for comment in comments:
                session.add(comment)
                session.commit()

            #Answers
            answers = askbot.answers(dbquestion)
            for answer in answers:
                if answer.user_identifier is not None:
                    users_id.append(int(answer.user_identifier))
                session.add(answer)
                session.commit()
                # comments per answer
                comments = askbot.answer_comments(answer)
                for comment in comments:
                    session.add(comment)
                    session.commit()

            #Tags
            from pysibyl.db import Tags, QuestionsTags
            alltags = {
                tag.tag.lower(): tag
                for tag in session.query(Tags).all()
            }
            tags = askbot.tags(alltags)
            for tag in tags:
                session.add(tag)
                session.commit()

                questiontag = QuestionsTags()
                questiontag.question_identifier = dbquestion.id
                questiontag.tag_id = tag.id

                session.add(questiontag)
                session.commit()

            #Users
            for user_id in users_id:
                if user_id not in all_users:
                    #User not previously inserted
                    user = askbot.get_user(user_id)
                    session.add(user)
                    session.commit()
                    all_users.append(user_id)