コード例 #1
0
ファイル: Spider.py プロジェクト: Catfishly/SinaSpider
    def save_only_profile(self):

        self.transformation()
        self.clear_null_data(
        )  # this function must be called after self.transformation

        self.database.connect()

        for pf in self.profile_list:
            for jb in pf['Job']:
                self.database.session.merge(Dao.Job(jb))
            for edu in pf['Education']:
                self.database.session.merge(Dao.Education(edu))

            del pf['Job']
            del pf['Education']

            self.database.session.merge(Dao.User(pf))

        self.clear_buffer()

        self.database.close()
コード例 #2
0
ファイル: Spider.py プロジェクト: Catfishly/SinaSpider
    def save(self):
        '''
        save crawled information to DB
        :return:
        '''

        self.transformation()
        self.clear_null_data(
        )  # this function must be called after self.transformation

        self.database.connect()

        for fee in self.followee_list:
            self.database.session.merge(Dao.Followee(fee))
        for fer in self.follower_list:
            self.database.session.merge(Dao.Follower(fer))
        for tl in self.timeline_list:
            try:
                tl['text'] = tl['text'].replace(
                    '', ' ')  #  is /001, so it's necessary to eliminate it
                tl['text'] = tl['text'].replace('\r', ' ').replace(
                    '\n', ' ')  # remove all the linefeed
            except Exception as e:
                print e.message
            self.database.session.merge(Dao.Timeline(tl))
        for pf in self.profile_list:
            for jb in pf['Job']:
                self.database.session.merge(Dao.Job(jb))
            for edu in pf['Education']:
                self.database.session.merge(Dao.Education(edu))

            del pf['Job']
            del pf['Education']

            self.database.session.merge(Dao.User(pf))

        self.clear_buffer()

        self.database.close()