def save_only_profile(self): self.transformation() self.clear_null_data( ) # this function must be called after self.transformation self.database.connect() for pf in self.profile_list: for jb in pf['Job']: self.database.session.merge(Dao.Job(jb)) for edu in pf['Education']: self.database.session.merge(Dao.Education(edu)) del pf['Job'] del pf['Education'] self.database.session.merge(Dao.User(pf)) self.clear_buffer() self.database.close()
def save(self): ''' save crawled information to DB :return: ''' self.transformation() self.clear_null_data( ) # this function must be called after self.transformation self.database.connect() for fee in self.followee_list: self.database.session.merge(Dao.Followee(fee)) for fer in self.follower_list: self.database.session.merge(Dao.Follower(fer)) for tl in self.timeline_list: try: tl['text'] = tl['text'].replace( '', ' ') # is /001, so it's necessary to eliminate it tl['text'] = tl['text'].replace('\r', ' ').replace( '\n', ' ') # remove all the linefeed except Exception as e: print e.message self.database.session.merge(Dao.Timeline(tl)) for pf in self.profile_list: for jb in pf['Job']: self.database.session.merge(Dao.Job(jb)) for edu in pf['Education']: self.database.session.merge(Dao.Education(edu)) del pf['Job'] del pf['Education'] self.database.session.merge(Dao.User(pf)) self.clear_buffer() self.database.close()