def store_bi_follow_id(user_id, bi_follow_id_list): """ store the user's bi_follow_id into the bi_follow table @param user_id: id of the user @param bi_follow_id_list: a list of bi_follow_id of the user """ try: session = orm.load_session() for bi_following_id in bi_follow_id_list: # now will store the bi_follow relationship into db bi_follow = session.query(orm.BiFollow).filter_by(user_id=user_id, bi_following_id=bi_following_id).first() if not bi_follow: # if not in DB, then store into DB add_bi_follow = orm.BiFollow(user_id=user_id, bi_following_id=bi_following_id) session.add(add_bi_follow) else: logger.info("%s <-> %s already in DB" % (user_id, bi_following_id)) except: logger.error("store_bi_follow_id error.. session.add? i do NOT know yet") logger.error('%s %s ' % (sys.exc_info()[0], sys.exc_info()[1])) else: try: #=========================================================================== # will update the update_bi_follow_time column of the user table #=========================================================================== update_bi_follow_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") session.query(orm.DemoUsers).filter_by(user_id=user_id). \ update({"update_bi_follow_time": update_bi_follow_time}, synchronize_session=False) session.commit() except exc.SQLAlchemyError, e: session.rollback() logger.error(e)
def __init__(self): print 'init controller' self.login_instance = Login() self.logger = self.login_instance.get_logger() self.config = self.login_instance.get_config() self.cj = self.login_instance.cj self.cookie_dict = self.login_instance.cookie_dict self.cookie_str = "" self.opener = self.login_instance.opener orm.set_dblogger() self.session = orm.load_session()
def __init__(self): print "init controller" self.login_instance = Login() self.logger = self.login_instance.get_logger() self.config = self.login_instance.get_config() self.cj = self.login_instance.cj self.cookie_dict = self.login_instance.cookie_dict self.cookie_str = "" self.opener = self.login_instance.opener orm.set_dblogger() self.session = orm.load_session()
def handle_user_weibo(crawler_json): """ will take the json object returned from the crawler as input and then store corresponding part into the DB """ user_id = crawler_json['user_id'] statuses_list = crawler_json['sina_weibo_json'] session = orm.load_session() try: for status in statuses_list: store_status(status, session) session.commit() except exc.SQLAlchemyError, e: logger.error(e) session.rollback()
def has_stored_user_by_uid(user_id): """ will query the DB, table "demo_users" , and then decide whether has stored this user or not @param user_id: id of the user @return: has_stored is a binary value which indicates that whether the user has stored or not """ session = orm.load_session() has_stored = False query = session.query(orm.DemoUsers) count = query.filter(orm.DemoUsers.user_id == user_id).count() session.commit() if count != 0: has_stored = True session.close() return has_stored
def store_follow_list(user_id, follow_list): """ just store the user's followings (both the following relationship and the following user into user table) into DB @param user_id: id of the user @param follow_list: a list of the followings of the user here the element in the follow_list is the user object returned by SinaWeiboAPI """ try: session = orm.load_session() for user in follow_list: demo_user = session.query(orm.DemoUsers).filter_by(user_id=user['id']).first() # now will store the following into db if not demo_user: # if not in DB, then store into DB add_user = add_orm_user(user) session.add(add_user) else: logger.info("this following %s is already in DB" % user['id']) logger.info("Update this following user %s in DB" % user['id']) # if in DB, then update the user in DB update_user(user, session) # now will store the follow relationship into db following_id = user['id'] follow = session.query(orm.Follow).filter_by(user_id=user_id, following_id=following_id).first() if not follow: # if not in DB, then store into DB add_follow = orm.Follow(user_id=user_id, following_id=following_id) session.add(add_follow) else: logger.info("%s -> %s already in DB" % (user_id, following_id)) except: error_str = 'store_follow_list %s %s' % (sys.exc_info()[0], sys.exc_info()[1]) logger.error(error_str) else: try: #=========================================================================== # will update the update_following_time column of the user table #=========================================================================== update_following_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") session.query(orm.DemoUsers).filter_by(user_id=user_id). \ update({"update_following_time": update_following_time}, synchronize_session=False) session.commit() except exc.SQLAlchemyError, e: logger.error(e) session.rollback() except:
def handle_keyword_status_ids(keyword, status_id_list): """ store the keyword and corresponding status_ids into DB """ logger.info("okay, will handle_keyword_status_ids(keyword, status_id_list)") logger.info("status_id_list is %d length" % (len(status_id_list))) session = orm.load_session() result = True try: for status_id in status_id_list: store_keyword_status_id(keyword, status_id, session) session.commit() logger.info("successfully committed the keyword_stauts_id") except exc.SQLAlchemyError, e: logger.error(e) session.rollback() result = False
def new_store_follow_list(user_id, follow_list): """ just store the user's followings (both the following relationship and the following user into user table) into DB @param user_id: id of the user @param follow_list: a list of the followings of the user here the element in the follow_list is the user object returned by SinaWeiboAPI """ logger.info("okay now in new_store_follow_list") db_transaction = db.transaction() try: session = orm.load_session() for user in follow_list: following_id = user['id'] # store the user into DB if not db_insert_user(user, db, db_transaction): # means already in DB, then update user DB logger.info("this following %s is already in DB" % following_id) logger.info("Update this following user %s in DB" % following_id) update_user(user, session) # now will store the follow relationship into db try: db.insert('follow', user_id=user_id, following_id=following_id) except: db_transaction.rollback() logger.error("new_store_follow_list db.insert follow table error. DUPLICATE?..") logger.info("So %s -> %s already in DB" % (user_id, following_id)) except: error_str = 'new_store_follow_list %s %s' % (sys.exc_info()[0], sys.exc_info()[1]) logger.error(error_str) else: # the reason why put commit() here is just to improve the speed of insert db_transaction.commit() try: #=========================================================================== # will update the update_following_time column of the user table #=========================================================================== update_following_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") session.query(orm.DemoUsers).filter_by(user_id=user_id). \ update({"update_following_time": update_following_time}, synchronize_session=False) session.commit() except exc.SQLAlchemyError, e: logger.error(e) session.rollback() except:
def query_update_user_weibo(self, limit_num): """ will query the DB for users that have not updated their weibo """ user_id_list = [] session = orm.load_session() query = session.query(orm.DemoUsers) try: user_list_db = query.filter(orm.DemoUsers.update_weibo_time == None).limit(limit_num) for user_db in user_list_db: user = map_rowobject_dict(user_db) user_id_list.append(user['user_id']) session.commit() except: self.logger.error('query update_weibo_time error') finally: session.close() return user_id_list
def query_update_keyword_status(self, limit_num): """ will query the DB for keyword_status that have not updated the status """ statuses_id_list = [] session = orm.load_session() query = session.query(orm.KeywordStatus) try: keyword_status_list_db = query.filter(orm.KeywordStatus.update_status_time == None).limit(limit_num) session.commit() for keyword_status_db in keyword_status_list_db: print "keyword_status_lit_db not empty, status_id: %s" % (keyword_status_db.status_id) statuses_id_list.append(keyword_status_db.status_id) except: self.logger.error('query update_keyword_status error') self.logger.error('%s' % (sys.exc_info()[1])) finally: session.close() return statuses_id_list
def store_user(user): """ store the user object into user table if already in DB, then will update the existing one """ try: session = orm.load_session() demo_user = session.query(orm.DemoUsers).filter_by(user_id=user['id']).first() # now will store the user into DB if not demo_user: # if not in DB, then store into DB add_user = add_orm_user(user) if add_user != None: session.add(add_user) else: logger.info("Update this user %s in DB" % user['id']) # if in DB, then update the user in DB update_user(user, session) session.commit() except exc.SQLAlchemyError, e: logger.error(e) session.rollback()
def handle_statuses_show(crawler_json): """ will take the json object returned from the crawler as input and then store corresponding part into the DB """ statuses_list = crawler_json['sina_weibo_json_list'] session = orm.load_session() try: for status in statuses_list: if status['exist']: store_status(status, session) #=========================================================================== # will update the update_status_time column of the keyword_status table #=========================================================================== update_status_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") if status.has_key('user'): user_obj = status['user'] store_user_session(user_obj, session) session.query(orm.KeywordStatus).filter_by(status_id=status['id']). \ update({"update_status_time": update_status_time}, synchronize_session=False) session.commit() except exc.SQLAlchemyError, e: logger.error(e) session.rollback()