コード例 #1
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def store_bi_follow_id(user_id, bi_follow_id_list):
    """
    store the user's bi_follow_id into the bi_follow table
    @param user_id: id of the user
    @param bi_follow_id_list: a list of bi_follow_id of the user
    """
    try:
        session = orm.load_session()
        for bi_following_id in bi_follow_id_list:
            # now will store the bi_follow relationship into db
            bi_follow = session.query(orm.BiFollow).filter_by(user_id=user_id, bi_following_id=bi_following_id).first()
            if not bi_follow:
                # if not in DB, then store into DB 
                add_bi_follow = orm.BiFollow(user_id=user_id, bi_following_id=bi_following_id)
                session.add(add_bi_follow)
            else:
                logger.info("%s <-> %s already in DB" % (user_id, bi_following_id))
    except:
        logger.error("store_bi_follow_id error.. session.add? i do NOT know yet")
        logger.error('%s %s ' % (sys.exc_info()[0], sys.exc_info()[1]))
    else:
        try:
            #===========================================================================
            # will update the update_bi_follow_time column of the user table
            #===========================================================================
            update_bi_follow_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            session.query(orm.DemoUsers).filter_by(user_id=user_id). \
                  update({"update_bi_follow_time": update_bi_follow_time}, synchronize_session=False)
            session.commit()
        except exc.SQLAlchemyError, e:
            session.rollback()
            logger.error(e)
コード例 #2
0
 def __init__(self):
     print 'init controller'
     self.login_instance = Login()
     self.logger = self.login_instance.get_logger()
     self.config = self.login_instance.get_config()
     self.cj = self.login_instance.cj
     self.cookie_dict = self.login_instance.cookie_dict
     self.cookie_str = ""
     self.opener = self.login_instance.opener
     orm.set_dblogger()
     self.session = orm.load_session()
コード例 #3
0
 def __init__(self):
     print "init controller"
     self.login_instance = Login()
     self.logger = self.login_instance.get_logger()
     self.config = self.login_instance.get_config()
     self.cj = self.login_instance.cj
     self.cookie_dict = self.login_instance.cookie_dict
     self.cookie_str = ""
     self.opener = self.login_instance.opener
     orm.set_dblogger()
     self.session = orm.load_session()
コード例 #4
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def handle_user_weibo(crawler_json):
    """
    will take the json object returned from the crawler as input
    and then store corresponding part into the DB
    """
    user_id = crawler_json['user_id']
    statuses_list = crawler_json['sina_weibo_json']
    session = orm.load_session()
    try:
        for status in statuses_list:
            store_status(status, session)
        session.commit()
    except exc.SQLAlchemyError, e:
        logger.error(e)
        session.rollback()
コード例 #5
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def has_stored_user_by_uid(user_id):
    """
    will query the DB, table "demo_users" , and then decide whether has stored this user or not
    @param user_id: id of the user 
    @return: has_stored is a binary value which indicates that whether the user has stored or not
    """
    session = orm.load_session()
    has_stored = False
    query = session.query(orm.DemoUsers)
    count = query.filter(orm.DemoUsers.user_id == user_id).count()
    session.commit()
    if count != 0:
        has_stored = True
    session.close()
    return has_stored
コード例 #6
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def store_follow_list(user_id, follow_list):
    """
    just store the user's followings 
        (both the following relationship and the following user into user table) into DB
    @param user_id: id of the user
    @param follow_list: a list of the followings of the user
                    here the element in the follow_list is the user object returned by SinaWeiboAPI
    """
    try:
        session = orm.load_session()
        for user in follow_list:
            demo_user = session.query(orm.DemoUsers).filter_by(user_id=user['id']).first()
            # now will store the following into db
            if not demo_user:
                # if not in DB, then store into DB 
                add_user = add_orm_user(user)
                session.add(add_user)
            else:
                logger.info("this following %s is already in DB"  % user['id'])
                logger.info("Update this following user %s in DB" % user['id'])
                # if in DB, then update the user in DB
                update_user(user, session)
            # now will store the follow relationship into db
            following_id = user['id']
            follow = session.query(orm.Follow).filter_by(user_id=user_id, following_id=following_id).first()
            if not follow:
                # if not in DB, then store into DB 
                add_follow = orm.Follow(user_id=user_id, following_id=following_id)
                session.add(add_follow)
            else:
                logger.info("%s -> %s already in DB" % (user_id, following_id))
    except:
        error_str = 'store_follow_list %s %s' % (sys.exc_info()[0], sys.exc_info()[1])
        logger.error(error_str)
    else:
        try:
            #===========================================================================
            # will update the update_following_time column of the user table
            #===========================================================================
            update_following_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            session.query(orm.DemoUsers).filter_by(user_id=user_id). \
                  update({"update_following_time": update_following_time}, synchronize_session=False)
            session.commit()
        except exc.SQLAlchemyError, e:
            logger.error(e)
            session.rollback()
        except:
コード例 #7
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def handle_keyword_status_ids(keyword, status_id_list):
    """
    store the keyword and corresponding status_ids into DB
    """
    logger.info("okay, will handle_keyword_status_ids(keyword, status_id_list)")
    logger.info("status_id_list is %d length" % (len(status_id_list)))
    session = orm.load_session()
    result = True
    try:
        for status_id in status_id_list:
            store_keyword_status_id(keyword, status_id, session)
        session.commit()
        logger.info("successfully committed the keyword_stauts_id")
    except exc.SQLAlchemyError, e:
        logger.error(e)
        session.rollback()
        result = False
コード例 #8
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def new_store_follow_list(user_id, follow_list):
    """
    just store the user's followings 
        (both the following relationship and the following user into user table) into DB
    @param user_id: id of the user
    @param follow_list: a list of the followings of the user
                    here the element in the follow_list is the user object returned by SinaWeiboAPI
    """
    logger.info("okay now in new_store_follow_list")
    db_transaction = db.transaction()
    try:
        session = orm.load_session()
        for user in follow_list:
            following_id = user['id']
            # store the user into DB 
            if not db_insert_user(user, db, db_transaction):
                # means already in DB, then update user DB 
                logger.info("this following %s is already in DB"  % following_id)
                logger.info("Update this following user %s in DB" % following_id)
                update_user(user, session)
            # now will store the follow relationship into db
            try:
                db.insert('follow', user_id=user_id, following_id=following_id)
            except:
                db_transaction.rollback()
                logger.error("new_store_follow_list db.insert follow table error. DUPLICATE?..")
                logger.info("So %s -> %s already in DB" % (user_id, following_id))
    except:
        error_str = 'new_store_follow_list %s %s' % (sys.exc_info()[0], sys.exc_info()[1])
        logger.error(error_str)
    else:
        # the reason why put commit() here is just to improve the speed of insert
        db_transaction.commit()
        try:
            #===========================================================================
            # will update the update_following_time column of the user table
            #===========================================================================
            update_following_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            session.query(orm.DemoUsers).filter_by(user_id=user_id). \
                  update({"update_following_time": update_following_time}, synchronize_session=False)
            session.commit()
        except exc.SQLAlchemyError, e:
            logger.error(e)
            session.rollback()
        except:
コード例 #9
0
 def query_update_user_weibo(self, limit_num):
     """
     will query the DB for users that have not updated their weibo
     """
     user_id_list = [] 
     session = orm.load_session()
     query = session.query(orm.DemoUsers)
     try:
         user_list_db = query.filter(orm.DemoUsers.update_weibo_time == None).limit(limit_num)
         for user_db in user_list_db:
             user = map_rowobject_dict(user_db)
             user_id_list.append(user['user_id'])
         session.commit()
     except:
         self.logger.error('query update_weibo_time error')
     finally:
         session.close()
     return user_id_list
コード例 #10
0
 def query_update_keyword_status(self, limit_num):
     """
     will query the DB for keyword_status that have not updated the status
     """
     statuses_id_list = [] 
     session = orm.load_session()
     query = session.query(orm.KeywordStatus)
     try:
         keyword_status_list_db = query.filter(orm.KeywordStatus.update_status_time == None).limit(limit_num)
         session.commit()
         for keyword_status_db in keyword_status_list_db:
             print "keyword_status_lit_db not empty, status_id: %s" % (keyword_status_db.status_id)
             statuses_id_list.append(keyword_status_db.status_id)
     except:
         self.logger.error('query update_keyword_status error')
         self.logger.error('%s' % (sys.exc_info()[1]))
     finally:
         session.close()
     return statuses_id_list 
コード例 #11
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def store_user(user):
    """
    store the user object into user table
       if already in DB, then will update the existing one
    """
    try:
        session = orm.load_session()
        demo_user = session.query(orm.DemoUsers).filter_by(user_id=user['id']).first()
        # now will store the user into DB
        if not demo_user:
            # if not in DB, then store into DB 
            add_user = add_orm_user(user)
            if add_user != None:
                session.add(add_user)
        else:
            logger.info("Update this user %s in DB" % user['id'])
            # if in DB, then update the user in DB
            update_user(user, session)
        session.commit()
    except exc.SQLAlchemyError, e:
        logger.error(e)
        session.rollback()
コード例 #12
0
ファイル: process_db.py プロジェクト: iswangheng/czwh_crawler
def handle_statuses_show(crawler_json):
    """
    will take the json object returned from the crawler as input
    and then store corresponding part into the DB
    """
    statuses_list = crawler_json['sina_weibo_json_list']
    session = orm.load_session()
    try:
        for status in statuses_list:
            if status['exist']:
                store_status(status, session)
            #===========================================================================
            # will update the update_status_time column of the keyword_status table
            #===========================================================================
            update_status_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            if status.has_key('user'):
                user_obj = status['user']
                store_user_session(user_obj, session)
            session.query(orm.KeywordStatus).filter_by(status_id=status['id']). \
                         update({"update_status_time": update_status_time}, synchronize_session=False)
        session.commit()
    except exc.SQLAlchemyError, e:
        logger.error(e)
        session.rollback()