class PageExtractorRDS(RedisController):
    def __init__(self):
        self.rds = RedisController(section_name="redis_pe")

    def __insert_id__(self, house_id, err_type):
        '''插入房源ID的基础方法
        err_type枚举:
        - -1 未曾发起过请求
        - 0 请求成功,删除错误标记
        - 1 请求详情页面成功,但是页面返回为空值 
        - 2 页面由于加载不完全出现的部分元素信息缺失
        
        '''
        self.rds.rset(house_id, err_type)

    def select(self):
        return self.rds.rscan

    def insert_init(self, house_id):
        '''初始插入所有的房源ID列表'''
        self.__insert_id__(house_id, err_type=-1)

    def insert_success_id(self, house_id):
        '''标记成功爬取到页面的房源ID'''
        self.__insert_id__(house_id, err_type=0)

    def insert_empty_id(self, house_id):
        '''存放请求详情页面但页面返回数据为空的情况的房源ID'''
        self.__insert_id__(house_id, err_type=1)

    def insert_lose_element_id(self, house_id):
        '''存放页面加载不完全损失元素的房源ID'''
        self.__insert_id__(house_id, err_type=2)
Exemple #2
0
class HouseSelectorRDS(RedisController):
    def __init__(self):
        self.rds = RedisController(section_name="redis_hs")

    def insert(self, house_id, house_info):
        self.rds.rset(house_id, house_info)

    def select(self):
        return self.rds.rscan
def ziroom_extra(project_name, rid, rtn_data):
    '''ziroom_extra
    Ziroom Extra func.
    
    '''
    logger = LogBase(project_name, "ziroom_extra")
    logger.debug("Before Extra =>", data=rtn_data)
    # Extra func for house code.

    try:
        end = int(rtn_data['house_code'].split('_')[1])
        room_num = int(findall(r"([0-9])室[0-9]厅", rtn_data['house_type'])[0])
    except Exception:
        pass
    else:
        if room_num > 1:
            rds = RedisController(
                int(conf_kv_func("ziroom.sys_config", all=True)['redis_db']),
                project_name)
            for idx in range(1, room_num + 1):
                rds.__update_dict_to_redis__(
                    rid - end + idx, {"house_id": str(rid - end + idx)})

    # Extra func for price.
    try:
        price_dict = dict()
        price, price_dict = get_price_from_png(rtn_data["price"], price_dict,
                                               project_name)

        rtn_data["price"] = price

    except Exception:
        pass

    # Extra func for payment.
    try:
        payment_rtn_list = list()
        for payment in rtn_data["paymentlist"]:
            payment_rtn = dict()
            for k, v in zip(payment.keys(), payment.values()):
                if k == "period":
                    payment_rtn["period"] = v
                else:
                    payment_rtn[k], price_dict = get_price_from_png(
                        v, price_dict, project_name)

            payment_rtn_list.append(payment_rtn)

            rtn_data["paymentlist"] = payment_rtn_list

    except Exception:
        pass

    logger.debug("After Extra =>", data=rtn_data)

    return rtn_data
 def __load__(self):
     '''__load__
     Load crawler config detail info from config files.
     '''
     self.crawler_conf = CrawlerConfigReader.crawler_config(self.crawler_name)
     self.rds          = RedisController(int(self.crawler_conf['sys_conf']['redis_db']), self.project_name)
     self.rds_key      = self.crawler_conf['sys_conf']['redis_key']
     
     self.debug("Here is crawler config => ", **self.crawler_conf)
Exemple #5
0
class LJRedisController():

    def __init__(self):
        self._db = LJDBController()
        self._redis = RedisController()

    def failed_page_insert(self):
        '''将spider_page操作中执行失败的房源编号列表入Redis'''
        house_list = self._db.get_house_page_failed
        for id in range(0,len(house_list)):
            idx = id + 1
            self._redis.rset(str(idx),house_list[id])
    
    def failed_page_get(self, num=20):
        '''将Redis中存储的房源ID按照num的频次返回'''
        for id in range(0, self._redis.dbsize, num):
            house_list_get = list()
            for i in range(0 ,num):
                idx = id + i + 1
                house_id = self._redis.rget(str(idx))
                if house_id.strip() == "n":
                    pass
                else:
                    house_list_get.append((idx, house_id))
            yield house_list_get

    def success_page_del(self, house_list):
        '''将更小粒度执行完成的一组房源编号从Redis中删除'''
        for house_id in house_list:
            self.page_reexec_del(house_id[0])

    def page_reexec_del(self, key):
        '''将重新运行过的房源编号从Redis中删除'''
        self._redis.rdel(key)

    @property
    def close(self):
        '''一些需要最终处理的事务'''
        self._db.close
 def __init__(self):
     self.rds = RedisController(section_name="redis_pe")
Exemple #7
0
 def __init__(self):
     self._db = LJDBController()
     self._redis = RedisController()
Exemple #8
0
def truncate_redis():
    # 清空Redis - PageExtractor
    rds = RedisController(section_name="redis_pe")
    rds._redis_conn.flushdb()