コード例 #1
0
class TCQ():
    '''A class of tc redis queue'''
    def __init__(self, _obj, _q_type=None):
        self._obj = _obj
        self._q_type = _q_type  # crawler type
        self.tc_type = Config.TC_TYPE  # queue type
        # DB
        self.redisQueue = RedisQueue()  # redis queue

        # message
        self.message = Message()

        # queue key
        if self._q_type:
            self._key = '%s_%s_%s' % (self.tc_type, self._obj, self._q_type)
        else:
            self._key = '%s_%s' % (self.tc_type, self._obj)

    # clear queue
    def clearQ(self):
        self.redisQueue.clear_q(self._key)

    # 写入redis queue
    def putQ(self, _msg):
        self.redisQueue.put_q(self._key, _msg)

    # 转换msg
    def putlistQ(self, item_list):
        for _item in item_list:
            _val = (0, self._obj, self._q_type) + _item
            msg = self.message.QueueMsg(self._obj, _val)
            if msg:
                self.putQ(msg)
コード例 #2
0
ファイル: JHSQ.py プロジェクト: xzhoutxd/jhs_v1
class JHSQ():
    '''A class of jhs redis queue'''
    def __init__(self, _obj, _q_type=None):
        self._obj       = _obj
        self._q_type    = _q_type           # queue type
        self.jhs_type   = Config.JHS_TYPE   # queue type
        # DB
        self.redisQueue  = RedisQueue()      # redis queue

        # message
        self.message     = Message()

        # queue key
        if self._q_type:
            self._key    = '%s_%s_%s' % (self.jhs_type, self._obj, self._q_type)
        else:
            self._key    = '%s_%s' % (self.jhs_type, self._obj)

    # clear queue
    def clearQ(self):
        self.redisQueue.clear_q(self._key)

    # 写入redis queue
    def putQ(self, _msg):
        self.redisQueue.put_q(self._key, _msg)

    # 转换msg
    def putlistQ(self, item_list):
        for _item in item_list:
            _val = (0,self._obj,self.jhs_type) + _item
            msg = self.message.jhsQueueMsg(self._obj, _val)
            if msg:
                self.putQ(msg)
コード例 #3
0
ファイル: JMItemM.py プロジェクト: xzhoutxd/jm
class JMItemRedisM(MyThread):
    '''A class of jm Item redis queue'''
    def __init__(self, key, q_type, thread_num=10, a_val=None):
        # parent construct
        MyThread.__init__(self, thread_num)
        # thread lock
        self.mutex          = threading.Lock()

        self.jm_type        = Config.JM_TYPE # jm type
        #self.item_type      = q_type        # item queue type

        # db
        self.mysqlAccess    = MysqlAccess() # mysql access
        self.redisQueue     = RedisQueue()  # redis queue
        self.mongofsAccess  = MongofsAccess() # mongodb fs access

        # jm queue type
        self.jm_queue_type  = q_type # main hour...
        self._key           = key   # redis queue key

        # appendix val
        self.a_val          = a_val

        # return items
        self.items          = []

        # dial client
        self.dial_client    = DialClient()

        # local ip
        self._ip            = Common.local_ip()

        # router tag
        self._tag           = 'ikuai'
        #self._tag          = 'tpent'

        # give up item, retry too many times
        self.giveup_items   = []

    # To dial router
    def dialRouter(self, _type, _obj):
        try:
            _module = '%s_%s' %(_type, _obj)
            self.dial_client.send((_module, self._ip, self._tag))
        except Exception as e:
            Common.log('# To dial router exception: %s' % e)

     # clear item queue
    def clearItemQ(self):
        self.redisQueue.clear_q(self._key)

    # 写入redis queue
    def putItemQ(self, _msg):
        _data = (0, _msg)
        self.redisQueue.put_q(self._key, _data)

    # 转换msg
    def putItemlistQ(self, item_list):
        for _item in item_list:
            #msg = self.q_message.itemMsg(_item)
            msg = _item
            self.putItemQ(msg)

    def push_back(self, L, v):
        if self.mutex.acquire(1):
            L.append(v)
            self.mutex.release()

    def crawlRetry(self, _key, msg):
        if not msg: return
        msg['retry'] += 1
        _retry = msg['retry']
        _obj = msg["obj"]
        max_time = Config.crawl_retry
        if _obj == 'globalitem':
            max_time = Config.item_crawl_retry
        if _retry < max_time:
            self.redisQueue.put_q(_key, msg)
        else:
            Common.log('# retry too many time, no get msg:')
            Common.log(msg)

    # insert Global item hour
    def insertGlobalItemHour(self, iteminfosql_list, f=False):
        if f or len(iteminfosql_list) >= Config.item_max_arg:
            if len(iteminfosql_list) > 0:
                self.mysqlAccess.insertJMGlobalitemHour(iteminfosql_list)
            return True
        return False

    # item sql list
    def crawl(self):
        _iteminfosql_list = []
        i, M = 0, 2
        n = 0
        while True:
            try:
                _data = self.redisQueue.get_q(self._key)

                # 队列为空
                if not _data:
                    # 队列为空,退出
                    # info
                    self.insertGlobalItemHour(_iteminfosql_list, True)
                    _iteminfosql_list = []

                    i += 1
                    if i > M:
                        Common.log('# all get itemQ item num: %d' % n)
                        Common.log('# not get itemQ of key: %s' % self._key)
                        break
                    time.sleep(10)
                    continue
                n += 1
                item = None
                obj = 'globalitem'
                if self.jm_queue_type == 'main':
                    # 商品实例
                    item = Item()
                    #_val = _data[1]
                    _val = _data["val"]
                    if self.a_val: _val = _val + self.a_val

                    item.antPageGlobal(_val)
                    # 汇聚
                    self.push_back(self.items, item.outGlobalSql())

                    iteminfoSql = item.outGlobalSql()
                    _iteminfosql_list.append(iteminfoSql)
                    if self.insertGlobalItemHour(_iteminfosql_list): _iteminfosql_list = []
                else:
                    continue

                # 存网页
                #if item and obj != '':
                #    _pages = item.outItemPage(obj, self.jm_queue_type)
                #    self.mongofsAccess.insertJMPages(_pages)

                # 延时
                time.sleep(1)

            except Common.NoItemException as e:
                Common.log('# Not item exception: %s' % e)

            except Common.NoPageException as e:
                Common.log('# Not page exception: %s' % e)

            except Common.InvalidPageException as e:
                self.crawlRetry(self._key, _data)
                Common.log('# Invalid page exception: %s' % e)

            except Exception as e:
                Common.log('# Unknown exception crawl item: %s' % e)
                Common.traceback_log()

                self.crawlRetry(self._key, _data)
                if str(e).find('Read timed out') == -1:
                    # 重新拨号
                    try:
                        self.dialRouter(4, 'item')
                    except Exception as e:
                        Common.log('# DailClient Exception err: %s' % e)
                        time.sleep(10)
                    time.sleep(random.uniform(10,30))