예제 #1
0
파일: JMBrand.py 프로젝트: xzhoutxd/jm
    def __init__(self, m_type):
        # DB
        #self.mysqlAccess   = MysqlAccess()     # mysql access

        # channel queue
        self.chan_queue = JMQ('channel','main')

        # act queue
        self.act_queue = JMQ('act','main')

        self.work = JMWorker()

        # 默认类别
        self.channel_list = [
                (1,'美妆','http://beauty.jumei.com/?from=all_null_index_top_nav_cosmetics&lo=3481&mat=30573')
                ]

        # 页面
        self.site_page  = None

        # 抓取开始时间
        self.begin_time = Common.now()

        # 分布式主机标志
        self.m_type = m_type
예제 #2
0
파일: JMGlobal.py 프로젝트: xzhoutxd/jm
    def __init__(self, m_type):
        # DB
        # self.mysqlAccess   = MysqlAccess()     # mysql access

        # channel queue
        self.chan_queue = JMQ("channel", "global")

        # item queue
        self.item_queue = JMQ("globalitem", "main")

        self.work = JMWorker()

        # 默认类别
        self.channel_list = [(2, "聚美极速免税店", "http://www.jumeiglobal.com")]

        # 页面
        self.site_page = None

        # 抓取开始时间
        self.begin_time = Common.now()

        # 分布式主机标志
        self.m_type = m_type
예제 #3
0
파일: JMBrand.py 프로젝트: xzhoutxd/jm
class JMBrand():
    '''A class of JM channel'''
    def __init__(self, m_type):
        # DB
        #self.mysqlAccess   = MysqlAccess()     # mysql access

        # channel queue
        self.chan_queue = JMQ('channel','main')

        # act queue
        self.act_queue = JMQ('act','main')

        self.work = JMWorker()

        # 默认类别
        self.channel_list = [
                (1,'美妆','http://beauty.jumei.com/?from=all_null_index_top_nav_cosmetics&lo=3481&mat=30573')
                ]

        # 页面
        self.site_page  = None

        # 抓取开始时间
        self.begin_time = Common.now()

        # 分布式主机标志
        self.m_type = m_type

    def antPage(self):
        try:
            # 主机器需要配置redis队列
            if self.m_type == 'm':
                #channel_list = self.mysqlAccess.selectJMChannel()
                #if not channel_list or len(channel_list) == 0:
                channel_list = self.channel_list
                if channel_list and len(channel_list) > 0:
                    channel_val_list = []
                    for c in channel_list:
                        channel_val_list.append(c+(self.begin_time,))
                    # 清空channel redis队列
                    self.chan_queue.clearQ()
                    # 保存channel redis队列
                    self.chan_queue.putlistQ(channel_val_list)

                    # 清空act redis队列
                    self.act_queue.clearQ()
                    Common.log('# channel queue end')
                else:
                    Common.log('# not find channel...')

            # channel acts
            obj = 'channel'
            crawl_type = 'main'
            # 获取还没有开团的活动id
            #val = (Common.time_s(Common.now()),)
            #acts = self.mysqlAccess.selectJMActNotStart(val)
            #act_id_list = []
            #if acts:
            #    for act in acts:
            #        act_id_list.append(str(act[1]))
            #_val = (self.begin_time, brandact_id_list)
            _val = None
            self.work.process(obj,crawl_type,_val)

            # 活动数据
            act_val_list = []
            act_val = self.work.items
            if act_val and len(act_val.keys()) > 0:
                if act_val.has_key('sale'):
                    Common.log('# act on sale nums: %d'%len(act_val['sale']))
                    act_val_list.extend(act_val['sale'])
                if act_val.has_key('coming'):
                    Common.log('# act will coming nums: %d'%len(act_val['coming']))
                    act_val_list.extend(act_val['coming'])

            # 保存到redis队列
            self.act_queue.putlistQ(act_val_list)
            Common.log('# act queue end')

            #if self.m_type == 'm':
            #    val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1))
            #    # 删除Redis中上个小时结束的活动
            #    _acts = self.mysqlAccess.selectJMActEndLastOneHour(val)
            #    Common.log('# end acts num: %d' % len(_acts))
            #    self.work.delAct(_acts)
            #    # 删除Redis中上个小时结束的商品
            #    _items = self.mysqlAccess.selectJMItemEndLastOneHour(val)
            #    Common.log('# end items num: %d' % len(_items))
            #    self.work.delItem(_items)
        except Exception as e:
            Common.log('# JMbrand antpage error: %s'%e)
            Common.traceback_log()
예제 #4
0
파일: JMGlobal.py 프로젝트: xzhoutxd/jm
class JMGlobal:
    """A class of JM global"""

    def __init__(self, m_type):
        # DB
        # self.mysqlAccess   = MysqlAccess()     # mysql access

        # channel queue
        self.chan_queue = JMQ("channel", "global")

        # item queue
        self.item_queue = JMQ("globalitem", "main")

        self.work = JMWorker()

        # 默认类别
        self.channel_list = [(2, "聚美极速免税店", "http://www.jumeiglobal.com")]

        # 页面
        self.site_page = None

        # 抓取开始时间
        self.begin_time = Common.now()

        # 分布式主机标志
        self.m_type = m_type

    def antPage(self):
        try:
            # 主机器需要配置redis队列
            if self.m_type == "m":
                # channel_list = self.mysqlAccess.selectJMChannel()
                # if not channel_list or len(channel_list) == 0:
                channel_list = self.channel_list
                if channel_list and len(channel_list) > 0:
                    channel_val_list = []
                    for c in channel_list:
                        channel_val_list.append(c + (self.begin_time,))
                    # 清空channel redis队列
                    self.chan_queue.clearQ()
                    # 保存channel redis队列
                    self.chan_queue.putlistQ(channel_val_list)

                    # 清空item redis队列
                    self.item_queue.clearQ()
                    Common.log("# channel queue end")
                else:
                    Common.log("# not find channel...")

            # global items
            obj = "channel"
            crawl_type = "global"
            _val = None
            self.work.process(obj, crawl_type, _val)

            # 商品数据
            item_val_list = []
            item_val = self.work.items
            if item_val and len(item_val.keys()) > 0:
                if item_val.has_key("sale"):
                    Common.log("# item on sale nums: %d" % len(item_val["sale"]))
                    item_val_list.extend(item_val["sale"])
                if item_val.has_key("coming"):
                    Common.log("# item will coming nums: %s" % len(item_val["coming"]))
                    item_val_list.extend(item_val["coming"])

            Common.log("# item val nums: %s" % len(item_val_list))
            # 保存到redis队列
            self.item_queue.putlistQ(item_val_list)
            Common.log("# item queue end")

            # if self.m_type == 'm':
            #    val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1))
            #    # 删除Redis中上个小时结束的商品
            #    _items = self.mysqlAccess.selectJMItemEndLastOneHour(val)
            #    Common.log('# end items num: %d' % len(_items))
            #    self.work.delItem(_items)
        except Exception as e:
            Common.log("# JMGlobal antpage error: %s" % e)
            Common.traceback_log()