Esempio n. 1
0
    def __init__(self):
        # 抓取设置
        self.crawler            = TCCrawler()
        self.retrycrawler       = RetryCrawler()
        self.crawling_time      = Common.now() # 当前爬取时间
        self.crawling_time_s    = Common.time_s(self.crawling_time)
        self.crawling_begintime = '' # 本次抓取开始时间
        self.crawling_beginDate = '' # 本次爬取日期
        self.crawling_beginHour = '' # 本次爬取小时

        # 频道信息
        self.platform           = '同程-pc' # 品牌团所在平台
        self.channel_id         = '' # 频道id
        self.channel_url        = '' # 频道链接
        self.channel_name       = '' # 频道name
        self.channel_type       = '' # 频道类型

        # 原数据信息
        self.channel_page       = '' # 频道页面html内容
        self.channel_pages      = {} # 频道页面内请求数据列表

        # channel items
        self.channel_items      = []

        # channel list
        self.channel_list       = []
Esempio n. 2
0
    def __init__(self):
        # 抓取设置
        self.crawler = TCCrawler()
        # dial client
        self.dial_client = DialClient()
        # local ip
        self._ip = Common.local_ip()
        # router tag
        self._tag = 'ikuai'

        # wait time
        self.w_time = 1
Esempio n. 3
0
    def __init__(self):
        # tc spot type
        self.worker_type   = Config.TC_Spot
        # DB
        self.tc_type       = Config.TC_TYPE    # queue type
        self.mysqlAccess   = MysqlAccess()     # mysql access
        self.redisQueue    = RedisQueue()      # redis queue
        self.mongofsAccess = MongofsAccess()   # mongodb fs access

        # 抓取设置
        self.crawler       = TCCrawler()

        # message
        self.message       = Message()

        # 抓取时间设定
        self.crawling_time = Common.now() # 当前爬取时间
        self.begin_time    = Common.now()
        self.begin_date    = Common.today_s()
        self.begin_hour    = Common.nowhour_s()
Esempio n. 4
0
    def __init__(self):
        # 商品页面抓取设置
        self.crawler = TCCrawler()
        self.crawling_time = Common.now()  # 当前爬取时间
        self.crawling_begintime = ''  # 本次抓取开始时间
        self.crawling_beginDate = ''  # 本次爬取日期
        self.crawling_beginHour = ''  # 本次爬取小时

        # 单品类型商品所属频道
        self.channel_id = ''
        self.channel_name = ''
        self.channel_url = ''
        self.channel_type = ''
        self.item_position = 0

        # 商品信息
        self.item_id = ''  # 商品Id
        self.item_url = ''  # 商品链接
        self.item_pic_url = ''  # 商品展示图片链接
        self.item_name = ''  # 商品Name
        self.item_desc = ''  # 商品说明
        self.item_book_status = 1  # 商品是否售卖 0:不售,1:在售
        self.item_level = ''  # 级别
        self.item_area = ''  # 地址
        self.item_service = ''  # 服务
        self.item_comment = ''  # 评论数
        self.item_comment_rate = ''  # 好评率
        self.item_comment_good = ''  # 好评数

        # 商品交易
        self.item_oriprice = ''  # 商品原价
        self.item_disprice = ''  # 商品折扣价
        self.item_discount = ''  # 商品打折

        # 门票
        self.item_tickets = []

        # 原数据信息
        self.item_pageData = ''  # 商品所属数据项内容
        self.item_page = ''  # 商品页面html内容
        self.item_pages = {}  # 商品页面内请求数据列表