def __init__(self): init_params = { 'site_name' : SITE_NAME, 'init_url' : INIT_URL, 'skip_url' : SKIP_URL, 'redis_crawling_urls' : REDIS_CRAWLING_URLS, 'redis_crawled_urls' : REDIS_CRAWLED_URLS, 'redis_product_urls' : REDIS_PRODUCT_URLS, 'product_pattern' : PRODUCT_PATTERN, 'process_num' : PROCESS_NUM, 'use_tor' : USE_TOR } Crawl.__init__(self, **init_params) #select collection self.mongo_collection = self.mongo_conn['nguyenkim_product']
def __init__(self): init_params = { 'site_name' : SITE_NAME, 'init_url' : INIT_URL, 'skip_url' : SKIP_URL, 'redis_crawling_urls' : REDIS_CRAWLING_URLS, 'redis_crawled_urls' : REDIS_CRAWLED_URLS, 'redis_product_urls' : REDIS_PRODUCT_URLS, 'product_pattern' : PRODUCT_PATTERN, 'process_num' : PROCESS_NUM, 'use_tor' : USE_TOR } Crawl.__init__(self, **init_params) #select collection self.mongo_collection = self.mongo_conn['tiki_product'] self.page_link_format = re.compile(r'(.*)\?.*(p=\d+).*', re.MULTILINE|re.DOTALL)
def __init__(self): init_params = { 'site_name': SITE_NAME, 'init_url': INIT_URL, 'skip_url': SKIP_URL, 'redis_crawling_urls': REDIS_CRAWLING_URLS, 'redis_crawled_urls': REDIS_CRAWLED_URLS, 'redis_product_urls': REDIS_PRODUCT_URLS, 'product_pattern': PRODUCT_PATTERN, 'process_num': PROCESS_NUM, 'use_tor': USE_TOR } Crawl.__init__(self, **init_params) #select collection self.mongo_collection = self.mongo_conn['lazada_product'] self.page_link_format = re.compile(r"(.*)\?.*(page=\d+).*", re.MULTILINE | re.DOTALL)
def __init__(self): Crawl.__init__(self, INIT_URL, SKIP_URL, USE_TOR) #select collection self.mongo_collection = self.mongo_conn['cdiscount_product']