Beispiel #1
0
    def __init__(self,target_name, base_url, log_toggle=True, page_param='', cate_param='', cate_list=[], cate_list_path='', page_list=[], cate_delay=0, page_delay=0):
        self.target_name = target_name
        self.base_url = base_url
        self.log_toggle = log_toggle
        self.cate_param = cate_param
        self.page_param = page_param
        self.cate_list = cate_list
        self.page_list = page_list
        if cate_list_path:
            self.cate_list = crawlib.get_list_from_fp(cate_list_path)

        # self.cate_delay = cate_delay
        # self.page_delay = page_delay

        #logging
        if self.log_toggle:
            logging.basicConfig(filename=self.target_name+".log",level=logging.INFO, format='%(asctime)s %(message)s')
            logging.info("------\tMain Start\t------")
Beispiel #2
0
    def crawling_cate_list(self, cate_list=[], cate_list_path='', page_list=[], cate_delay=0, page_delay=0):
        if cate_list_path:
            self.cate_list = crawlib.get_list_from_fp(cate_list_path)

        if not (self.cate_list or cate_list):
            print("not exist cate_list, input cate_list")

        # update if exist argment
        if cate_list:
            self.cate_list = cate_list

        # update if exist argment
        if page_list:
            self.page_list = page_list

        for cate in self.cate_list:
            # logging
            self.crawling_page_list(cate, self.page_list, page_delay=page_delay)
            time.sleep(cate_delay)