def __init__(self, url, output_folder, method="get", params={}, headers={}): self.params = params self.method = method self.url = url self.folder = output_folder self.headers = headers assure_folder_exists(output_folder)
def parse(input_folder, output_folder, parser, workers=5): assure_folder_exists(output_folder) pool = Pool(workers) input_output_pairs = get_input_output_pairs(input_folder, output_folder) tasks = list([ParseTask(input_file_name, output_file_name, parser) for input_file_name, output_file_name in input_output_pairs]) info_line = "Parsing %s tasks with %s workers"%(len(tasks), workers) logger.info(info_line) pool.map(parser_worker, tasks)
def parse(input_folder, output_folder, parser, workers=5): assure_folder_exists(output_folder) pool = Pool(workers) input_output_pairs = get_input_output_pairs(input_folder, output_folder) tasks = list([ ParseTask(input_file_name, output_file_name, parser) for input_file_name, output_file_name in input_output_pairs ]) info_line = "Parsing %s tasks with %s workers" % (len(tasks), workers) logger.info(info_line) pool.map(parser_worker, tasks)