def __init__(self, data_adapter_config_path, source_name, schema_file): ''' Constructord ''' self.logger = Logger("cleaner", source_name) self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger) self.data_clean_adapter = DataCleanAdapter(data_adapter_config_path, source_name, self.logger) self.source_name = source_name self.get_schema(schema_file)
def __init__(self, data_adapter_config_path, source_name, encode="utf-8", parse_try_limit=3): self.logger = Logger("spider", source_name) self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path, source_name, self.logger) self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger) self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path, self.logger) self.source_name = source_name self.encode = encode self.parse_try_limit = parse_try_limit self.exploring_times = 0