def _start(self): self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.transactions_output_file = get_file_handle( self.transactions_output, binary=True) self.blocks_exporter = CsvItemExporter(self.blocks_output_file) self.transactions_exporter = CsvItemExporter( self.transactions_output_file)
def _start(self): super()._start() self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.blocks_exporter = CsvItemExporter( self.blocks_output_file, fields_to_export=self.block_fields_to_export) self.transactions_output_file = get_file_handle(self.transactions_output, binary=True) self.transactions_exporter = CsvItemExporter( self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)
def get_item_sink(output_file): fh = get_file_handle(output_file, 'w') if output_file.endswith('.csv'): set_max_field_size_limit() writer = None def sink(item): nonlocal writer if writer is None: fields = list(six.iterkeys(item)) writer = csv.DictWriter(fh, fieldnames=fields, extrasaction='ignore') writer.writeheader() writer.writerow(item) else: def sink(item): fh.write(json.dumps(item) + '\n') try: yield sink finally: fh.close()
def open(self): for item_type, filename in self.filename_mapping.items(): self.file_mapping[item_type] = get_file_handle(filename, binary=True) self.exporter_mapping[item_type] = CsvItemExporter( self.file_mapping[item_type], fields_to_export=self.field_mapping[item_type])
def _start(self): # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers)) self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.blocks_exporter = CsvItemExporter( self.blocks_output_file, fields_to_export=self.block_fields_to_export) self.transactions_output_file = get_file_handle( self.transactions_output, binary=True) self.transactions_exporter = CsvItemExporter( self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)
def _start(self): super()._start() self.output_file = get_file_handle(self.output, binary=True, create_parent_dirs=True) self.exporter = CsvItemExporter(self.output_file, fields_to_export=self.fields_to_export)
def open(self): for item_type, filename in self.filename_mapping.items(): file = get_file_handle(filename, binary=True) fields = self.field_mapping[item_type] self.file_mapping[item_type] = file if str(filename).endswith('.json'): item_exporter = JsonLinesItemExporter(file, fields_to_export=fields) else: item_exporter = CsvItemExporter(file, fields_to_export=fields) self.exporter_mapping[item_type] = item_exporter self.counter_mapping[item_type] = AtomicCounter()
def get_item_iterable(input_file): fh = get_file_handle(input_file, 'r') if input_file.endswith('.csv'): set_max_field_size_limit() reader = csv.DictReader(fh) else: reader = (json.loads(line) for line in fh) try: yield reader finally: fh.close()
def _start(self): self.output_file = get_file_handle(self.output, binary=True) self.exporter = CsvItemExporter(self.output_file)
def _start(self): super()._start() self.output_file = get_file_handle(self.output, binary=True, create_parent_dirs=True)