def get_item_sink(output_file):
    fh = get_file_handle(output_file, 'w')

    if output_file.endswith('.csv'):
        set_max_field_size_limit()

        writer = None

        def sink(item):
            nonlocal writer
            if writer is None:
                fields = list(six.iterkeys(item))
                writer = csv.DictWriter(fh,
                                        fieldnames=fields,
                                        extrasaction='ignore')
                writer.writeheader()
            writer.writerow(item)
    else:

        def sink(item):
            fh.write(json.dumps(item) + '\n')

    try:
        yield sink
    finally:
        fh.close()
Exemple #2
0
    def open(self):
        for item_type, filename in self.filename_mapping.items():
            file = get_file_handle(filename, binary=True)
            fields = self.field_mapping.get(item_type)
            self.file_mapping[item_type] = file
            if str(filename).endswith('.json'):
                item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
            else:
                item_exporter = CsvItemExporter(file, fields_to_export=fields)
            self.exporter_mapping[item_type] = item_exporter

            self.counter_mapping[item_type] = AtomicCounter()
Exemple #3
0
def get_item_iterable(input_file):
    fh = get_file_handle(input_file, 'r')

    if input_file.endswith('.csv'):
        set_max_field_size_limit()
        reader = csv.DictReader(fh)
    else:
        reader = (json.loads(line) for line in fh)

    try:
        yield reader
    finally:
        fh.close()