コード例 #1
0
def unzip(csv_in, fields, csv_out_spec, csv_out_unspec, zip_field='id'):
    input_csv = iter(csv_in)

    header_row = input_csv.next()
    header = Header(header_row)

    if zip_field in header:
        raise DuplicateFieldError(zip_field)

    spec_extractors = [header.extractor(field) for field in fields]
    unspec_extractors = [
        header.extractor(field) for field in header_row if field not in fields
    ]

    def extract_to(output, extractors, row_id, row):
        output.writerow([str(row_id)] +
                        [extract_field(row) for extract_field in extractors])

    def unzip_row(row_id, row):
        extract_to(csv_out_spec, spec_extractors, row_id, row)
        extract_to(csv_out_unspec, unspec_extractors, row_id, row)

    unzip_row(zip_field, header_row)
    for zip_id, row in enumerate(input_csv):
        unzip_row(zip_id, row)
コード例 #2
0
ファイル: unzip.py プロジェクト: ceumicrodata/csvtools
def unzip(csv_in, fields, csv_out_spec, csv_out_unspec, zip_field='id'):
    input_csv = iter(csv_in)

    header_row = input_csv.next()
    header = Header(header_row)

    if zip_field in header:
        raise DuplicateFieldError(zip_field)

    spec_extractors = [header.extractor(field) for field in fields]
    unspec_extractors = [
        header.extractor(field)
        for field in header_row
        if field not in fields]

    def extract_to(output, extractors, row_id, row):
        output.writerow(
            [str(row_id)]
            + [extract_field(row) for extract_field in extractors])

    def unzip_row(row_id, row):
        extract_to(csv_out_spec, spec_extractors, row_id, row)
        extract_to(csv_out_unspec, unspec_extractors, row_id, row)

    unzip_row(zip_field, header_row)
    for zip_id, row in enumerate(input_csv):
        unzip_row(zip_id, row)
コード例 #3
0
    def extract(self, reader, writer):
        ireader = iter(reader)
        input_header = Header(ireader.next())

        extract_entity = tuple_extractor(
            input_header.extractors(self.fields_map.input_fields))

        def entity_ref(row):
            return self.mapper.map(extract_entity(row))

        transform = list_extractor(
            input_header.extractors(input_header) + [entity_ref])
        output_header = (list(input_header) +
                         list(self.ref_field_map.input_fields))

        writer.writerow(output_header)
        writer.writerows(transform(row) for row in ireader)
コード例 #4
0
ファイル: extract_map.py プロジェクト: e3krisztian/csvtools
    def extract(self, reader, writer):
        ireader = iter(reader)
        input_header = Header(ireader.next())

        extract_entity = tuple_extractor(
            input_header.extractors(self.fields_map.input_fields))

        def entity_ref(row):
            return self.mapper.map(extract_entity(row))

        transform = list_extractor(
            input_header.extractors(input_header) + [entity_ref])
        output_header = (
            list(input_header) + list(self.ref_field_map.input_fields))

        writer.writerow(output_header)
        writer.writerows(transform(row) for row in ireader)
コード例 #5
0
    def __init__(self, ref_field, fields, reader, appender):
        self.appender = appender
        self.max_ref = 0
        self.values_to_ref = dict()
        fields = list(fields)
        reader = iter(reader)
        header = Header(reader.next())

        self._check_parameters(ref_field, fields, header)

        param_header = Header([ref_field] + fields)
        self.to_entity_file_order = list_extractor(
            param_header.extractors(header))

        def permutated_reader():
            transform = list_extractor([header.extractor(ref_field)] +
                                       header.extractors(fields))
            for row in reader:
                yield transform(row)

        self._read_existing_mappings(permutated_reader())
コード例 #6
0
ファイル: extract_map.py プロジェクト: e3krisztian/csvtools
    def __init__(self, ref_field, fields, reader, appender):
        self.appender = appender
        self.max_ref = 0
        self.values_to_ref = dict()
        fields = list(fields)
        reader = iter(reader)
        header = Header(reader.next())

        self._check_parameters(ref_field, fields, header)

        param_header = Header([ref_field] + fields)
        self.to_entity_file_order = list_extractor(
            param_header.extractors(header))

        def permutated_reader():
            transform = list_extractor(
                [header.extractor(ref_field)] + header.extractors(fields))
            for row in reader:
                yield transform(row)

        self._read_existing_mappings(permutated_reader())