def unzip(csv_in, fields, csv_out_spec, csv_out_unspec, zip_field='id'): input_csv = iter(csv_in) header_row = input_csv.next() header = Header(header_row) if zip_field in header: raise DuplicateFieldError(zip_field) extract_spec = list_extractor(header.extractors(fields)) extract_unspec = list_extractor( header.extractor(field) for field in header_row if field not in fields) def extract_to(output, extract, row_id, row): output.writerow([str(row_id)] + extract(row)) def unzip_row(row_id, row): extract_to(csv_out_spec, extract_spec, row_id, row) extract_to(csv_out_unspec, extract_unspec, row_id, row) unzip_row(zip_field, header_row) for zip_id, row in enumerate(input_csv): unzip_row(zip_id, row)
def extract(self, reader, writer): ireader = iter(reader) input_header = Header(ireader.next()) extract_entity = tuple_extractor( input_header.extractors(self.fields_map.input_fields)) def entity_ref(row): return self.mapper.map(extract_entity(row)) transform = list_extractor( input_header.extractors(input_header) + [entity_ref]) output_header = (list(input_header) + list(self.ref_field_map.input_fields)) writer.writerow(output_header) writer.writerows(transform(row) for row in ireader)
def extract(self, reader, writer): ireader = iter(reader) input_header = Header(ireader.next()) extract_entity = tuple_extractor( input_header.extractors(self.fields_map.input_fields)) def entity_ref(row): return self.mapper.map(extract_entity(row)) transform = list_extractor( input_header.extractors(input_header) + [entity_ref]) output_header = ( list(input_header) + list(self.ref_field_map.input_fields)) writer.writerow(output_header) writer.writerows(transform(row) for row in ireader)
def __init__(self, ref_field, fields, reader, appender): self.appender = appender self.max_ref = 0 self.values_to_ref = dict() fields = list(fields) reader = iter(reader) header = Header(reader.next()) self._check_parameters(ref_field, fields, header) param_header = Header([ref_field] + fields) self.to_entity_file_order = list_extractor( param_header.extractors(header)) def permutated_reader(): transform = list_extractor([header.extractor(ref_field)] + header.extractors(fields)) for row in reader: yield transform(row) self._read_existing_mappings(permutated_reader())
def __init__(self, ref_field, fields, reader, appender): self.appender = appender self.max_ref = 0 self.values_to_ref = dict() fields = list(fields) reader = iter(reader) header = Header(reader.next()) self._check_parameters(ref_field, fields, header) param_header = Header([ref_field] + fields) self.to_entity_file_order = list_extractor( param_header.extractors(header)) def permutated_reader(): transform = list_extractor( [header.extractor(ref_field)] + header.extractors(fields)) for row in reader: yield transform(row) self._read_existing_mappings(permutated_reader())