def read(self, reader): rows = iter(reader) header = rows.next() if self.ref_field_name not in header: raise MissingFieldError(self.ref_field_name) for field_name in self.transformer.output_field_names: if field_name not in header: raise MissingFieldError(self.ref_field_name) # first field is ID field_maps = FieldMaps() for input_field_name in self.field_names: field_maps.add(input_field_name, input_field_name) map_transformer = SimpleTransformer(field_maps) map_transformer.bind(header) count = 0 values = dict() for row in rows: transformed_row = map_transformer.transform(row) ref = int(transformed_row[0]) value = transformed_row[1:] values[value] = ref count += 1 if count != len(values): raise DuplicateValuesError() if count != len(set(values.values())): raise DuplicateRefsError() self.values = values self.next_ref = max(values.values()) + 1
def bind(self, header): self.output_field_names = tuple( field_name for field_name in header if field_name not in self.fields_to_remove) field_maps = FieldMaps() for field_name in self.output_field_names: field_maps.add(field_name, field_name) self.transformer = SimpleTransformer(field_maps) self.transformer.bind(header)
def bind(self, header): # TODO: DRY: copied from RemoveFields # except for adding the ref field # (extract common stuff into ProxyTransformer?) input_fields_to_keep = tuple( field_name for field_name in header if field_name not in self.fields_to_remove) field_maps = FieldMaps() for field_name in input_fields_to_keep: field_maps.add(field_name, field_name) field_maps.add(input_field_name=None, output_field_name=self.ref_field_name, extractor_field=RefField(self.map)) self.transformer = SimpleTransformer(field_maps) self.transformer.bind(header)
def select(input_file, output_file, transform_spec): reader = csv.reader(input_file) writer = csv.writer(output_file) field_maps = FieldMaps() field_maps.parse_from(transform_spec) SimpleTransformer(field_maps).process(reader, writer)
def __init__(self, map_field_maps, ref_field_name): self.transformer = SimpleTransformer(map_field_maps) self.ref_field_name = ref_field_name self.values = dict() self.next_ref = 0