def read(self, reader): rows = iter(reader) header = rows.next() if self.ref_field_name not in header: raise MissingFieldError(self.ref_field_name) for field_name in self.transformer.output_field_names: if field_name not in header: raise MissingFieldError(self.ref_field_name) # first field is ID field_maps = FieldMaps() for input_field_name in self.field_names: field_maps.add(input_field_name, input_field_name) map_transformer = SimpleTransformer(field_maps) map_transformer.bind(header) count = 0 values = dict() for row in rows: transformed_row = map_transformer.transform(row) ref = int(transformed_row[0]) value = transformed_row[1:] values[value] = ref count += 1 if count != len(values): raise DuplicateValuesError() if count != len(set(values.values())): raise DuplicateRefsError() self.values = values self.next_ref = max(values.values()) + 1
def bind(self, header): self.output_field_names = tuple( field_name for field_name in header if field_name not in self.fields_to_remove) field_maps = FieldMaps() for field_name in self.output_field_names: field_maps.add(field_name, field_name) self.transformer = SimpleTransformer(field_maps) self.transformer.bind(header)
def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False): field_maps = FieldMaps() field_maps.parse_from(map_fields_spec) self.fields_to_remove = ( set() if keep_fields else field_maps.input_field_names) # TODO: this is ugly, beautify ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec) self.map = Map(field_maps, ref_field_map.output_field_name) self.ref_field_name = ref_field_map.input_field_name self.transformer = None
def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False): field_maps = FieldMaps() field_maps.parse_from(map_fields_spec) self.fields_to_remove = (set() if keep_fields else field_maps.input_field_names) # TODO: this is ugly, beautify ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec) self.map = Map(field_maps, ref_field_map.output_field_name) self.ref_field_name = ref_field_map.input_field_name self.transformer = None
def bind(self, header): # TODO: DRY: copied from RemoveFields # except for adding the ref field # (extract common stuff into ProxyTransformer?) input_fields_to_keep = tuple( field_name for field_name in header if field_name not in self.fields_to_remove) field_maps = FieldMaps() for field_name in input_fields_to_keep: field_maps.add(field_name, field_name) field_maps.add( input_field_name=None, output_field_name=self.ref_field_name, extractor_field=RefField(self.map)) self.transformer = SimpleTransformer(field_maps) self.transformer.bind(header)
def bind(self, header): # TODO: DRY: copied from RemoveFields # except for adding the ref field # (extract common stuff into ProxyTransformer?) input_fields_to_keep = tuple( field_name for field_name in header if field_name not in self.fields_to_remove) field_maps = FieldMaps() for field_name in input_fields_to_keep: field_maps.add(field_name, field_name) field_maps.add(input_field_name=None, output_field_name=self.ref_field_name, extractor_field=RefField(self.map)) self.transformer = SimpleTransformer(field_maps) self.transformer.bind(header)
def select(input_file, output_file, transform_spec): reader = csv.reader(input_file) writer = csv.writer(output_file) field_maps = FieldMaps() field_maps.parse_from(transform_spec) SimpleTransformer(field_maps).process(reader, writer)
def simple_transformer(field_maps_string): field_maps = FieldMaps() field_maps.parse_from(field_maps_string) return m.SimpleTransformer(field_maps)
def make_map(field_maps_spec, ref_field_name): field_maps = FieldMaps() field_maps.parse_from(field_maps_spec) return m.Map(field_maps, ref_field_name)