Ejemplo n.º 1
0
    def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1
    def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1
Ejemplo n.º 3
0
    def bind(self, header):
        self.output_field_names = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in self.output_field_names:
            field_maps.add(field_name, field_name)
        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
Ejemplo n.º 4
0
    def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False):
        field_maps = FieldMaps()
        field_maps.parse_from(map_fields_spec)
        self.fields_to_remove = (
            set() if keep_fields else field_maps.input_field_names)

        # TODO: this is ugly, beautify
        ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec)

        self.map = Map(field_maps, ref_field_map.output_field_name)
        self.ref_field_name = ref_field_map.input_field_name
        self.transformer = None
    def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False):
        field_maps = FieldMaps()
        field_maps.parse_from(map_fields_spec)
        self.fields_to_remove = (set() if keep_fields else
                                 field_maps.input_field_names)

        # TODO: this is ugly, beautify
        ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec)

        self.map = Map(field_maps, ref_field_map.output_field_name)
        self.ref_field_name = ref_field_map.input_field_name
        self.transformer = None
Ejemplo n.º 6
0
    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(
            input_field_name=None,
            output_field_name=self.ref_field_name,
            extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(input_field_name=None,
                       output_field_name=self.ref_field_name,
                       extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
def select(input_file, output_file, transform_spec):
    reader = csv.reader(input_file)
    writer = csv.writer(output_file)
    field_maps = FieldMaps()
    field_maps.parse_from(transform_spec)
    SimpleTransformer(field_maps).process(reader, writer)
Ejemplo n.º 9
0
def simple_transformer(field_maps_string):
    field_maps = FieldMaps()
    field_maps.parse_from(field_maps_string)
    return m.SimpleTransformer(field_maps)
Ejemplo n.º 10
0
def make_map(field_maps_spec, ref_field_name):
    field_maps = FieldMaps()
    field_maps.parse_from(field_maps_spec)
    return m.Map(field_maps, ref_field_name)