def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1
Ejemplo n.º 2
0
    def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1
Ejemplo n.º 3
0
    def bind(self, header):
        self.output_field_names = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in self.output_field_names:
            field_maps.add(field_name, field_name)
        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
Ejemplo n.º 4
0
class ExtractMap(Transformer):

    def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False):
        field_maps = FieldMaps()
        field_maps.parse_from(map_fields_spec)
        self.fields_to_remove = (
            set() if keep_fields else field_maps.input_field_names)

        # TODO: this is ugly, beautify
        ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec)

        self.map = Map(field_maps, ref_field_map.output_field_name)
        self.ref_field_name = ref_field_map.input_field_name
        self.transformer = None

    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(
            input_field_name=None,
            output_field_name=self.ref_field_name,
            extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)

    def read_map(self, reader):
        self.map.read(reader)

    def write_map(self, writer):
        self.map.write(writer)

    @property
    def map_changed(self):
        return self.map.changed

    @property
    def output_field_names(self):
        return self.transformer.output_field_names

    @property
    def transform(self):
        return self.transformer.transform
class ExtractMap(Transformer):
    def __init__(self, map_fields_spec, ref_field_spec, keep_fields=False):
        field_maps = FieldMaps()
        field_maps.parse_from(map_fields_spec)
        self.fields_to_remove = (set() if keep_fields else
                                 field_maps.input_field_names)

        # TODO: this is ugly, beautify
        ref_field_map = FieldMaps().parse_field_map_string(ref_field_spec)

        self.map = Map(field_maps, ref_field_map.output_field_name)
        self.ref_field_name = ref_field_map.input_field_name
        self.transformer = None

    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(input_field_name=None,
                       output_field_name=self.ref_field_name,
                       extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)

    def read_map(self, reader):
        self.map.read(reader)

    def write_map(self, writer):
        self.map.write(writer)

    @property
    def map_changed(self):
        return self.map.changed

    @property
    def output_field_names(self):
        return self.transformer.output_field_names

    @property
    def transform(self):
        return self.transformer.transform
    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(input_field_name=None,
                       output_field_name=self.ref_field_name,
                       extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
Ejemplo n.º 7
0
class RemoveFields(Transformer):

    def __init__(self, fields_to_remove):
        self.fields_to_remove = fields_to_remove
        self.transformer = None

    def bind(self, header):
        self.output_field_names = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in self.output_field_names:
            field_maps.add(field_name, field_name)
        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)

    @property
    def transform(self):
        return self.transformer.transform
Ejemplo n.º 8
0
    def bind(self, header):
        # TODO: DRY: copied from RemoveFields
        # except for adding the ref field
        # (extract common stuff into ProxyTransformer?)
        input_fields_to_keep = tuple(
            field_name
            for field_name in header
            if field_name not in self.fields_to_remove)

        field_maps = FieldMaps()
        for field_name in input_fields_to_keep:
            field_maps.add(field_name, field_name)
        field_maps.add(
            input_field_name=None,
            output_field_name=self.ref_field_name,
            extractor_field=RefField(self.map))

        self.transformer = SimpleTransformer(field_maps)
        self.transformer.bind(header)
Ejemplo n.º 9
0
 def __init__(self, map_field_maps, ref_field_name):
     self.transformer = SimpleTransformer(map_field_maps)
     self.ref_field_name = ref_field_name
     self.values = dict()
     self.next_ref = 0
Ejemplo n.º 10
0
class Map(object):

    changed = False

    def __init__(self, map_field_maps, ref_field_name):
        self.transformer = SimpleTransformer(map_field_maps)
        self.ref_field_name = ref_field_name
        self.values = dict()
        self.next_ref = 0

    def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1

    def write(self, writer):
        writer.writerow(self.field_names)

        for (value, ref) in self.values.iteritems():
            writer.writerow(tuple([ref]) + tuple(value))

    def translate(self, input_row):
        key = self.transformer.transform(input_row)
        ref = self.values.setdefault(key, self.next_ref)
        if ref == self.next_ref:
            self.next_ref += 1
            self.changed = True
        return ref

    def bind(self, header):
        self.transformer.bind(header)

    @property
    def field_names(self):
        return (
            tuple([self.ref_field_name])
            + self.transformer.output_field_names)
def select(input_file, output_file, transform_spec):
    reader = csv.reader(input_file)
    writer = csv.writer(output_file)
    field_maps = FieldMaps()
    field_maps.parse_from(transform_spec)
    SimpleTransformer(field_maps).process(reader, writer)
 def __init__(self, map_field_maps, ref_field_name):
     self.transformer = SimpleTransformer(map_field_maps)
     self.ref_field_name = ref_field_name
     self.values = dict()
     self.next_ref = 0
class Map(object):

    changed = False

    def __init__(self, map_field_maps, ref_field_name):
        self.transformer = SimpleTransformer(map_field_maps)
        self.ref_field_name = ref_field_name
        self.values = dict()
        self.next_ref = 0

    def read(self, reader):
        rows = iter(reader)

        header = rows.next()
        if self.ref_field_name not in header:
            raise MissingFieldError(self.ref_field_name)
        for field_name in self.transformer.output_field_names:
            if field_name not in header:
                raise MissingFieldError(self.ref_field_name)

        # first field is ID
        field_maps = FieldMaps()
        for input_field_name in self.field_names:
            field_maps.add(input_field_name, input_field_name)
        map_transformer = SimpleTransformer(field_maps)
        map_transformer.bind(header)

        count = 0
        values = dict()
        for row in rows:
            transformed_row = map_transformer.transform(row)
            ref = int(transformed_row[0])
            value = transformed_row[1:]
            values[value] = ref

            count += 1

        if count != len(values):
            raise DuplicateValuesError()
        if count != len(set(values.values())):
            raise DuplicateRefsError()

        self.values = values
        self.next_ref = max(values.values()) + 1

    def write(self, writer):
        writer.writerow(self.field_names)

        for (value, ref) in self.values.iteritems():
            writer.writerow(tuple([ref]) + tuple(value))

    def translate(self, input_row):
        key = self.transformer.transform(input_row)
        ref = self.values.setdefault(key, self.next_ref)
        if ref == self.next_ref:
            self.next_ref += 1
            self.changed = True
        return ref

    def bind(self, header):
        self.transformer.bind(header)

    @property
    def field_names(self):
        return (tuple([self.ref_field_name]) +
                self.transformer.output_field_names)