예제 #1
0
    def __next__(self):
        row = self.tap.emit()
        if row:
            if not isinstance(row, dict):
                raise ValueError('tap.emit() must returned a dict')
            if self.tap_key not in row:
                raise ValueError('tap key not exist in elements from tap')

            if not self.tap_schema:
                self.builder.add_object(row)
                schema = self.builder.to_schema()
            else:
                schema = self.tap_schema

            if isinstance(self.tap_key, (str, bytes)):
                key_properties = [self.tap_key]
            if not isinstance(key_properties, list):
                raise Exception('tap key must be a string or list of strings')

            r = SchemaMessage(
                stream = self.tap_name,
                schema = schema,
                key_properties = key_properties,
                bookmark_properties = None,
            )
            s = format_message(r)
            r = RecordMessage(
                stream = self.tap_name, record = row, time_extracted = None
            )
            r = format_message(r)
            row = (s.encode(), r.encode())
        return row
예제 #2
0
def transformation(rows, builder, function: Callable, tap_schema = None):

    results = []

    for line in rows:
        try:
            msg = singer.parse_message(line)
        except json.decoder.JSONDecodeError:
            logger.error('Unable to parse:\n{}'.format(line))
            raise

        if isinstance(msg, singer.RecordMessage):
            record = msg.record
            record = function(record)
            if record:
                if isinstance(record, tuple):
                    if len(record) != 2:
                        raise ValueError(
                            'transformation must returned (row, dictionary) or row.'
                        )
                    record, types = record
                else:
                    types = None
                builder.add_object(record)
                schema = builder.to_schema()
                if tap_schema:
                    for k, v in tap_schema['properties'].items():
                        if k in schema['properties']:
                            schema['properties'][k] = v
                if types:
                    for k, v in types.items():
                        if k in schema['properties']:
                            v = type_mapping.get(v, v)
                            if not isinstance(v, str):
                                raise ValueError(
                                    f'value {v} from {k} not supported.'
                                )
                            schema['properties'][k] = {'type': v}

                tap_name = msg.stream
                r = SchemaMessage(
                    stream = tap_name,
                    schema = schema,
                    key_properties = None,
                    bookmark_properties = None,
                )
                s = format_message(r)
                r = RecordMessage(
                    stream = tap_name, record = record, time_extracted = None
                )
                r = format_message(r)
                row = [s.encode(), r.encode()]
                results.extend(row)

        else:
            results.append(line)

    return results
예제 #3
0
def transformation(rows, builder, function: Callable):

    results = []

    for line in rows:
        try:
            msg = singer.parse_message(line)
        except json.decoder.JSONDecodeError:
            logger.error('Unable to parse:\n{}'.format(line))
            raise

        if isinstance(msg, singer.RecordMessage):
            record = msg.record
            record = function(record)
            if record:
                builder.add_object(record)
                schema = builder.to_schema()
                tap_name = msg.stream
                r = SchemaMessage(
                    stream = tap_name,
                    schema = schema,
                    key_properties = None,
                    bookmark_properties = None,
                )
                s = format_message(r)
                r = RecordMessage(
                    stream = tap_name, record = record, time_extracted = None
                )
                r = format_message(r)
                row = [s.encode(), r.encode()]
                results.extend(row)

        else:
            results.append(line)

    return results