def __next__(self): row = self.tap.emit() if row: if not isinstance(row, dict): raise ValueError('tap.emit() must returned a dict') if self.tap_key not in row: raise ValueError('tap key not exist in elements from tap') if not self.tap_schema: self.builder.add_object(row) schema = self.builder.to_schema() else: schema = self.tap_schema if isinstance(self.tap_key, (str, bytes)): key_properties = [self.tap_key] if not isinstance(key_properties, list): raise Exception('tap key must be a string or list of strings') r = SchemaMessage( stream = self.tap_name, schema = schema, key_properties = key_properties, bookmark_properties = None, ) s = format_message(r) r = RecordMessage( stream = self.tap_name, record = row, time_extracted = None ) r = format_message(r) row = (s.encode(), r.encode()) return row
def transformation(rows, builder, function: Callable, tap_schema = None): results = [] for line in rows: try: msg = singer.parse_message(line) except json.decoder.JSONDecodeError: logger.error('Unable to parse:\n{}'.format(line)) raise if isinstance(msg, singer.RecordMessage): record = msg.record record = function(record) if record: if isinstance(record, tuple): if len(record) != 2: raise ValueError( 'transformation must returned (row, dictionary) or row.' ) record, types = record else: types = None builder.add_object(record) schema = builder.to_schema() if tap_schema: for k, v in tap_schema['properties'].items(): if k in schema['properties']: schema['properties'][k] = v if types: for k, v in types.items(): if k in schema['properties']: v = type_mapping.get(v, v) if not isinstance(v, str): raise ValueError( f'value {v} from {k} not supported.' ) schema['properties'][k] = {'type': v} tap_name = msg.stream r = SchemaMessage( stream = tap_name, schema = schema, key_properties = None, bookmark_properties = None, ) s = format_message(r) r = RecordMessage( stream = tap_name, record = record, time_extracted = None ) r = format_message(r) row = [s.encode(), r.encode()] results.extend(row) else: results.append(line) return results
def transformation(rows, builder, function: Callable): results = [] for line in rows: try: msg = singer.parse_message(line) except json.decoder.JSONDecodeError: logger.error('Unable to parse:\n{}'.format(line)) raise if isinstance(msg, singer.RecordMessage): record = msg.record record = function(record) if record: builder.add_object(record) schema = builder.to_schema() tap_name = msg.stream r = SchemaMessage( stream = tap_name, schema = schema, key_properties = None, bookmark_properties = None, ) s = format_message(r) r = RecordMessage( stream = tap_name, record = record, time_extracted = None ) r = format_message(r) row = [s.encode(), r.encode()] results.extend(row) else: results.append(line) return results