def handle_record_message(self, stream, line_data): if stream not in self.streams: raise TargetError('A record for stream {} was encountered before a corresponding schema'.format(stream)) self.message_counter += 1 self.streams_added_to.add(stream) self.stream_add_watermarks[stream] = self.message_counter self.streams[stream].add_record_message(line_data)
def handle_record_message(self, stream, line_data): if stream not in self.streams: raise TargetError( 'A record for stream {} was encountered before a corresponding schema' .format(stream)) self.message_counter += 1 self.streams_added_to.add(stream) self.stream_add_watermarks[stream] = self.message_counter try: self.streams[stream].add_record_message(line_data) except Exception as e: LOGGER.debug('line processing failed: {}'.format(line_data))
def _line_handler(state_tracker, target, invalid_records_detect, invalid_records_threshold, max_batch_rows, max_batch_size, line): try: line_data = json.loads(line) except json.decoder.JSONDecodeError: LOGGER.error("Unable to parse JSON: {}".format(line)) raise if 'type' not in line_data: raise TargetError('`type` is a required key: {}'.format(line)) if line_data['type'] == 'SCHEMA': if 'stream' not in line_data: raise TargetError('`stream` is a required key: {}'.format(line)) stream = line_data['stream'] if 'schema' not in line_data: raise TargetError('`schema` is a required key: {}'.format(line)) schema = line_data['schema'] schema_validation_errors = json_schema.validation_errors(schema) if schema_validation_errors: raise TargetError( '`schema` is an invalid JSON Schema instance: {}'.format(line), *schema_validation_errors) if 'key_properties' in line_data: key_properties = line_data['key_properties'] else: key_properties = None if stream not in state_tracker.streams: buffered_stream = BufferedSingerStream( stream, schema, key_properties, invalid_records_detect=invalid_records_detect, invalid_records_threshold=invalid_records_threshold) if max_batch_rows: buffered_stream.max_rows = max_batch_rows if max_batch_size: buffered_stream.max_buffer_size = max_batch_size state_tracker.register_stream(stream, buffered_stream) else: state_tracker.streams[stream].update_schema(schema, key_properties) elif line_data['type'] == 'RECORD': if 'stream' not in line_data: raise TargetError('`stream` is a required key: {}'.format(line)) state_tracker.handle_record_message(line_data['stream'], line_data) elif line_data['type'] == 'ACTIVATE_VERSION': if 'stream' not in line_data: raise TargetError('`stream` is a required key: {}'.format(line)) if 'version' not in line_data: raise TargetError('`version` is a required key: {}'.format(line)) if line_data['stream'] not in state_tracker.streams: raise TargetError( 'A ACTIVATE_VERSION for stream {} was encountered before a corresponding schema' .format(line_data['stream'])) stream_buffer = state_tracker.streams[line_data['stream']] state_tracker.flush_stream(line_data['stream']) target.activate_version(stream_buffer, line_data['version']) elif line_data['type'] == 'STATE': state_tracker.handle_state_message(line_data) else: raise TargetError('Unknown message type {} in message {}'.format( line_data['type'], line))