Exemple #1
0
    def handle_record_message(self, stream, line_data):
        if stream not in self.streams:
            raise TargetError('A record for stream {} was encountered before a corresponding schema'.format(stream))

        self.message_counter += 1
        self.streams_added_to.add(stream)
        self.stream_add_watermarks[stream] = self.message_counter
        self.streams[stream].add_record_message(line_data)
Exemple #2
0
    def handle_record_message(self, stream, line_data):
        if stream not in self.streams:
            raise TargetError(
                'A record for stream {} was encountered before a corresponding schema'
                .format(stream))

        self.message_counter += 1
        self.streams_added_to.add(stream)
        self.stream_add_watermarks[stream] = self.message_counter
        try:
            self.streams[stream].add_record_message(line_data)
        except Exception as e:
            LOGGER.debug('line processing failed: {}'.format(line_data))
def _line_handler(state_tracker, target, invalid_records_detect,
                  invalid_records_threshold, max_batch_rows, max_batch_size,
                  line):
    try:
        line_data = json.loads(line)
    except json.decoder.JSONDecodeError:
        LOGGER.error("Unable to parse JSON: {}".format(line))
        raise

    if 'type' not in line_data:
        raise TargetError('`type` is a required key: {}'.format(line))

    if line_data['type'] == 'SCHEMA':
        if 'stream' not in line_data:
            raise TargetError('`stream` is a required key: {}'.format(line))

        stream = line_data['stream']

        if 'schema' not in line_data:
            raise TargetError('`schema` is a required key: {}'.format(line))

        schema = line_data['schema']

        schema_validation_errors = json_schema.validation_errors(schema)
        if schema_validation_errors:
            raise TargetError(
                '`schema` is an invalid JSON Schema instance: {}'.format(line),
                *schema_validation_errors)

        if 'key_properties' in line_data:
            key_properties = line_data['key_properties']
        else:
            key_properties = None

        if stream not in state_tracker.streams:
            buffered_stream = BufferedSingerStream(
                stream,
                schema,
                key_properties,
                invalid_records_detect=invalid_records_detect,
                invalid_records_threshold=invalid_records_threshold)
            if max_batch_rows:
                buffered_stream.max_rows = max_batch_rows
            if max_batch_size:
                buffered_stream.max_buffer_size = max_batch_size

            state_tracker.register_stream(stream, buffered_stream)
        else:
            state_tracker.streams[stream].update_schema(schema, key_properties)
    elif line_data['type'] == 'RECORD':
        if 'stream' not in line_data:
            raise TargetError('`stream` is a required key: {}'.format(line))

        state_tracker.handle_record_message(line_data['stream'], line_data)
    elif line_data['type'] == 'ACTIVATE_VERSION':
        if 'stream' not in line_data:
            raise TargetError('`stream` is a required key: {}'.format(line))
        if 'version' not in line_data:
            raise TargetError('`version` is a required key: {}'.format(line))
        if line_data['stream'] not in state_tracker.streams:
            raise TargetError(
                'A ACTIVATE_VERSION for stream {} was encountered before a corresponding schema'
                .format(line_data['stream']))

        stream_buffer = state_tracker.streams[line_data['stream']]
        state_tracker.flush_stream(line_data['stream'])
        target.activate_version(stream_buffer, line_data['version'])
    elif line_data['type'] == 'STATE':
        state_tracker.handle_state_message(line_data)
    else:
        raise TargetError('Unknown message type {} in message {}'.format(
            line_data['type'], line))