コード例 #1
0
ファイル: utils.py プロジェクト: hunterjackson/timeseries_etl
def to_transport_message(msg: dict, validate_schema=True):
    """
    Function designed to turn a message into a valid json string before placing it in a kafka topic
    :param msg:
    :param validate_schema:
    :return:
    """

    msg['_document_type'] = 'transport'
    for k, v in msg.items():
        if k[0] == '_':
            continue

        # convert datetimes into date parseable floats
        if v['type'] == 'datetime':
            msg[k]['value'] = v['value'].isoformat()

    msg = json.dumps(msg)

    if validate_schema:
        # importing here to avoid import loop
        from timeseries_etl.schema_validators import validate_kafka_messsage
        validate_kafka_messsage(msg)

    return msg
コード例 #2
0
    def test_kafka_message_doc_type(self):
        """
        Exercise different values in _document_type
        :return:
        """
        msg = deepcopy(self.base_message)
        self.assertTrue(validate_kafka_messsage(msg))

        msg['_document_type'] = 'instruction'
        self.assertTrue(validate_kafka_messsage(msg))

        msg['_document_type'] = 'junk'
        with self.assertRaises(ValidationError):
            validate_kafka_messsage(msg)
コード例 #3
0
ファイル: utils.py プロジェクト: hunterjackson/timeseries_etl
def read_transport_message(msg: str, validate_schema=True):
    """
    Function designed to take a message from the kafka topic and turn it into a dictionary for processing purposes
    :param msg:
    :param validate_schema:
    :return:
    """
    msg = json.loads(msg)
    if msg['_document_type'] != 'transport':
        raise TypeError('Not a transport document')

    if validate_schema:
        # importing here to avoid import loop
        from timeseries_etl.schema_validators import validate_kafka_messsage
        validate_kafka_messsage(msg)

    # convert datetime fields
    for k, v in msg.items():
        if k[0] == '_':
            continue
        if v['type'] == 'datetime':
            msg[k]['value'] = date_parser(v['value'], ignoretz=True)

    return msg
コード例 #4
0
    def test_kafka_message_fields(self):

        msg = deepcopy(self.base_message)

        del msg['field1']['value']
        with self.assertRaises(ValidationError, msg='No value in field and still passed'):
            validate_kafka_messsage(msg)

        msg['field1']['value'] = 10
        del msg['field1']['type']
        with self.assertRaises(ValidationError, msg='No type in field and still passed'):
            validate_kafka_messsage(msg)

        msg['field1']['type'] = 'durpidy'
        with self.assertRaises(ValidationError, msg='Invalid type in field and still passed'):
            validate_kafka_messsage(msg)

        del msg['field1'], msg['field2'], msg['field3'], msg['field4'], msg['field5']
        with self.assertRaises(ValidationError, msg='No fields in message and still passed'):
            validate_kafka_messsage(msg)
コード例 #5
0
    return record


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='generate and insert random information in to a Kafka Topic')
    parser.add_argument('--topic', default='raw', type=str, help='Name of topic to submit too')
    parser.add_argument('--width', default=3, type=int, help='Number of values in in each record')
    parser.add_argument('--count', default=10, type=int, help='Number of records to submit')
    parser.add_argument('--validate', action='store_true', help='validate each record against schema')
    parser.add_argument('--sleepseconds', default=0, type=float, help='The seconds between each record post')

    args = parser.parse_args()
    producer = KafkaProducer(bootstrap_servers='kafka', value_serializer=lambda v: json.dumps(v).encode('utf-8'))

    topic = args.topic
    column_count = args.width
    row_count = args.count

    column_names = generate_column_names(column_count)  # generate column names predictably
    records = (generate_random_record(column_names) for _ in range(row_count))  # generate records using the column names with random integers as values

    for record in records:
        if args.validate:
            validate_kafka_messsage(record)  # raises error if not a valid message, here mostly for testing
        producer.send(topic, record)
        sleep(args.sleepseconds)

    producer.flush()  # wait for messages to send, does not confirm message received
コード例 #6
0
    def test_kafka_message_field_types(self):

        # int process as str
        msg = deepcopy(self.base_message)
        msg['field1']['type'] = 'str'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)

        # str process at int
        msg = deepcopy(self.base_message)
        msg['field3']['type'] = 'int'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)

        # str process as float
        msg['field3']['type'] = 'float'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)

        # date process as int
        msg['field4']['type'] = 'int'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)

        # date process as str should pass
        msg = deepcopy(self.base_message)
        msg['field4']['type'] = 'str'
        self.assertTrue(validate_kafka_messsage(msg))

        # bool pass as str
        msg = deepcopy(self.base_message)
        msg['field5']['type'] = 'str'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)

        # invalid datetime string as datetime
        msg = deepcopy(self.base_message)
        msg['timestamp']['value'] = 'definitely not a timestamp'
        with self.assertRaises(TypeError):
            validate_kafka_messsage(msg)