def update(topic, schema_config, force=False):
    """Given a topic, update (or create) a schema"""
    client = CachedSchemaRegistryClient(schema_config)

    if topic == 'all':
        schema_files = Path(__file__).parent.glob('**/*.avsc')
    else:
        schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc')

    for schema_file in schema_files:
        with open(schema_file) as f:
            schema_str = f.read()
        schema_dict = json.loads(schema_str)
        avro_schema = schema.Parse(schema_str)

        subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name']
        if force:
            client.update_compatibility('NONE', subject=subject)
        else:
            client.update_compatibility('BACKWARD', subject=subject)

        try:
            schema_id = client.register(subject, avro_schema)
            log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}')
        except avro_error.ClientError as error:
            log.error(f'Error adding/updating {schema_file}: {error.message}')
Beispiel #2
0
def update_avro_compatibility(topic_name, compatibility_level, schema_registry_url='http://127.0.0.1:8081'):
    compatibility_levels = ["NONE", "FULL", "FORWARD", "BACKWARD"]
    if compatibility_level.upper() in compatibility_levels:
        schema_registry = CachedSchemaRegistryClient(url=schema_registry_url)
        schema_registry.update_compatibility(level=compatibility_level.upper(), subject=topic_name+"-key")
        schema_registry.update_compatibility(level=compatibility_level.upper(), subject=topic_name+"-value")
    else:
        raise Exception(
            "Compatilibility level not in {}".format(compatibility_levels))
def process_csv(csv):

    schema_dict = {
        "name": "mil.darpa.oot.particles.releases",
        "type": "record",
        "doc": "A particle release",
        "fields": [
            { "name": "id", "type": "string", "doc": "Unique particle release identifier"},
            {
                "name": "records",
                "type": {
                    "type": "array",
                    "items": {
                        "type": "record",
                        "name": "release",
                        "fields": [
                            {"name": "time",       "type": "string",   "doc": "ISO8601 Date String"},
                            {"name": "lat",        "type": "double",   "doc": "wgs84 latitude"},
                            {"name": "lon",        "type": "double",   "doc": "wgs84 longitude"},
                            {"name": "nparticles", "type": "int",      "doc": "Number of particles released"}
                        ]
                    }
                }
            }
        ]
    }

    subject = 'mil-darpa-oot-particle-releases-value'
    client = CachedSchemaRegistryClient(url=f'http://{kafka_base}:7002')
    client.update_compatibility('NONE', subject=subject)

    avro_schema = schema.Parse(json.dumps(schema_dict))
    client.register(subject, avro_schema)

    df = pd.read_csv(
        StringIO(csv),
        header=None,
        names=['time', 'lat', 'lon', 'nparticles'],
        parse_dates=[0],
        infer_datetime_format=True
    )
    records_to_send = []
    for i, x in df.iterrows():
        x.time = x.time.isoformat()
        records_to_send.append(x.to_dict())

    if not records_to_send:
        raise ValueError("No particles to run")

    to_send = [(
        None,
        {
            'id': 'website-run',
            'records': records_to_send
        }
    )]

    p = EasyAvroProducer(
        schema_registry_url=f'http://{kafka_base}:7002',
        kafka_brokers=[f'{kafka_base}:7001'],
        kafka_topic='mil-darpa-oot-particle-releases',
        key_schema='nokey'
    )
    p.produce(to_send)