def update(topic, schema_config, force=False): """Given a topic, update (or create) a schema""" client = CachedSchemaRegistryClient(schema_config) if topic == 'all': schema_files = Path(__file__).parent.glob('**/*.avsc') else: schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc') for schema_file in schema_files: with open(schema_file) as f: schema_str = f.read() schema_dict = json.loads(schema_str) avro_schema = schema.Parse(schema_str) subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name'] if force: client.update_compatibility('NONE', subject=subject) else: client.update_compatibility('BACKWARD', subject=subject) try: schema_id = client.register(subject, avro_schema) log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}') except avro_error.ClientError as error: log.error(f'Error adding/updating {schema_file}: {error.message}')
def update_avro_compatibility(topic_name, compatibility_level, schema_registry_url='http://127.0.0.1:8081'): compatibility_levels = ["NONE", "FULL", "FORWARD", "BACKWARD"] if compatibility_level.upper() in compatibility_levels: schema_registry = CachedSchemaRegistryClient(url=schema_registry_url) schema_registry.update_compatibility(level=compatibility_level.upper(), subject=topic_name+"-key") schema_registry.update_compatibility(level=compatibility_level.upper(), subject=topic_name+"-value") else: raise Exception( "Compatilibility level not in {}".format(compatibility_levels))
def process_csv(csv): schema_dict = { "name": "mil.darpa.oot.particles.releases", "type": "record", "doc": "A particle release", "fields": [ { "name": "id", "type": "string", "doc": "Unique particle release identifier"}, { "name": "records", "type": { "type": "array", "items": { "type": "record", "name": "release", "fields": [ {"name": "time", "type": "string", "doc": "ISO8601 Date String"}, {"name": "lat", "type": "double", "doc": "wgs84 latitude"}, {"name": "lon", "type": "double", "doc": "wgs84 longitude"}, {"name": "nparticles", "type": "int", "doc": "Number of particles released"} ] } } } ] } subject = 'mil-darpa-oot-particle-releases-value' client = CachedSchemaRegistryClient(url=f'http://{kafka_base}:7002') client.update_compatibility('NONE', subject=subject) avro_schema = schema.Parse(json.dumps(schema_dict)) client.register(subject, avro_schema) df = pd.read_csv( StringIO(csv), header=None, names=['time', 'lat', 'lon', 'nparticles'], parse_dates=[0], infer_datetime_format=True ) records_to_send = [] for i, x in df.iterrows(): x.time = x.time.isoformat() records_to_send.append(x.to_dict()) if not records_to_send: raise ValueError("No particles to run") to_send = [( None, { 'id': 'website-run', 'records': records_to_send } )] p = EasyAvroProducer( schema_registry_url=f'http://{kafka_base}:7002', kafka_brokers=[f'{kafka_base}:7001'], kafka_topic='mil-darpa-oot-particle-releases', key_schema='nokey' ) p.produce(to_send)