}, { "name": "icu_yn", "type": ["string", "null"] }, { "name": "death_yn", "type": ["string", "null"] }, { "name": "medcond_yn", "type": ["string", "null"] } ] } """ schema_registry_conf = {'url': 'http://schema-registry:8081'} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(value_schema_str, schema_registry_client, dictToCase) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': 'kafka:29092', 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "group1"} #'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe(['topic_test']) consumeList = [] consumer.close()
def main(args): topic = args.topic outputtopic = args.outputtopic schema_enriched_event_str = EnrichedEventSchema schema_dict = ast.literal_eval(schema_enriched_event_str) schema_metrics = MetricSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) string_deserializer = StringDeserializer('utf_8') avro_serializer = AvroSerializer(schema_metrics, schema_registry_client) producer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.serializer': StringSerializer('utf_8'), 'value.serializer': avro_serializer } producer = SerializingProducer(producer_conf) avro_deserializer = AvroDeserializer(schema_enriched_event_str, schema_registry_client) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group + str(random.Random()), 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) cluster = Cluster([args.host]) session = cluster.connect("datascience") cluster.register_user_type('datascience', 'datafield', Datafield) client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2") # client_influxdb = InfluxDBClient(url="http://35.181.155.182:8086 , "mydb") while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. start = time.time() msg = consumer.poll(1.0) if msg is None: continue evt = msg.value() query = f""" insert into eventenrich ( "eventId" , "dateTimeRef", "nomenclatureEv", "canal", "media", "schemaVersion", "headerVersion", "serveur", "adresseIP", "idTelematique", "idPersonne", "dateNaissance", "paysResidence", "paysNaissance", "revenusAnnuel", "csp", "eventBC", "eventContent" ) VALUES (%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s) """ #eventBc = evt["EventBusinessContext"][0].replace("com.bnpparibas.dsibddf.event.","") eventBc = evt["eventBC"].replace("com.bnpparibas.dsibddf.event.", "") eventContent = evt["EventBusinessContext"][1] transformed_event = transform_enriched_event_to_cassandra_model( evt, eventBc, schema_dict, eventContent) insert_enriched_event_to_cassandra(transformed_event, session, query) elapsed_time = (time.time() - start) except Exception as e: print(f"Exception => {e}") continue query = 'SELECT * FROM metrics' result = client_influxdb.query(query, database="dbsaleh2") print(result) data = [] print(elapsed_time) metrics = [{ "measurement": "metrics", "fields": { "metricName": "hystorize", "timeforhystorize": elapsed_time } }] data.append(metrics) # client_influxdb.write_points("hystorize",elapsed_time, database="dbsaleh2") client_influxdb.write_points(metrics, database="dbsaleh2") producer.produce(topic=outputtopic, value={ 'metricName': "hystorize", 'time': elapsed_time }, on_delivery=delivery_report) producer.flush() consumer.close()
def main(): sr_conf = {'url': SCHEMA_REGISTRY_URL} schema_registry_client = SchemaRegistryClient(sr_conf) schema_str = """ { "namespace": "io.confluent.ksql.avro_schemas", "name": "User", "type": "record", "fields":[ {"name":"DATESTAMP","type":"string"}, {"name":"TIMESTAMP","type":"string"}, {"name":"MILLISEC","type":"string"}, {"name":"LOGLEVEL","type":"string"}, {"name":"REQUESTID","type":"string"}, {"name":"RECORDFORMATVERSION","type":"string"}, {"name":"SOURCEIP","type":"string"}, {"name":"DNSDOMAIN","type":"string"}, {"name":"MESSAGETYPE","type":"string"}, {"name":"OPERATION","type":"string"}, {"name":"AUTHUSER","type":"string"}, {"name":"AUTHDOMAIN","type":"string"}, {"name":"HTTPCODE","type":"string"}, {"name":"SOURCEBYTES","type":"string"}, {"name":"RESPONSEBYTES","type":"string"}, {"name":"ELAPSEDTIME","type":"string"}, {"name":"DOMAIN","type":"string"}, {"name":"BUCKET","type":"string"}, {"name":"OBJECT","type":"string"} ] } """ avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue record = msg.value() if record is not None: if record['OPERATION'] == "POST" or record[ 'OPERATION'] == "MULTIPART_COMPLETE" and record[ 'DOMAIN'] != "%28none%29": urllistraw = "http://" + record['DOMAIN'] + "/" + record[ 'BUCKET'] + "/" + record['OBJECT'] urllist = urllistraw[:-1] print(urllist) r = requests.head(urllist) headercheck = r.headers contentimgstring = 'image/jpeg' if contentimgstring in headercheck['Content-Type']: print("here") OBJECTR = record['OBJECT'] OBJECT = OBJECTR[:-1] imagesnap(urllist, OBJECT) else: print("skipped") continue print(r.headers) else: continue except KeyboardInterrupt: break consumer.close()