Exemple #1
0
     },
     {
       "name": "icu_yn",
       "type": ["string", "null"]
     },
     {
       "name": "death_yn",
       "type": ["string", "null"]
     },
     {
       "name": "medcond_yn",
       "type": ["string", "null"]
     }
   ]
 }
 """
schema_registry_conf = {'url': 'http://schema-registry:8081'}
schema_registry_client = SchemaRegistryClient(schema_registry_conf)
avro_deserializer = AvroDeserializer(value_schema_str, schema_registry_client, dictToCase)
string_deserializer = StringDeserializer('utf_8')


consumer_conf = {'bootstrap.servers': 'kafka:29092',
                 'key.deserializer': string_deserializer,
                 'value.deserializer': avro_deserializer,
                 'group.id': "group1"}
#'auto.offset.reset': "earliest"}
consumer = DeserializingConsumer(consumer_conf)
consumer.subscribe(['topic_test'])
consumeList = []
consumer.close()
Exemple #2
0
def main(args):
    topic = args.topic
    outputtopic = args.outputtopic

    schema_enriched_event_str = EnrichedEventSchema
    schema_dict = ast.literal_eval(schema_enriched_event_str)
    schema_metrics = MetricSchema

    sr_conf = {'url': args.schema_registry}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    string_deserializer = StringDeserializer('utf_8')

    avro_serializer = AvroSerializer(schema_metrics, schema_registry_client)
    producer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.serializer': StringSerializer('utf_8'),
        'value.serializer': avro_serializer
    }

    producer = SerializingProducer(producer_conf)

    avro_deserializer = AvroDeserializer(schema_enriched_event_str,
                                         schema_registry_client)

    consumer_conf = {
        'bootstrap.servers': args.bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': args.group + str(random.Random()),
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    cluster = Cluster([args.host])
    session = cluster.connect("datascience")

    cluster.register_user_type('datascience', 'datafield', Datafield)

    client_influxdb = InfluxDBClient('35.181.155.182', 8086, "dbsaleh2")
    # client_influxdb = InfluxDBClient(url="http://35.181.155.182:8086 , "mydb")

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            start = time.time()
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            evt = msg.value()

            query = f"""
            insert into eventenrich (
                        "eventId" ,
                        "dateTimeRef",
                        "nomenclatureEv",
                        "canal",
                        "media",
                        "schemaVersion",
                        "headerVersion",
                        "serveur",
                        "adresseIP",
                        "idTelematique",
                        "idPersonne",
                        "dateNaissance",
                        "paysResidence",
                        "paysNaissance",
                        "revenusAnnuel",
                        "csp",
                        "eventBC",
                        "eventContent"
                        )
                        VALUES (%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s, %s, %s,%s, %s)
                    """

            #eventBc = evt["EventBusinessContext"][0].replace("com.bnpparibas.dsibddf.event.","")
            eventBc = evt["eventBC"].replace("com.bnpparibas.dsibddf.event.",
                                             "")
            eventContent = evt["EventBusinessContext"][1]

            transformed_event = transform_enriched_event_to_cassandra_model(
                evt, eventBc, schema_dict, eventContent)

            insert_enriched_event_to_cassandra(transformed_event, session,
                                               query)

            elapsed_time = (time.time() - start)

        except Exception as e:
            print(f"Exception => {e}")
            continue

        query = 'SELECT * FROM metrics'
        result = client_influxdb.query(query, database="dbsaleh2")
        print(result)

        data = []

        print(elapsed_time)
        metrics = [{
            "measurement": "metrics",
            "fields": {
                "metricName": "hystorize",
                "timeforhystorize": elapsed_time
            }
        }]
        data.append(metrics)

        # client_influxdb.write_points("hystorize",elapsed_time, database="dbsaleh2")
        client_influxdb.write_points(metrics, database="dbsaleh2")
        producer.produce(topic=outputtopic,
                         value={
                             'metricName': "hystorize",
                             'time': elapsed_time
                         },
                         on_delivery=delivery_report)
        producer.flush()

    consumer.close()
Exemple #3
0
def main():

    sr_conf = {'url': SCHEMA_REGISTRY_URL}
    schema_registry_client = SchemaRegistryClient(sr_conf)
    schema_str = """
    {
        "namespace": "io.confluent.ksql.avro_schemas",
        "name": "User",
        "type": "record",
        "fields":[
        {"name":"DATESTAMP","type":"string"},
        {"name":"TIMESTAMP","type":"string"},
        {"name":"MILLISEC","type":"string"},
        {"name":"LOGLEVEL","type":"string"},
        {"name":"REQUESTID","type":"string"},
        {"name":"RECORDFORMATVERSION","type":"string"},
        {"name":"SOURCEIP","type":"string"},
        {"name":"DNSDOMAIN","type":"string"},
        {"name":"MESSAGETYPE","type":"string"},
        {"name":"OPERATION","type":"string"},
        {"name":"AUTHUSER","type":"string"},
        {"name":"AUTHDOMAIN","type":"string"},
        {"name":"HTTPCODE","type":"string"},
        {"name":"SOURCEBYTES","type":"string"},
        {"name":"RESPONSEBYTES","type":"string"},
        {"name":"ELAPSEDTIME","type":"string"},
        {"name":"DOMAIN","type":"string"},
        {"name":"BUCKET","type":"string"},
        {"name":"OBJECT","type":"string"}
        ]
    }
    """

    avro_deserializer = AvroDeserializer(schema_str, schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    consumer_conf = {
        'bootstrap.servers': bootstrap_servers,
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': group,
        'auto.offset.reset': "earliest"
    }

    consumer = DeserializingConsumer(consumer_conf)
    consumer.subscribe([topic])

    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = consumer.poll(1.0)
            if msg is None:
                continue

            record = msg.value()
            if record is not None:
                if record['OPERATION'] == "POST" or record[
                        'OPERATION'] == "MULTIPART_COMPLETE" and record[
                            'DOMAIN'] != "%28none%29":
                    urllistraw = "http://" + record['DOMAIN'] + "/" + record[
                        'BUCKET'] + "/" + record['OBJECT']
                    urllist = urllistraw[:-1]
                    print(urllist)
                    r = requests.head(urllist)
                    headercheck = r.headers
                    contentimgstring = 'image/jpeg'
                    if contentimgstring in headercheck['Content-Type']:
                        print("here")
                        OBJECTR = record['OBJECT']
                        OBJECT = OBJECTR[:-1]
                        imagesnap(urllist, OBJECT)
                    else:
                        print("skipped")
                        continue
                    print(r.headers)
                else:
                    continue
        except KeyboardInterrupt:
            break

    consumer.close()