os.getenv('ES_HOST'),
    os.getenv('ES_PORT'),
    use_ssl=os.getenv('ES_USE_SSL', False),
    verify_certs=os.getenv('ES_VERIFY_CERTS', False),
    http_auth=(os.getenv('ES_USER'),
               os.getenv('ES_PASSWORD')) if os.getenv('ES_USER') else None,
    ca_certs=os.getenv('ES_CA_CERTS', None))

geo_point_mapping = es.define_geo_point_mapping()

es.create_index(ELASTICSEARCH_INDEX, geo_point_mapping)

kafka_consumer = KafkaConsumer(
    KAFKA_TOPIC,
    bootstrap_servers=[
        "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT'))
    ],
    # auto_offset_reset='earliest',
    security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'),
    ssl_cafile=os.getenv('KAFKA_CA_FILE', None),
    ssl_certfile=os.getenv('KAFKA_CERT_FILE', None),
    ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None),
    group_id='group_' + KAFKA_TOPIC,
    value_deserializer=lambda m: json.loads(m.decode('utf8')))
c = 0
for msg in kafka_consumer:
    c += 1
    print("Consumed: {} messages".format(c))
    # data are already processed in the appropriate way from producer's DataFrame, so just insert them to DB
    es.insert_doc(msg.value)
Exemple #2
0
geo_point_mapping = es.define_custom_geo_shape_mapping("geometry")

es.create_index(ELASTICSEARCH_INDEX, geo_point_mapping)

kafka_consumer = KafkaConsumer(
    KAFKA_TOPIC,
    bootstrap_servers=[
        "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT'))
    ],
    # auto_offset_reset='earliest',
    security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'),
    ssl_cafile=os.getenv('KAFKA_CA_FILE', None),
    ssl_certfile=os.getenv('KAFKA_CERT_FILE', None),
    ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None),
    group_id='group_' + KAFKA_TOPIC,
    value_deserializer=lambda m: json.loads(m.decode('utf8')))
c = 0
denied_docs = 0
for msg in kafka_consumer:
    c += 1
    print("Consumed: {} messages".format(c))
    # data are already processed in the appropriate way from producer, so just insert them to DB
    try:
        print(es.insert_doc(msg.value))
    except RequestError as e:
        denied_docs += 1
        logger.error(e.info)
        logger.error("Denied docs: {}".format(denied_docs))
        continue
kafka_consumer = KafkaConsumer(
    KAFKA_TOPIC,
    bootstrap_servers=[
        "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT'))
    ],
    # auto_offset_reset='earliest',
    security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'),
    ssl_cafile=os.getenv('KAFKA_CA_FILE', None),
    ssl_certfile=os.getenv('KAFKA_CERT_FILE', None),
    ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None),
    group_id='group_' + KAFKA_TOPIC,
    value_deserializer=lambda m: json.loads(m.decode('utf8')))

for msg in kafka_consumer:
    # Data are ready to be inserted to ES from producer.

    # Insert documents, with id's date + variable eg. "2010-01-01T11:30:00temp-avg"
    # in order to avoid duplicate records
    doc = msg.value

    date = doc['Date']
    variable = doc['Variable']

    id_ = date + variable

    print("Inserting doc: {}".format(doc))
    print("with id: {}".format(id_))

    result = es.insert_doc(doc_=doc, id_=id_)
    print("Status: {}".format(result))
Exemple #4
0
    os.getenv('ES_PORT'),
    use_ssl=os.getenv('ES_USE_SSL', False),
    verify_certs=os.getenv('ES_VERIFY_CERTS', False),
    http_auth=(os.getenv('ES_USER'),
               os.getenv('ES_PASSWORD')) if os.getenv('ES_USER') else None,
    ca_certs=os.getenv('ES_CA_CERTS', None))

geo_point_mapping = es.define_geo_point_mapping()

es.create_index(ELASTICSEARCH_INDEX, geo_point_mapping)

kafka_consumer = KafkaConsumer(
    KAFKA_TOPIC,
    bootstrap_servers=[
        "{}:{}".format(os.getenv('KAFKA_HOST'), os.getenv('KAFKA_PORT'))
    ],
    # auto_offset_reset='earliest',
    security_protocol=os.getenv('KAFKA_SECURITY_PROTOCOL', 'PLAINTEXT'),
    ssl_cafile=os.getenv('KAFKA_CA_FILE', None),
    ssl_certfile=os.getenv('KAFKA_CERT_FILE', None),
    ssl_keyfile=os.getenv('KAFKA_KEY_FILE', None),
    group_id='group_' + KAFKA_TOPIC,
    value_deserializer=lambda m: json.loads(m.decode('utf8')))

c = 0
for msg in kafka_consumer:
    c += 1
    print("Consumed: {} messages".format(c))
    formatted_msg = transform_message_for_es(msg.value)
    es.insert_doc(formatted_msg)