def main(): parser = argparse.ArgumentParser(prog='scribe', usage='%(prog)s [options]') parser.add_argument( '-t', '--scribe-type', dest="type", nargs='?', choices=['stockpile'], default=_default_scribe_type, help='Type of data scribe would work with. Example: Stockpile, Foo..\ Defaults to {}'.format(_default_scribe_type)) parser.add_argument( '-ip', '--input-path', dest="input_path", nargs='?', default=_default_input_path, help='path for input-data. Defaults to {}'.format(_default_input_path)) input_args = parser.parse_args() if input_args.input_path: input_data_path = input_args.input_path check_file(input_data_path) if input_args.type: scribe_type = input_args.type for scribed_doc in transcribe(input_data_path, scribe_type): print(scribed_doc)
def _index_result(server, port, payload_file): index = "backpack-results" _es_connection_string = str(server) + ':' + str(port) es = elasticsearch.Elasticsearch([_es_connection_string], send_get_body_as='POST') if not es.indices.exists(index): es.indices.create(index=index) es.indices.put_mapping(index=index, doc_type="result", body={ "dynamic_templates": [{ "rule1": { "mapping": { "type": "string" }, "match_mapping_type": "long" } }] }) indexed = True scribe_uuid = "NONE" for scribed in transcribe(payload_file, 'stockpile'): try: scribe_module = json.loads(scribed)['module'] es.index(index=scribe_module + "-metadata", doc_type="result", body=scribed) scribe_uuid = json.loads(scribed)['scribe_uuid'] except Exception as e: print(repr(e) + "occurred for the json document:") print(str(scribed)) indexed = False return scribe_uuid
def _upload_to_es(payload_file,my_uuid,timestamp,es,index): payload = open(payload_file, "rb").read() for scribed in transcribe(payload_file,'stockpile'): try: scribe_module = json.loads(scribed)['module'] _data = { "uuid": my_uuid, "timestamp": timestamp, "data": scribed } es.index(index=scribe_module+"-metadata", body=_data) except Exception as e: print(repr(e) + "occurred for the json document:") print(str(scribed)) indexed=False
def doc_stream(): for scribed in transcribe(payload_file, 'stockpile'): doc = json.loads(scribed) es_index = "%s-metadata" % doc["module"] doc["uuid"] = my_uuid _id = hashlib.sha256(str(doc).encode()).hexdigest() # This information changes depending on the node and pod where stockpile-wrapper is executed # Don't include it in the _id calculation to avoid indexing several times documents not # specific to a node doc["node_name"] = my_node doc["pod_name"] = my_pod doc["timestamp"] = timestamp documents["total"] += 1 yield {"_index": es_index, "_source": doc, "_id": _id, "_op_type": "create"}
from transcribe.render import transcribe for scribe_object in transcribe('/tmp/stockpile.json', 'stockpile'): print(scribe_object)