def start(): try: settings = startup.read_settings() Log.start(settings.debug) main(settings) except Exception, e: Log.error("Problems exist", e)
def main(settings): # SYNCH WITH source ES INDEX source = ElasticSearch(settings.source) destination = get_or_create_index(settings["destination"], source) id_field_name = "info.started" esfilter = {"script": {"script": "true"}} pending = get_pending(source, id_field_name, esfilter) replicate(source, destination, pending, id_field_name, esfilter) Log.note("Done")
def main(settings): # SYNCH WITH source ES INDEX destination = get_or_create_index(settings.destination) source = File("C:/Users/klahnakoski/Downloads/records.json").read() lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()] records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)] for g, r in Q.groupby(records, size=1000): destination.extend(r) Log.note("Added {{num}}", {"num": len(r)}) Log.note("Done")
def get_or_create_index(destination_settings): #CHECK IF INDEX, OR ALIAS, EXISTS es = ElasticSearch(destination_settings) aliases = es.get_aliases() indexes = [a for a in aliases if a.alias == destination_settings.index or a.index == destination_settings.index] if not indexes: schema = CNV.JSON2object(File(destination_settings.schema_filename).read()) return ElasticSearch.create_index(destination_settings, schema) elif len(indexes) > 1: Log.error("do not know how to replicate to more than one index") elif indexes[0].alias != None: destination_settings.alias = indexes[0].alias destination_settings.index = indexes[0].index return ElasticSearch(destination_settings)
def get_last_updated(es): try: results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": { "range": { "modified_ts": {"gte": CNV.datetime2milli(far_back)}}} }}, "from": 0, "size": 0, "sort": [], "facets": {"0": {"statistical": {"field": "modified_ts"}}} }) if results.facets["0"].count == 0: return datetime.min return CNV.milli2datetime(results.facets["0"].max) except Exception, e: Log.error("Can not get_last_updated from {{host}}/{{index}}",{ "host": es.settings.host, "index": es.settings.index }, e)
def get_pending(es, id_field_name, esfilter): result = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": esfilter }}, "from": 0, "size": 0, "sort": [], "facets": {"default": {"terms": {"field": id_field_name, "size": 200000}}} }) if len(result.facets.default.terms) >= 200000: Log.error("Can not handle more than 200K bugs changed") pending = Multiset( result.facets.default.terms, key_field="term", count_field="count" ) Log.note("Source has {{num}} records for updating", { "num": len(pending) }) return pending
def main(settings): # SYNCH WITH source ES INDEX destination = get_or_create_index(settings.destination) source = File("C:/Users/klahnakoski/Downloads/records.json").read() lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()] records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)] for g, r in Q.groupby(records, size=1000): destination.extend(r) Log.note("Added {{num}}", {"num": len(r)}) Log.note("Done") def start(): try: settings = startup.read_settings() Log.start(settings.debug) main(settings) except Exception, e: Log.error("Problems exist", e) finally: Log.stop() if __name__ == "__main__": start()