def main(settings): # SYNCH WITH source ES INDEX source = ElasticSearch(settings.source) destination = get_or_create_index(settings["destination"], source) id_field_name = "info.started" esfilter = {"script": {"script": "true"}} pending = get_pending(source, id_field_name, esfilter) replicate(source, destination, pending, id_field_name, esfilter) Log.note("Done")
def main(settings): # SYNCH WITH source ES INDEX destination = get_or_create_index(settings.destination) source = File("C:/Users/klahnakoski/Downloads/records.json").read() lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()] records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)] for g, r in Q.groupby(records, size=1000): destination.extend(r) Log.note("Added {{num}}", {"num": len(r)}) Log.note("Done")
def get_pending(es, id_field_name, esfilter): result = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": esfilter }}, "from": 0, "size": 0, "sort": [], "facets": {"default": {"terms": {"field": id_field_name, "size": 200000}}} }) if len(result.facets.default.terms) >= 200000: Log.error("Can not handle more than 200K bugs changed") pending = Multiset( result.facets.default.terms, key_field="term", count_field="count" ) Log.note("Source has {{num}} records for updating", { "num": len(pending) }) return pending