def main(settings): # SYNCH WITH source ES INDEX destination = get_or_create_index(settings.destination) source = File("C:/Users/klahnakoski/Downloads/records.json").read() lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()] records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)] for g, r in Q.groupby(records, size=1000): destination.extend(r) Log.note("Added {{num}}", {"num": len(r)}) Log.note("Done")
def replicate(source, destination, pending, id_field_name, esfilter): """ COPY source RECORDS TO destination """ for g, ids in Q.groupby(pending, max_size=BATCH_SIZE): with Timer("Replicate {{num}} records...", {"num": len(ids)}): data = source.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"terms": {id_field_name: set(ids)}}, esfilter ]} }}, "from": 0, "size": 200000, "sort": [] }) d2 = map( lambda(x): {"id": x.id, "value": x}, (transform(x._source) for x in data.hits.hits) ) destination.add(d2)