Ejemplo n.º 1
0
def main(settings):
    # SYNCH WITH source ES INDEX

    destination = get_or_create_index(settings.destination)

    source = File("C:/Users/klahnakoski/Downloads/records.json").read()
    lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()]
    records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)]
    for g, r in Q.groupby(records, size=1000):
        destination.extend(r)
        Log.note("Added {{num}}", {"num": len(r)})

    Log.note("Done")
Ejemplo n.º 2
0
def replicate(source, destination, pending, id_field_name, esfilter):
    """
    COPY source RECORDS TO destination
    """
    for g, ids in Q.groupby(pending, max_size=BATCH_SIZE):
        with Timer("Replicate {{num}} records...", {"num": len(ids)}):
            data = source.search({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"terms": {id_field_name: set(ids)}},
                        esfilter
                    ]}
                }},
                "from": 0,
                "size": 200000,
                "sort": []
            })

            d2 = map(
                lambda(x): {"id": x.id, "value": x},
                (transform(x._source) for x in data.hits.hits)
            )
            destination.add(d2)