Ejemplo n.º 1
0
def start():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        main(settings)
    except Exception, e:
        Log.error("Problems exist", e)
Ejemplo n.º 2
0
def main(settings):
    # SYNCH WITH source ES INDEX
    source = ElasticSearch(settings.source)
    destination = get_or_create_index(settings["destination"], source)

    id_field_name = "info.started"
    esfilter = {"script": {"script": "true"}}

    pending = get_pending(source, id_field_name, esfilter)
    replicate(source, destination, pending, id_field_name, esfilter)
    Log.note("Done")
Ejemplo n.º 3
0
def main(settings):
    # SYNCH WITH source ES INDEX

    destination = get_or_create_index(settings.destination)

    source = File("C:/Users/klahnakoski/Downloads/records.json").read()
    lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()]
    records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)]
    for g, r in Q.groupby(records, size=1000):
        destination.extend(r)
        Log.note("Added {{num}}", {"num": len(r)})

    Log.note("Done")
Ejemplo n.º 4
0
def get_or_create_index(destination_settings):
    #CHECK IF INDEX, OR ALIAS, EXISTS
    es = ElasticSearch(destination_settings)
    aliases = es.get_aliases()

    indexes = [a for a in aliases if a.alias == destination_settings.index or a.index == destination_settings.index]
    if not indexes:
        schema = CNV.JSON2object(File(destination_settings.schema_filename).read())
        return ElasticSearch.create_index(destination_settings, schema)
    elif len(indexes) > 1:
        Log.error("do not know how to replicate to more than one index")
    elif indexes[0].alias != None:
        destination_settings.alias = indexes[0].alias
        destination_settings.index = indexes[0].index

    return ElasticSearch(destination_settings)
Ejemplo n.º 5
0
def get_last_updated(es):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {
                    "range": {
                    "modified_ts": {"gte": CNV.datetime2milli(far_back)}}}
            }},
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {"0": {"statistical": {"field": "modified_ts"}}}
        })

        if results.facets["0"].count == 0:
            return datetime.min
        return CNV.milli2datetime(results.facets["0"].max)
    except Exception, e:
        Log.error("Can not get_last_updated from {{host}}/{{index}}",{
            "host": es.settings.host,
            "index": es.settings.index
        }, e)
Ejemplo n.º 6
0
def get_pending(es, id_field_name, esfilter):
    result = es.search({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": esfilter
        }},
        "from": 0,
        "size": 0,
        "sort": [],
        "facets": {"default": {"terms": {"field": id_field_name, "size": 200000}}}
    })

    if len(result.facets.default.terms) >= 200000:
        Log.error("Can not handle more than 200K bugs changed")

    pending = Multiset(
        result.facets.default.terms,
        key_field="term",
        count_field="count"
    )
    Log.note("Source has {{num}} records for updating", {
        "num": len(pending)
    })
    return pending
Ejemplo n.º 7
0

def main(settings):
    # SYNCH WITH source ES INDEX

    destination = get_or_create_index(settings.destination)

    source = File("C:/Users/klahnakoski/Downloads/records.json").read()
    lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()]
    records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)]
    for g, r in Q.groupby(records, size=1000):
        destination.extend(r)
        Log.note("Added {{num}}", {"num": len(r)})

    Log.note("Done")


def start():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        main(settings)
    except Exception, e:
        Log.error("Problems exist", e)
    finally:
        Log.stop()


if __name__ == "__main__":
    start()