def start(): try: settings = startup.read_settings() Log.start(settings.debug) main(settings) except Exception, e: Log.error("Problems exist", e)
def get_or_create_index(destination_settings): #CHECK IF INDEX, OR ALIAS, EXISTS es = ElasticSearch(destination_settings) aliases = es.get_aliases() indexes = [a for a in aliases if a.alias == destination_settings.index or a.index == destination_settings.index] if not indexes: schema = CNV.JSON2object(File(destination_settings.schema_filename).read()) return ElasticSearch.create_index(destination_settings, schema) elif len(indexes) > 1: Log.error("do not know how to replicate to more than one index") elif indexes[0].alias != None: destination_settings.alias = indexes[0].alias destination_settings.index = indexes[0].index return ElasticSearch(destination_settings)
def get_last_updated(es): try: results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": { "range": { "modified_ts": {"gte": CNV.datetime2milli(far_back)}}} }}, "from": 0, "size": 0, "sort": [], "facets": {"0": {"statistical": {"field": "modified_ts"}}} }) if results.facets["0"].count == 0: return datetime.min return CNV.milli2datetime(results.facets["0"].max) except Exception, e: Log.error("Can not get_last_updated from {{host}}/{{index}}",{ "host": es.settings.host, "index": es.settings.index }, e)
def get_pending(es, id_field_name, esfilter): result = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": esfilter }}, "from": 0, "size": 0, "sort": [], "facets": {"default": {"terms": {"field": id_field_name, "size": 200000}}} }) if len(result.facets.default.terms) >= 200000: Log.error("Can not handle more than 200K bugs changed") pending = Multiset( result.facets.default.terms, key_field="term", count_field="count" ) Log.note("Source has {{num}} records for updating", { "num": len(pending) }) return pending