def update_calculated_properties(): es = get_es() #todo: use some sort of ES scrolling/paginating results = es.get(DOMAIN_INDEX + "/hqdomain/_search", data={"size": 99999})['hits']['hits'] all_stats = _all_domain_stats() for r in results: dom = r["_source"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def _domains_matching(key, value): es = get_es() throwaway_facet_name = 'facets' query = { "filter":{ "term":{ key: value } }, "facets":{ throwaway_facet_name: { "terms":{ "field":"domain.exact", "size":1000 }, "facet_filter":{ "term":{ key: value } } } } } res = es['xforms'].post('_search', data=query) return [r['term'] for r in res['facets'][throwaway_facet_name]['terms']]
def _check_es_rev(index, doc_id, couch_rev): es = get_es() doc_id_query = {"filter": {"ids": {"values": [doc_id]}}, "fields": ["_id", "_rev"]} try: res = es[index].get("_search", data=doc_id_query) status = False message = "Not in sync" if res.has_key("hits"): if res["hits"].get("total", 0) == 0: status = False # if doc doesn't exist it's def. not in sync message = "Not in sync %s" % index elif res["hits"].has_key("hits"): fields = res["hits"]["hits"][0]["fields"] if fields["_rev"] == couch_rev: status = True message = "%s OK" % index else: status = False # less likely, but if it's there but the rev is off message = "Not in sync - %s stale" % index else: status = False message = "Not in sync - query failed" except Exception, ex: message = "ES Error: %s" % ex status = False
def _get_latest_doc_from_index(es_index, sort_field): """ Query elasticsearch index sort descending by the sort field and get the doc_id back so we can then do a rev-update check. This si because there's no direct view known ahead of time what's inside the report* index, so just get it directly from the index and do the modify check workflow. """ recent_query = { "filter": { "match_all": {} }, "sort": {sort_field: "desc"}, "size": 1 } es = get_es() try: res = es[es_index].get('_search', data=recent_query) if 'hits' in res: if 'hits' in res['hits']: result = res['hits']['hits'][0] return result['_source']['_id'] except Exception, ex: logging.error("Error querying get_latest_doc_from_index[%s]: %s" % (es_index, ex)) return None
def apps_update_calculated_properties(): es = get_es() q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}} results = stream_es_query(q=q, es_url=ES_URLS["apps"], size=999999, chunksize=500) for r in results: calced_props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])} es.post("%s/app/%s/_update" % (APP_INDEX, r["_id"]), data={"doc": calced_props})
def update_calculated_properties(): es = get_es() q = {"filter": {"and": [ {"term": {"doc_type": "Domain"}}, {"term": {"is_snapshot": False}} ]}} results = stream_es_query(q=q, es_url=ES_URLS["domains"], size=999999, chunksize=500, fields=["name"]) all_stats = _all_domain_stats() for r in results: dom = r["fields"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_users_submitted_form": total_distinct_users([dom]), "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)), "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), "cp_last_updated": datetime.now().strftime(DATE_FORMAT), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def _get_latest_doc_from_index(es_index, sort_field): """ Query elasticsearch index sort descending by the sort field and get the doc_id back so we can then do a rev-update check. This si because there's no direct view known ahead of time what's inside the report* index, so just get it directly from the index and do the modify check workflow. """ recent_query = { "filter": { "match_all": {} }, "sort": { sort_field: "desc" }, "size": 1 } es = get_es() try: res = es[es_index].get('_search', data=recent_query) if 'hits' in res: if 'hits' in res['hits']: result = res['hits']['hits'][0] return result['_source']['_id'] except Exception, ex: logging.error("Error querying get_latest_doc_from_index[%s]: %s" % (es_index, ex)) return None
def update_calculated_properties(): es = get_es() #todo: use some sort of ES scrolling/paginating results = es.get(DOMAIN_INDEX + "/hqdomain/_search", data={"size": 99999})['hits']['hits'] all_stats = _all_domain_stats() for r in results: dom = r["_source"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)), "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def handle(self, *args, **options): self.es = get_es() try: from_date = datetime.strptime(options['from_date'], "%Y-%m-%d") except Exception, ex: self.printerr("need a valid date string --from_date YYYY-mm-dd: %s" % ex) sys.exit()
def es_histogram(histo_type, domains=None, startdate=None, enddate=None, tz_diff=None): date_field = {"forms": "received_on", "cases": "opened_on"}[histo_type] es_url = {"forms": XFORM_INDEX + "/xform/_search", "cases": CASE_INDEX + "/case/_search"}[histo_type] q = {"query": {"match_all": {}}} if domains is not None: q["query"] = {"in": {"domain.exact": domains}} q.update( { "facets": { "histo": { "date_histogram": {"field": date_field, "interval": "day"}, "facet_filter": {"and": [{"range": {date_field: {"from": startdate, "to": enddate}}}]}, } }, "size": 0, } ) if tz_diff: q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff if histo_type == "forms": q["facets"]["histo"]["facet_filter"]["and"].append( {"not": {"in": {"doc_type": ["xformduplicate", "xformdeleted"]}}} ) es = get_es() ret_data = es.get(es_url, data=q) return ret_data["facets"]["histo"]["entries"]
def _domains_matching(key, value): es = get_es() throwaway_facet_name = 'facets' query = { "filter": { "term": { key: value } }, "facets": { throwaway_facet_name: { "terms": { "field": "domain.exact", "size": 1000 }, "facet_filter": { "term": { key: value } } } } } res = es['xforms'].post('_search', data=query) return [ r['term'] for r in res['facets'][throwaway_facet_name]['terms'] ]
def es_query(params, facets=None, terms=None, q=None): if terms is None: terms = [] if q is None: q = {} q["size"] = 9999 q["filter"] = q.get("filter", {}) q["filter"]["and"] = q["filter"].get("and", []) for attr in params: if attr not in terms: attr_val = [params[attr].lower()] if isinstance(params[attr], basestring) else [p.lower() for p in params[attr]] q["filter"]["and"].append({"terms": {attr: attr_val}}) def facet_filter(facet): ff = {"facet_filter": {}} ff["facet_filter"]["and"] = [clause for clause in q["filter"]["and"] if facet not in clause.get("terms", [])] return ff if ff["facet_filter"]["and"] else {} if facets: q["facets"] = {} for facet in facets: q["facets"][facet] = {"terms": {"field": facet, "size": 9999}} q["facets"][facet].update(facet_filter(facet)) if not q['filter']['and']: del q["filter"] es_url = "cc_exchange/domain/_search" es = get_es() ret_data = es.get(es_url, data=q) return ret_data
def update_calculated_properties(): es = get_es() q = { "filter": { "and": [{ "term": { "doc_type": "Domain" } }, { "term": { "is_snapshot": False } }] } } results = stream_es_query(q=q, es_url=ES_URLS["domains"], size=999999, chunksize=500, fields=["name"]) all_stats = _all_domain_stats() for r in results: dom = r["fields"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_users_submitted_form": total_distinct_users([dom]), "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)), "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), "cp_last_updated": datetime.now().strftime(DATE_FORMAT), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def _es_query(self): es_config_case = { 'index': 'report_cases', 'type': 'report_case', 'field_to_path': lambda f: '%s.#value' % f, 'fields': { 'date': 'server_modified_on', 'submission_type': 'type', } } es_config_form = { 'index': 'report_xforms', 'type': 'report_xform', 'field_to_path': lambda f: 'form.%s.#value' % f, 'fields': { 'date': 'received_on', 'submission_type': 'xmlns', } } es_config = { 'case': es_config_case, 'form': es_config_form, }[self.mode] MAX_DISTINCT_VALUES = 50 es = elastic.get_es() filter_criteria = [ {"term": {"domain": self.domain}}, {"term": {es_config['fields']['submission_type']: self.submission_type}}, {"range": {es_config['fields']['date']: { "from": self.start_date, "to": self.end_date, }}}, ] if self.location_id: filter_criteria.append({"term": {"location_id": self.location_id}}) result = es.get('%s/_search' % es_config['index'], data={ "query": {"match_all": {}}, "size": 0, # no hits; only aggregated data "facets": { "blah": { "terms": { "field": "%s.%s" % (es_config['type'], es_config['field_to_path'](self.field)), "size": MAX_DISTINCT_VALUES }, "facet_filter": { "and": filter_criteria } } }, }) result = result['facets']['blah'] raw = dict((k['term'], k['count']) for k in result['terms']) if result['other']: raw[_('Other')] = result['other'] return raw
def add_demo_user_to_user_index(): es = get_es() es_path = USER_INDEX + "/user/demo_user" es.put(es_path, data={ "_id": "demo_user", "username": "******", "doc_type": "DemoUser" })
def es_query(params=None, facets=None, terms=None, q=None, es_url=None, start_at=None, size=None, dict_only=False): """ Any filters you include in your query should an and filter todo: intelligently deal with preexisting filters """ if terms is None: terms = [] if q is None: q = {} if params is None: params = {} q["size"] = size or 9999 q["from"] = start_at or 0 q["filter"] = q.get("filter", {}) q["filter"]["and"] = q["filter"].get("and", []) def convert(param): #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool if param == 'T' or param is True: return 1 elif param == 'F' or param is False: return 0 return param for attr in params: if attr not in terms: attr_val = [convert(params[attr])] if not isinstance(params[attr], list) else [convert(p) for p in params[attr]] q["filter"]["and"].append({"terms": {attr: attr_val}}) def facet_filter(facet): ff = {"facet_filter": {}} ff["facet_filter"]["and"] = [clause for clause in q["filter"]["and"] if facet not in clause.get("terms", [])] return ff if ff["facet_filter"]["and"] else {} if facets: q["facets"] = q.get("facets", {}) for facet in facets: q["facets"][facet] = {"terms": {"field": facet, "size": 9999}} if q.get('facets'): for facet in q["facets"]: q["facets"][facet].update(facet_filter(facet)) if not q['filter']['and']: del q["filter"] if dict_only: return q es_url = es_url or DOMAIN_INDEX + '/hqdomain/_search' es = get_es() ret_data = es.get(es_url, data=q) return ret_data
def es_filter_cases(domain, filters=None): """ Filter cases using elastic search """ q = ElasticCaseQuery(domain, filters) res = get_es().get('hqcases/_search', data=q.get_query()) # this is ugly, but for consistency / ease of deployment just # use this to return everything in the expected format for now return [CommCareCase.wrap(r["_source"]).get_json() for r in res['hits']['hits'] if r["_source"]]
def check_es_cluster_health(): """ The color state of the cluster health is just a simple indicator for how a cluster is running It'll mainly be useful for finding out if shards are in good/bad state (red) There are better realtime tools for monitoring ES clusters which should probably be looked at. specifically paramedic or bigdesk """ ret = {} es = get_es() cluster_health = es.get('_cluster/health') ret[CLUSTER_HEALTH] = cluster_health['status'] return ret
def es_filter_cases(domain, filters=None): """ Filter cases using elastic search (Domain, Filters?) -> [CommCareCase] """ q = ElasticCaseQuery(domain, filters) res = get_es().get('hqcases/_search', data=q.get_query()) # this is ugly, but for consistency / ease of deployment just # use this to return everything in the expected format for now return [CommCareCase.wrap(r["_source"]) for r in res['hits']['hits'] if r["_source"]]
def change_trigger(self, changes_dict): es = get_es() user_ids = changes_dict["doc"].get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query(es_url=ES_URLS["users"], q=q, fields=["__group_ids", "__group_names"]): group_ids = set(user_source.get('fields', {}).get("__group_ids", [])) group_names = set(user_source.get('fields', {}).get("__group_names", [])) if changes_dict["doc"]["name"] not in group_names or changes_dict["doc"]["_id"] not in group_ids: group_ids.add(changes_dict["doc"]["_id"]) group_names.add(changes_dict["doc"]["name"]) doc = {"__group_ids": list(group_ids), "__group_names": list(group_names)} es.post("%s/user/%s/_update" % (USER_INDEX, user_source["_id"]), data={"doc": doc})
def es_histogram(histo_type, domains=None, startdate=None, enddate=None, tz_diff=None): date_field = {"forms": "received_on", "cases": "opened_on"}[histo_type] es_url = { "forms": XFORM_INDEX + '/xform/_search', "cases": CASE_INDEX + '/case/_search' }[histo_type] q = {"query": {"match_all": {}}} if domains is not None: q["query"] = {"in": {"domain.exact": domains}} q.update({ "facets": { "histo": { "date_histogram": { "field": date_field, "interval": "day" }, "facet_filter": { "and": [{ "range": { date_field: { "from": startdate, "to": enddate } } }] } } }, "size": 0 }) if tz_diff: q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff if histo_type == "forms": q["facets"]["histo"]["facet_filter"]["and"].append( {"not": { "in": { "doc_type": ["xformduplicate", "xformdeleted"] } }}) es = get_es() ret_data = es.get(es_url, data=q) return ret_data["facets"]["histo"]["entries"]
def handle(self, *args, **options): if len(args) != 0: raise CommandError("This command doesn't expect arguments!") show_info = options['show_info'] list_pillows = options['list_pillows'] flip_all = options['flip_all'] flip_single = options['pillow_class'] code_red = options['code_red'] es = get_es() pillows = get_all_pillow_instances() aliased_pillows = filter(lambda x: isinstance(x, AliasedElasticPillow), pillows) if code_red: if raw_input('\n'.join([ 'CODE RED!!!', 'Really delete ALL the elastic indices and pillow checkpoints?', 'The following pillows will be affected:', '\n'.join([type(p).__name__ for p in aliased_pillows]), 'This is a PERMANENT action. (Type "code red" to continue):', '', ])).lower() == 'code red': for pillow in aliased_pillows: pillow.get_es_new().indices.delete(pillow.es_index) print 'deleted elastic index: {}'.format(pillow.es_index) checkpoint_id = pillow.checkpoint.checkpoint_id if pillow.couch_db.doc_exist(checkpoint_id): pillow.couch_db.delete_doc(checkpoint_id) print 'deleted checkpoint: {}'.format(checkpoint_id) else: print 'Safety first!' return if show_info: get_pillow_states(aliased_pillows).dump_info() if list_pillows: print aliased_pillows if flip_all: for pillow in aliased_pillows: assume_alias_for_pillow(pillow) print simplejson.dumps(es.get('_aliases'), indent=4) if flip_single is not None: pillow_class_name = flip_single pillow_to_use = filter(lambda x: x.__class__.__name__ == pillow_class_name, aliased_pillows) if len(pillow_to_use) != 1: print "Unknown pillow (option --pillow <name>) class string, the options are: \n\t%s" % ', '.join( [x.__class__.__name__ for x in aliased_pillows]) sys.exit() target_pillow = pillow_to_use[0] assume_alias_for_pillow(target_pillow) print es.get('_aliases')
def results(self): """Elasticsearch Results""" # there's no point doing filters that are like owner_id:(x1 OR x2 OR ... OR x612) # so past a certain number just exclude MAX_IDS = 50 def _filter_gen(key, list): if list and len(list) < MAX_IDS: yield {"terms": {key: [item.lower() if item else "" for item in list]}} # demo user hack elif list and "demo_user" not in list: yield {"not": {"term": {key: "demo_user"}}} if self.params.search: # these are not supported/implemented on the UI side, so ignoring (dmyung) pass subterms = [self.filter] if self.filter else [] if self.case_type: subterms.append({"term": {"type": self.case_type}}) if self.status: subterms.append({"term": {"closed": (self.status == "closed")}}) user_filters = list(_filter_gen("owner_id", self.owner_ids)) + list(_filter_gen("user_id", self.owner_ids)) if user_filters: subterms.append({"or": user_filters}) and_block = {"and": subterms} if subterms else {} es_query = { "query": {"filtered": {"query": {"match": {"domain.exact": self.domain}}, "filter": and_block}}, "sort": {self.sort_key: {"order": self.sort_order}}, "from": self.params.start, "size": self.params.count, } es_results = get_es().get("hqcases/_search", data=es_query) if es_results.has_key("error"): notify_exception(None, "Error in case list elasticsearch query: %s" % es_results["error"]) return {"skip": self.params.start, "limit": self.params.count, "rows": [], "total_rows": 0} # transform the return value to something compatible with the report listing ret = { "skip": self.params.start, "limit": self.params.count, "rows": [{"doc": x["_source"]} for x in es_results["hits"]["hits"]], "total_rows": es_results["hits"]["total"], } return ret
def _domains_matching(key, value): es = get_es() throwaway_facet_name = "facets" query = { "filter": {"term": {key: value}}, "facets": { throwaway_facet_name: { "terms": {"field": "domain.exact", "size": 1000}, "facet_filter": {"term": {key: value}}, } }, } res = es["xforms"].post("_search", data=query) return [r["term"] for r in res["facets"][throwaway_facet_name]["terms"]]
def change_trigger(self, changes_dict): user_id, username, domain, xform_id = self.get_fields(changes_dict) es = get_es() es_path = USER_INDEX + "/user/" if user_id and not self.user_db.doc_exist(user_id) and not es.head(es_path + user_id): doc_type = "AdminUser" if username == "admin" else "UnknownUser" doc = { "_id": user_id, "domain": domain, "username": username, "first_form_found_in": xform_id, "doc_type": doc_type, } if domain: doc["domain_membership"] = {"domain": domain} es.put(es_path + user_id, data=doc)
def change_trigger(self, changes_dict): user_id, username, domain, xform_id = self.get_fields(changes_dict) es = get_es() es_path = USER_INDEX + "/user/" if user_id and not self.user_db.doc_exist(user_id) and not es.head( es_path + user_id): doc_type = "AdminUser" if username == "admin" else "UnknownUser" doc = { "_id": user_id, "domain": domain, "username": username, "first_form_found_in": xform_id, "doc_type": doc_type, } if domain: doc["domain_membership"] = {"domain": domain} es.put(es_path + user_id, data=doc)
def es_histogram(histo_type, domains=None, startdate=None, enddate=None, tz_diff=None, interval="day"): date_field = { "forms": "received_on", "cases": "opened_on", "users": "created_on", }[histo_type] es_url = { "forms": XFORM_INDEX + '/xform/_search', "cases": CASE_INDEX + '/case/_search', "users": USER_INDEX + '/user/_search' }[histo_type] q = {"query": {"match_all":{}}} if domains is not None: q["query"] = {"in" : {"domain.exact": domains}} q.update({ "facets": { "histo": { "date_histogram": { "field": date_field, "interval": interval }, "facet_filter": { "and": [{ "range": { date_field: { "from": startdate, "to": enddate }}}]}}}, "size": 0 }) if tz_diff: q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff if histo_type == "forms": q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"in": {"doc_type": ["xformduplicate", "xformdeleted"]}}}) q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"missing": {"field": "xmlns"}}}) q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"missing": {"field": "form.meta.userID"}}}) if histo_type == "users": q["facets"]["histo"]["facet_filter"]["and"].append({"term": {"doc_type": "CommCareUser"}}) es = get_es() ret_data = es.get(es_url, data=q) return ret_data["facets"]["histo"]["entries"]
def _check_es_rev(index, doc_id, couch_revs): """ Specific docid and rev checker. index: rawes index doc_id: id to query in ES couch_rev: target couch_rev that you want to match """ es = get_es() doc_id_query = { "filter": { "ids": { "values": [doc_id] } }, "fields": ["_id", "_rev"] } try: res = es[index].get('_search', data=doc_id_query) status = False message = "Not in sync" if res.has_key('hits'): if res['hits'].get('total', 0) == 0: status = False # if doc doesn't exist it's def. not in sync message = "Not in sync %s" % index elif 'hits' in res['hits']: fields = res['hits']['hits'][0]['fields'] if fields['_rev'] in couch_revs: status = True message = "%s OK" % index else: status = False # less likely, but if it's there but the rev is off message = "Not in sync - %s stale" % index else: status = False message = "Not in sync - query failed" except Exception, ex: message = "ES Error: %s" % ex status = False
def _check_es_rev(index, doc_id, couch_revs): """ Specific docid and rev checker. index: rawes index doc_id: id to query in ES couch_rev: target couch_rev that you want to match """ es = get_es() doc_id_query = { "filter": { "ids": {"values": [doc_id]} }, "fields": ["_id", "_rev"] } try: res = es[index].get('_search', data=doc_id_query) status = False message = "Not in sync" if res.has_key('hits'): if res['hits'].get('total', 0) == 0: status = False # if doc doesn't exist it's def. not in sync message = "Not in sync %s" % index elif 'hits' in res['hits']: fields = res['hits']['hits'][0]['fields'] if fields['_rev'] in couch_revs: status = True message = "%s OK" % index else: status = False # less likely, but if it's there but the rev is off message = "Not in sync - %s stale" % index else: status = False message = "Not in sync - query failed" notify_error("%s: %s" % (message, str(res))) except Exception, ex: message = "ES Error: %s" % ex status = False
def change_trigger(self, changes_dict): es = get_es() user_ids = changes_dict["doc"].get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query( es_url=ES_URLS["users"], q=q, fields=["__group_ids", "__group_names"]): group_ids = set( user_source.get('fields', {}).get("__group_ids", [])) group_names = set( user_source.get('fields', {}).get("__group_names", [])) if changes_dict["doc"]["name"] not in group_names or changes_dict[ "doc"]["_id"] not in group_ids: group_ids.add(changes_dict["doc"]["_id"]) group_names.add(changes_dict["doc"]["name"]) doc = { "__group_ids": list(group_ids), "__group_names": list(group_names) } es.post("%s/user/%s/_update" % (USER_INDEX, user_source["_id"]), data={"doc": doc})
def _check_es_rev(index, doc_id, couch_rev): es = get_es() doc_id_query = { "filter": { "ids": { "values": [doc_id] } }, "fields": ["_id", "_rev"] } try: res = es[index].get('_search', data=doc_id_query) status = False message = "Not in sync" if res.has_key('hits'): if res['hits'].get('total', 0) == 0: status = False #if doc doesn't exist it's def. not in sync message = "Not in sync %s" % index elif res['hits'].has_key('hits'): fields = res['hits']['hits'][0]['fields'] if fields['_rev'] == couch_rev: status = True message = "%s OK" % index else: status = False #less likely, but if it's there but the rev is off message = "Not in sync - %s stale" % index else: status = False message = "Not in sync - query failed" except Exception, ex: message = "ES Error: %s" % ex status = False
def __init__(self, domain): super(ESView, self).__init__() self.domain = domain.lower() self.es = get_es()
def _es_query(self): es_config_case = { 'index': 'report_cases', 'type': 'report_case', 'field_to_path': lambda f: '%s.#value' % f, 'fields': { 'date': 'server_modified_on', 'submission_type': 'type', } } es_config_form = { 'index': 'report_xforms', 'type': 'report_xform', 'field_to_path': lambda f: 'form.%s.#value' % f, 'fields': { 'date': 'received_on', 'submission_type': 'xmlns', } } es_config = { 'case': es_config_case, 'form': es_config_form, }[self.mode] MAX_DISTINCT_VALUES = 50 es = elastic.get_es() filter_criteria = [ { "term": { "domain": self.domain } }, { "term": { es_config['fields']['submission_type']: self.submission_type } }, { "range": { es_config['fields']['date']: { "from": self.start_date, "to": self.end_date, } } }, ] if self.location_id: filter_criteria.append({"term": {"location_": self.location_id}}) result = es.get( '%s/_search' % es_config['index'], data={ "query": { "match_all": {} }, "size": 0, # no hits; only aggregated data "facets": { "blah": { "terms": { "field": "%s.%s" % (es_config['type'], es_config['field_to_path'](self.field)), "size": MAX_DISTINCT_VALUES }, "facet_filter": { "and": filter_criteria } } }, }) result = result['facets']['blah'] raw = dict((k['term'], k['count']) for k in result['terms']) if result['other']: raw[_('Other')] = result['other'] return raw
def es_filter_cases(domain, filters=None): """ Filter cases using elastic search """ class ElasticCaseQuery(object): # this class is currently pretty customized to serve exactly # this API. one day it may be worth reconciling our ES interfaces # but today is not that day. # To be replaced by CaseES framework. RESERVED_KEYS = ('date_modified_start', 'date_modified_end', 'server_date_modified_start', 'server_date_modified_end', 'limit') def __init__(self, domain, filters): self.domain = domain self.filters = filters self.limit = int(filters.get('limit', 50)) self._date_modified_start = filters.get("date_modified_start", None) self._date_modified_end = filters.get("date_modified_end", None) self._server_date_modified_start = filters.get("server_date_modified_start", None) self._server_date_modified_end = filters.get("server_date_modified_end", None) @property def uses_modified(self): return bool(self._date_modified_start or self._date_modified_end) @property def uses_server_modified(self): return bool(self._server_date_modified_start or self._server_date_modified_end) @property def date_modified_start(self): return self._date_modified_start or datetime(1970,1,1).strftime("%Y-%m-%d") @property def date_modified_end(self): return self._date_modified_end or datetime.max.strftime("%Y-%m-%d") @property def server_date_modified_start(self): return self._server_date_modified_start or datetime(1970,1,1).strftime("%Y-%m-%d") @property def server_date_modified_end(self): return self._server_date_modified_end or datetime.max.strftime("%Y-%m-%d") @property def scrubbed_filters(self): return dict((k, v) for k, v in self.filters.items() if k not in self.RESERVED_KEYS) def _modified_params(self, key, start, end): return { 'range': { key: { 'from': start, 'to': end } } } @property def modified_params(self, ): return self._modified_params('modified_on', self.date_modified_start, self.date_modified_end) @property def server_modified_params(self): return self._modified_params('server_modified_on', self.server_date_modified_start, self.server_date_modified_end) def get_terms(self): yield {'term': {'domain.exact': self.domain}} if self.uses_modified: yield self.modified_params if self.uses_modified: yield self.modified_params if self.uses_server_modified: yield self.server_modified_params for k, v in self.scrubbed_filters.items(): yield {'term': {k: v.lower()}} def get_query(self): return { 'query': { 'bool': { 'must': list(self.get_terms()) } }, 'sort': { 'modified_on': {'order': 'asc'} }, 'from': 0, 'size': self.limit, } q = ElasticCaseQuery(domain, filters) res = get_es().get('hqcases/_search', data=q.get_query()) # this is ugly, but for consistency / ease of deployment just # use this to return everything in the expected format for now return [CommCareCase.wrap(r["_source"]).get_json() for r in res['hits']['hits'] if r["_source"]]
def __init__(self, domain): self.domain=domain.lower() self.es = get_es()
def __init__(self, domain): self.domain = domain.lower() self.es = get_es()
def __init__(self, domain): self.domain=domain self.es = get_es()
def handle(self, *args, **options): if len(args) != 0: raise CommandError("This command doesn't expect arguments!") print "" show_info = options['show_info'] list_pillows = options['list_pillows'] do_flip = options['do_flip'] es = get_es() pillows = import_pillows() aliased_pillows = filter(lambda x: isinstance(x, AliasedElasticPillow), pillows) #make tuples of (index, alias) #this maybe problematic if we have multiple pillows pointing to the same alias or indices master_aliases = dict((x.es_index, x.es_alias) for x in aliased_pillows) print master_aliases if show_info: system_status = es.get('_status') indices = system_status['indices'].keys() print "" print "\tActive indices" for index in indices: print "\t\t%s" % index print "" print "\n\tAlias Mapping Status" active_aliases = es.get('_aliases') for idx, alias_dict in active_aliases.items(): line = ["\t\t", idx] is_master = False if idx in master_aliases: is_master = True line.append('*HEAD') if is_master: if master_aliases[idx] in alias_dict['aliases']: #is master, has alias, good line.append('=> %s :)' % master_aliases[idx]) else: #is not master, doesn't have alias, bad line.append('=> Does not have alias yet :(') else: #not a master index line.append( '=> [%s] Non HEAD has alias' % (' '.join(alias_dict['aliases'].keys()))) print ' '.join(line) print "" sys.exit() if list_pillows: print aliased_pillows sys.exit() if do_flip: pillow_class_name = options['pillow_class'] pillow_to_use = filter(lambda x: x.__class__.__name__ == pillow_class_name, aliased_pillows) if len(pillow_to_use) != 1: print "Unknown pillow (option --pillow <name>) class string, the options are: \n\t%s" % ', '.join( [x.__class__.__name__ for x in aliased_pillows]) sys.exit() #ok we got the pillow target_pillow = pillow_to_use[0] target_pillow.assume_alias() print es.get('_aliases')
def __init__(self, **kwargs): super(UnknownUsersPillow, self).__init__(**kwargs) self.couch_db = XFormInstance.get_db() self.user_db = CouchUser.get_db() self.es = get_es()
def es_query(params=None, facets=None, terms=None, q=None, es_url=None, start_at=None, size=None, dict_only=False): if terms is None: terms = [] if q is None: q = {} if params is None: params = {} q["size"] = size or 9999 q["from"] = start_at or 0 q["filter"] = q.get("filter", {}) q["filter"]["and"] = q["filter"].get("and", []) def convert(param): #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool if param == 'T' or param is True: return 1 elif param == 'F' or param is False: return 0 return param.lower() for attr in params: if attr not in terms: attr_val = [convert(params[attr])] if not isinstance( params[attr], list) else [convert(p) for p in params[attr]] q["filter"]["and"].append({"terms": {attr: attr_val}}) def facet_filter(facet): ff = {"facet_filter": {}} ff["facet_filter"]["and"] = [ clause for clause in q["filter"]["and"] if facet not in clause.get("terms", []) ] return ff if ff["facet_filter"]["and"] else {} if facets: q["facets"] = q.get("facets", {}) for facet in facets: q["facets"][facet] = {"terms": {"field": facet, "size": 9999}} if q.get('facets'): for facet in q["facets"]: q["facets"][facet].update(facet_filter(facet)) if not q['filter']['and']: del q["filter"] if dict_only: return q es_url = es_url or "cc_exchange/domain/_search" es = get_es() ret_data = es.get(es_url, data=q) return ret_data
def add_demo_user_to_user_index(): es = get_es() es_path = USER_INDEX + "/user/demo_user" es.put(es_path, data={"_id": "demo_user", "username": "******", "doc_type": "DemoUser"})