def forms_with_cases(domain=None, since=None, chunksize=500): q = { "filter": { "and": [ { "bool": { "must_not": { "missing": {"field": "__retrieved_case_ids", "existence": True, "null_value": True} } } } ] } } q["sort"] = [{"domain.exact": {"order": "asc"}}] params = {"domain.exact": domain} if domain else {} if since: q["filter"]["and"][0]["bool"]["must"] = {"range": {"received_on": {"from": since.strftime("%Y-%m-%d")}}} q["filter"]["and"].extend(ADD_TO_ES_FILTER["forms"][:]) return stream_es_query( params=params, q=q, es_url=ES_URLS["forms"], fields=["__retrieved_case_ids", "domain", "received_on"], chunksize=chunksize, )
def apps_update_calculated_properties(): es = get_es_new() q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}} results = stream_es_query(q=q, es_index='apps', size=999999, chunksize=500) for r in results: props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])} es.update(APP_INDEX, ES_META['apps'].type, r["_id"], body={"doc": props})
def apps_update_calculated_properties(): es = get_es_new() q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}} results = stream_es_query(q=q, es_index='apps', size=999999, chunksize=500) for r in results: calced_props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])} es.update(APP_INDEX, ES_META['apps'].type, r["_id"], body={"doc": calced_props})
def update_calculated_properties(): es = get_es() q = {"filter": {"and": [ {"term": {"doc_type": "Domain"}}, {"term": {"is_snapshot": False}} ]}} results = stream_es_query(q=q, es_url=ES_URLS["domains"], size=999999, chunksize=500, fields=["name"]) all_stats = _all_domain_stats() for r in results: dom = r["fields"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_users_submitted_form": total_distinct_users([dom]), "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)), "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), "cp_last_updated": datetime.now().strftime(DATE_FORMAT), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def apps_update_calculated_properties(): es = get_es() q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}} results = stream_es_query(q=q, es_url=ES_URLS["apps"], size=999999, chunksize=500) for r in results: calced_props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])} es.post("%s/app/%s/_update" % (APP_INDEX, r["_id"]), data={"doc": calced_props})
def get_all_rows(self): query_results = stream_es_query(q=self.es_query, es_index='cases', size=999999, chunksize=100) case_displays = (self.model(self, self.get_case(case)) for case in query_results)
def get_all_rows(self): query_results = stream_es_query(q=self.es_query, es_url=ES_URLS["cases"], size=999999, chunksize=100) case_displays = (self.model(self, self.get_case(case)) for case in query_results)
def update_calculated_properties(): es = get_es() q = { "filter": { "and": [{ "term": { "doc_type": "Domain" } }, { "term": { "is_snapshot": False } }] } } results = stream_es_query(q=q, es_url=ES_URLS["domains"], size=999999, chunksize=500, fields=["name"]) all_stats = _all_domain_stats() for r in results: dom = r["fields"]["name"] calced_props = { "cp_n_web_users": int(all_stats["web_users"][dom]), "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)), "cp_n_cc_users": int(all_stats["commcare_users"][dom]), "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)), "cp_n_users_submitted_form": total_distinct_users([dom]), "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)), "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)), "cp_n_cases": int(all_stats["cases"][dom]), "cp_n_forms": int(all_stats["forms"][dom]), "cp_first_form": CALC_FNS["first_form_submission"](dom, False), "cp_last_form": CALC_FNS["last_form_submission"](dom, False), "cp_is_active": CALC_FNS["active"](dom), "cp_has_app": CALC_FNS["has_app"](dom), "cp_last_updated": datetime.now().strftime(DATE_FORMAT), } if calced_props['cp_first_form'] == 'No forms': del calced_props['cp_first_form'] del calced_props['cp_last_form'] es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
def stream_user_sources(user_ids): q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for result in stream_es_query(es_index='users', q=q, fields=["__group_ids", "__group_names"]): group_ids = result.get('fields', {}).get("__group_ids", []) group_ids = set(group_ids) if isinstance(group_ids, list) else {group_ids} group_names = result.get('fields', {}).get("__group_names", []) group_names = set(group_names) if isinstance(group_names, list) else {group_names} yield UserSource(result['_id'], group_ids, group_names)
def apps_update_calculated_properties(): q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}} results = stream_es_query(q=q, es_index='apps', size=999999, chunksize=500) for r in results: doc = { "_id": r["_id"], "cp_is_active": is_app_active(r["_id"], r["_source"]["domain"]) } send_to_elasticsearch('apps', doc, es_merge_update=True)
def change_transport(self, doc_dict): user_ids = doc_dict.get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query(es_index='users', q=q, fields=["__group_ids", "__group_names"]): group_ids = set(user_source.get('fields', {}).get("__group_ids", [])) group_names = set(user_source.get('fields', {}).get("__group_names", [])) if doc_dict["name"] not in group_names or doc_dict["_id"] not in group_ids: group_ids.add(doc_dict["_id"]) group_names.add(doc_dict["name"]) doc = {"__group_ids": list(group_ids), "__group_names": list(group_names)} self.es.update(USER_INDEX, self.es_type, user_source["_id"], body={"doc": doc})
def change_trigger(self, changes_dict): es = get_es() user_ids = changes_dict["doc"].get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query(es_url=ES_URLS["users"], q=q, fields=["__group_ids", "__group_names"]): group_ids = set(user_source.get('fields', {}).get("__group_ids", [])) group_names = set(user_source.get('fields', {}).get("__group_names", [])) if changes_dict["doc"]["name"] not in group_names or changes_dict["doc"]["_id"] not in group_ids: group_ids.add(changes_dict["doc"]["_id"]) group_names.add(changes_dict["doc"]["name"]) doc = {"__group_ids": list(group_ids), "__group_names": list(group_names)} es.post("%s/user/%s/_update" % (USER_INDEX, user_source["_id"]), data={"doc": doc})
def save_metadata_export_to_tempfile(domain, datespan=None, user_ids=None): """ Saves the domain's form metadata to a file. Returns the filename. """ headers = ("domain", "instanceID", "received_on", "type", "timeStart", "timeEnd", "deviceID", "username", "userID", "xmlns", "version") def _form_data_to_row(formdata): def _key_to_val(formdata, key): if key == "type": return xmlns_to_name(domain, formdata.get("xmlns"), app_id=None) if key == "version": return formdata["form"].get("@version") if key in ["domain", "received_on", "xmlns"]: return formdata.get(key) return formdata["form"].get("meta", {}).get(key) return [_key_to_val(formdata, key) for key in headers] fd, path = tempfile.mkstemp() q = { "query": {"match_all": {}}, "sort": [{"received_on" : {"order": "desc"}}], "filter": {"and": []}, } if datespan: q["query"] = { "range": { "form.meta.timeEnd": { "from": datespan.startdate_param, "to": datespan.enddate_param, "include_upper": False, } } } if user_ids is not None: q["filter"]["and"].append({"terms": {"form.meta.userID": user_ids}}) results = stream_es_query(params={"domain.exact": domain}, q=q, es_url=XFORM_INDEX + '/xform/_search', size=999999) data = (_form_data_to_row(res["_source"]) for res in results) with os.fdopen(fd, 'w') as temp: export_raw((("forms", headers),), (("forms", data),), temp) return path
def save_metadata_export_to_tempfile(domain, format, datespan=None, user_ids=None): """ Saves the domain's form metadata to a file. Returns the filename. """ headers = ("domain", "instanceID", "received_on", "type", "timeStart", "timeEnd", "deviceID", "username", "userID", "xmlns", "version") def _form_data_to_row(formdata): def _key_to_val(formdata, key): if key == "type": return xmlns_to_name(domain, formdata.get("xmlns"), app_id=None) if key == "version": return formdata["form"].get("@version") if key in ["domain", "received_on", "xmlns"]: return formdata.get(key) return formdata["form"].get("meta", {}).get(key) return [_key_to_val(formdata, key) for key in headers] fd, path = tempfile.mkstemp() q = { "query": {"match_all": {}}, "sort": [{"received_on" : {"order": "desc"}}], "filter": {"and": []}, } if datespan: q["query"] = { "range": { "form.meta.timeEnd": { "from": datespan.startdate_param, "to": datespan.enddate_param, "include_upper": False, } } } if user_ids is not None: q["filter"]["and"].append({"terms": {"form.meta.userID": user_ids}}) results = stream_es_query(params={"domain.exact": domain}, q=q, es_url=XFORM_INDEX + '/xform/_search', size=999999) data = (_form_data_to_row(res["_source"]) for res in results) with os.fdopen(fd, 'w') as temp: export_raw((("forms", headers),), (("forms", data),), temp, format=format) return path
def forms_with_cases(domain=None, since=None, chunksize=500): q = {"filter": {"and": [{"bool": { "must_not": { "missing": { "field": "__retrieved_case_ids", "existence": True, "null_value": True}}}}]}} q["sort"] = [{"domain.exact" : {"order": "asc"}}] params={"domain.exact": domain} if domain else {} if since: q["filter"]["and"][0]["bool"]["must"] = { "range": { "received_on": {"from": since.strftime("%Y-%m-%d")}}} q["filter"]["and"].extend(ADD_TO_ES_FILTER["forms"][:]) return stream_es_query(params=params, q=q, es_url=ES_URLS["forms"], fields=["__retrieved_case_ids", "domain", "received_on"], chunksize=chunksize)
def update_es_user_with_groups(group_doc, es_client=None): if not es_client: es_client = get_es_new() user_ids = group_doc.get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query(es_index='users', q=q, fields=["__group_ids", "__group_names"]): group_ids = user_source.get('fields', {}).get("__group_ids", []) group_ids = set(group_ids) if isinstance(group_ids, list) else {group_ids} group_names = user_source.get('fields', {}).get("__group_names", []) group_names = set(group_names) if isinstance(group_names, list) else {group_names} if group_doc["name"] not in group_names or group_doc["_id"] not in group_ids: group_ids.add(group_doc["_id"]) group_names.add(group_doc["name"]) doc = {"__group_ids": list(group_ids), "__group_names": list(group_names)} es_client.update(USER_INDEX, ES_META['users'].type, user_source["_id"], body={"doc": doc})
def change_trigger(self, changes_dict): es = get_es() user_ids = changes_dict["doc"].get("users", []) q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}} for user_source in stream_es_query( es_url=ES_URLS["users"], q=q, fields=["__group_ids", "__group_names"]): group_ids = set( user_source.get('fields', {}).get("__group_ids", [])) group_names = set( user_source.get('fields', {}).get("__group_names", [])) if changes_dict["doc"]["name"] not in group_names or changes_dict[ "doc"]["_id"] not in group_ids: group_ids.add(changes_dict["doc"]["_id"]) group_names.add(changes_dict["doc"]["name"]) doc = { "__group_ids": list(group_ids), "__group_names": list(group_names) } es.post("%s/user/%s/_update" % (USER_INDEX, user_source["_id"]), data={"doc": doc})
def get_all_rows(self): query_results = stream_es_query(q=self.es_query, es_url=ES_URLS["cases"], size=999999, chunksize=100) case_displays = (self.model(self, self.get_case(case)) for case in query_results) return self.get_cases(case_displays)