def get_forms_for_users(domain, user_ids, start, end): query = (FormES().domain(domain).submitted( gte=start, lte=end).user_id(user_ids).source( ['form.meta.userID', 'form.case', 'form.@xmlns'])) return query.scroll()
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains three_months_ago = date.today() - timedelta(days=90) user_query = (UserES().web_users().last_logged_in( gte=three_months_ago).sort('date_joined', desc=True).source( ['domains', 'email', 'date_joined']).analytics_enabled()) total_users = user_query.count() chunk_size = 100 num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD hubspot_number_of_users = 0 hubspot_number_of_domains_with_forms_gt_threshold = 0 for chunk in range(num_chunks): users_to_domains = (user_query.size(chunk_size).start( chunk * chunk_size).run().hits) # users_to_domains is a list of dicts domains_to_forms = (FormES().terms_aggregation( 'domain', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) domains_to_mobile_users = (UserES().mobile_users().terms_aggregation( 'domain', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) # Keep track of india and www data seperately env = get_instance_string() for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: hubspot_number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not _email_is_valid(email): continue hubspot_number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 max_export = 0 max_report = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] if _get_export_count(domain) > max_export: max_export = _get_export_count(domain) if _get_report_count(domain) > max_report: max_report = _get_report_count(domain) project_spaces_created = ", ".join( get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }, { 'property': '{}max_exports_in_a_domain'.format(env), 'value': max_export }, { 'property': '{}max_custom_reports_in_a_domain'.format(env), 'value': max_report }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics({ DATADOG_WEB_USERS_GAUGE: hubspot_number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: hubspot_number_of_domains_with_forms_gt_threshold })
def get_300th_form_submission_received(domain): result = FormES().domain(domain).start(300).size(1).sort('received_on').fields(['received_on']).run().hits if not result: return return iso_string_to_datetime(result[0]['received_on'])
def forms_in_last(domain, days): """ Returns the number of forms submitted in the last given number of days """ then = datetime.utcnow() - timedelta(days=int(days)) return FormES().domain(domain).submitted(gte=then).size(0).run().total
def find_form_ids_updating_case(case_id): result = FormES().filter(updating_cases([case_id])).run() return [hit["_id"] for hit in result.hits]
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED: return time_started = datetime.utcnow() three_months_ago = date.today() - timedelta(days=90) user_query = (UserES().web_users().last_logged_in( gte=three_months_ago).sort('date_joined', desc=True).source( ['domains', 'email', 'date_joined', 'username']).analytics_enabled()) total_users = user_query.count() chunk_size = 100 num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD hubspot_number_of_users_processed = 0 hubspot_number_of_domains_with_forms_gt_threshold = 0 hubspot_number_of_users_blocked = 0 blocked_domains = get_blocked_hubspot_domains() blocked_users = emails_that_accepted_invitations_to_blocked_hubspot_domains( ) for chunk in range(num_chunks): users_to_domains = (user_query.size(chunk_size).start( chunk * chunk_size).run().hits) # users_to_domains is a list of dicts domains_to_forms = (FormES().terms_aggregation( 'domain.exact', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) domains_to_mobile_users = (UserES().mobile_users().terms_aggregation( 'domain.exact', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) # Keep track of india and www data seperately env = get_instance_string() for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: hubspot_number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') or user.get('username') if not _email_is_valid(email): continue if (user.get('email') in blocked_users or user.get('username') in blocked_users): # User had accepted an invitation to a project space whose # Billing Account has blocked HubSpot analytics, so we # should not send any data about them going forward metrics_counter( 'commcare.hubspot_data.rejected.periodic_task.invitation', ) hubspot_number_of_users_blocked += 1 continue date_created = user.get('date_joined') max_forms = 0 max_workers = 0 max_export = 0 max_report = 0 is_member_of_blocked_domain = False for domain in user['domains']: if domain in blocked_domains: metrics_counter( 'commcare.hubspot_data.rejected.periodic_task.domain', tags={ 'domain': domain, }) is_member_of_blocked_domain = True break if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] if _get_export_count(domain) > max_export: max_export = _get_export_count(domain) if _get_report_count(domain) > max_report: max_report = _get_report_count(domain) if is_member_of_blocked_domain: # user is a member of a project space whose Billing Account # has blocked HubSpot analytics, so we must not send any data # about them. hubspot_number_of_users_blocked += 1 continue hubspot_number_of_users_processed += 1 project_spaces_created = ", ".join( get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }, { 'property': '{}max_exports_in_a_domain'.format(env), 'value': max_export }, { 'property': '{}max_custom_reports_in_a_domain'.format(env), 'value': max_report }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) metrics_gauge('commcare.hubspot.web_users_processed', hubspot_number_of_users_processed, multiprocess_mode=MPM_LIVESUM) metrics_gauge('commcare.hubspot.web_users_blocked', hubspot_number_of_users_blocked, multiprocess_mode=MPM_LIVESUM) metrics_gauge('commcare.hubspot.domains_with_forms_gt_threshold', hubspot_number_of_domains_with_forms_gt_threshold, multiprocess_mode=MPM_MAX) task_time = datetime.utcnow() - time_started metrics_gauge('commcare.hubspot.runtimes.track_periodic_data', task_time.seconds, multiprocess_mode=MPM_LIVESUM)
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago)\ .fields(['domains', 'email', 'date_joined'])\ .run().hits # users_to_domains is a list of dicts domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() # Keep track of india and www data seperately env = get_instance_string() # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD number_of_users = 0 number_of_domains_with_forms_gt_threshold = 0 for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not _email_is_valid(email): continue number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics({ DATADOG_WEB_USERS_GAUGE: number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: number_of_domains_with_forms_gt_threshold })
def j2me_forms_in_last(domain, days): """ Returns the number of forms submitted by j2me in the last given number of days """ then = datetime.utcnow() - timedelta(days=int(days)) return FormES().domain(domain).j2me_submissions(gte=then).count()
def get_base_form_es_query(start=0, size=DEFAULT_SIZE): return (FormES().remove_default_filters().domain(PACT_DOMAIN).filter( filters.term('doc_type', 'XFormInstance')).start(start).size(size))