Ejemplo n.º 1
0
def get_forms_for_users(domain, user_ids, start, end):
    query = (FormES().domain(domain).submitted(
        gte=start, lte=end).user_id(user_ids).source(
            ['form.meta.userID', 'form.case', 'form.@xmlns']))

    return query.scroll()
Ejemplo n.º 2
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    three_months_ago = date.today() - timedelta(days=90)

    user_query = (UserES().web_users().last_logged_in(
        gte=three_months_ago).sort('date_joined', desc=True).source(
            ['domains', 'email', 'date_joined']).analytics_enabled())

    total_users = user_query.count()
    chunk_size = 100
    num_chunks = int(math.ceil(float(total_users) / float(chunk_size)))

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    hubspot_number_of_users = 0
    hubspot_number_of_domains_with_forms_gt_threshold = 0

    for chunk in range(num_chunks):
        users_to_domains = (user_query.size(chunk_size).start(
            chunk * chunk_size).run().hits)

        # users_to_domains is a list of dicts
        domains_to_forms = (FormES().terms_aggregation(
            'domain',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())
        domains_to_mobile_users = (UserES().mobile_users().terms_aggregation(
            'domain',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())

        # Keep track of india and www data seperately
        env = get_instance_string()

        for num_forms in domains_to_forms.values():
            if num_forms > HUBSPOT_THRESHOLD:
                hubspot_number_of_domains_with_forms_gt_threshold += 1

        # For each web user, iterate through their domains and select the max number of form submissions and
        # max number of mobile workers
        submit = []
        for user in users_to_domains:
            email = user.get('email')
            if not _email_is_valid(email):
                continue

            hubspot_number_of_users += 1
            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            for domain in user['domains']:
                if domain in domains_to_forms and domains_to_forms[
                        domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[
                        domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            project_spaces_created = ", ".join(
                get_domains_created_by_user(email))

            user_json = {
                'email':
                email,
                'properties': [{
                    'property':
                    '{}max_form_submissions_in_a_domain'.format(env),
                    'value':
                    max_forms
                }, {
                    'property':
                    '{}max_mobile_workers_in_a_domain'.format(env),
                    'value':
                    max_workers
                }, {
                    'property':
                    '{}project_spaces_created_by_user'.format(env),
                    'value':
                    project_spaces_created,
                }, {
                    'property':
                    '{}over_300_form_submissions'.format(env),
                    'value':
                    max_forms > HUBSPOT_THRESHOLD
                }, {
                    'property': '{}date_created'.format(env),
                    'value': date_created
                }, {
                    'property':
                    '{}max_exports_in_a_domain'.format(env),
                    'value':
                    max_export
                }, {
                    'property':
                    '{}max_custom_reports_in_a_domain'.format(env),
                    'value':
                    max_report
                }]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)
        submit_data_to_hub_and_kiss(submit_json)

    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE:
        hubspot_number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE:
        hubspot_number_of_domains_with_forms_gt_threshold
    })
Ejemplo n.º 3
0
def get_300th_form_submission_received(domain):
    result = FormES().domain(domain).start(300).size(1).sort('received_on').fields(['received_on']).run().hits
    if not result:
        return

    return iso_string_to_datetime(result[0]['received_on'])
Ejemplo n.º 4
0
def forms_in_last(domain, days):
    """
    Returns the number of forms submitted in the last given number of days
    """
    then = datetime.utcnow() - timedelta(days=int(days))
    return FormES().domain(domain).submitted(gte=then).size(0).run().total
Ejemplo n.º 5
0
def find_form_ids_updating_case(case_id):
    result = FormES().filter(updating_cases([case_id])).run()
    return [hit["_id"] for hit in result.hits]
Ejemplo n.º 6
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains

    if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED:
        return

    time_started = datetime.utcnow()

    three_months_ago = date.today() - timedelta(days=90)

    user_query = (UserES().web_users().last_logged_in(
        gte=three_months_ago).sort('date_joined', desc=True).source(
            ['domains', 'email', 'date_joined',
             'username']).analytics_enabled())

    total_users = user_query.count()
    chunk_size = 100
    num_chunks = int(math.ceil(float(total_users) / float(chunk_size)))

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    hubspot_number_of_users_processed = 0
    hubspot_number_of_domains_with_forms_gt_threshold = 0
    hubspot_number_of_users_blocked = 0

    blocked_domains = get_blocked_hubspot_domains()
    blocked_users = emails_that_accepted_invitations_to_blocked_hubspot_domains(
    )

    for chunk in range(num_chunks):
        users_to_domains = (user_query.size(chunk_size).start(
            chunk * chunk_size).run().hits)

        # users_to_domains is a list of dicts
        domains_to_forms = (FormES().terms_aggregation(
            'domain.exact',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())
        domains_to_mobile_users = (UserES().mobile_users().terms_aggregation(
            'domain.exact',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())

        # Keep track of india and www data seperately
        env = get_instance_string()

        for num_forms in domains_to_forms.values():
            if num_forms > HUBSPOT_THRESHOLD:
                hubspot_number_of_domains_with_forms_gt_threshold += 1

        # For each web user, iterate through their domains and select the max number of form submissions and
        # max number of mobile workers
        submit = []
        for user in users_to_domains:
            email = user.get('email') or user.get('username')
            if not _email_is_valid(email):
                continue

            if (user.get('email') in blocked_users
                    or user.get('username') in blocked_users):
                # User had accepted an invitation to a project space whose
                # Billing Account has blocked HubSpot analytics, so we
                # should not send any data about them going forward
                metrics_counter(
                    'commcare.hubspot_data.rejected.periodic_task.invitation',
                )
                hubspot_number_of_users_blocked += 1
                continue

            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            is_member_of_blocked_domain = False
            for domain in user['domains']:
                if domain in blocked_domains:
                    metrics_counter(
                        'commcare.hubspot_data.rejected.periodic_task.domain',
                        tags={
                            'domain': domain,
                        })
                    is_member_of_blocked_domain = True
                    break
                if domain in domains_to_forms and domains_to_forms[
                        domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[
                        domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            if is_member_of_blocked_domain:
                # user is a member of a project space whose Billing Account
                # has blocked HubSpot analytics, so we must not send any data
                # about them.
                hubspot_number_of_users_blocked += 1
                continue

            hubspot_number_of_users_processed += 1

            project_spaces_created = ", ".join(
                get_domains_created_by_user(email))

            user_json = {
                'email':
                email,
                'properties': [{
                    'property':
                    '{}max_form_submissions_in_a_domain'.format(env),
                    'value':
                    max_forms
                }, {
                    'property':
                    '{}max_mobile_workers_in_a_domain'.format(env),
                    'value':
                    max_workers
                }, {
                    'property':
                    '{}project_spaces_created_by_user'.format(env),
                    'value':
                    project_spaces_created,
                }, {
                    'property':
                    '{}over_300_form_submissions'.format(env),
                    'value':
                    max_forms > HUBSPOT_THRESHOLD
                }, {
                    'property': '{}date_created'.format(env),
                    'value': date_created
                }, {
                    'property':
                    '{}max_exports_in_a_domain'.format(env),
                    'value':
                    max_export
                }, {
                    'property':
                    '{}max_custom_reports_in_a_domain'.format(env),
                    'value':
                    max_report
                }]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)
        submit_data_to_hub_and_kiss(submit_json)

    metrics_gauge('commcare.hubspot.web_users_processed',
                  hubspot_number_of_users_processed,
                  multiprocess_mode=MPM_LIVESUM)
    metrics_gauge('commcare.hubspot.web_users_blocked',
                  hubspot_number_of_users_blocked,
                  multiprocess_mode=MPM_LIVESUM)
    metrics_gauge('commcare.hubspot.domains_with_forms_gt_threshold',
                  hubspot_number_of_domains_with_forms_gt_threshold,
                  multiprocess_mode=MPM_MAX)

    task_time = datetime.utcnow() - time_started
    metrics_gauge('commcare.hubspot.runtimes.track_periodic_data',
                  task_time.seconds,
                  multiprocess_mode=MPM_LIVESUM)
Ejemplo n.º 7
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago)\
                               .fields(['domains', 'email', 'date_joined'])\
                               .run().hits
    # users_to_domains is a list of dicts
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()

    # Keep track of india and www data seperately
    env = get_instance_string()

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    number_of_users = 0
    number_of_domains_with_forms_gt_threshold = 0

    for num_forms in domains_to_forms.values():
        if num_forms > HUBSPOT_THRESHOLD:
            number_of_domains_with_forms_gt_threshold += 1

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get('email')
        if not _email_is_valid(email):
            continue

        number_of_users += 1
        date_created = user.get('date_joined')
        max_forms = 0
        max_workers = 0

        for domain in user['domains']:
            if domain in domains_to_forms and domains_to_forms[
                    domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[
                    domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            'email':
            email,
            'properties': [{
                'property':
                '{}max_form_submissions_in_a_domain'.format(env),
                'value':
                max_forms
            }, {
                'property':
                '{}max_mobile_workers_in_a_domain'.format(env),
                'value':
                max_workers
            }, {
                'property':
                '{}project_spaces_created_by_user'.format(env),
                'value':
                project_spaces_created,
            }, {
                'property':
                '{}over_300_form_submissions'.format(env),
                'value':
                max_forms > HUBSPOT_THRESHOLD
            }, {
                'property': '{}date_created'.format(env),
                'value': date_created
            }]
        }
        submit.append(user_json)

    submit_json = json.dumps(submit)

    submit_data_to_hub_and_kiss(submit_json)
    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE:
        number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE:
        number_of_domains_with_forms_gt_threshold
    })
Ejemplo n.º 8
0
def j2me_forms_in_last(domain, days):
    """
    Returns the number of forms submitted by j2me in the last given number of days
    """
    then = datetime.utcnow() - timedelta(days=int(days))
    return FormES().domain(domain).j2me_submissions(gte=then).count()
Ejemplo n.º 9
0
def get_base_form_es_query(start=0, size=DEFAULT_SIZE):
    return (FormES().remove_default_filters().domain(PACT_DOMAIN).filter(
        filters.term('doc_type', 'XFormInstance')).start(start).size(size))