Ejemplo n.º 1
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago).fields(['domains', 'email'])\
                               .run().hits
    # users_to_domains is a list of dicts
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get('email')
        if not email:
            continue
        max_forms = 0
        max_workers = 0

        for domain in user['domains']:
            if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            'email': email,
            'properties': [
                {
                    'property': 'max_form_submissions_in_a_domain',
                    'value': max_forms
                },
                {
                    'property': 'max_mobile_workers_in_a_domain',
                    'value': max_workers
                },
                {
                    'property': 'project_spaces_created_by_user',
                    'value': project_spaces_created,
                },
                {
                    'property': 'over_300_form_submissions',
                    'value': max_forms > 300
                }
            ]
        }
        submit.append(user_json)

    submit_json = json.dumps(submit)

    submit_data_to_hub_and_kiss(submit_json)
Ejemplo n.º 2
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    start_time = datetime.now()
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago).fields(['domains', 'email'])\
                               .run().hits
    # users_to_domains is a list of dicts
    time_users_to_domains_query = datetime.now()
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    time_domains_to_forms_query = datetime.now()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()
    time_domains_to_mobile_users_query = datetime.now()

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get('email')
        if not email:
            continue
        max_forms = 0
        max_workers = 0

        for domain in user['domains']:
            if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            'email': email,
            'properties': [
                {
                    'property': 'max_form_submissions_in_a_domain',
                    'value': max_forms
                },
                {
                    'property': 'max_mobile_workers_in_a_domain',
                    'value': max_workers
                },
                {
                    'property': 'project_spaces_created_by_user',
                    'value': project_spaces_created,
                },
                {
                    'property': 'over_300_form_submissions',
                    'value': max_forms > 300
                }
            ]
        }
        submit.append(user_json)

    end_time = datetime.now()
    submit_json = json.dumps(submit)

    processing_time = end_time - start_time
    _soft_assert = soft_assert('{}@{}'.format('tsheffels', 'dimagi.com'))
    #TODO: Update this soft assert to only trigger if the timing is longer than a threshold
    msg = 'Periodic Data Timing: start: {}, users_to_domains: {}, domains_to_forms: {}, ' \
          'domains_to_mobile_workers: {}, end: {}, size of string post to hubspot (bytes): {}'\
        .format(
            start_time,
            time_users_to_domains_query,
            time_domains_to_forms_query,
            time_domains_to_mobile_users_query,
            end_time,
            sys.getsizeof(submit_json)
        )
    _soft_assert(processing_time.seconds < 100, msg)

    submit_data_to_hub_and_kiss(submit_json)
Ejemplo n.º 3
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = (UserES().web_users().last_logged_in(
        gte=six_months_ago).fields(['domains', 'email', 'date_joined'
                                    ]).analytics_enabled().run().hits)
    # users_to_domains is a list of dicts
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()

    # Keep track of india and www data seperately
    env = get_instance_string()

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    number_of_users = 0
    number_of_domains_with_forms_gt_threshold = 0

    for num_forms in domains_to_forms.values():
        if num_forms > HUBSPOT_THRESHOLD:
            number_of_domains_with_forms_gt_threshold += 1

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get('email')
        if not _email_is_valid(email):
            continue

        number_of_users += 1
        date_created = user.get('date_joined')
        max_forms = 0
        max_workers = 0

        for domain in user['domains']:
            if domain in domains_to_forms and domains_to_forms[
                    domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[
                    domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            'email':
            email,
            'properties': [{
                'property':
                '{}max_form_submissions_in_a_domain'.format(env),
                'value':
                max_forms
            }, {
                'property':
                '{}max_mobile_workers_in_a_domain'.format(env),
                'value':
                max_workers
            }, {
                'property':
                '{}project_spaces_created_by_user'.format(env),
                'value':
                project_spaces_created,
            }, {
                'property':
                '{}over_300_form_submissions'.format(env),
                'value':
                max_forms > HUBSPOT_THRESHOLD
            }, {
                'property': '{}date_created'.format(env),
                'value': date_created
            }]
        }
        submit.append(user_json)

    submit_json = json.dumps(submit)

    submit_data_to_hub_and_kiss(submit_json)
    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE:
        number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE:
        number_of_domains_with_forms_gt_threshold
    })
Ejemplo n.º 4
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago)\
                               .fields(['domains', 'email', 'date_joined'])\
                               .run().hits
    # users_to_domains is a list of dicts
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()

    # Keep track of india and www data seperately
    env = get_instance_string()

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get('email')
        if not email:
            continue
        date_created = user.get('date_joined')
        max_forms = 0
        max_workers = 0

        for domain in user['domains']:
            if domain in domains_to_forms and domains_to_forms[
                    domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[
                    domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            'email':
            email,
            'properties': [{
                'property':
                '{}max_form_submissions_in_a_domain'.format(env),
                'value':
                max_forms
            }, {
                'property':
                '{}max_mobile_workers_in_a_domain'.format(env),
                'value':
                max_workers
            }, {
                'property':
                '{}project_spaces_created_by_user'.format(env),
                'value':
                project_spaces_created,
            }, {
                'property':
                '{}over_300_form_submissions'.format(env),
                'value':
                max_forms > 300
            }, {
                'property': '{}date_created'.format(env),
                'value': date_created
            }]
        }
        submit.append(user_json)

    submit_json = json.dumps(submit)

    submit_data_to_hub_and_kiss(submit_json)
Ejemplo n.º 5
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains

    if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED:
        return

    three_months_ago = date.today() - timedelta(days=90)

    user_query = (UserES()
                  .web_users()
                  .last_logged_in(gte=three_months_ago)
                  .sort('date_joined', desc=True)
                  .source(['domains', 'email', 'date_joined', 'username'])
                  .analytics_enabled())

    total_users = user_query.count()
    chunk_size = 100
    num_chunks = int(math.ceil(float(total_users) / float(chunk_size)))

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    hubspot_number_of_users = 0
    hubspot_number_of_domains_with_forms_gt_threshold = 0

    blocked_domains = get_blocked_hubspot_domains()
    blocked_email_domains = get_blocked_hubspot_email_domains()

    for chunk in range(num_chunks):
        users_to_domains = (user_query
                            .size(chunk_size)
                            .start(chunk * chunk_size)
                            .run()
                            .hits)

        # users_to_domains is a list of dicts
        domains_to_forms = (FormES()
                            .terms_aggregation('domain.exact', 'domain')
                            .size(0)
                            .run()
                            .aggregations.domain.counts_by_bucket())
        domains_to_mobile_users = (UserES()
                                   .mobile_users()
                                   .terms_aggregation('domain.exact', 'domain')
                                   .size(0)
                                   .run()
                                   .aggregations
                                   .domain
                                   .counts_by_bucket())

        # Keep track of india and www data seperately
        env = get_instance_string()

        for num_forms in domains_to_forms.values():
            if num_forms > HUBSPOT_THRESHOLD:
                hubspot_number_of_domains_with_forms_gt_threshold += 1

        # For each web user, iterate through their domains and select the max number of form submissions and
        # max number of mobile workers
        submit = []
        for user in users_to_domains:
            email = user.get('email') or user.get('username')
            if not _email_is_valid(email):
                continue

            email_domain = email.split('@')[-1]
            if email_domain in blocked_email_domains:
                metrics_gauge(
                    'commcare.hubspot_data.rejected.periodic_task.email_domain',
                    1,
                    tags={
                        'email_domain': email_domain,
                    }
                )
                continue

            username_email_domain = user.get('username').split('@')[-1]
            if username_email_domain in blocked_email_domains:
                metrics_gauge(
                    'commcare.hubspot_data.rejected.periodic_task.username',
                    1,
                    tags={
                        'username': username_email_domain,
                    }
                )
                continue

            hubspot_number_of_users += 1
            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            is_member_of_blocked_domain = False
            for domain in user['domains']:
                if domain in blocked_domains:
                    metrics_gauge(
                        'commcare.hubspot_data.rejected.periodic_task.domain',
                        1,
                        tags={
                            'domain': domain,
                        }
                    )
                    is_member_of_blocked_domain = True
                    break
                if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            if is_member_of_blocked_domain:
                continue

            project_spaces_created = ", ".join(get_domains_created_by_user(email))

            user_json = {
                'email': email,
                'properties': [
                    {
                        'property': '{}max_form_submissions_in_a_domain'.format(env),
                        'value': max_forms
                    },
                    {
                        'property': '{}max_mobile_workers_in_a_domain'.format(env),
                        'value': max_workers
                    },
                    {
                        'property': '{}project_spaces_created_by_user'.format(env),
                        'value': project_spaces_created,
                    },
                    {
                        'property': '{}over_300_form_submissions'.format(env),
                        'value': max_forms > HUBSPOT_THRESHOLD
                    },
                    {
                        'property': '{}date_created'.format(env),
                        'value': date_created
                    },
                    {
                        'property': '{}max_exports_in_a_domain'.format(env),
                        'value': max_export
                    },
                    {
                        'property': '{}max_custom_reports_in_a_domain'.format(env),
                        'value': max_report
                    }
                ]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)
        submit_data_to_hub_and_kiss(submit_json)

    metrics_gauge('commcare.hubspot.web_users_processed', hubspot_number_of_users,
        multiprocess_mode=MPM_LIVESUM)
    metrics_gauge(
        'commcare.hubspot.domains_with_forms_gt_threshold', hubspot_number_of_domains_with_forms_gt_threshold,
        multiprocess_mode=MPM_MAX
    )
Ejemplo n.º 6
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    three_months_ago = date.today() - timedelta(days=90)

    user_query = (UserES()
                  .web_users()
                  .last_logged_in(gte=three_months_ago)
                  .sort('date_joined', desc=True)
                  .source(['domains', 'email', 'date_joined'])
                  .analytics_enabled())

    total_users = user_query.count()
    chunk_size = 100
    num_chunks = int(math.ceil(float(total_users) / float(chunk_size)))

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    hubspot_number_of_users = 0
    hubspot_number_of_domains_with_forms_gt_threshold = 0

    for chunk in range(num_chunks):
        users_to_domains = (user_query
                            .size(chunk_size)
                            .start(chunk * chunk_size)
                            .run()
                            .hits)

        # users_to_domains is a list of dicts
        domains_to_forms = (FormES()
                            .terms_aggregation('domain', 'domain')
                            .size(0)
                            .run()
                            .aggregations.domain.counts_by_bucket())
        domains_to_mobile_users = (UserES()
                                   .mobile_users()
                                   .terms_aggregation('domain', 'domain')
                                   .size(0)
                                   .run()
                                   .aggregations
                                   .domain
                                   .counts_by_bucket())

        # Keep track of india and www data seperately
        env = get_instance_string()

        for num_forms in domains_to_forms.values():
            if num_forms > HUBSPOT_THRESHOLD:
                hubspot_number_of_domains_with_forms_gt_threshold += 1

        # For each web user, iterate through their domains and select the max number of form submissions and
        # max number of mobile workers
        submit = []
        for user in users_to_domains:
            email = user.get('email')
            if not _email_is_valid(email):
                continue

            hubspot_number_of_users += 1
            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            for domain in user['domains']:
                if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            project_spaces_created = ", ".join(get_domains_created_by_user(email))

            user_json = {
                'email': email,
                'properties': [
                    {
                        'property': '{}max_form_submissions_in_a_domain'.format(env),
                        'value': max_forms
                    },
                    {
                        'property': '{}max_mobile_workers_in_a_domain'.format(env),
                        'value': max_workers
                    },
                    {
                        'property': '{}project_spaces_created_by_user'.format(env),
                        'value': project_spaces_created,
                    },
                    {
                        'property': '{}over_300_form_submissions'.format(env),
                        'value': max_forms > HUBSPOT_THRESHOLD
                    },
                    {
                        'property': '{}date_created'.format(env),
                        'value': date_created
                    },
                    {
                        'property': '{}max_exports_in_a_domain'.format(env),
                        'value': max_export
                    },
                    {
                        'property': '{}max_custom_reports_in_a_domain'.format(env),
                        'value': max_report
                    }
                ]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)
        submit_data_to_hub_and_kiss(submit_json)

    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE: hubspot_number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: hubspot_number_of_domains_with_forms_gt_threshold
    })
Ejemplo n.º 7
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = (
        UserES().web_users().last_logged_in(gte=six_months_ago).fields(["domains", "email", "date_joined"]).run().hits
    )
    # users_to_domains is a list of dicts
    domains_to_forms = (
        FormES().terms_aggregation("domain", "domain").size(0).run().aggregations.domain.counts_by_bucket()
    )
    domains_to_mobile_users = (
        UserES()
        .mobile_users()
        .terms_aggregation("domain", "domain")
        .size(0)
        .run()
        .aggregations.domain.counts_by_bucket()
    )

    # Keep track of india and www data seperately
    env = get_instance_string()

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    number_of_users = 0
    number_of_domains_with_forms_gt_threshold = 0

    for num_forms in domains_to_forms.values():
        if num_forms > HUBSPOT_THRESHOLD:
            number_of_domains_with_forms_gt_threshold += 1

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    submit = []
    for user in users_to_domains:
        email = user.get("email")
        if not _email_is_valid(email):
            continue

        number_of_users += 1
        date_created = user.get("date_joined")
        max_forms = 0
        max_workers = 0

        for domain in user["domains"]:
            if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                max_forms = domains_to_forms[domain]
            if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                max_workers = domains_to_mobile_users[domain]

        project_spaces_created = ", ".join(get_domains_created_by_user(email))

        user_json = {
            "email": email,
            "properties": [
                {"property": "{}max_form_submissions_in_a_domain".format(env), "value": max_forms},
                {"property": "{}max_mobile_workers_in_a_domain".format(env), "value": max_workers},
                {"property": "{}project_spaces_created_by_user".format(env), "value": project_spaces_created},
                {"property": "{}over_300_form_submissions".format(env), "value": max_forms > HUBSPOT_THRESHOLD},
                {"property": "{}date_created".format(env), "value": date_created},
            ],
        }
        submit.append(user_json)

    submit_json = json.dumps(submit)

    submit_data_to_hub_and_kiss(submit_json)
    update_datadog_metrics(
        {
            DATADOG_WEB_USERS_GAUGE: number_of_users,
            DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: number_of_domains_with_forms_gt_threshold,
        }
    )
Ejemplo n.º 8
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains

    if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED:
        return

    three_months_ago = date.today() - timedelta(days=90)

    user_query = (UserES().web_users().last_logged_in(
        gte=three_months_ago).sort('date_joined', desc=True).source(
            ['domains', 'email', 'date_joined']).analytics_enabled())

    total_users = user_query.count()
    chunk_size = 100
    num_chunks = int(math.ceil(float(total_users) / float(chunk_size)))

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    hubspot_number_of_users = 0
    hubspot_number_of_domains_with_forms_gt_threshold = 0

    for chunk in range(num_chunks):
        users_to_domains = (user_query.size(chunk_size).start(
            chunk * chunk_size).run().hits)

        # users_to_domains is a list of dicts
        domains_to_forms = (FormES().terms_aggregation(
            'domain',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())
        domains_to_mobile_users = (UserES().mobile_users().terms_aggregation(
            'domain',
            'domain').size(0).run().aggregations.domain.counts_by_bucket())

        # Keep track of india and www data seperately
        env = get_instance_string()

        for num_forms in domains_to_forms.values():
            if num_forms > HUBSPOT_THRESHOLD:
                hubspot_number_of_domains_with_forms_gt_threshold += 1

        # For each web user, iterate through their domains and select the max number of form submissions and
        # max number of mobile workers
        submit = []
        for user in users_to_domains:
            email = user.get('email')
            if not _email_is_valid(email):
                continue

            hubspot_number_of_users += 1
            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            for domain in user['domains']:
                if domain in domains_to_forms and domains_to_forms[
                        domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[
                        domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            project_spaces_created = ", ".join(
                get_domains_created_by_user(email))

            user_json = {
                'email':
                email,
                'properties': [{
                    'property':
                    '{}max_form_submissions_in_a_domain'.format(env),
                    'value':
                    max_forms
                }, {
                    'property':
                    '{}max_mobile_workers_in_a_domain'.format(env),
                    'value':
                    max_workers
                }, {
                    'property':
                    '{}project_spaces_created_by_user'.format(env),
                    'value':
                    project_spaces_created,
                }, {
                    'property':
                    '{}over_300_form_submissions'.format(env),
                    'value':
                    max_forms > HUBSPOT_THRESHOLD
                }, {
                    'property': '{}date_created'.format(env),
                    'value': date_created
                }, {
                    'property':
                    '{}max_exports_in_a_domain'.format(env),
                    'value':
                    max_export
                }, {
                    'property':
                    '{}max_custom_reports_in_a_domain'.format(env),
                    'value':
                    max_report
                }]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)
        submit_data_to_hub_and_kiss(submit_json)

    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE:
        hubspot_number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE:
        hubspot_number_of_domains_with_forms_gt_threshold
    })