Example #1
0
def top_duplicates(ctx: callable, config: callable, statsd: callable,
                   logger: callable, run_id: int, conn: callable,
                   metadata_conn: callable, command: str,
                   metrics_root: callable, metrics_run_root: callable,
                   force_refresh: bool, disable_retention_check: bool,
                   disable_data_check: bool, debug_query_performance: bool,
                   month: int, year: int, output_dir: str) -> None:
    """Generate report listing IMEIs seen with more than 5 IMSIs in a given month and year.

    Arguments:
        ctx: click context object
        config: DIRBS config object
        statsd: DIRBS statsd connection object
        logger: DIRBS custom logger object
        run_id: run id of the current job
        conn: DIRBS PostgreSQL connection object
        metadata_conn: DIRBS PostgreSQL metadata connection object
        command: name of the command
        metrics_root: root object for the statsd metrics
        metrics_run_root: root object for the statsd run metrics
        force_refresh: bool to force writing/generating reports from scratch
        disable_retention_check: bool to disable data retention check
        disable_data_check: bool to disable data check
        debug_query_performance: bool to debug query performance
        month: reporting month
        year: reporting year
        output_dir: output directory path
    Returns:
        None
    """
    reports_validation_checks(disable_retention_check, year, month, logger,
                              config, conn, disable_data_check)
    metadata.add_optional_job_metadata(
        metadata_conn,
        command,
        run_id,
        refreshed_data=force_refresh,
        month=month,
        year=year,
        report_schema_version=report_schema_version,
        output_dir=os.path.abspath(str(output_dir)))
    report_metadata = []
    report_dir = make_report_directory(ctx,
                                       output_dir,
                                       run_id,
                                       conn,
                                       config,
                                       year=year,
                                       month=month)
    with utils.CodeProfiler() as cp:
        imsi_min_limit = 5
        country_name = config.region_config.name
        logger.info(
            'Generating country duplicate IMEI report (IMEIs seen with more than {0:d} IMSIs this '
            'reporting month)...'.format(imsi_min_limit))
        report_metadata.extend(
            write_country_duplicates_report(conn,
                                            config,
                                            month,
                                            year,
                                            country_name,
                                            report_dir,
                                            imsi_min_limit=imsi_min_limit))
    statsd.gauge(
        '{0}runtime.per_report.top_duplicates'.format(metrics_run_root),
        cp.duration)

    # Store metadata about the report data ID and classification run ID
    metadata.add_optional_job_metadata(metadata_conn,
                                       command,
                                       run_id,
                                       report_outputs=report_metadata)
Example #2
0
def condition_imei_overlaps(ctx: callable, config: callable, statsd: callable,
                            logger: callable, run_id: int, conn: callable,
                            metadata_conn: callable, command: str,
                            metrics_root: callable, metrics_run_root: callable,
                            force_refresh: bool, disable_retention_check: bool,
                            disable_data_check: bool,
                            debug_query_performance: bool, month: int,
                            year: int, output_dir: str):
    """Generate per-condition reports showing matched IMEIs seen on more than one MNO network.

    Arguments:
        ctx: click context object
        config: DIRBS config object
        statsd: DIRBS statsd connection object
        logger: DIRBS custom logger object
        run_id: run id of the current job
        conn: DIRBS PostgreSQL connection object
        metadata_conn: DIRBS PostgreSQL metadata connection object
        command: name of the command
        metrics_root: root object for the statsd metrics
        metrics_run_root: root object for the statsd run metrics
        force_refresh: bool to force writing/generating reports from scratch
        disable_retention_check: bool to disable data retention check
        disable_data_check: bool to disable data check
        debug_query_performance: bool to debug query performance
        month: reporting month
        year: reporting year
        output_dir: output directory path
    Returns:
        None
    """
    reports_validation_checks(disable_retention_check, year, month, logger,
                              config, conn, disable_data_check)
    metadata.add_optional_job_metadata(
        metadata_conn,
        command,
        run_id,
        refreshed_data=force_refresh,
        month=month,
        year=year,
        report_schema_version=report_schema_version,
        output_dir=os.path.abspath(str(output_dir)))
    report_dir = make_report_directory(ctx,
                                       output_dir,
                                       run_id,
                                       conn,
                                       config,
                                       year=year,
                                       month=month)
    report_metadata = []

    with utils.CodeProfiler() as cp:
        country_name = config.region_config.name
        logger.info(
            'Generating country per-condition IMEI overlap reports (classified IMEIs seen on more than '
            "one MNO\'s network this month...")
        cond_names = [c.label for c in config.conditions]
        report_metadata.extend(
            write_condition_imei_overlaps(conn, config, month, year,
                                          country_name, report_dir,
                                          cond_names))
    statsd.gauge(
        '{0}runtime.per_report.condition_imei_overlaps'.format(
            metrics_run_root), cp.duration)

    # Store metadata about the report data ID and classification run ID
    metadata.add_optional_job_metadata(metadata_conn,
                                       command,
                                       run_id,
                                       report_outputs=report_metadata)
Example #3
0
def standard(ctx: callable, config: callable, statsd: callable,
             logger: callable, run_id: int, conn: callable,
             metadata_conn: callable, command: str, metrics_root: callable,
             metrics_run_root: callable, force_refresh: bool,
             disable_retention_check: bool, disable_data_check: bool,
             debug_query_performance: bool, month: int, year: int,
             output_dir: str) -> None:
    """Generate standard monthly operator and country-level reports.

    Arguments:
        ctx: click context object
        config: DIRBS config object
        statsd: DIRBS statsd connection object
        logger: DIRBS custom logger object
        run_id: run id of the current job
        conn: DIRBS PostgreSQL connection object
        metadata_conn: DIRBS PostgreSQL metadata connection object
        command: name of the command
        metrics_root: root object for the statsd metrics
        metrics_run_root: root object for the statsd run metrics
        force_refresh: bool to force writing/generating reports from scratch
        disable_retention_check: bool to disable data retention check
        disable_data_check: bool to disable data check
        debug_query_performance: bool to debug query performance
        month: reporting month
        year: reporting year
        output_dir: output directory path
    Returns:
        None
    """
    # Store metadata
    metadata.add_optional_job_metadata(
        metadata_conn,
        command,
        run_id,
        refreshed_data=force_refresh,
        month=month,
        year=year,
        report_schema_version=report_schema_version,
        output_dir=os.path.abspath(str(output_dir)))

    reports_validation_checks(disable_retention_check, year, month, logger,
                              config, conn, disable_data_check)

    # Next, generate all the report data so that report generation can happen very quickly
    data_id, class_run_id, per_tac_compliance_data = generate_monthly_report_stats(
        config, conn, month, year, statsd, metrics_run_root, run_id,
        force_refresh, debug_query_performance)

    # Store metadata about the report data ID and classification run ID
    metadata.add_optional_job_metadata(metadata_conn,
                                       command,
                                       run_id,
                                       data_id=data_id,
                                       classification_run_id=class_run_id)

    report_dir = make_report_directory(ctx,
                                       output_dir,
                                       run_id,
                                       conn,
                                       config,
                                       class_run_id=class_run_id,
                                       year=year,
                                       month=month,
                                       data_id=data_id)

    # First, copy all the report JS/CSS files into the output directory in
    # cachebusted form and get the cachebusted filenames
    asset_map = {}
    report_assets = ['js/report.js', 'css/report.css']

    for fn in report_assets:
        logger.info('Copying required asset "%s" to report folder', fn)
        asset = pkgutil.get_data('dirbs', fn)
        name, ext = fn.split('/')[-1].split('.')
        filename = '{0}_{1}.{2}'.format(
            name, utils.cachebusted_filename_from_contents(asset), ext)
        asset_map[fn] = filename
        with open(os.path.join(report_dir, filename), 'wb') as of:
            of.write(asset)

    js_filename = asset_map['js/report.js']
    css_filename = asset_map['css/report.css']

    # Next, generate the country level report
    report_metadata = []
    with utils.CodeProfiler() as cp:
        logger.info('Generating country report...')
        country_name = config.region_config.name
        country_per_tac_compliance_data = None
        if per_tac_compliance_data is not None:
            country_per_tac_compliance_data = per_tac_compliance_data[
                OperatorConfig.COUNTRY_OPERATOR_NAME]
        report = CountryReport(
            conn,
            data_id,
            config,
            month,
            year,
            country_name,
            has_compliance_data=country_per_tac_compliance_data is not None)
        report_metadata.extend(
            write_report(report, month, year, report_dir, country_name,
                         css_filename, js_filename,
                         country_per_tac_compliance_data))

    statsd.gauge('{0}runtime.per_report.country'.format(metrics_run_root),
                 cp.duration)
    operators = config.region_config.operators
    # Finally, generate the operator reports
    for op in operators:
        with utils.CodeProfiler() as cp:
            logger.info('Generating operator report for operator ID %s...',
                        op.id)
            operator_per_tac_compliance_data = None
            if per_tac_compliance_data is not None:
                operator_per_tac_compliance_data = per_tac_compliance_data.get(
                    op.id)
            report = OperatorReport(
                conn,
                data_id,
                config,
                month,
                year,
                op,
                has_compliance_data=operator_per_tac_compliance_data
                is not None)
            report_prefix = '{0}_{1}'.format(country_name, op.id)
            report_metadata.extend(
                write_report(report, month, year, report_dir, report_prefix,
                             css_filename, js_filename,
                             operator_per_tac_compliance_data))
        statsd.gauge(
            '{0}runtime.per_report.operators.{1}'.format(
                metrics_run_root, op.id), cp.duration)

    # Store per-report job metadata
    metadata.add_optional_job_metadata(metadata_conn,
                                       command,
                                       run_id,
                                       report_outputs=report_metadata)