Exemple #1
0
def test_tac_api(per_test_flask_app, per_test_postgres, logger, mocked_statsd,
                 tmpdir, request, mocked_config, api_version):
    """Test TAC API call works with the security role created based on abstract role."""
    dsn = per_test_postgres.dsn()
    dsn['user'] = '******'
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, create_db_connection(
            db_config, autocommit=True) as metadata_conn:
        with get_importer(
                GSMADataImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                GSMADataParams(
                    filename='sample_gsma_import_list_anonymized.txt')) as imp:
            imp.import_data()

    current_user = request.node.callspec.params['per_test_flask_app']
    if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']:
        rv = per_test_flask_app.get(
            url_for('{0}.tac_api'.format(api_version), tac='01234404'))
        assert rv.status_code == 200
        results = json.loads(rv.data.decode('utf-8'))
        assert results['gsma'] is not None
    else:
        with pytest.raises(DatabaseRoleCheckException):
            per_test_flask_app.get(
                url_for('{0}.tac_api'.format(api_version), tac='01234404'))
Exemple #2
0
def test_prune(per_test_postgres, tmpdir, logger, mocked_statsd, db_user,
               mocked_config, monkeypatch):
    """Test prune works with the poweruser security role."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, create_db_connection(
            db_config, autocommit=True) as metadata_conn:
        with get_importer(
                OperatorDataImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                OperatorDataParams(
                    filename=
                    'testData1-operator-operator4-anonymized_20161101_20161130.csv',
                    operator='1',
                    extract=False,
                    perform_leading_zero_check=False,
                    mcc_mnc_pairs=[{
                        'mcc': '111',
                        'mnc': '04'
                    }],
                    perform_unclean_checks=False,
                    perform_file_daterange_check=False)) as imp:
            imp.import_data()
            conn.commit()

    runner = CliRunner()
    monkeypatch.setattr(mocked_config.db_config, 'user', db_user)
    result = runner.invoke(dirbs_prune_cli, ['triplets'],
                           obj={'APP_CONFIG': mocked_config})
    if db_user in ['dirbs_poweruser_login']:
        assert result.exit_code == 0
    else:
        assert result.exit_code != 0
Exemple #3
0
def test_report(per_test_postgres, tmpdir, db_user, logger, mocked_statsd,
                mocked_config, monkeypatch):
    """Test catalog works with the security role created based on abstract role."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, create_db_connection(
            db_config, autocommit=True) as metadata_conn:
        with get_importer(
                OperatorDataImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                OperatorDataParams(
                    filename=
                    'testData1-operator-operator1-anonymized_20161101_20161130.csv',
                    operator='operator1',
                    perform_unclean_checks=False,
                    extract=False)) as imp:
            imp.import_data()

    runner = CliRunner()
    output_dir = str(tmpdir)
    monkeypatch.setattr(mocked_config.db_config, 'user', db_user)
    result = runner.invoke(dirbs_report_cli, [
        'standard', '--disable-retention-check', '--disable-data-check', '11',
        '2016', output_dir
    ],
                           obj={'APP_CONFIG': mocked_config})

    if db_user in ['dirbs_poweruser_login', 'dirbs_report_user']:
        assert result.exit_code == 0
    else:
        assert result.exit_code != 0
Exemple #4
0
def db_conn(postgres, mocked_config, request):
    """Fixture to inject a DB connection into a fixture. Cleans up to make sure DB is clean after each test."""
    # Create db connection
    current_db_user = mocked_config.db_config.user
    mocked_config.db_config.user = request.param
    conn = create_db_connection(mocked_config.db_config)
    yield conn
    # Close connection and create new one as the db role might have changed for security tests
    conn.close()
    mocked_config.db_config.user = current_db_user
    conn = create_db_connection(mocked_config.db_config)
    with conn.cursor() as table_cursor, conn.cursor() as truncate_cursor:
        table_cursor.execute('SELECT tablename FROM pg_tables WHERE schemaname = current_schema() '
                             'AND tablename != \'schema_metadata\' AND tablename != \'radio_access_technology_map\'')
        for tblname in table_cursor:
            truncate_cursor.execute(sql.SQL('TRUNCATE {0} CASCADE').format(sql.Identifier(tblname[0])))

        table_cursor.execute('SELECT sequence_name FROM information_schema.sequences '
                             'WHERE sequence_schema = current_schema()')
        for seqname in table_cursor:
            truncate_cursor.execute(sql.SQL('ALTER SEQUENCE {0} RESTART WITH 1').format(sql.Identifier(seqname[0])))

        table_cursor.execute('SELECT matviewname FROM pg_matviews WHERE schemaname = current_schema()')
        for matviewname in table_cursor:
            truncate_cursor.execute(sql.SQL('REFRESH MATERIALIZED VIEW CONCURRENTLY {0}')
                                    .format(sql.Identifier(matviewname[0])))
    # Commit truncations
    conn.commit()
    conn.close()
Exemple #5
0
def test_imei_api_registration_list(per_test_flask_app, per_test_postgres,
                                    logger, mocked_statsd, tmpdir, request,
                                    mocked_config, api_version):
    """Test IMEI API call after registration list import."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, \
            create_db_connection(db_config, autocommit=True) as metadata_conn:
        with get_importer(
                GSMADataImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                GSMADataParams(
                    filename='gsma_dump_small_july_2016.txt')) as imp:
            imp.import_data()

        with get_importer(
                RegistrationListImporter, conn, metadata_conn, db_config,
                tmpdir, logger, mocked_statsd,
                RegistrationListParams(
                    content=
                    'APPROVED_IMEI,make,model,status,model_number,brand_name,'
                    'device_type,radio_interface,device_id\n'
                    '21260934000003,,,,,,,,1')) as imp:
            imp.import_data()

    if api_version == 'v1':
        rv = per_test_flask_app.get(
            url_for('{0}.imei_api'.format(api_version), imei='21260934000003'))
        assert rv.status_code == 200
    else:  # api version 2.0
        rv = per_test_flask_app.get(
            url_for('{0}.imei_get_api'.format(api_version),
                    imei='21260934000003'))
        assert rv.status_code == 200
Exemple #6
0
def test_imei_api_pairing_list(per_test_flask_app, per_test_postgres, logger,
                               mocked_statsd, tmpdir, request, mocked_config,
                               api_version):
    """Test IMEI API call after pairing list import."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, \
            create_db_connection(db_config, autocommit=True) as metadata_conn:
        with get_importer(
                GSMADataImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                GSMADataParams(
                    filename='gsma_dump_small_july_2016.txt')) as imp:
            imp.import_data()

        with get_importer(
                PairingListImporter, conn, metadata_conn, db_config, tmpdir,
                logger, mocked_statsd,
                PairListParams(content='imei,imsi\n'
                               '811111013136460,111018001111111\n'
                               '359000000000000,111015113222222\n'
                               '357756065985824,111015113333333')) as imp:
            imp.import_data()

    if api_version == 'v1':
        rv = per_test_flask_app.get(
            url_for('{0}.imei_api'.format(api_version), imei='21260934000003'))
        assert rv.status_code == 200
    else:  # api version 2.0
        rv = per_test_flask_app.get(
            url_for('{0}.imei_get_pairings_api'.format(api_version),
                    imei='21260934000003'))
        assert rv.status_code == 200
def test_imei_api(per_test_flask_app, per_test_postgres, logger, mocked_statsd, tmpdir, request, mocked_config,
                  api_version):
    """Test IMEI API call works with the security role created based on abstract role."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, \
            create_db_connection(db_config, autocommit=True) as metadata_conn:
        with get_importer(OperatorDataImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          OperatorDataParams(
                              filename='testData1-operator-operator1-anonymized_20161101_20161130.csv',
                              operator='operator1',
                              perform_unclean_checks=False,
                              extract=False)) as imp:
            imp.import_data()

    current_user = request.node.callspec.params['per_test_flask_app']

    if api_version == 'v1':
        if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']:
            rv = per_test_flask_app.get(url_for('{0}.imei_api'.format(api_version),
                                                imei='388260336982806', include_seen_with=1))
            assert rv.status_code == 200
            assert json.loads(rv.data.decode('utf-8'))['seen_with'] == \
                                                      [{'imsi': '11101400135251', 'msisdn': '22300825684694'},
                                                       {'imsi': '11101400135252', 'msisdn': '22300825684692'}]
            assert json.loads(rv.data.decode('utf-8'))['realtime_checks']['ever_observed_on_network'] is True

        else:
            with pytest.raises(DatabaseRoleCheckException):
                per_test_flask_app.get(url_for('{0}.imei_api'.format(api_version),
                                               imei='388260336982806', include_seen_with=1))
    else:  # api version 2.0
        if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']:
            rv = per_test_flask_app.get(url_for('{0}.imei_get_subscribers_api'.format(api_version),
                                                imei='388260336982806'))
            assert rv.status_code == 200
            data = json.loads(rv.data.decode('utf-8'))
            assert len(data['subscribers']) != 0
            assert data['subscribers'] == [
                {
                    'imsi': '11101400135251',
                    'last_seen': '2016-11-01',
                    'msisdn': '22300825684694'
                },
                {
                    'imsi': '11101400135252',
                    'last_seen': '2016-11-02',
                    'msisdn': '22300825684692'
                }]
        else:
            with pytest.raises(DatabaseRoleCheckException):
                per_test_flask_app.get(url_for('{0}.imei_get_subscribers_api'.format(api_version),
                                               imei='388260336982806'))
def test_classify(per_test_postgres, db_user, tmpdir, logger, mocked_statsd, monkeypatch, mocked_config):
    """Test classify works with the security role created based on abstract role."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, create_db_connection(db_config, autocommit=True) as metadata_conn:
        with get_importer(OperatorDataImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          OperatorDataParams(
                              content='date,imei,imsi,msisdn\n'
                                      '20110101,8888#888622222,123456789012345,123456789012345\n'
                                      '20110101,88888888622222,123456789012345,123456789012345\n'
                                      '20110101,8888888862222209,123456789012345,123456789012345\n'
                                      '20110101,88888862222209**,123456789012345,123456789012345',
                              extract=False,
                              perform_unclean_checks=False,
                              perform_region_checks=False,
                              perform_home_network_check=False,
                              operator='operator1')) as imp:
            imp.import_data()

        with get_importer(GSMADataImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          GSMADataParams(filename='gsma_not_found_anonymized.txt')) as imp:
            imp.import_data()

        with get_importer(RegistrationListImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          RegistrationListParams(filename='sample_registration_list.csv')) as imp:
            imp.import_data()

    # Run dirbs-classify using db args from the temp postgres instance
    runner = CliRunner()
    monkeypatch.setattr(mocked_config.db_config, 'user', db_user)
    result = runner.invoke(dirbs_classify_cli, ['--no-safety-check'], obj={'APP_CONFIG': mocked_config})

    if db_user in ['dirbs_poweruser_login', 'dirbs_classify_user']:
        assert result.exit_code == 0
    else:
        assert result.exit_code != 0
Exemple #9
0
def _update_catalog(uncataloged_files, config):
    """Write the new and modified files to the data catalog."""
    with create_db_connection(
            config.db_config) as conn, conn.cursor() as cursor:
        for f in uncataloged_files:
            cursor.execute(
                sql.SQL(
                    """INSERT INTO data_catalog AS dc(filename, file_type, modified_time,
                                                               compressed_size_bytes, is_valid_zip, is_valid_format,
                                                               md5, extra_attributes, first_seen, last_seen,
                                                               uncompressed_size_bytes, num_records)
                                           VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now(), now(), %s, %s)
                                      ON CONFLICT (filename, file_type)
                                        DO UPDATE
                                              SET modified_time = %s,
                                                  compressed_size_bytes = %s,
                                                  is_valid_zip = %s,
                                                  is_valid_format = %s,
                                                  md5 = %s,
                                                  extra_attributes = %s,
                                                  last_seen = now(),
                                                  uncompressed_size_bytes = %s,
                                                  num_records = %s"""
                ),  # noqa: Q441, Q449
                [
                    f.filename, f.file_type, f.modified_time,
                    f.compressed_size_bytes, f.is_valid_zip, f.is_valid_format,
                    f.md5,
                    json.dumps(f.extra_attributes), f.uncompressed_size_bytes,
                    f.num_records, f.modified_time, f.compressed_size_bytes,
                    f.is_valid_zip, f.is_valid_format, f.md5,
                    json.dumps(f.extra_attributes), f.uncompressed_size_bytes,
                    f.num_records
                ])
Exemple #10
0
def check(ctx):
    """
    Checks whether DB schema matches software DB version.

    :param ctx: current cli context obj
    """
    db_config = common.ensure_config(ctx).db_config

    logger = logging.getLogger('dirbs.db')
    logger.info('Querying DB schema version for DB %s on host %s',
                db_config.database, db_config.host)

    with utils.create_db_connection(db_config) as conn:
        version = utils.query_db_schema_version(conn)

    logger.info('Code schema version: %d', code_db_schema_version)
    if version is None:
        logger.error(
            'DB has not been clean installed. Maybe this DB pre-dates the version checking?'
        )
        logger.error('DB schema version unknown.')
        # Exit code is used to determine if schema has(exit code:0) or has not(exit code:1) been installed.
        # Non-zero exit code triggers installation of schema at entrypoint of processing container.
        sys.exit(1)
    else:
        logger.info('DB schema version: %s', str(version))
        if version < code_db_schema_version:
            logger.error('DB schema older than code.')
        elif version > code_db_schema_version:
            logger.error('DB schema newer than code.')
        else:
            logger.info('Schema versions match between code and DB.')
Exemple #11
0
def cli(ctx):
    """
    DIRBS script to intiliaze, configure and upgrade the PostgreSQL schema.

    :param ctx: current cli context obj
    """
    config = common.ensure_config(ctx)
    db_config = config.db_config
    logger = logging.getLogger('dirbs.db')
    subcommand = ctx.invoked_subcommand

    dirbs.logging.setup_file_logging(
        config.log_config,
        'dirbs-db_{0}_{1}'.format(subcommand,
                                  datetime.datetime.now().strftime('%Y%m%d')))

    # check subcommand should try and fail regardless of these checks.
    # install_roles subcommand installs these roles so can't do these checks
    if subcommand not in ['install_roles', 'check']:
        with utils.create_db_connection(db_config) as conn:
            try:
                utils.warn_if_db_superuser(conn)
                utils.verify_db_roles_installed(conn)
                utils.verify_db_role_for_job(conn, 'dirbs_core_power_user')
                utils.verify_db_ownership(conn)
                utils.verify_hll_schema(conn)
                if subcommand != 'install':
                    # install subcommand creates the schema, so can't check it here
                    utils.verify_core_schema(conn)
                    utils.verify_db_search_path(conn)
            except (utils.DatabaseRoleCheckException,
                    utils.DatabaseSchemaException) as ex:
                logger.error(str(ex))
                sys.exit(1)
Exemple #12
0
def get_db_connection() -> g:
    """Opens a new DB connection if there is not yet for the current app context."""
    if not hasattr(g, 'db_conn'):
        db_config = current_app.config['DIRBS_CONFIG'].db_config
        g.db_conn = create_db_connection(db_config, readonly=True, autocommit=True)

    return g.db_conn
Exemple #13
0
    def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date):
        """
        Method to queue jobs to calculate the IMEIs that are met by this condition.

        Arguments:
            executor: instance of the python executor class, to submit back the results
            app_config: dirbs app current configuration, to extract various configs required for the job
            run_id: run id of the current classification job
            curr_date: current date of the system
        """
        with create_db_connection(app_config.db_config) as conn, conn.cursor() as cursor:
            cursor.execute(sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} (
                                          imei_norm TEXT NOT NULL,
                                          virt_imei_shard SMALLINT NOT NULL
                                      )
                                      PARTITION BY RANGE (virt_imei_shard)""")
                           .format(intermediate_tbl=self.intermediate_tbl_id(run_id)))
            partition_utils.create_imei_shard_partitions(conn, tbl_name=self.intermediate_tbl_name(run_id),
                                                         unlogged=True)
            parallel_shards = partition_utils.num_physical_imei_shards(conn)

        # Done with connection -- temp tables should now be committed
        virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards)
        for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
            yield executor.submit(self._calc_imeis_job,
                                  app_config,
                                  run_id,
                                  curr_date,
                                  virt_imei_range_start,
                                  virt_imei_range_end)
def test_listgen(per_test_postgres, tmpdir, logger, mocked_statsd, db_user, mocked_config, monkeypatch,
                 classification_data):
    """Test that the dirbs-listgen instance runs without an error."""
    dsn = per_test_postgres.dsn()
    db_config = DBConfig(ignore_env=True, **dsn)
    with create_db_connection(db_config) as conn, create_db_connection(db_config, autocommit=True) as metadata_conn:
        with get_importer(OperatorDataImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          OperatorDataParams(
                              content='date,imei,imsi,msisdn\n'
                                      '20160203,811111013136460,111018001111111,223338000000\n'
                                      '20160203,359000000000000,111015113222222,223355000000\n'
                                      '20160203,357756065985824,111015113333333,223355111111',
                              cc=['22', '74'],
                              mcc_mnc_pairs=[{'mcc': '111', 'mnc': '01'}],
                              operator='operator1',
                              extract=False)) as imp:
            imp.import_data()

        with get_importer(PairingListImporter,
                          conn,
                          metadata_conn,
                          db_config,
                          tmpdir,
                          logger,
                          mocked_statsd,
                          PairListParams(
                              content='imei,imsi,msisdn\n'
                                      '811111013136460,111018001111111,234555555555550\n'
                                      '359000000000000,111015113222222,234555555555551\n'
                                      '357756065985824,111015113333333,234555555555552')) as imp:
            imp.import_data()

    # Now run listgen as requested user
    runner = CliRunner()
    monkeypatch.setattr(mocked_config.db_config, 'user', db_user)
    output_dir = str(tmpdir)
    result = runner.invoke(dirbs_listgen_cli, [output_dir], obj={'APP_CONFIG': mocked_config})
    if db_user in ['dirbs_poweruser_login', 'dirbs_listgen_user']:
        assert result.exit_code == 0
    else:
        assert result.exit_code != 0
Exemple #15
0
def _postgres_impl(mocked_config):
    """Implementation of fixture to initialise a temporary PostgreSQL instance with a clean DB schema."""
    # The system needs to be set to the C locale other than en_US.UTF8 to assume that,
    # in collation order uppercase will come before lowercase.
    postgresql = testing.postgresql.Postgresql(initdb_args='-U postgres -A trust --lc-collate=C.UTF-8 '
                                                           '--lc-ctype=C.UTF-8')
    dsn = postgresql.dsn()

    # Monkey-patch Postgres config to use temp postgres instance
    for setting in ['database', 'host', 'port', 'user', 'password']:
        setattr(mocked_config.db_config, setting, dsn.get(setting, None))

    # Run dirbs-db install_roles using db args from the temp postgres instance
    runner = CliRunner()
    result = runner.invoke(dirbs_db_cli, ['install_roles'], obj={'APP_CONFIG': mocked_config})
    assert result.exit_code == 0

    with create_db_connection(mocked_config.db_config) as conn, conn.cursor() as cursor:
        cursor.execute('CREATE SCHEMA hll;')
        cursor.execute('GRANT USAGE ON SCHEMA hll TO dirbs_core_base;')
        cursor.execute('CREATE EXTENSION hll SCHEMA hll;')
        cursor.execute(sql.SQL('ALTER DATABASE {0} OWNER TO dirbs_core_power_user')
                       .format(sql.Identifier(dsn.get('database'))))

    # Run dirbs-db install using db args from the temp postgres instance
    result = runner.invoke(dirbs_db_cli, ['install'], catch_exceptions=False, obj={'APP_CONFIG': mocked_config})
    assert result.exit_code == 0

    # Create the necessary roles for security tests
    with create_db_connection(mocked_config.db_config) as conn, conn.cursor() as cursor:
        cursor.execute('CREATE ROLE dirbs_import_operator_user IN ROLE dirbs_core_import_operator LOGIN')
        cursor.execute('CREATE ROLE dirbs_import_gsma_user IN ROLE dirbs_core_import_gsma LOGIN')
        cursor.execute('CREATE ROLE dirbs_import_pairing_list_user IN ROLE dirbs_core_import_pairing_list LOGIN')
        cursor.execute('CREATE ROLE dirbs_import_stolen_list_user IN ROLE dirbs_core_import_stolen_list LOGIN')
        cursor.execute('CREATE ROLE dirbs_import_registration_list_user '
                       'IN ROLE dirbs_core_import_registration_list LOGIN')
        cursor.execute('CREATE ROLE dirbs_import_golden_list_user IN ROLE dirbs_core_import_golden_list LOGIN')
        cursor.execute('CREATE ROLE dirbs_classify_user IN ROLE dirbs_core_classify LOGIN')
        cursor.execute('CREATE ROLE dirbs_listgen_user IN ROLE dirbs_core_listgen LOGIN')
        cursor.execute('CREATE ROLE dirbs_report_user IN ROLE dirbs_core_report LOGIN')
        cursor.execute('CREATE ROLE dirbs_api_user IN ROLE dirbs_core_api LOGIN')
        cursor.execute('CREATE ROLE dirbs_catalog_user IN ROLE dirbs_core_catalog LOGIN')
        cursor.execute('CREATE ROLE dirbs_poweruser_login IN ROLE dirbs_core_power_user LOGIN')
        cursor.execute('CREATE ROLE unknown_user LOGIN')

    yield postgresql
    postgresql.stop()
Exemple #16
0
def repartition(ctx, num_physical_shards):
    """Repartition DIRBS Core tables into a new number of physical IMEI shards."""
    logger = logging.getLogger('dirbs.db')
    config = common.ensure_config(ctx)
    with utils.create_db_connection(
            config.db_config) as conn, conn.cursor() as cursor:
        logger.info(
            'Repartitioning DB schema in DB %s on host %s into %d physical shards...',
            config.db_config.database, config.db_config.host,
            num_physical_shards)

        logger.info('Re-partitioning classification_state table...')
        partition_utils.repartition_classification_state(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned classification_state table')

        logger.info('Re-partitioning registration_list table...')
        partition_utils.repartition_registration_list(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned registration_list table')

        logger.info('Re-partitioning stolen_list table...')
        partition_utils.repartition_stolen_list(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned stolen_list table')

        logger.info('Re-partitioning pairing_list table...')
        partition_utils.repartition_pairing_list(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned pairing_list table')

        logger.info('Re-partitioning blacklist table...')
        partition_utils.repartition_blacklist(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned blacklist table')

        logger.info('Re-partitioning notifications_lists table...')
        partition_utils.repartition_notifications_lists(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned notifications_lists table')

        logger.info('Re-partitioning exceptions_lists table...')
        partition_utils.repartition_exceptions_lists(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned exceptions_lists table')

        logger.info('Re-partitioning network_imeis table...')
        partition_utils.repartition_network_imeis(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned network_imeis table')

        logger.info('Re-partitioning monthly_network_triplets tables...')
        partition_utils.repartition_monthly_network_triplets(
            conn, num_physical_shards=num_physical_shards)
        logger.info('Re-partitioned monthly_network_triplets tables')

        # Update schema metadata table
        cursor.execute('UPDATE schema_metadata SET phys_shards = %s',
                       [num_physical_shards])
def metadata_db_conn(postgres, mocked_config):
    """Fixture to inject a metadata DB connection as a fixture. Only cleans up the job_metadata table."""
    # Create db connection
    conn = create_db_connection(mocked_config.db_config, autocommit=True)
    yield conn
    with conn.cursor() as cursor:
        cursor.execute('TRUNCATE job_metadata')
        cursor.execute('ALTER SEQUENCE job_metadata_run_id_seq RESTART WITH 1')
    conn.close()
Exemple #18
0
    def _upload_file_to_staging_table(self, input_filename):
        """Method to upload a single batch to the staging table."""
        conn = getattr(self._thread_local_storage, 'conn', None)
        if conn is None:
            conn = self._thread_local_storage.conn = create_db_connection(
                self._db_config)

        with open(input_filename, 'r') as f, conn, conn.cursor() as cursor:
            cursor.copy_expert(sql=self._upload_batch_to_staging_table_query(),
                               file=f)
            return cursor.rowcount
Exemple #19
0
 def queue_update_classification_state_jobs(self, executor, app_config,
                                            run_id, curr_date):
     """Method to queue jobs to update the classification_state table after the IMEIs have been calculated."""
     with create_db_connection(app_config.db_config) as conn:
         parallel_shards = partition_utils.num_physical_imei_shards(conn)
         virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(
             parallel_shards)
         for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
             yield executor.submit(self._update_classification_state_job,
                                   app_config, run_id, curr_date,
                                   virt_imei_range_start,
                                   virt_imei_range_end)
Exemple #20
0
def install(ctx):
    """
    Installs latest schema on clean DB instance.

    :param ctx: current cli context obj
    :return: status
    """
    logger = logging.getLogger('dirbs.db')
    config = common.ensure_config(ctx)
    db_config = config.db_config
    with utils.create_db_connection(db_config) as conn, conn.cursor() as cur:
        logger.info('Creating initial base DB schema in DB %s on host %s',
                    db_config.database, db_config.host)

        # Check if there is stuff already in there
        cur.execute("""SELECT COUNT(*)
                         FROM pg_class c
                         JOIN pg_namespace n ON n.oid = c.relnamespace
                        WHERE n.nspname = current_schema()""")
        is_clean = (cur.fetchone()[0] == 0)
        if not is_clean:
            logger.error('Can\'t install latest schema into a non-clean DB')
            logger.error(
                'Instead, use dirbs-db upgrade to upgrade the schema to the latest version'
            )
            sys.exit(1)

        # Set our role here so that new objects get created with dirbs_core_power_user as owner by default
        with utils.db_role_setter(conn, role_name='dirbs_core_power_user'):
            # First we setup the schema, search path etc.
            sql = pkgutil.get_data('dirbs', 'sql/base/on_db_creation.sql')
            cur.execute(sql)

            # Install the base schema for v19 and set current version to 19
            base_schema = 'sql/base/v19_schema.sql'
            logger.info('Restoring base v19 schema from SQL file: %s',
                        base_schema)
            sql = pkgutil.get_data('dirbs', base_schema)
            cur.execute(sql)
            utils.set_db_schema_version(conn, min_schema_version)
            logger.info(
                'Successfully created base v{0:d} schema. Scheduling dirbs-db upgrade...'
                .format(min_schema_version))

    # Then we call upgrade to complete the process
    rv = 0
    if code_db_schema_version > min_schema_version:
        rv = ctx.invoke(upgrade)
    else:
        # Can't do anything until we know the schema is the right version
        _store_job_metadata(config, 'install')

    return rv
Exemple #21
0
    def _calc_imeis_job(self, app_config, run_id, curr_date,
                        virt_imei_range_start, virt_imei_range_end):
        """Function to calculate the IMEIs that are met by this condition (single job)."""
        with create_db_connection(app_config.db_config) as conn, conn.cursor(
        ) as cursor, CodeProfiler() as cp:
            dims_sql = [
                d.sql(conn,
                      app_config,
                      virt_imei_range_start,
                      virt_imei_range_end,
                      curr_date=curr_date) for d in self.dimensions
            ]

            # Calculate the SQL for the intersection of all dimensions
            condition_sql = sql.SQL(' INTERSECT ').join(dims_sql)

            # If sticky, we need to UNION the sql with the currently selected IMEIs
            if self.config.sticky:
                condition_sql = sql.SQL("""SELECT imei_norm
                                             FROM classification_state
                                            WHERE cond_name = {cond_name}
                                              AND virt_imei_shard >= {virt_imei_range_start}
                                              AND virt_imei_shard < {virt_imei_range_end}
                                              AND end_date IS NULL
                                                  UNION ALL {cond_results_sql}
                                        """).format(
                    cond_name=sql.Literal(self.label),
                    virt_imei_range_start=sql.Literal(virt_imei_range_start),
                    virt_imei_range_end=sql.Literal(virt_imei_range_end),
                    cond_results_sql=condition_sql)

            # Make sure we only get distinct IMEIs
            condition_sql = sql.SQL(
                """SELECT imei_norm, calc_virt_imei_shard(imei_norm) AS virt_imei_shard
                                         FROM ({0}) non_distinct
                                     GROUP BY imei_norm""").format(
                    condition_sql)

            # Copy results to the temp table
            tbl_name = partition_utils.imei_shard_name(
                base_name=self.intermediate_tbl_name(run_id),
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)
            cursor.execute(
                sql.SQL(
                    """INSERT INTO {intermediate_tbl}(imei_norm, virt_imei_shard) {condition_sql}"""
                ).format(intermediate_tbl=sql.Identifier(tbl_name),
                         condition_sql=condition_sql))

            matching_imeis_count = cursor.rowcount

        return matching_imeis_count, cp.duration
Exemple #22
0
def _fetch_catalog_files(config):
    """Fetch all the cataloged files from the database."""
    with create_db_connection(
            config.db_config) as conn, conn.cursor() as cursor:
        cursor.execute(
            'SELECT filename, file_type, modified_time, compressed_size_bytes FROM data_catalog'
        )
        cataloged_files = []
        for res in cursor:
            file_properties = CatalogAttributes(res.filename, res.file_type,
                                                res.modified_time,
                                                res.compressed_size_bytes)
            cataloged_files.append(file_properties)
        return cataloged_files
Exemple #23
0
def _store_job_metadata(config, subcommand):
    """
    Utility method to store metadata about a dirbs-db invocation in the database.

    :param config: dirbs config obj
    :param subcommand: sub-command name
    """
    logger = logging.getLogger('dirbs.db')
    with utils.create_db_connection(config.db_config, autocommit=True) as conn:
        # We can only really store successful database installs/upgrades as we can't store
        # anything in an unknown schema version. Therefore, we can store at the end of the job
        # and mark it as successfully complete immediately
        run_id = metadata.store_job_metadata(conn,
                                             'dirbs-db',
                                             logger,
                                             job_subcommand=subcommand)
        metadata.log_job_success(conn, 'dirbs-db', run_id)
Exemple #24
0
def install_roles(ctx):
    """Creates DIRBS Core PostgreSQL base roles if they don't exist."""
    logger = logging.getLogger('dirbs.db')
    config = common.ensure_config(ctx)
    db_config = copy.copy(config.db_config)
    # Allow install_roles to work even if database doesn't exist by using the postgres DB
    db_config.database = 'postgres'
    with utils.create_db_connection(db_config) as conn, conn.cursor() as cur:
        if not utils.can_db_user_create_roles(conn):
            logger.error(
                'Current PostgreSQL user does not have the CREATEROLE privilege. Please run this command '
                'as a normal user with the CREATEROLE privilege granted (preferred) or as a superuser'
            )
            sys.exit(1)

        logger.info('Creating DIRBS Core PostgreSQL roles...')
        sql = pkgutil.get_data('dirbs', 'sql/base/roles.sql')
        cur.execute(sql)
        logger.info('Created DIRBS Core PostgreSQL roles')
Exemple #25
0
    def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date):
        """
        Method to queue jobs to update the classification_state table after the IMEIs have been calculated.

        Arguments:
            executor: job executor instance to submit back the results to the queue
            app_config: current dirbs app config object to use configuration from
            run_id: run_id of the current running classification job
            curr_date: current date of the system
        """
        with create_db_connection(app_config.db_config) as conn:
            parallel_shards = partition_utils.num_physical_imei_shards(conn)
            virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards)
            for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
                yield executor.submit(self._update_classification_state_job,
                                      app_config,
                                      run_id,
                                      curr_date,
                                      virt_imei_range_start,
                                      virt_imei_range_end)
Exemple #26
0
    def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date):
        """Method to queue jobs to calculate the IMEIs that are met by this condition."""
        with create_db_connection(
                app_config.db_config) as conn, conn.cursor() as cursor:
            cursor.execute(
                sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} (
                                          imei_norm TEXT NOT NULL,
                                          virt_imei_shard SMALLINT NOT NULL
                                      )
                                      PARTITION BY RANGE (virt_imei_shard)""").
                format(intermediate_tbl=self.intermediate_tbl_id(run_id)))
            partition_utils.create_imei_shard_partitions(
                conn,
                tbl_name=self.intermediate_tbl_name(run_id),
                unlogged=True)
            parallel_shards = partition_utils.num_physical_imei_shards(conn)

        # Done with connection -- temp tables should now be committed
        virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(
            parallel_shards)
        for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
            yield executor.submit(self._calc_imeis_job, app_config, run_id,
                                  curr_date, virt_imei_range_start,
                                  virt_imei_range_end)
Exemple #27
0
    def _update_classification_state_job(self, app_config, run_id, curr_date,
                                         virt_imei_range_start,
                                         virt_imei_range_end):
        """Function to update the classificate_state table with IMEIs that are met by this condition (single job)."""
        with create_db_connection(app_config.db_config) as conn, conn.cursor(
        ) as cursor, CodeProfiler() as cp:
            src_shard_name = partition_utils.imei_shard_name(
                base_name=self.intermediate_tbl_name(run_id),
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)

            # Add index on imei_norm
            indices = [
                partition_utils.IndexMetadatum(idx_cols=['imei_norm'],
                                               is_unique=True)
            ]
            partition_utils.add_indices(conn,
                                        tbl_name=src_shard_name,
                                        idx_metadata=indices)

            # Analyze table for better stats/plans
            cursor.execute(
                sql.SQL('ANALYZE {0}').format(sql.Identifier(src_shard_name)))

            # Calculate block date
            if curr_date is None:
                curr_date = datetime.date.today()

            in_amnesty_eval_period, in_amnesty_period = compute_amnesty_flags(
                app_config, curr_date)

            # If condition is blocking and is not eligible for amnesty, then compute block_date.
            # The block_date is set to NULL for amnesty_eligible condition within the eval period.
            amnesty_eligible = self.config.amnesty_eligible
            sticky_block_date = curr_date + datetime.timedelta(days=self.config.grace_period) \
                if self.config.blocking and not (amnesty_eligible and in_amnesty_eval_period) else None

            # If the condition's amnesty_eligible flag changed while in eval period, then make sure we update
            # the amnesty_granted column in the classification_state table for existing IMEIs meeting that condition.
            # These rows will be selected by the existing WHERE clause filters as the block_date would change
            # from being NULL to not-NULL or vice-versa.
            set_amnesty_granted_column = sql.SQL(', amnesty_granted = {0}').format(sql.Literal(amnesty_eligible)) \
                if in_amnesty_eval_period else sql.SQL('')

            # If in amnesty period, update the block_date for IMEIs that were previously classified
            # as amnesty eligible. This filter is to select amnesty_granted IMEIs with not-NULL block date.
            # This is to make sure if the amnesty_end_date was updated, we update the block_date too.
            amnesty_block_date_filter = sql.SQL(
                'OR cs.amnesty_granted = TRUE'
            ) if in_amnesty_period else sql.SQL('')

            dest_shard_name = partition_utils.imei_shard_name(
                base_name='classification_state',
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)

            # If a condition is blocking, insert new records into state table with not null blocking date or
            # set a not null blocking date for the existing ones having a null block_date.
            # Viceversa, if a condition is not blocking, insert new records into state table with Null block_date
            # or set a Null block_date for the existing ones having a not-null block_date.
            # Set the amnesty_granted column equal to condition's amnesty_eligible flag when in amnesty eval
            # period, otherwise always set it to False for new IMEIs meeting the condition.
            cursor.execute(
                sql.SQL("""INSERT INTO {dest_shard} AS cs(imei_norm,
                                                                     cond_name,
                                                                     run_id,
                                                                     start_date,
                                                                     end_date,
                                                                     block_date,
                                                                     amnesty_granted,
                                                                     virt_imei_shard)
                                           SELECT imei_norm,
                                                  %s,
                                                  %s,
                                                  %s,
                                                  NULL,
                                                  %s,
                                                  %s,
                                                  calc_virt_imei_shard(imei_norm)
                                             FROM {src_shard}
                                                  ON CONFLICT (imei_norm, cond_name)
                                            WHERE end_date IS NULL
                                                  DO UPDATE
                                                        SET block_date = CASE WHEN cs.amnesty_granted = TRUE
                                                                          AND NOT {in_eval_period}
                                                                         THEN {amnesty_end_date}
                                                                         ELSE {sticky_block_date}
                                                                          END
                                                            {set_amnesty_granted_column}
                                                      WHERE (cs.block_date IS NULL
                                                        AND excluded.block_date IS NOT NULL)
                                                         OR (cs.block_date IS NOT NULL
                                                        AND excluded.block_date IS NULL)
                                                            {amnesty_block_date_filter}"""
                        )  # noqa Q441
                .format(src_shard=sql.Identifier(src_shard_name),
                        dest_shard=sql.Identifier(dest_shard_name),
                        in_eval_period=sql.Literal(in_amnesty_eval_period),
                        set_amnesty_granted_column=set_amnesty_granted_column,
                        amnesty_block_date_filter=amnesty_block_date_filter,
                        amnesty_end_date=sql.Literal(
                            app_config.amnesty_config.amnesty_period_end_date),
                        sticky_block_date=sql.Literal(sticky_block_date)),
                [
                    self.label, run_id, curr_date, sticky_block_date,
                    (amnesty_eligible and in_amnesty_eval_period)
                ])

            # Get rid of records that no longer exist in the matched IMEIs list
            cursor.execute(
                sql.SQL("""UPDATE {dest_shard} dst
                                         SET end_date = %s
                                       WHERE cond_name = %s
                                         AND end_date IS NULL
                                         AND NOT EXISTS (SELECT imei_norm
                                                           FROM {src_shard}
                                                          WHERE imei_norm = dst.imei_norm)"""
                        ).format(src_shard=sql.Identifier(src_shard_name),
                                 dest_shard=sql.Identifier(dest_shard_name)),
                [curr_date, self.label])

        return cp.duration
Exemple #28
0
        def decorated(ctx, *args, **kwargs):
            _command = command or os.path.basename(sys.argv[0])
            _logger_name = logger_name or _command.replace('-', '.')
            if callable(metrics_root):
                _metrics_root = metrics_root(ctx, args, **kwargs)
            else:
                _metrics_root = metrics_root
            if _metrics_root is None:
                _metrics_root = _logger_name + '.'
                if subcommand is not None:
                    _metrics_root = _metrics_root + subcommand + '.'

            config = ensure_config(ctx)
            statsd = ensure_statsd(ctx)
            logger = logging.getLogger(_logger_name)
            metrics_run_root = None
            run_id = -1
            metadata_conn = None
            inited_file_logging = False

            try:
                # Store time so that we can track metrics for total listgen time
                st = time.time()

                # Get metadata connection in autocommit mode
                metadata_conn = utils.create_db_connection(config.db_config, autocommit=True)

                try:
                    # Verify DB schema
                    utils.verify_db_schema(metadata_conn, required_role)
                except (utils.DatabaseSchemaException, utils.DatabaseRoleCheckException) as ex:
                    logger.error(str(ex))
                    sys.exit(1)

                # Store metadata and get run_id
                run_id = metadata.store_job_metadata(metadata_conn, _command, logger, job_subcommand=subcommand)

                # Now that we have a run_id, we can setup logging
                if subcommand is not None:
                    log_filename = '{0}_{1}_run_id_{2:d}'.format(command, subcommand, run_id)
                else:
                    log_filename = '{0}_run_id_{1:d}'.format(command, run_id)
                inited_file_logging = dirbs.logging.setup_file_logging(config.log_config, log_filename)

                # Get metrics run root based on run_id
                metrics_run_root = '{0}runs.{1:d}.'.format(_metrics_root, run_id)

                # Validate that any exempted device types occur in the imported GSMA TAC DB
                utils.validate_exempted_device_types(metadata_conn, config)

                # Run the actual decorated function with injected args for config, conn, statsd, logger,
                # run_id and metadata_conn
                with utils.create_db_connection(config.db_config) as conn:
                    # Call CLI function with injected args
                    f(ctx,
                      config,
                      statsd,
                      logger,
                      run_id,
                      conn,
                      metadata_conn,
                      _command,
                      _metrics_root,
                      metrics_run_root,
                      *args,
                      **kwargs)

                # Update the last success timestamp
                statsd.gauge('{0}last_success'.format(_metrics_root), int(time.time()))
                metadata.log_job_success(metadata_conn, _command, run_id)
            except:  # noqa: E722
                # Make sure we track the last failure timestamp for any exception and re-raise
                statsd.gauge('{0}last_failure'.format(_metrics_root), int(time.time()))
                # Log metadata in job_metadata table
                if run_id != -1:
                    metadata.log_job_failure(metadata_conn, _command, run_id, logger)
                raise
            finally:
                # Make sure we init file logging so with date as a last resort so we flush our buffered
                # log output
                if not inited_file_logging:
                    if subcommand is not None:
                        log_filename = '{0}_{1}_run_id_unknown'.format(command, subcommand)
                    else:
                        log_filename = '{0}_run_id_unknown'.format(command)
                    dirbs.logging.setup_file_logging(config.log_config, log_filename)

                # Only track StatsD metrics for run time if we at least retrieved a run id, as this
                # forms part of the key
                dt = int((time.time() - st) * 1000)
                if metrics_run_root is not None:
                    statsd.gauge('{0}runtime.total'.format(metrics_run_root), dt)

                # If there was a duration_callback set, call it here with the calculated dt
                if duration_callback is not None:
                    duration_callback(dt)

                # Cleanup metadata connection (not in with statement)
                if metadata_conn is not None:
                    try:
                        metadata_conn.close()
                    except (psycopg2.InterfaceError, psycopg2.OperationalError) as e:
                        logger.error(str(e))
Exemple #29
0
def upgrade(ctx):
    """
    Upgrades the current DB schema to the version supported by this code using migration scripts.

    :param ctx: current cli context obj
    """
    logger = logging.getLogger('dirbs.db')
    config = common.ensure_config(ctx)
    db_config = config.db_config
    needs_analyze = False
    with utils.create_db_connection(db_config) as conn:
        logger.info('Querying DB schema version for DB %s on host %s',
                    db_config.database, db_config.host)
        with conn.cursor() as cur:
            try:
                version = utils.query_db_schema_version(conn)
            except ProgrammingError:
                logger.warn(
                    'Could not determine current schema version. Assuming no version'
                )
                version = None

            if version is None:
                logger.error(
                    'DB currently not installed or version number could not be determined. Can\'t upgrade'
                )
                sys.exit(1)

            if version < min_schema_version:
                logger.error(
                    'Current DB schema is older than DIRBS 4.0.0. Can\'t upgrade'
                )
                sys.exit(1)

            if version > code_db_schema_version:
                logger.error('DB schema newer than code. Can\'t upgrade')
                sys.exit(1)

            if version != code_db_schema_version:
                logger.info('Upgrading DB schema from version %d to %d',
                            version, code_db_schema_version)

                # If we're upgrading, make sure we schedule a full ANALYZE outside the transaction later
                needs_analyze = True

                # Set our role here so that new objects get created with dirbs_core_power_user as owner by default
                with utils.db_role_setter(conn,
                                          role_name='dirbs_core_power_user'):
                    for old_version in range(version, code_db_schema_version):
                        new_version = old_version + 1
                        # Check if there is a special migration class, otherwise use standard SQL file
                        try:
                            module_name = 'dirbs.schema_migrators.v{0}_upgrade'.format(
                                new_version)
                            module = importlib.import_module(module_name)
                            logger.info('Running Python migration script: %s',
                                        module_name)
                            migrator = module.migrator()
                            migrator.upgrade(conn)
                        except ImportError as ex:
                            script_name = 'sql/migration_scripts/v{0:d}_upgrade.sql'.format(
                                new_version)
                            logger.info('Running SQL migration script: %s',
                                        script_name)
                            sql = pkgutil.get_data('dirbs', script_name)
                            cur.execute(sql)

                        # We commit after every version upgrade
                        utils.set_db_schema_version(conn, new_version)
                        conn.commit()

                logger.info(
                    'Successfully updated schema - DB schema version is now %d',
                    code_db_schema_version)
                # Can't do anything until we know the schema is the right version
                _store_job_metadata(config, 'upgrade')
            else:
                logger.info('DB schema is already latest version')

            # Schedule a full ANALYZE at the end of an upgrade
            if needs_analyze:
                logger.info(
                    'Running ANALYZE of entire database after upgrade...')
                cur.execute('ANALYZE')
                logger.info(
                    'Finished running ANALYZE of entire database after upgrade'
                )
Exemple #30
0
def _add_indices_parallel_single_job(db_config, *, tbl_name, idx_metadatum, if_not_exists=False):
    """Job function called by add_indices_parallel."""
    with utils.create_db_connection(db_config) as conn:
        _add_index_to_single_shard(conn, part_name=tbl_name, idx_metadatum=idx_metadatum, if_not_exists=if_not_exists)