Ejemplo n.º 1
0
 def test_is_static_positive_yaml(self):
     with override_settings(STATIC_DATA_SOURCES=[
             self.get_path('sample_static_data_source', 'yaml')
     ]):
         example = list(
             StaticDataSourceConfiguration.all(use_server_filter=False))[0]
         self.assertTrue(example.is_static)
Ejemplo n.º 2
0
    def handle(self, **options):
        data_sources = list(DataSourceConfiguration.all())
        data_sources.extend(list(StaticDataSourceConfiguration.all()))

        engine_ids = self._get_engine_ids(data_sources,
                                          options.get('engine_id'))

        tables_to_remove_by_engine = defaultdict(list)
        for engine_id in engine_ids:
            engine = connection_manager.get_engine(engine_id)
            with engine.begin() as connection:
                migration_context = get_migration_context(
                    connection, include_object=_include_object)
                raw_diffs = compare_metadata(migration_context, metadata)

            diffs = reformat_alembic_diffs(raw_diffs)
            tables_to_remove_by_engine[engine_id] = [
                diff.table_name for diff in diffs
                if diff.type == 'remove_table'
            ]

        for engine_id, tablenames in tables_to_remove_by_engine.items():
            engine = connection_manager.get_engine(engine_id)
            for tablename in tablenames:
                with engine.begin() as connection:
                    try:
                        result = connection.execute(
                            'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'
                            .format(tablename=tablename))
                    except Exception:
                        print(tablename,
                              "no inserted_at column, probably not UCR")
                    else:
                        print(tablename, result.fetchone())
Ejemplo n.º 3
0
def _get_old_new_tablenames(engine_id=None):
    by_engine_id = defaultdict(list)
    seen_tables = defaultdict(set)
    for ds in StaticDataSourceConfiguration.all():
        ds_engine_id = ds['engine_id']
        if engine_id and ds_engine_id != engine_id:
            continue
        old, new = _table_names(ds.domain, ds.table_id)
        if old in seen_tables[ds_engine_id]:
            logger.warning('Duplicate table: %s - %s', ds.get_id, old)
            continue
        seen_tables[ds_engine_id].add(old)
        by_engine_id[ds_engine_id].append(DSConf(old, new, ds))

    data_source_ids = [
        row['id'] for row in DataSourceConfiguration.view(
            'userreports/active_data_sources',
            reduce=False,
            include_docs=False)
    ]
    for ds in iter_docs(DataSourceConfiguration.get_db(), data_source_ids):
        ds = DataSourceConfiguration.wrap(ds)
        ds_engine_id = ds['engine_id']
        if engine_id and ds_engine_id != engine_id:
            continue

        old, new = _table_names(ds.domain, ds.table_id)
        if old in seen_tables[ds_engine_id]:
            logger.warning('Duplicate table: %s - %s', ds.get_id, old)
            continue
        seen_tables[ds_engine_id].add(old)
        by_engine_id[ds_engine_id].append(DSConf(old, new, ds))

    return by_engine_id
Ejemplo n.º 4
0
 def test_deactivate_noop(self):
     with override_settings(STATIC_DATA_SOURCES=[
             self.get_path('sample_static_data_source', 'json')
     ]):
         example = list(StaticDataSourceConfiguration.all())[0]
         # since this is a SimpleTest, this should fail if the call actually hits the DB
         example.deactivate()
Ejemplo n.º 5
0
    def handle(self, **options):
        data_sources = list(DataSourceConfiguration.all())
        data_sources.extend(list(StaticDataSourceConfiguration.all()))

        engine_ids = self._get_engine_ids(data_sources, options.get('engine_id'))

        tables_to_remove_by_engine = defaultdict(list)
        for engine_id in engine_ids:
            engine = connection_manager.get_engine(engine_id)
            with engine.begin() as connection:
                migration_context = get_migration_context(connection, include_object=_include_object)
                raw_diffs = compare_metadata(migration_context, metadata)

            diffs = reformat_alembic_diffs(raw_diffs)
            tables_to_remove_by_engine[engine_id] = [
                diff.table_name for diff in diffs
                if diff.type == 'remove_table'
            ]

        for engine_id, tablenames in tables_to_remove_by_engine.items():
            engine = connection_manager.get_engine(engine_id)
            for tablename in tablenames:
                with engine.begin() as connection:
                    try:
                        result = connection.execute(
                            'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'.format(tablename=tablename)
                        )
                    except Exception:
                        print(tablename, "no inserted_at column, probably not UCR")
                    else:
                        print(tablename, result.fetchone())
Ejemplo n.º 6
0
    def handle(self, **options):
        data_sources = list(DataSourceConfiguration.all())
        data_sources.extend(list(StaticDataSourceConfiguration.all()))

        tables_by_engine_id = self._get_tables_by_engine_id(
            data_sources, options.get('engine_id'))

        tables_to_remove_by_engine = defaultdict(list)
        for engine_id, expected_tables in tables_by_engine_id.items():
            engine = connection_manager.get_engine(engine_id)
            with engine.begin() as connection:
                # Using string formatting rather than execute with %s syntax
                # is acceptable here because the strings we're inserting are static
                # and only templated for DRYness
                results = connection.execute(f"""
                SELECT table_name
                  FROM information_schema.tables
                WHERE table_schema='public'
                  AND table_type='BASE TABLE'
                  AND (
                    table_name LIKE '{UCR_TABLE_PREFIX}%%'
                    OR
                    table_name LIKE '{LEGACY_UCR_TABLE_PREFIX}%%'
                );
                """).fetchall()
                tables_in_db = {r[0] for r in results}

            tables_to_remove_by_engine[
                engine_id] = tables_in_db - expected_tables

        for engine_id, tablenames in tables_to_remove_by_engine.items():
            print("\nTables no longer referenced in database: {}:\n".format(
                engine_id))
            engine = connection_manager.get_engine(engine_id)
            if not tablenames:
                print("\t No tables to prune")
                continue

            for tablename in tablenames:
                with engine.begin() as connection:
                    try:
                        result = connection.execute(
                            f'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'
                        )
                    except Exception:
                        print(
                            f"\t{tablename}: no inserted_at column, probably not UCR"
                        )
                    else:
                        row_count, idle_since = result.fetchone()
                        if row_count == 0:
                            print(f"\t{tablename}: {row_count} rows")
                            if options['drop_empty_tables']:
                                connection.execute(f'DROP TABLE "{tablename}"')
                                print(f'\t^-- deleted {tablename}')
                        else:
                            print(
                                f"\t{tablename}: {row_count} rows, idle since {idle_since}"
                            )
Ejemplo n.º 7
0
 def test_for_table_id_conflicts(self):
     counts = Counter((ds.table_id, ds.domain)
                      for ds in StaticDataSourceConfiguration.all())
     duplicates = [k for k, v in counts.items() if v > 1]
     msg = "The following data source configs have duplicate table_ids on the same domains:\n{}".format(
         "\n".join("table_id: {}, domain: {}".format(table_id, domain)
                   for table_id, domain in duplicates))
     self.assertEqual(0, len(duplicates), msg)
 def test_get_all(self):
     with override_settings(STATIC_DATA_SOURCES=[self.get_path('sample_static_data_source', 'json')]):
         all = list(StaticDataSourceConfiguration.all())
         self.assertEqual(2, len(all))
         example, dimagi = all
         self.assertEqual('example', example.domain)
         self.assertEqual('dimagi', dimagi.domain)
         for config in all:
             self.assertEqual('all_candidates', config.table_id)
Ejemplo n.º 9
0
def _data_sources_by_engine_id():
    by_engine_id = defaultdict(list)
    for ds in StaticDataSourceConfiguration.all():
        ds_engine_id = ds['engine_id']
        by_engine_id[ds_engine_id].append(ds)

    for ds in DataSourceConfiguration.all():
        ds_engine_id = ds['engine_id']
        by_engine_id[ds_engine_id].append(ds)

    return by_engine_id
Ejemplo n.º 10
0
def _data_sources_by_engine_id():
    by_engine_id = defaultdict(list)
    for ds in StaticDataSourceConfiguration.all():
        ds_engine_id = ds['engine_id']
        by_engine_id[ds_engine_id].append(ds)

    for ds in DataSourceConfiguration.all():
        ds_engine_id = ds['engine_id']
        by_engine_id[ds_engine_id].append(ds)

    return by_engine_id
    def test_get_all(self):
        with override_settings(STATIC_DATA_SOURCES=[self.get_path('sample_static_data_source', 'json')]):
            all = list(StaticDataSourceConfiguration.all())
            self.assertEqual(2 + 3, len(all))
            example, dimagi = all[:2]
            self.assertEqual('example', example.domain)
            self.assertEqual('dimagi', dimagi.domain)
            for config in all[:2]:
                self.assertEqual('all_candidates', config.table_id)

            for config in all[2:]:
                self.assertEqual('cc1', config.domain)
Ejemplo n.º 12
0
    def handle(self, **options):
        data_sources = list(DataSourceConfiguration.all())
        data_sources.extend(list(StaticDataSourceConfiguration.all()))

        tables_by_engine_id = self._get_tables_by_engine_id(
            data_sources, options.get('engine_id'))

        tables_to_remove_by_engine = defaultdict(list)
        for engine_id, expected_tables in tables_by_engine_id.items():
            engine = connection_manager.get_engine(engine_id)
            with engine.begin() as connection:
                results = connection.execute("""
                SELECT table_name
                  FROM information_schema.tables
                WHERE table_schema='public'
                  AND table_type='BASE TABLE'
                  AND (
                    table_name LIKE '{}%%'
                    OR
                    table_name LIKE '{}%%'
                );
                """.format(UCR_TABLE_PREFIX,
                           LEGACY_UCR_TABLE_PREFIX)).fetchall()
                tables_in_db = {r[0] for r in results}

            tables_to_remove_by_engine[
                engine_id] = tables_in_db - expected_tables

        for engine_id, tablenames in tables_to_remove_by_engine.items():
            print("\nTables no longer referenced in database: {}:\n".format(
                engine_id))
            engine = connection_manager.get_engine(engine_id)
            if not tablenames:
                print("\t No tables to prune")
                continue

            for tablename in tablenames:
                if options['show_counts']:
                    with engine.begin() as connection:
                        try:
                            result = connection.execute(
                                'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'
                                .format(tablename=tablename))
                        except Exception:
                            print(
                                "\t{}: no inserted_at column, probably not UCR"
                                .format(tablename))
                        else:
                            print("\t{}: {}".format(tablename,
                                                    result.fetchone()))
                else:
                    print("\t{}".format(tablename))
Ejemplo n.º 13
0
    def test_get_all(self):
        path = self.get_path('sample_static_data_source', 'json')
        with patch("corehq.apps.userreports.models.static_ucr_data_source_paths", return_value=[path]), \
             override_settings(STATIC_DATA_SOURCES=[path]):
            all = list(StaticDataSourceConfiguration.all())
            self.assertEqual(4 + 3, len(all))
            example, dimagi, example1, dimagi1 = all[:4]
            self.assertEqual('example', example.domain)
            self.assertEqual('example', example1.domain)
            self.assertEqual('dimagi', dimagi.domain)
            self.assertEqual('dimagi', dimagi1.domain)
            for config in all[:4]:
                self.assertEqual('all_candidates', config.table_id)

            for config in all[4:]:
                self.assertEqual('cc1', config.domain)
Ejemplo n.º 14
0
    def handle(self, **options):
        data_sources = list(DataSourceConfiguration.all())
        data_sources.extend(list(StaticDataSourceConfiguration.all()))

        tables_by_engine_id = self._get_tables_by_engine_id(data_sources, options.get('engine_id'))

        tables_to_remove_by_engine = defaultdict(list)
        for engine_id, expected_tables in tables_by_engine_id.items():
            engine = connection_manager.get_engine(engine_id)
            with engine.begin() as connection:
                results = connection.execute("""
                SELECT table_name
                  FROM information_schema.tables
                WHERE table_schema='public'
                  AND table_type='BASE TABLE'
                  AND (
                    table_name LIKE '{}%%'
                    OR
                    table_name LIKE '{}%%'
                );
                """.format(UCR_TABLE_PREFIX, LEGACY_UCR_TABLE_PREFIX)).fetchall()
                tables_in_db = {r[0] for r in results}

            tables_to_remove_by_engine[engine_id] = tables_in_db - expected_tables

        for engine_id, tablenames in tables_to_remove_by_engine.items():
            print("\nTables no longer referenced in database: {}:\n".format(engine_id))
            engine = connection_manager.get_engine(engine_id)
            if not tablenames:
                print("\t No tables to prune")
                continue

            for tablename in tablenames:
                if options['show_counts']:
                    with engine.begin() as connection:
                        try:
                            result = connection.execute(
                                'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'.format(tablename=tablename)
                            )
                        except Exception:
                            print("\t{}: no inserted_at column, probably not UCR".format(tablename))
                        else:
                            print("\t{}: {}".foramt(tablename, result.fetchone()))
                else:
                    print("\t{}".format(tablename))
Ejemplo n.º 15
0
    def test_data_sources_actually_exist(self):

        data_sources_on_domain = defaultdict(set)
        for data_source in StaticDataSourceConfiguration.all():
            data_sources_on_domain[data_source.domain].add(data_source.get_id)

        def has_no_data_source(report_config):
            available_data_sources = data_sources_on_domain[report_config.domain]
            return report_config.config_id not in available_data_sources

        all_configs = StaticReportConfiguration.all()
        configs_missing_data_source = list(filter(has_no_data_source, all_configs))

        msg = ("There are {} report configs which reference data sources that "
               "don't exist (or which don't exist on that domain):\n{}".format(
                   len(configs_missing_data_source),
                   "\n".join(config.get_id for config in configs_missing_data_source)))
        self.assertEqual(0, len(configs_missing_data_source), msg)
Ejemplo n.º 16
0
    def test_data_sources_actually_exist(self):

        data_sources_on_domain = defaultdict(set)
        for data_source in StaticDataSourceConfiguration.all():
            data_sources_on_domain[data_source.domain].add(data_source.get_id)

        def has_no_data_source(report_config):
            available_data_sources = data_sources_on_domain[
                report_config.domain]
            return report_config.config_id not in available_data_sources

        all_configs = StaticReportConfiguration.all()
        configs_missing_data_source = list(
            filter(has_no_data_source, all_configs))

        msg = ("There are {} report configs which reference data sources that "
               "don't exist (or which don't exist on that domain):\n{}".format(
                   len(configs_missing_data_source),
                   "\n".join(config.get_id
                             for config in configs_missing_data_source)))
        self.assertEqual(0, len(configs_missing_data_source), msg)
Ejemplo n.º 17
0
 def test_is_static_positive_json(self):
     with override_settings(STATIC_DATA_SOURCES=[
             self.get_path('sample_static_data_source', 'json')
     ]):
         example = list(StaticDataSourceConfiguration.all())[0]
         self.assertTrue(example.is_static)
 def test_production_config(self):
     for data_source in StaticDataSourceConfiguration.all():
         data_source.validate()
Ejemplo n.º 19
0
 def get_all_configs(self):
     return StaticDataSourceConfiguration.all()
Ejemplo n.º 20
0
 def test_production_config(self):
     for data_source in StaticDataSourceConfiguration.all(
             use_server_filter=False):
         data_source.validate()
 def test_is_static_positive(self):
     with override_settings(STATIC_DATA_SOURCES=[self.get_path('sample_static_data_source', 'json')]):
         example = list(StaticDataSourceConfiguration.all())[0]
         self.assertTrue(example.is_static)
Ejemplo n.º 22
0
 def get_data_sources(self):
     return StaticDataSourceConfiguration.all()
Ejemplo n.º 23
0
 def test_production_config(self):
     for data_source in StaticDataSourceConfiguration.all():
         data_source.validate()
Ejemplo n.º 24
0
 def get_all_data_sources(self):
     return StaticDataSourceConfiguration.all()
 def test_deactivate_noop(self):
     with override_settings(STATIC_DATA_SOURCES=[self.get_path('sample_static_data_source', 'json')]):
         example = list(StaticDataSourceConfiguration.all())[0]
         # since this is a SimpleTest, this should fail if the call actually hits the DB
         example.deactivate()