Beispiel #1
0
def refresh_data_samples(name, num_tables=50, organization='default'):
    """Refresh table samples by data source name."""
    try:
        org = models.Organization.get_by_slug(organization)
        data_source = models.DataSource.query.filter(
            models.DataSource.name == name,
            models.DataSource.org == org).one()
        print("Refreshing samples for data source: {} (id={})".format(
            name, data_source.id))
        refresh_samples(data_source.id, num_tables)
    except NoResultFound:
        print("Couldn't find data source named: {}".format(name))
        exit(1)
Beispiel #2
0
    def test_refresh_samples_applied_to_one_data_source(self):
        ds1 = self.factory.create_data_source()
        ds2 = self.factory.create_data_source()

        ds1.query_runner.configuration['samples'] = True
        ds2.query_runner.configuration['samples'] = True

        refresh_schema(ds1.id)
        refresh_schema(ds2.id)
        refresh_samples(ds1.id, 50)

        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertEqual(table_metadata.count(),
                         len(self.default_schema_return_value))
Beispiel #3
0
    def test_refresh_samples_refreshes(self):
        NEW_COLUMN_NAME = 'new_column'
        NUM_TABLES = 5
        TIME_BEFORE_UPDATE = utils.utcnow()
        tables = []

        for i in range(NUM_TABLES):
            tables.append({
                'name':
                'table{}'.format(i),
                'columns': [NEW_COLUMN_NAME],
                'metadata': [{
                    'name': NEW_COLUMN_NAME,
                    'type': self.COLUMN_TYPE,
                }]
            })

        self.patched_get_schema.return_value = tables
        self.factory.data_source.query_runner.configuration['samples'] = True

        refresh_schema(self.factory.data_source.id)
        refresh_samples(self.factory.data_source.id, 50)

        # There's a total of 5 processed tables
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertEqual(table_metadata.count(), NUM_TABLES)
        self.assertTrue(
            table_metadata.first().sample_updated_at > TIME_BEFORE_UPDATE)

        table_metadata.update({
            'sample_updated_at':
            utils.utcnow() - datetime.timedelta(days=30)
        })
        models.db.session.commit()

        TIME_BEFORE_UPDATE = utils.utcnow()
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata_list = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertTrue(
            table_metadata_list.first().sample_updated_at > TIME_BEFORE_UPDATE)
Beispiel #4
0
    def test_refresh_samples_rate_limits(self):
        NEW_COLUMN_NAME = 'new_column'
        NUM_TABLES = 105
        tables = []

        for i in range(NUM_TABLES):
            tables.append({
                'name':
                'table{}'.format(i),
                'columns': [NEW_COLUMN_NAME],
                'metadata': [{
                    'name': NEW_COLUMN_NAME,
                    'type': self.COLUMN_TYPE,
                }]
            })

        self.patched_get_schema.return_value = tables
        self.factory.data_source.query_runner.configuration['samples'] = True

        refresh_schema(self.factory.data_source.id)
        refresh_samples(self.factory.data_source.id, 50)

        # There's a total of 105 tables
        table_metadata = TableMetadata.query.count()
        self.assertEqual(table_metadata, NUM_TABLES)

        # 50 tables are processed on the first call
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 55)

        # 50 more tables are processed on the second call
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 5)

        # All tables are processed by the third call
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 0)