Beispiel #1
0
    def test_refresh_schema_creates_tables(self):
        EXPECTED_TABLE_METADATA = {
            'id': 1,
            'org_id': 1,
            'exists': True,
            'name': 'table',
            'visible': True,
            'sample_query': None,
            'description': None,
            'column_metadata': True,
            'data_source_id': 1,
            'sample_updated_at': None,
        }

        refresh_schema(self.factory.data_source.id)
        update_sample(self.factory.data_source.id, 'table', 1,
                      "2019-05-09T17:07:52.386910Z")
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertEqual(table_metadata[0].to_dict(), EXPECTED_TABLE_METADATA)
        self.assertEqual(column_metadata[0].to_dict(),
                         self.EXPECTED_COLUMN_METADATA)
Beispiel #2
0
    def test_refresh_schema_creates_tables(self):
        EXPECTED_TABLE_METADATA = {
            'id': 1,
            'org_id': 1,
            'exists': True,
            'name': 'table',
            'sample_query': None,
            'description': None,
            'column_metadata': True,
            'data_source_id': 1
        }

        refresh_schema(self.factory.data_source.id)
        get_table_sample_data(self.factory.data_source.id, {
            "name": 'table',
            "columns": [self.COLUMN_NAME]
        }, 1)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertEqual(table_metadata[0].to_dict(), EXPECTED_TABLE_METADATA)
        self.assertEqual(column_metadata[0].to_dict(),
                         self.EXPECTED_COLUMN_METADATA)
    def test_refresh_schema_update_column(self):
        UPDATED_COLUMN_TYPE = "varchar"

        refresh_schema(self.factory.data_source.id)
        update_sample(
            self.factory.data_source.id,
            "table",
            1,
            utils.utcnow() - datetime.timedelta(days=90),
        )
        column_metadata = ColumnMetadata.query.all()
        self.assertEqual(
            ColumnMetadataSerializer(column_metadata[0]).serialize(),
            self.EXPECTED_COLUMN_METADATA,
        )

        updated_schema = copy.deepcopy(self.default_schema_return_value)
        updated_schema[0]["metadata"][0]["type"] = UPDATED_COLUMN_TYPE
        self.patched_get_schema.return_value = updated_schema

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()
        self.assertNotEqual(
            ColumnMetadataSerializer(column_metadata[0]).serialize(),
            self.EXPECTED_COLUMN_METADATA,
        )
        self.assertEqual(
            ColumnMetadataSerializer(column_metadata[0]).serialize()["type"],
            UPDATED_COLUMN_TYPE,
        )
    def test_refresh_schema_delete_column(self):
        NEW_COLUMN_NAME = "new_column"
        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()

        self.assertTrue(
            ColumnMetadataSerializer(column_metadata[0]).serialize()["exists"])

        self.patched_get_schema.return_value = [{
            "name":
            "table",
            "columns": [NEW_COLUMN_NAME],
            "metadata": [{
                "name": NEW_COLUMN_NAME,
                "type": self.COLUMN_TYPE,
            }],
        }]

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()
        self.assertEqual(len(column_metadata), 2)

        self.assertFalse(
            ColumnMetadataSerializer(column_metadata[1]).serialize()["exists"])
        self.assertTrue(
            ColumnMetadataSerializer(column_metadata[0]).serialize()["exists"])
Beispiel #5
0
    def test_recent_empty_sample_refreshs(self):
        self.factory.data_source.query_runner.configuration['samples'] = True
        refresh_schema(self.factory.data_source.id)

        # Confirm no sample exists
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, None)

        LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5)
        update_sample(self.factory.data_source.id, 'table', 1,
                      LAST_UPDATE.isoformat())

        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
Beispiel #6
0
    def test_refresh_samples_applied_to_one_data_source(self):
        ds1 = self.factory.create_data_source()
        ds2 = self.factory.create_data_source()

        ds1.query_runner.configuration['samples'] = True
        ds2.query_runner.configuration['samples'] = True

        refresh_schema(ds1.id)
        refresh_schema(ds2.id)
        refresh_samples(ds1.id, 50)

        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertEqual(table_metadata.count(),
                         len(self.default_schema_return_value))
Beispiel #7
0
    def test_refresh_schema_doesnt_overwrite_samples(self):
        self.factory.data_source.query_runner.configuration['samples'] = True

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, None)

        update_sample(self.factory.data_source.id, 'table', 1,
                      "2019-05-09T17:07:52.386910Z")
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)

        # Check that a schema refresh doesn't overwrite examples
        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
Beispiel #8
0
    def test_refresh_samples_rate_limits(self):
        NEW_COLUMN_NAME = 'new_column'
        NUM_TABLES = 105
        tables = []

        for i in range(NUM_TABLES):
            tables.append({
                'name':
                'table{}'.format(i),
                'columns': [NEW_COLUMN_NAME],
                'metadata': [{
                    'name': NEW_COLUMN_NAME,
                    'type': self.COLUMN_TYPE,
                }]
            })

        self.patched_get_schema.return_value = tables
        self.factory.data_source.query_runner.configuration['samples'] = True

        refresh_schema(self.factory.data_source.id)
        refresh_samples(self.factory.data_source.id, 50)

        # There's a total of 105 tables
        table_metadata = TableMetadata.query.count()
        self.assertEqual(table_metadata, NUM_TABLES)

        # 50 tables are processed on the first call
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 55)

        # 50 more tables are processed on the second call
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 5)

        # All tables are processed by the third call
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.is_(None)).all()
        self.assertEqual(len(table_metadata), 0)
Beispiel #9
0
    def test_refresh_samples_refreshes(self):
        NEW_COLUMN_NAME = 'new_column'
        NUM_TABLES = 5
        TIME_BEFORE_UPDATE = utils.utcnow()
        tables = []

        for i in range(NUM_TABLES):
            tables.append({
                'name':
                'table{}'.format(i),
                'columns': [NEW_COLUMN_NAME],
                'metadata': [{
                    'name': NEW_COLUMN_NAME,
                    'type': self.COLUMN_TYPE,
                }]
            })

        self.patched_get_schema.return_value = tables
        self.factory.data_source.query_runner.configuration['samples'] = True

        refresh_schema(self.factory.data_source.id)
        refresh_samples(self.factory.data_source.id, 50)

        # There's a total of 5 processed tables
        table_metadata = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertEqual(table_metadata.count(), NUM_TABLES)
        self.assertTrue(
            table_metadata.first().sample_updated_at > TIME_BEFORE_UPDATE)

        table_metadata.update({
            'sample_updated_at':
            utils.utcnow() - datetime.timedelta(days=30)
        })
        models.db.session.commit()

        TIME_BEFORE_UPDATE = utils.utcnow()
        refresh_samples(self.factory.data_source.id, 50)
        table_metadata_list = TableMetadata.query.filter(
            TableMetadata.sample_updated_at.isnot(None))
        self.assertTrue(
            table_metadata_list.first().sample_updated_at > TIME_BEFORE_UPDATE)
Beispiel #10
0
    def test_refresh_schema_update_column(self):
        UPDATED_COLUMN_TYPE = 'varchar'

        refresh_schema(self.factory.data_source.id)
        update_sample(self.factory.data_source.id, 'table', 1,
                      "2019-05-09T17:07:52.386910Z")
        column_metadata = ColumnMetadata.query.all()
        self.assertEqual(column_metadata[0].to_dict(),
                         self.EXPECTED_COLUMN_METADATA)

        updated_schema = copy.deepcopy(self.default_schema_return_value)
        updated_schema[0]['metadata'][0]['type'] = UPDATED_COLUMN_TYPE
        self.patched_get_schema.return_value = updated_schema

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()
        self.assertNotEqual(column_metadata[0].to_dict(),
                            self.EXPECTED_COLUMN_METADATA)
        self.assertEqual(column_metadata[0].to_dict()['type'],
                         UPDATED_COLUMN_TYPE)
    def test_refresh_schema_doesnt_overwrite_samples(self):
        self.factory.data_source.query_runner.configuration["samples"] = True

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, None)

        update_sample(
            self.factory.data_source.id,
            "table",
            1,
            utils.utcnow() - datetime.timedelta(days=90),
        )
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)

        # Check that a schema refresh doesn't overwrite examples
        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
Beispiel #12
0
    def test_recent_non_empty_sample_doesnt_refresh(self):
        self.factory.data_source.query_runner.configuration['samples'] = True
        refresh_schema(self.factory.data_source.id)

        update_sample(self.factory.data_source.id, 'table', 1, None)

        # Confirm a sample was added
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)

        self.patched_get_table_sample.return_value = {
            self.COLUMN_NAME: "a new example"
        }
        LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5)
        update_sample(self.factory.data_source.id, 'table', 1,
                      LAST_UPDATE.isoformat())

        # The sample doesn't take on the new value that is returned.
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
Beispiel #13
0
    def test_refresh_schema_table_with_new_metadata_updated(self):
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertTrue(table_metadata[0].to_dict()['column_metadata'])

        # Table has no metdata field, `column_metadata` should be False.
        self.patched_get_schema.return_value = [{
            'name': 'table',
            'columns': [self.COLUMN_NAME],
        }]

        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertFalse(table_metadata[0].to_dict()['column_metadata'])

        # Table metadata field is back, `column_metadata` should be True again.
        self.patched_get_schema.return_value = self.default_schema_return_value
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        self.assertTrue(table_metadata[0].to_dict()['column_metadata'])
Beispiel #14
0
    def test_refresh_schema_deleted_table_marked(self):
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertTrue(table_metadata[0].to_dict()['exists'])

        # Table is gone, `exists` should be False.
        self.patched_get_schema.return_value = []

        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertFalse(table_metadata[0].to_dict()['exists'])

        # Table is back, `exists` should be True again.
        self.patched_get_schema.return_value = self.default_schema_return_value
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        self.assertTrue(table_metadata[0].to_dict()['exists'])
    def test_refresh_schema_deleted_table_marked(self):
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertTrue(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["exists"])

        # Table is gone, `exists` should be False.
        self.patched_get_schema.return_value = []

        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertFalse(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["exists"])

        # Table is back, `exists` should be True again.
        self.patched_get_schema.return_value = self.default_schema_return_value
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        self.assertTrue(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["exists"])
Beispiel #16
0
    def test_refresh_schema_update_column(self):
        UPDATED_COLUMN_TYPE = 'varchar'

        refresh_schema(self.factory.data_source.id)
        get_table_sample_data(self.factory.data_source.id, {
            "name": 'table',
            "columns": [self.COLUMN_NAME]
        }, 1)
        column_metadata = ColumnMetadata.query.all()
        self.assertEqual(column_metadata[0].to_dict(),
                         self.EXPECTED_COLUMN_METADATA)

        updated_schema = copy.deepcopy(self.default_schema_return_value)
        updated_schema[0]['metadata'][0]['type'] = UPDATED_COLUMN_TYPE
        self.patched_get_schema.return_value = updated_schema

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()
        self.assertNotEqual(column_metadata[0].to_dict(),
                            self.EXPECTED_COLUMN_METADATA)
        self.assertEqual(column_metadata[0].to_dict()['type'],
                         UPDATED_COLUMN_TYPE)
Beispiel #17
0
    def test_refresh_schema_delete_column(self):
        NEW_COLUMN_NAME = 'new_column'
        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()

        self.assertTrue(column_metadata[0].to_dict()['exists'])

        self.patched_get_schema.return_value = [{
            'name':
            'table',
            'columns': [NEW_COLUMN_NAME],
            'metadata': [{
                'name': NEW_COLUMN_NAME,
                'type': self.COLUMN_TYPE,
            }]
        }]

        refresh_schema(self.factory.data_source.id)
        column_metadata = ColumnMetadata.query.all()
        self.assertEqual(len(column_metadata), 2)

        self.assertFalse(column_metadata[1].to_dict()['exists'])
        self.assertTrue(column_metadata[0].to_dict()['exists'])
    def test_refresh_schema_creates_tables(self):
        EXPECTED_TABLE_METADATA = {
            "id": 1,
            "org_id": 1,
            "exists": True,
            "name": u"table",
            "visible": True,
            "description": None,
            "column_metadata": True,
            "data_source_id": 1,
            "sample_updated_at": None,
            "sample_queries": {},
            "columns": [self.EXPECTED_COLUMN_METADATA],
        }

        refresh_schema(self.factory.data_source.id)
        update_sample(
            self.factory.data_source.id,
            "table",
            1,
            utils.utcnow() - datetime.timedelta(days=90),
        )
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertEqual(
            TableMetadataSerializer(table_metadata[0],
                                    with_favorite_state=False).serialize(),
            EXPECTED_TABLE_METADATA,
        )
        self.assertEqual(
            ColumnMetadataSerializer(column_metadata[0]).serialize(),
            self.EXPECTED_COLUMN_METADATA,
        )
    def test_refresh_schema_table_with_new_metadata_updated(self):
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertTrue(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["column_metadata"])

        # Table has no metdata field, `column_metadata` should be False.
        self.patched_get_schema.return_value = [{
            "name": "table",
            "columns": [self.COLUMN_NAME],
        }]

        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        column_metadata = ColumnMetadata.query.all()

        self.assertEqual(len(table_metadata), 1)
        self.assertEqual(len(column_metadata), 1)
        self.assertFalse(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["column_metadata"])

        # Table metadata field is back, `column_metadata` should be True again.
        self.patched_get_schema.return_value = self.default_schema_return_value
        refresh_schema(self.factory.data_source.id)
        table_metadata = TableMetadata.query.all()
        self.assertTrue(
            TableMetadataSerializer(
                table_metadata[0],
                with_favorite_state=False).serialize()["column_metadata"])