def test_refresh_schema_creates_tables(self): EXPECTED_TABLE_METADATA = { 'id': 1, 'org_id': 1, 'exists': True, 'name': 'table', 'visible': True, 'sample_query': None, 'description': None, 'column_metadata': True, 'data_source_id': 1, 'sample_updated_at': None, } refresh_schema(self.factory.data_source.id) update_sample(self.factory.data_source.id, 'table', 1, "2019-05-09T17:07:52.386910Z") table_metadata = TableMetadata.query.all() column_metadata = ColumnMetadata.query.all() self.assertEqual(len(table_metadata), 1) self.assertEqual(len(column_metadata), 1) self.assertEqual(table_metadata[0].to_dict(), EXPECTED_TABLE_METADATA) self.assertEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA)
def test_refresh_schema_update_column(self): UPDATED_COLUMN_TYPE = "varchar" refresh_schema(self.factory.data_source.id) update_sample( self.factory.data_source.id, "table", 1, utils.utcnow() - datetime.timedelta(days=90), ) column_metadata = ColumnMetadata.query.all() self.assertEqual( ColumnMetadataSerializer(column_metadata[0]).serialize(), self.EXPECTED_COLUMN_METADATA, ) updated_schema = copy.deepcopy(self.default_schema_return_value) updated_schema[0]["metadata"][0]["type"] = UPDATED_COLUMN_TYPE self.patched_get_schema.return_value = updated_schema refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.all() self.assertNotEqual( ColumnMetadataSerializer(column_metadata[0]).serialize(), self.EXPECTED_COLUMN_METADATA, ) self.assertEqual( ColumnMetadataSerializer(column_metadata[0]).serialize()["type"], UPDATED_COLUMN_TYPE, )
def test_recent_empty_sample_refreshs(self): self.factory.data_source.query_runner.configuration['samples'] = True refresh_schema(self.factory.data_source.id) # Confirm no sample exists column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, None) LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5) update_sample(self.factory.data_source.id, 'table', 1, LAST_UPDATE.isoformat()) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
def test_refresh_schema_doesnt_overwrite_samples(self): self.factory.data_source.query_runner.configuration['samples'] = True refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, None) update_sample(self.factory.data_source.id, 'table', 1, "2019-05-09T17:07:52.386910Z") column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE) # Check that a schema refresh doesn't overwrite examples refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
def test_recent_non_empty_sample_doesnt_refresh(self): self.factory.data_source.query_runner.configuration['samples'] = True refresh_schema(self.factory.data_source.id) update_sample(self.factory.data_source.id, 'table', 1, None) # Confirm a sample was added column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE) self.patched_get_table_sample.return_value = { self.COLUMN_NAME: "a new example" } LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5) update_sample(self.factory.data_source.id, 'table', 1, LAST_UPDATE.isoformat()) # The sample doesn't take on the new value that is returned. column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
def test_refresh_schema_update_column(self): UPDATED_COLUMN_TYPE = 'varchar' refresh_schema(self.factory.data_source.id) update_sample(self.factory.data_source.id, 'table', 1, "2019-05-09T17:07:52.386910Z") column_metadata = ColumnMetadata.query.all() self.assertEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA) updated_schema = copy.deepcopy(self.default_schema_return_value) updated_schema[0]['metadata'][0]['type'] = UPDATED_COLUMN_TYPE self.patched_get_schema.return_value = updated_schema refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.all() self.assertNotEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA) self.assertEqual(column_metadata[0].to_dict()['type'], UPDATED_COLUMN_TYPE)
def test_refresh_schema_doesnt_overwrite_samples(self): self.factory.data_source.query_runner.configuration["samples"] = True refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, None) update_sample( self.factory.data_source.id, "table", 1, utils.utcnow() - datetime.timedelta(days=90), ) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE) # Check that a schema refresh doesn't overwrite examples refresh_schema(self.factory.data_source.id) column_metadata = ColumnMetadata.query.first() self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
def test_refresh_schema_creates_tables(self): EXPECTED_TABLE_METADATA = { "id": 1, "org_id": 1, "exists": True, "name": u"table", "visible": True, "description": None, "column_metadata": True, "data_source_id": 1, "sample_updated_at": None, "sample_queries": {}, "columns": [self.EXPECTED_COLUMN_METADATA], } refresh_schema(self.factory.data_source.id) update_sample( self.factory.data_source.id, "table", 1, utils.utcnow() - datetime.timedelta(days=90), ) table_metadata = TableMetadata.query.all() column_metadata = ColumnMetadata.query.all() self.assertEqual(len(table_metadata), 1) self.assertEqual(len(column_metadata), 1) self.assertEqual( TableMetadataSerializer(table_metadata[0], with_favorite_state=False).serialize(), EXPECTED_TABLE_METADATA, ) self.assertEqual( ColumnMetadataSerializer(column_metadata[0]).serialize(), self.EXPECTED_COLUMN_METADATA, )