def test_publish_all_datasets_metadata(self): mock_blob1 = Mock() mock_blob1.name = ( "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json" ) mock_blob1.updated = datetime(2020, 4, 3, 11, 25, 5) mock_blob2 = Mock() mock_blob2.name = ( "api/v1/tables/test/incremental_query/v1/files/2020-03-15/" "000000000001.json" ) mock_blob2.updated = datetime(2020, 4, 3, 11, 25, 5) gcs_table_metadata = [ pgm.GcsTableMetadata([mock_blob1], self.endpoint, self.sql_dir), pgm.GcsTableMetadata([mock_blob2], self.endpoint, self.sql_dir), ] mock_out = MagicMock() file_handler = MagicMock() file_handler.__enter__.return_value = mock_out smart_open.open = MagicMock(return_value=file_handler) pgm.publish_all_datasets_metadata(gcs_table_metadata, "output.txt") all_datasets = TEST_DIR / "data" / "all_datasets.json" with open(all_datasets) as f: expected_json = json.load(f) mock_out.write.assert_called_with(json.dumps(expected_json, indent=4))
def test_gcs_table_metadata_to_json(self): mock_blob = Mock() mock_blob.name = ( "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json" ) mock_blob.updated = datetime(2020, 4, 3, 11, 25, 5) files_path = "api/v1/tables/test/non_incremental_query/v1/files" last_updated_path = "api/v1/tables/test/non_incremental_query/v1/last_updated" gcs_table_metadata = pgm.GcsTableMetadata( [mock_blob], self.endpoint, self.sql_dir ) result = gcs_table_metadata.table_metadata_to_json() assert len(result.items()) == 7 assert result["description"] == "Test table for a non-incremental query" assert result["friendly_name"] == "Test table for a non-incremental query" assert result["incremental"] is False assert result["incremental_export"] is False review_link = [ "https://bugzilla.mozilla.org/show_bug.cgi?id=1999999", "https://bugzilla.mozilla.org/show_bug.cgi?id=12121212", ] assert result["review_links"] == review_link assert result["files_uri"] == self.endpoint + files_path assert result["last_updated"] == self.endpoint + last_updated_path
def test_gcs_table_metadata(self): mock_blob = Mock() mock_blob.name = ( "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json" ) mock_blob.updated = datetime(2020, 4, 3, 11, 30, 1) files_path = "api/v1/tables/test/non_incremental_query/v1/files" last_updated_path = "api/v1/tables/test/non_incremental_query/v1/last_updated" gcs_table_metadata = pgm.GcsTableMetadata( [mock_blob], self.endpoint, self.sql_dir ) assert gcs_table_metadata.blobs == [mock_blob] assert gcs_table_metadata.endpoint == self.endpoint assert gcs_table_metadata.files_path == files_path assert gcs_table_metadata.files_uri == self.endpoint + files_path assert gcs_table_metadata.dataset == "test" assert gcs_table_metadata.table == "non_incremental_query" assert gcs_table_metadata.version == "v1" assert gcs_table_metadata.metadata.is_incremental() is False assert gcs_table_metadata.metadata.is_incremental_export() is False assert gcs_table_metadata.metadata.review_bugs() == ["1999999", "12121212"] assert gcs_table_metadata.last_updated_path == last_updated_path assert gcs_table_metadata.last_updated_uri == self.endpoint + last_updated_path
def test_publish_table_metadata(self): mock_blob1 = Mock() mock_blob1.name = ( "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json" ) mock_blob1.updated = datetime(2020, 4, 3, 11, 25, 5) mock_blob2 = Mock() mock_blob2.name = ( "api/v1/tables/test/incremental_query/v1/files/2020-03-15/" "000000000001.json" ) mock_blob2.updated = datetime(2020, 4, 3, 11, 25, 5) gcs_table_metadata = [ pgm.GcsTableMetadata([mock_blob1], self.endpoint, self.sql_dir), pgm.GcsTableMetadata([mock_blob2], self.endpoint, self.sql_dir), ] mock_out = MagicMock() file_handler = MagicMock() file_handler.__enter__.return_value = mock_out smart_open.open = MagicMock(return_value=file_handler) pgm.publish_table_metadata( self.mock_storage_client, gcs_table_metadata, self.test_bucket ) metadata_file = TEST_DIR / "data" / "incremental_query_gcs_metadata.json" with open(metadata_file) as f: expected_incremental_query_json = json.load(f) metadata_file = TEST_DIR / "data" / "non_incremental_query_gcs_metadata.json" with open(metadata_file) as f: expected_non_incremental_query_json = json.load(f) mock_out.write.assert_has_calls( [ call(json.dumps(expected_non_incremental_query_json, indent=4)), call(json.dumps(expected_incremental_query_json, indent=4)), ] )
def test_gcs_files_metadata_to_json_incremental(self): files = [ ( "api/v1/tables/test/incremental_query/v1/files/2020-03-15/" "000000000000.json" ), ( "api/v1/tables/test/incremental_query/v1/files/2020-03-15/" "000000000001.json" ), ( "api/v1/tables/test/incremental_query/v1/files/2020-03-16/" "000000000000.json" ), ] blobs = [] for file in files: mock_blob = Mock() mock_blob.name = file mock_blob.updated = datetime(2020, 4, 3, 11, 25, 5) blobs.append(mock_blob) json_expected = { "2020-03-15": [ ( f"{self.endpoint}" "api/v1/tables/test/incremental_query/v1/files/" "2020-03-15/000000000000.json" ), ( f"{self.endpoint}" "api/v1/tables/test/incremental_query/v1/files/" "2020-03-15/000000000001.json" ), ], "2020-03-16": [ ( f"{self.endpoint}" "api/v1/tables/test/incremental_query/v1/files/" "2020-03-16/000000000000.json" ) ], } gcs_table_metadata = pgm.GcsTableMetadata(blobs, self.endpoint, self.sql_dir) result = gcs_table_metadata.files_metadata_to_json() assert result == json_expected
def test_gcs_files_metadata_to_json(self): mock_blob = Mock() mock_blob.name = ( "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json.gz" ) mock_blob.updated = datetime(2020, 4, 3, 11, 25, 5) json_expected = [( f"{self.endpoint}" "api/v1/tables/test/non_incremental_query/v1/files/000000000000.json.gz" )] gcs_table_metadata = pgm.GcsTableMetadata([mock_blob], self.endpoint, self.sql_dir) result = gcs_table_metadata.files_metadata_to_json() assert result == json_expected
def test_gcs_table_metadata_no_files(self): with pytest.raises(Exception): pgm.GcsTableMetadata([], self.endpoint, self.target_dir)