def test__import_namespaces(): def check_subpackage_imports(subpackage: ModuleType, directory_path: Path) -> None: """Recursively asserts that all files/directories within a directory path are importable from the subpackage Args: subpackage: The subpackage to check import from directory_path: The directory to compare with the subpackage imports """ print(f"Checking all files/directories within {directory_path} are importable") # Collect names of packages we expect based on filenames in the directory modules_by_files = { f.replace(".py", "") for f in os.listdir(directory_path) if not f.startswith("_") } # Collect names of packages that are importable from the module modules_by_namespace_all = set(subpackage.__all__) modules_by_namespace_dir = {p for p in subpackage.__dir__() if (not p.startswith("_"))} # We use subset here to allow for imports that are not in the direct file system folder # such as the project._common files which are imported through specific project types assert modules_by_files.issubset(modules_by_namespace_all) assert modules_by_files.issubset(modules_by_namespace_dir) # For any subpackages that are also directories, perform the same check for sub_module_name in modules_by_namespace_dir: sub_module_path = directory_path / sub_module_name if os.path.isdir(sub_module_path): sub_module = importlib.import_module(f"{subpackage.__name__}.{sub_module_name}") check_subpackage_imports(sub_module, sub_module_path) import tamr_toolbox toolbox_dir = get_toolbox_root_dir() / "tamr_toolbox" check_subpackage_imports(tamr_toolbox, toolbox_dir)
def test_dataset_bad_encoding(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) bad_encoding_dataset_id = CONFIG["datasets"]["bad_encoding"] dataset = client.datasets.by_resource_id(bad_encoding_dataset_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_bad_encoding.csv") csv.from_dataset(dataset, filepath) os.remove(filepath)
def test_taxonomy_check_invalid_project_type(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) master_project_id = CONFIG["projects"]["minimal_mastering"] project = client.projects.by_resource_id(master_project_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_taxonomy.csv") with pytest.raises(TypeError): csv.from_taxonomy(project, filepath, csv_delimiter=",", flatten_delimiter=",")
def test_invalid_delimiters(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) categorization_project_id = CONFIG["projects"]["minimal_categorization"] project = client.projects.by_resource_id(categorization_project_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_taxonomy.csv") with pytest.raises(ValueError): csv.from_taxonomy(project, filepath, csv_delimiter=",", flatten_delimiter=",")
def test_taxonomy_existing_file(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) categorization_project_id = CONFIG["projects"]["minimal_categorization"] project = client.projects.by_resource_id(categorization_project_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_taxonomy2.csv") f = open(filepath, "w") f.write("Temporary file") f.close() with pytest.raises(FileExistsError): csv.from_taxonomy(project, filepath, overwrite=False) os.remove(filepath)
def test_taxonomy_export_csv(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) categorization_project_id = CONFIG["projects"]["minimal_categorization"] project = client.projects.by_resource_id(categorization_project_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_taxonomy.csv") records_written = csv.from_taxonomy(project, filepath, csv_delimiter=",") list_written = list(io.open(filepath)) list_ref = TAXONOMY_DATA assert len(list_ref) == len(list_written) assert len(list_ref) == records_written for i in range(len(list_ref)): assert list_ref[i] == list_written[i] os.remove(filepath)
def test_taxonomy_overwrite_file(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) categorization_project_id = CONFIG["projects"]["minimal_categorization"] project = client.projects.by_resource_id(categorization_project_id) filepath = os.path.join(get_toolbox_root_dir(), "tests/data_io/temp_taxonomy3.csv") f = open(filepath, "w") f.write("Temporary file") f.close() records_written = csv.from_taxonomy(project, filepath, overwrite=True) list_ref = TAXONOMY_DATA list_written = list(io.open(filepath)) assert records_written == len(list_ref) for i in range(len(list_ref)): assert list_ref[i] == list_written[i] os.remove(filepath)
"""Tests for tasks related to transformations with Tamr projects""" import pytest from tamr_toolbox.project import categorization, mastering, schema_mapping from tamr_toolbox import utils from tamr_toolbox.utils.testing import mock_api from tests._common import get_toolbox_root_dir CONFIG = utils.config.from_yaml(get_toolbox_root_dir() / "tests/mocking/resources/toolbox_test.yaml") PROJECT_ID = CONFIG["projects"]["minimal_categorization"] @mock_api() def test_input_and_unified_transformations(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(PROJECT_ID) # Should start with no tx initial_tx = categorization.transformations.get_all(project) assert len(initial_tx.unified_scope) == 0 assert len(initial_tx.input_scope) == 0 input_tx1 = categorization.transformations.InputTransformation( transformation="SELECT *, 1 as one;") input_dataset = next(project.input_datasets().stream()) input_tx2 = categorization.transformations.InputTransformation( transformation="//comment\nSELECT *, 2 as two;", datasets=[input_dataset])
"""Tests for workflow example scripts""" from tamr_toolbox import utils from tamr_toolbox.utils.testing import mock_api from examples.scripts.workflow import run_multiple_projects from tests._common import get_toolbox_root_dir CONFIG = utils.config.from_yaml( path_to_file=get_toolbox_root_dir() / "examples/resources/conf/project.config.yaml" ) @mock_api() def test_run_multiple_projects(): run_multiple_projects.LOGGER = utils.logger.create(__name__) all_ops = run_multiple_projects.main( instance_connection_info=CONFIG["my_tamr_instance"], project_ids=[ CONFIG["projects"]["my_schema_mapping_project"], CONFIG["projects"]["my_categorization_project"], CONFIG["projects"]["my_mastering_project"], CONFIG["projects"]["my_golden_records_project"], ], ) # Check that all operations run completed successfully for op in all_ops: assert op.succeeded() # Check that the number of operations run is exactly what we expect
def test__valid_toolbox_root_dir(): path = get_toolbox_root_dir() assert path.exists() assert path.is_absolute() # test that we can find this file using the toolbox root directory assert path / "tests" / "test__common.py" == Path(__file__)
from tamr_toolbox.enrichment.dictionary import TranslationDictionary from tamr_toolbox import utils from tamr_toolbox import enrichment from tamr_toolbox.utils.testing import mock_api from tests._common import get_toolbox_root_dir from pathlib import Path from typing import Optional import tempfile import pytest CONFIG = utils.config.from_yaml( get_toolbox_root_dir() / "tests/mocking/resources/toolbox_test.yaml" ) DICTIONARY_DATASET_ID = CONFIG["datasets"]["dictionary_auto_to_fr.json"] # Raw export of minimal_schema_mapping_unified_dataset TEST_TRANSLATION_DICTIONARY = { "cheddar cheese": TranslationDictionary( standardized_phrase="cheddar cheese", translated_phrase="fromage cheddar", detected_language="en", original_phrases={"cheddar cheese"}, ), "ground beef": TranslationDictionary( standardized_phrase="ground beef", translated_phrase="boeuf haché",
"""Tests for Tamr Golden Records project example scripts""" from tamr_toolbox import utils from tamr_toolbox.utils.testing import mock_api from examples.scripts.project.golden_records import ( run_golden_records_simple, run_golden_records_verbose, ) from tests._common import get_toolbox_root_dir CONFIG = utils.config.from_yaml(path_to_file=get_toolbox_root_dir() / "examples/resources/conf/project.config.yaml") @mock_api() def test_run_golden_records_simple(): run_golden_records_simple.LOGGER = utils.logger.create(__name__) all_ops = run_golden_records_simple.main( instance_connection_info=CONFIG["my_tamr_instance"], golden_records_project_id=CONFIG["projects"] ["my_golden_records_project"], ) # Check that all operations run completed successfully for op in all_ops: assert op.succeeded() # Check that the number of operations run is exactly 2 assert len(all_ops) == 3
"""Tests for tasks related to creation of Email notifications""" from unittest.mock import patch from tamr_toolbox import utils import tamr_toolbox as tbox from tamr_toolbox.utils.testing import mock_api from tests._common import get_toolbox_root_dir CONFIG = utils.config.from_yaml( get_toolbox_root_dir() / "tests/mocking/resources/notifications.config.yaml" ) def test_build_message(): test_message = "This is a test email." subject_line = "Test 123" test_response = ( 'Content-Type: text/plain; charset="us-ascii"\nMIME-Version: ' + "1.0\nContent-Transfer-Encoding: 7bit\n" + f'Subject: Test 123\nFrom: {CONFIG["my_email_notification"]["sender_address"]}\n' + f'To: {CONFIG["my_email_notification"]["recipient_addresses"][0]}\n' + "\nThis is a test email." ) msg = tbox.notifications.emails._build_message( message=test_message, subject_line=subject_line, sender=CONFIG["my_email_notification"]["sender_address"], recipients=CONFIG["my_email_notification"]["recipient_addresses"],
"""Tests for related to the Tamr auxiliary service DF-connect""" import pytest from tamr_toolbox.data_io.df_connect import client from tamr_toolbox.utils.config import from_yaml from tests._common import get_toolbox_root_dir CONFIG = from_yaml(get_toolbox_root_dir() / "tests/mocking/resources/connect.config.yaml") CONFIG_HTTPS = from_yaml(get_toolbox_root_dir() / "tests/mocking/resources/connect_https.config.yaml") CONFIG_MULTI_EXPORT = from_yaml( get_toolbox_root_dir() / "tests/mocking/resources/connect_multi_export.config.yaml") @pytest.mark.parametrize( "protocol,port,base_path, expected", [ ("http", "9030", "", "http://localhost:9030/api/jdbc/ingest"), ("https", "9030", "", "https://localhost:9030/api/jdbc/ingest"), ("http", "", "", "http://localhost/api/jdbc/ingest"), ("https", "", "", "https://localhost/api/jdbc/ingest"), ("http", "", "/proxy", "http://localhost/proxy/api/jdbc/ingest"), ("https", "", "proxy", "https://localhost/proxy/api/jdbc/ingest"), ("http", "9030", "proxy", "http://localhost:9030/proxy/api/jdbc/ingest"), ("https", "9030", "proxy", "https://localhost:9030/proxy/api/jdbc/ingest"), ], ) def test_create_with_multiple_parameters(protocol: str, port: str,
"""Tests for tasks related to creation of Slack notifications""" import pytest from requests import HTTPError from slack import WebClient from slack.errors import SlackApiError from slack.web.slack_response import SlackResponse from unittest.mock import MagicMock from tamr_toolbox import utils, notifications from tamr_toolbox.utils.operation import from_resource_id, get_details from tamr_toolbox.utils.testing import mock_api from tests._common import get_toolbox_root_dir CONFIG = utils.config.from_yaml( get_toolbox_root_dir() / "tests/mocking/resources/notifications.config.yaml") def _mock_response(channel: str, text: str) -> SlackResponse: """ Simulate response for a call to chat_PostMessage() in the Slack WebClient https://python-slackclient.readthedocs.io/en/latest/basic_usage.html Args: channel: The slack channel to post to (ignored in mock response) text: The body of the message in the response Returns: A JSON SlackResponse object """ mock_client = WebClient()