예제 #1
0
def test__import_namespaces():
    def check_subpackage_imports(subpackage: ModuleType, directory_path: Path) -> None:
        """Recursively asserts that all files/directories within a directory path are importable
        from the subpackage

        Args:
            subpackage: The subpackage to check import from
            directory_path: The directory to compare with the subpackage imports
        """
        print(f"Checking all files/directories within {directory_path} are importable")
        # Collect names of packages we expect based on filenames in the directory
        modules_by_files = {
            f.replace(".py", "") for f in os.listdir(directory_path) if not f.startswith("_")
        }
        # Collect names of packages that are importable from the module
        modules_by_namespace_all = set(subpackage.__all__)
        modules_by_namespace_dir = {p for p in subpackage.__dir__() if (not p.startswith("_"))}

        # We use subset here to allow for imports that are not in the direct file system folder
        # such as the project._common files which are imported through specific project types
        assert modules_by_files.issubset(modules_by_namespace_all)
        assert modules_by_files.issubset(modules_by_namespace_dir)

        # For any subpackages that are also directories, perform the same check
        for sub_module_name in modules_by_namespace_dir:
            sub_module_path = directory_path / sub_module_name
            if os.path.isdir(sub_module_path):
                sub_module = importlib.import_module(f"{subpackage.__name__}.{sub_module_name}")
                check_subpackage_imports(sub_module, sub_module_path)

    import tamr_toolbox

    toolbox_dir = get_toolbox_root_dir() / "tamr_toolbox"

    check_subpackage_imports(tamr_toolbox, toolbox_dir)
예제 #2
0
def test_dataset_bad_encoding():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    bad_encoding_dataset_id = CONFIG["datasets"]["bad_encoding"]
    dataset = client.datasets.by_resource_id(bad_encoding_dataset_id)

    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_bad_encoding.csv")
    csv.from_dataset(dataset, filepath)
    os.remove(filepath)
예제 #3
0
def test_taxonomy_check_invalid_project_type():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    master_project_id = CONFIG["projects"]["minimal_mastering"]
    project = client.projects.by_resource_id(master_project_id)
    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_taxonomy.csv")

    with pytest.raises(TypeError):
        csv.from_taxonomy(project,
                          filepath,
                          csv_delimiter=",",
                          flatten_delimiter=",")
예제 #4
0
def test_invalid_delimiters():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    categorization_project_id = CONFIG["projects"]["minimal_categorization"]
    project = client.projects.by_resource_id(categorization_project_id)
    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_taxonomy.csv")

    with pytest.raises(ValueError):
        csv.from_taxonomy(project,
                          filepath,
                          csv_delimiter=",",
                          flatten_delimiter=",")
예제 #5
0
def test_taxonomy_existing_file():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    categorization_project_id = CONFIG["projects"]["minimal_categorization"]
    project = client.projects.by_resource_id(categorization_project_id)
    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_taxonomy2.csv")
    f = open(filepath, "w")
    f.write("Temporary file")
    f.close()

    with pytest.raises(FileExistsError):
        csv.from_taxonomy(project, filepath, overwrite=False)
    os.remove(filepath)
예제 #6
0
def test_taxonomy_export_csv():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    categorization_project_id = CONFIG["projects"]["minimal_categorization"]
    project = client.projects.by_resource_id(categorization_project_id)
    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_taxonomy.csv")
    records_written = csv.from_taxonomy(project, filepath, csv_delimiter=",")
    list_written = list(io.open(filepath))
    list_ref = TAXONOMY_DATA

    assert len(list_ref) == len(list_written)
    assert len(list_ref) == records_written

    for i in range(len(list_ref)):
        assert list_ref[i] == list_written[i]
    os.remove(filepath)
예제 #7
0
def test_taxonomy_overwrite_file():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    categorization_project_id = CONFIG["projects"]["minimal_categorization"]
    project = client.projects.by_resource_id(categorization_project_id)
    filepath = os.path.join(get_toolbox_root_dir(),
                            "tests/data_io/temp_taxonomy3.csv")

    f = open(filepath, "w")
    f.write("Temporary file")
    f.close()

    records_written = csv.from_taxonomy(project, filepath, overwrite=True)
    list_ref = TAXONOMY_DATA
    list_written = list(io.open(filepath))

    assert records_written == len(list_ref)
    for i in range(len(list_ref)):
        assert list_ref[i] == list_written[i]
    os.remove(filepath)
예제 #8
0
"""Tests for tasks related to transformations with Tamr projects"""
import pytest

from tamr_toolbox.project import categorization, mastering, schema_mapping
from tamr_toolbox import utils

from tamr_toolbox.utils.testing import mock_api
from tests._common import get_toolbox_root_dir

CONFIG = utils.config.from_yaml(get_toolbox_root_dir() /
                                "tests/mocking/resources/toolbox_test.yaml")
PROJECT_ID = CONFIG["projects"]["minimal_categorization"]


@mock_api()
def test_input_and_unified_transformations():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(PROJECT_ID)

    # Should start with no tx
    initial_tx = categorization.transformations.get_all(project)

    assert len(initial_tx.unified_scope) == 0
    assert len(initial_tx.input_scope) == 0

    input_tx1 = categorization.transformations.InputTransformation(
        transformation="SELECT *, 1 as one;")
    input_dataset = next(project.input_datasets().stream())
    input_tx2 = categorization.transformations.InputTransformation(
        transformation="//comment\nSELECT *, 2 as two;",
        datasets=[input_dataset])
예제 #9
0
"""Tests for workflow example scripts"""
from tamr_toolbox import utils
from tamr_toolbox.utils.testing import mock_api

from examples.scripts.workflow import run_multiple_projects
from tests._common import get_toolbox_root_dir

CONFIG = utils.config.from_yaml(
    path_to_file=get_toolbox_root_dir() / "examples/resources/conf/project.config.yaml"
)


@mock_api()
def test_run_multiple_projects():
    run_multiple_projects.LOGGER = utils.logger.create(__name__)

    all_ops = run_multiple_projects.main(
        instance_connection_info=CONFIG["my_tamr_instance"],
        project_ids=[
            CONFIG["projects"]["my_schema_mapping_project"],
            CONFIG["projects"]["my_categorization_project"],
            CONFIG["projects"]["my_mastering_project"],
            CONFIG["projects"]["my_golden_records_project"],
        ],
    )

    # Check that all operations run completed successfully
    for op in all_ops:
        assert op.succeeded()

    # Check that the number of operations run is exactly what we expect
예제 #10
0
def test__valid_toolbox_root_dir():
    path = get_toolbox_root_dir()
    assert path.exists()
    assert path.is_absolute()
    # test that we can find this file using the toolbox root directory
    assert path / "tests" / "test__common.py" == Path(__file__)
예제 #11
0
from tamr_toolbox.enrichment.dictionary import TranslationDictionary

from tamr_toolbox import utils
from tamr_toolbox import enrichment

from tamr_toolbox.utils.testing import mock_api
from tests._common import get_toolbox_root_dir

from pathlib import Path
from typing import Optional
import tempfile
import pytest


CONFIG = utils.config.from_yaml(
    get_toolbox_root_dir() / "tests/mocking/resources/toolbox_test.yaml"
)
DICTIONARY_DATASET_ID = CONFIG["datasets"]["dictionary_auto_to_fr.json"]


# Raw export of minimal_schema_mapping_unified_dataset
TEST_TRANSLATION_DICTIONARY = {
    "cheddar cheese": TranslationDictionary(
        standardized_phrase="cheddar cheese",
        translated_phrase="fromage cheddar",
        detected_language="en",
        original_phrases={"cheddar cheese"},
    ),
    "ground beef": TranslationDictionary(
        standardized_phrase="ground beef",
        translated_phrase="boeuf haché",
예제 #12
0
"""Tests for Tamr Golden Records project example scripts"""
from tamr_toolbox import utils
from tamr_toolbox.utils.testing import mock_api

from examples.scripts.project.golden_records import (
    run_golden_records_simple,
    run_golden_records_verbose,
)
from tests._common import get_toolbox_root_dir

CONFIG = utils.config.from_yaml(path_to_file=get_toolbox_root_dir() /
                                "examples/resources/conf/project.config.yaml")


@mock_api()
def test_run_golden_records_simple():
    run_golden_records_simple.LOGGER = utils.logger.create(__name__)

    all_ops = run_golden_records_simple.main(
        instance_connection_info=CONFIG["my_tamr_instance"],
        golden_records_project_id=CONFIG["projects"]
        ["my_golden_records_project"],
    )

    # Check that all operations run completed successfully
    for op in all_ops:
        assert op.succeeded()

    # Check that the number of operations run is exactly 2
    assert len(all_ops) == 3
예제 #13
0
"""Tests for tasks related to creation of Email notifications"""
from unittest.mock import patch

from tamr_toolbox import utils
import tamr_toolbox as tbox

from tamr_toolbox.utils.testing import mock_api
from tests._common import get_toolbox_root_dir


CONFIG = utils.config.from_yaml(
    get_toolbox_root_dir() / "tests/mocking/resources/notifications.config.yaml"
)


def test_build_message():
    test_message = "This is a test email."
    subject_line = "Test 123"
    test_response = (
        'Content-Type: text/plain; charset="us-ascii"\nMIME-Version: '
        + "1.0\nContent-Transfer-Encoding: 7bit\n"
        + f'Subject: Test 123\nFrom: {CONFIG["my_email_notification"]["sender_address"]}\n'
        + f'To: {CONFIG["my_email_notification"]["recipient_addresses"][0]}\n'
        + "\nThis is a test email."
    )

    msg = tbox.notifications.emails._build_message(
        message=test_message,
        subject_line=subject_line,
        sender=CONFIG["my_email_notification"]["sender_address"],
        recipients=CONFIG["my_email_notification"]["recipient_addresses"],
예제 #14
0
"""Tests for related to the Tamr auxiliary service DF-connect"""
import pytest
from tamr_toolbox.data_io.df_connect import client
from tamr_toolbox.utils.config import from_yaml
from tests._common import get_toolbox_root_dir

CONFIG = from_yaml(get_toolbox_root_dir() /
                   "tests/mocking/resources/connect.config.yaml")
CONFIG_HTTPS = from_yaml(get_toolbox_root_dir() /
                         "tests/mocking/resources/connect_https.config.yaml")
CONFIG_MULTI_EXPORT = from_yaml(
    get_toolbox_root_dir() /
    "tests/mocking/resources/connect_multi_export.config.yaml")


@pytest.mark.parametrize(
    "protocol,port,base_path, expected",
    [
        ("http", "9030", "", "http://localhost:9030/api/jdbc/ingest"),
        ("https", "9030", "", "https://localhost:9030/api/jdbc/ingest"),
        ("http", "", "", "http://localhost/api/jdbc/ingest"),
        ("https", "", "", "https://localhost/api/jdbc/ingest"),
        ("http", "", "/proxy", "http://localhost/proxy/api/jdbc/ingest"),
        ("https", "", "proxy", "https://localhost/proxy/api/jdbc/ingest"),
        ("http", "9030", "proxy",
         "http://localhost:9030/proxy/api/jdbc/ingest"),
        ("https", "9030", "proxy",
         "https://localhost:9030/proxy/api/jdbc/ingest"),
    ],
)
def test_create_with_multiple_parameters(protocol: str, port: str,
예제 #15
0
"""Tests for tasks related to creation of Slack notifications"""
import pytest

from requests import HTTPError
from slack import WebClient
from slack.errors import SlackApiError
from slack.web.slack_response import SlackResponse
from unittest.mock import MagicMock

from tamr_toolbox import utils, notifications
from tamr_toolbox.utils.operation import from_resource_id, get_details
from tamr_toolbox.utils.testing import mock_api
from tests._common import get_toolbox_root_dir

CONFIG = utils.config.from_yaml(
    get_toolbox_root_dir() /
    "tests/mocking/resources/notifications.config.yaml")


def _mock_response(channel: str, text: str) -> SlackResponse:
    """
    Simulate response for a call to chat_PostMessage() in the Slack WebClient
    https://python-slackclient.readthedocs.io/en/latest/basic_usage.html

    Args:
        channel: The slack channel to post to (ignored in mock response)
        text: The body of the message in the response
    Returns:
        A JSON SlackResponse object
    """
    mock_client = WebClient()