def test_scan_dataset_folders(set_dataset_config): """Test ability to scan the Dataset folder structure. """ from mydata.conf import settings from mydata.tasks.folders import scan_folders folders = [] # We don't need callbacks for these in this case: found_exp = None found_user = None found_group = None def found_dataset(folder): folders.append(folder) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) scan_folders(found_user, found_group, found_exp, found_dataset) assert sorted([folder.name for folder in folders]) == ["Birds", "Flowers"] assert sum([folder.num_files for folder in folders]) == 5
def test_scan_email_exp_dataset_folders(set_email_exp_dataset_config): """Test ability to scan the Email / Experiment / Dataset folder structure. """ from mydata.conf import settings from mydata.tasks.folders import scan_folders users = [] exps = [] folders = [] def found_user(user): users.append(user) def found_exp(exp_folder_name): exps.append(exp_folder_name) found_group = None def found_dataset(folder): folders.append(folder) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_testusers_response(mocker, settings, ["testuser1", "testuser2"]) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) scan_folders(found_user, found_group, found_exp, found_dataset) assert sorted([user.username for user in users]) == ["testuser1", "testuser2"] assert sorted(exps) == ["Exp1", "Exp2"] assert sorted([folder.name for folder in folders]) == ["Birds", "Flowers"] assert sum([folder.num_files for folder in folders]) == 5
def test_scan_user_mytardis_exp_dataset(set_user_mytardis_exp_dataset_config): from mydata.commands.scan import scan_cmd from mydata.conf import settings with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_testusers_response(mocker, settings, ["testuser1", "testuser2", "testuser3"]) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) runner = CliRunner() result = runner.invoke(scan_cmd, []) assert result.exit_code == 0 assert result.output == "%s\n" % textwrap.dedent(""" Scanning tests/testdata/testdata-user-mytardis-exp-dataset/ using the "Username / "MyTardis" / Experiment / Dataset" folder structure... Found user folder: testuser1 Found user folder: testuser2 Found user folder: testuser3 Found 2 dataset folders in tests/testdata/testdata-user-mytardis-exp-dataset/ Datasets will be collected into 2 experiments. """)
def test_scan_group_dataset_folders(set_group_dataset_config): """Test ability to scan the Group / Dataset folder structure. """ from mydata.conf import settings from mydata.tasks.folders import scan_folders groups = [] folders = [] # We don't need callback for finding users folders: found_exp = None found_user = None def found_group(group): groups.append(group) def found_dataset(folder): folders.append(folder) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) for group_name in ("TestFacility-Group1", "TestFacility-Group2"): mock_get_group(mocker, settings.general.mytardis_url, group_name) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) scan_folders(found_user, found_group, found_exp, found_dataset) assert sorted([group.name for group in groups]) == [ "TestFacility-Group1", "TestFacility-Group2", ] assert sorted([folder.name for folder in folders]) == ["Birds", "Flowers"] assert sum([folder.num_files for folder in folders]) == 5
def test_scan_dataset(set_dataset_config): from mydata.commands.scan import scan_cmd from mydata.conf import settings with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) runner = CliRunner() result = runner.invoke(scan_cmd, []) assert result.exit_code == 0 assert result.output == "%s\n" % textwrap.dedent(""" Scanning tests/testdata/testdata-dataset/ using the "Dataset" folder structure... Found 2 dataset folders in tests/testdata/testdata-dataset/ """)
def test_scan_group_instrument(set_group_instrument_config): from mydata.commands.scan import scan_cmd from mydata.conf import settings with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) for group_name in ("TestFacility-Group1", "TestFacility-Group2"): mock_get_group(mocker, settings.general.mytardis_url, group_name) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) runner = CliRunner() result = runner.invoke(scan_cmd, []) assert result.exit_code == 0 assert result.output == "%s\n" % textwrap.dedent( """ Scanning tests/testdata/testdata-group-instrument/ using the "User Group / Instrument / Full Name / Dataset" folder structure... Found group folder: Group1 Found group folder: Group2 Found 8 dataset folders in tests/testdata/testdata-group-instrument/ """ )
def test_filters_settings(set_user_exp_dataset_config): """Test ability to validate filters-related settings. """ from mydata.conf import settings from mydata.models.settings.validation import validate_settings from mydata.logs import logger with requests_mock.Mocker() as mocker: list_api_endpoints_url = ("%s/api/v1/?format=json" % settings.general.mytardis_url) mocker.get(list_api_endpoints_url, text=MOCK_API_ENDPOINTS_RESPONSE) mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) log_output = logger.get_value() validate_settings() expected_warning = "Files newer than 1 minute(s) are being ignored" assert expected_warning in subtract(logger.get_value(), log_output) old_value = settings.advanced.upload_invalid_user_or_group_folders settings.advanced.upload_invalid_user_or_group_folders = False log_output = logger.get_value() validate_settings() expected_warning = "Invalid user folders are being ignored" assert expected_warning in subtract(logger.get_value(), log_output) settings.advanced.upload_invalid_user_or_group_folders = old_value old_value1 = settings.advanced.upload_invalid_user_or_group_folders old_value2 = settings.advanced.folder_structure settings.advanced.upload_invalid_user_or_group_folders = False settings.advanced.folder_structure = "User Group / Experiment / Dataset" log_output = logger.get_value() validate_settings() expected_warning = "Invalid user group folders are being ignored" assert expected_warning in subtract(logger.get_value(), log_output) settings.advanced.upload_invalid_user_or_group_folders = old_value1 settings.advanced.folder_structure = old_value2 old_value1 = settings.filters.user_filter settings.filters.user_filter = "filter-string" log_output = logger.get_value() validate_settings() expected_warning = "User folders are being filtered" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.user_filter = old_value1 old_value1 = settings.filters.user_filter old_value2 = settings.advanced.folder_structure settings.filters.user_filter = "filter-string" settings.advanced.folder_structure = "User Group / Experiment / Dataset" log_output = logger.get_value() validate_settings() expected_warning = "User group folders are being filtered" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.user_filter = old_value1 settings.advanced.folder_structure = old_value2 old_value = settings.filters.experiment_filter settings.filters.experiment_filter = "filter-string" log_output = logger.get_value() validate_settings() expected_warning = "Experiment folders are being filtered" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.experiment_filter = old_value old_value = settings.filters.dataset_filter settings.filters.dataset_filter = "filter-string" log_output = logger.get_value() validate_settings() expected_warning = "Dataset folders are being filtered" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.dataset_filter = old_value old_value = settings.filters.ignore_old_datasets settings.filters.ignore_old_datasets = True log_output = logger.get_value() validate_settings() expected_warning = "Old datasets are being ignored" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.ignore_old_datasets = old_value old_value = settings.filters.ignore_new_datasets settings.filters.ignore_new_datasets = True log_output = logger.get_value() validate_settings() expected_warning = "New datasets are being ignored" assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.ignore_new_datasets = old_value def test_globs_validation( use_includes_file, use_excludes_file, includes_file, excludes_file, expected_warning=None, expected_exception_msg=None, ): """ Test globs files settings validation / warnings """ from mydata.conf import settings from mydata.models.settings.validation import validate_settings from mydata.utils.exceptions import InvalidSettings from mydata.logs import logger old_value1 = settings.filters.use_includes_file old_value2 = settings.filters.use_excludes_file old_value3 = settings.filters.includes_file old_value4 = settings.filters.excludes_file settings.filters.use_includes_file = use_includes_file settings.filters.use_excludes_file = use_excludes_file settings.filters.includes_file = includes_file settings.filters.excludes_file = excludes_file log_output = logger.get_value() if expected_exception_msg: with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert str(excinfo.value) == expected_exception_msg else: validate_settings() if expected_warning: assert expected_warning in subtract(logger.get_value(), log_output) settings.filters.use_includes_file = old_value1 settings.filters.use_excludes_file = old_value2 settings.filters.includes_file = old_value3 settings.filters.excludes_file = old_value4 warning = ("Only files matching patterns in includes " "file will be scanned for upload.") message = "No includes file was specified." test_globs_validation( use_includes_file=True, use_excludes_file=False, includes_file="", excludes_file="", expected_warning=warning, expected_exception_msg=message, ) message = "Specified includes file doesn't exist." test_globs_validation( use_includes_file=True, use_excludes_file=False, includes_file="file/does/not/exist", excludes_file="", expected_warning=warning, expected_exception_msg=message, ) message = "Specified includes file path is not a file." test_globs_validation( use_includes_file=True, use_excludes_file=False, includes_file=".", excludes_file="", expected_warning=warning, expected_exception_msg=message, ) warning = ("Files matching patterns in excludes " "file will not be scanned for upload.") message = "No excludes file was specified." test_globs_validation( use_includes_file=False, use_excludes_file=True, includes_file="", excludes_file="", expected_warning=warning, expected_exception_msg=message, ) warning = ("Files matching patterns in excludes " "file will not be scanned for upload, " "unless they match patterns in the includes file.") message = "No includes file was specified." test_globs_validation( use_includes_file=True, use_excludes_file=True, includes_file="", excludes_file="", expected_warning=warning, expected_exception_msg=message, )
def test_objectacl_exceptions(set_exp_dataset_config): """Test ability to handle ObjectACL-related exceptions. """ # In a normal MyData run, the settings singleton would only be initialized # once, but when running a series of unit tests, we need to ensure that # settings is initialized for each test from the MYDATA_CONFIG_PATH # environment variable. from mydata.conf import settings from mydata.models.objectacl import ObjectACL from mydata.models.experiment import Experiment from mydata.models.folder import Folder from mydata.models.group import Group with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) owner = settings.general.default_owner dataset_folder_name = "Flowers" exp_folder_name = "Exp1" location = os.path.join(settings.general.data_directory, exp_folder_name) # Test sharing experiment with user, and ensure that no exception # is raised: user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Existing Experiment" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset&user_folder_name=testfacility" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" with requests_mock.Mocker() as mocker: post_acl_url = "%s/api/v1/objectacl/" % settings.general.mytardis_url mocker.post(post_acl_url, status_code=201) ObjectACL.share_exp_with_user(experiment, owner) # Test sharing experiment with group, and ensure that no exception # is raised: with requests_mock.Mocker() as mocker: mock_get_group(mocker, settings.general.mytardis_url, "TestFacility-Group1") group = Group.get_group_by_name("TestFacility-Group1") post_acl_url = "%s/api/v1/objectacl/" % settings.general.mytardis_url mocker.post(post_acl_url, status_code=201) ObjectACL.share_exp_with_group(experiment, group, is_owner=True) # Try to create a user ObjectACL record with # an invalid API key, which should give 401 (Unauthorized) api_key = settings.general.api_key settings.general.api_key = "invalid" with requests_mock.Mocker() as mocker: post_acl_url = "%s/api/v1/objectacl/" % settings.general.mytardis_url mocker.post(post_acl_url, status_code=401) with pytest.raises(HTTPError) as excinfo: ObjectACL.share_exp_with_user(experiment, owner) assert excinfo.value.response.status_code == 401 settings.general.api_key = api_key # Try to create a group ObjectACL record with # an invalid API key, which should give 401 (Unauthorized) api_key = settings.general.api_key settings.general.api_key = "invalid" with requests_mock.Mocker() as mocker: post_acl_url = "%s/api/v1/objectacl/" % settings.general.mytardis_url mocker.post(post_acl_url, status_code=401) with pytest.raises(HTTPError) as excinfo: ObjectACL.share_exp_with_group(experiment, group, is_owner=True) assert excinfo.value.response.status_code == 401 settings.general.api_key = api_key
def test_experiment_exceptions(set_exp_dataset_config): """Test ability to handle experiment-related exceptions. """ # pylint: disable=too-many-locals # pylint: disable=too-many-statements from mydata.conf import settings from mydata.threads.flags import FLAGS from mydata.models.experiment import Experiment from mydata.models.folder import Folder # MyData has the concept of a "default experiment", # which depends on the UUID of the MyData instance: settings.miscellaneous.uuid = "1234567890" with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) owner = settings.general.default_owner dataset_folder_name = "Flowers" exp_folder_name = "Exp1" location = os.path.join(settings.general.data_directory, exp_folder_name) # LOOKING UP EXPERIMENTS # Try to look up nonexistent experiment record with # experiment title set manually, and with a user folder # name, but no group folder name: user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Exp1" "&folder_structure=Experiment%%20/%%20Dataset" "&user_folder_name=testfacility") % settings.general.mytardis_url mocker.get(get_exp_url, text=EMPTY_LIST_RESPONSE) existing_exp = Experiment.get_exp_for_folder(folder) assert not existing_exp # Look up existing experiment record with # experiment title set manually, and with a user folder # name, but no group folder name: with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json" "&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset" "&user_folder_name=testfacility") % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Existing Experiment" experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" # Look up one of many existing experiment records with # experiment title set manually, and with a user folder # name, but no group folder name: mock_exp_dict = { "meta": { "limit": 20, "next": None, "offset": 0, "previous": None, "total_count": 2, }, "objects": [ { "id": 1, "title": "Existing Experiment1" }, { "id": 2, "title": "Existing Experiment2" }, ], } mock_exp_response = json.dumps(mock_exp_dict) with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json" "&title=Multiple%%20Existing%%20Experiments" "&folder_structure=Experiment%%20/%%20Dataset" "&user_folder_name=testfacility") % settings.general.mytardis_url mocker.get(get_exp_url, text=mock_exp_response) user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Multiple Existing Experiments" experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment1" # Try to look up nonexistent experiment record with # experiment title set manually, and with a group folder # name, but no user folder name: user_folder_name = None group_folder_name = "Test Group1" folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: get_exp_url = ("%s/api/v1/mydata_experiment/?format=json&title=Exp1" "&folder_structure=Experiment%%20/%%20Dataset" "&group_folder_name=Test%%20Group1" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EMPTY_LIST_RESPONSE) existing_exp = Experiment.get_exp_for_folder(folder) assert not existing_exp # Look up existing experiment record with # experiment title set manually, and with a group folder # name, but no user folder name: user_folder_name = None group_folder_name = "Test Group1" folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Existing Experiment" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset&group_folder_name=Test%%20Group1" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" # Try to look up nonexistent experiment record with # experiment title set manually, and with a user folder # name, and a group folder name: user_folder_name = owner.username group_folder_name = "Test Group1" folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: get_exp_url = ("%s/api/v1/mydata_experiment/?format=json&title=Exp1" "&folder_structure=Experiment%%20/%%20Dataset" "&group_folder_name=Test%%20Group1" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EMPTY_LIST_RESPONSE) existing_exp = Experiment.get_exp_for_folder(folder) assert not existing_exp # Look up existing experiment record with # experiment title set manually, and with a group folder # name, and a user folder name: user_folder_name = owner.username group_folder_name = "Test Group1" folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Existing Experiment" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset" "&user_folder_name=testfacility&group_folder_name=Test%%20Group1" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" # Try to look up nonexistent experiment record with # experiment title set manually, with neither a user folder # name, nor a group folder name: user_folder_name = None group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: get_exp_url = ("%s/api/v1/mydata_experiment/?format=json&title=Exp1" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EMPTY_LIST_RESPONSE) existing_exp = Experiment.get_exp_for_folder(folder) assert not existing_exp # Look up existing experiment record with # experiment title set manually, and with neither a user folder # name, nor a group folder name: user_folder_name = None group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experiment_title = "Existing Experiment" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" # Try to look up experiment record with # an invalid API key, which should give 401 (Unauthorized) api_key = settings.general.api_key settings.general.api_key = "invalid" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Existing%%20Experiment" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, status_code=401) with pytest.raises(HTTPError) as excinfo: _ = Experiment.get_exp_for_folder(folder) assert excinfo.value.response.status_code == 401 settings.general.api_key = api_key # Try to look up experiment record with a missing Schema, # which can result in a 404 from the MyTardis API: folder.experiment_title = "Missing Schema" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Missing%%20Schema" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, status_code=404) with pytest.raises(HTTPError) as excinfo: _ = Experiment.get_exp_for_folder(folder) assert excinfo.value.response.status_code == 404 # Try to look up experiment record and handle a 404 of # unknown origin from the MyTardis API: folder.experiment_title = "Unknown 404" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=Unknown%%20404" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, status_code=404) with pytest.raises(HTTPError) as excinfo: _ = Experiment.get_exp_for_folder(folder) assert excinfo.value.response.status_code == 404 # CREATING EXPERIMENTS # Try to create an experiment with a title specified manually # and check that the title is correct: FLAGS.test_run_running = False folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: post_exp_url = "%s/api/v1/mydata_experiment/" % settings.general.mytardis_url mocker.post(post_exp_url, text=EXP1_RESPONSE, status_code=201) post_objectacl_url = "%s/api/v1/objectacl/" % settings.general.mytardis_url mocker.post(post_objectacl_url, status_code=201) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) experiment = Experiment.create_exp_for_folder(folder) assert experiment.title == exp_folder_name # Try to create an experiment with a title specified manually, # during a test run FLAGS.test_run_running = True folder.experiment_title = exp_folder_name with requests_mock.Mocker() as mocker: get_exp_url = ("%s/api/v1/mydata_experiment/?format=json&title=Exp1" "&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EMPTY_LIST_RESPONSE) experiment = Experiment.get_or_create_exp_for_folder(folder) assert experiment is None FLAGS.test_run_running = False # Get or create an experiment with a title specified manually, # which already exists during a test run FLAGS.test_run_running = True folder.experiment_title = "Existing Experiment" with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json" "&title=Existing%%20Experiment&folder_structure=Experiment%%20/%%20Dataset" ) % settings.general.mytardis_url mocker.get(get_exp_url, text=EXISTING_EXP_RESPONSE) experiment = Experiment.get_or_create_exp_for_folder(folder) assert experiment.title == "Existing Experiment" folder.experiment_title = exp_folder_name FLAGS.test_run_running = False # Try to create an experiment record with # an invalid API key, which should give 401 (Unauthorized) api_key = settings.general.api_key settings.general.api_key = "invalid" with requests_mock.Mocker() as mocker: post_exp_url = ( "%s/api/v1/mydata_experiment/") % settings.general.mytardis_url mocker.post(post_exp_url, status_code=401) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) with pytest.raises(HTTPError) as excinfo: _ = Experiment.create_exp_for_folder(folder) assert excinfo.value.response.status_code == 401 settings.general.api_key = api_key # Now let's test experiment creation with the experiment's # title determined automatically (from the instrument's name # which becomes the default uploader name) and the user folder # name or group folder name): user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) # Test case where MyTardis API returns a 404, e.g. because a # requested Experiment Schema can't be found. folder.experiment_title = "Request 404 from Fake MyTardis Server" with requests_mock.Mocker() as mocker: post_exp_url = ( "%s/api/v1/mydata_experiment/") % settings.general.mytardis_url mocker.post(post_exp_url, status_code=404) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) with pytest.raises(HTTPError) as excinfo: _ = Experiment.create_exp_for_folder(folder) assert excinfo.value.response.status_code == 404
def test_indexing(): """ Test indexing of already-uploaded data """ from mydata.commands.index import index_cmd env = dict( MYTARDIS_URL="https://www.example.com", MYTARDIS_USERNAME="******", MYTARDIS_API_KEY="mock_api_key", MYTARDIS_STORAGE_BOX_PATH=os.path.abspath( os.path.join(".", "tests", "testdata", "testdata-dataset")), MYTARDIS_STORAGE_BOX_NAME="storage-box1", MYTARDIS_SRC_PATH=os.path.abspath( os.path.join(".", "tests", "testdata", "testdata-dataset")), MYTARDIS_EXP_ID="123", ) runner = CliRunner() dataset_ids = dict(Birds=BIRDS_DATASET_ID, Flowers=FLOWERS_DATASET_ID) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, env["MYTARDIS_URL"]) mock_birds_flowers_datafile_lookups(mocker) for folder_name in ("Birds", "Flowers"): folder_path = os.path.join(env["MYTARDIS_STORAGE_BOX_PATH"], folder_name) get_dataset_url = ( "%s/api/v1/dataset/?format=json&experiments__id=123" "&description=%s") % (env["MYTARDIS_URL"], quote(folder_name)) mocker.get(get_dataset_url, text=EMPTY_LIST_RESPONSE) post_dataset_url = "%s/api/v1/dataset/" % env["MYTARDIS_URL"] mock_dataset_response = created_dataset_response( dataset_ids[folder_name], folder_name) mocker.post(post_dataset_url, text=mock_dataset_response) post_datafile_url = "%s/api/v1/dataset_file/" % env["MYTARDIS_URL"] mocker.post( post_datafile_url, status_code=201, headers=dict(Location="/api/v1/dataset_file/123456"), ) result = runner.invoke(index_cmd, [folder_path], env=env, catch_exceptions=False) assert result.exit_code == 0 expected_output_template = Template("%s\n" % textwrap.dedent(""" MYTARDIS_STORAGE_BOX_PATH: $cwd/tests/testdata/testdata-dataset Validated MyTardis settings. Indexing folder: $folder_name Created Dataset record for '$folder_name' with ID: $dataset_id $files """)) expected_files_output_template = { "Birds": Template( textwrap.dedent("""\ File path: $cwd/tests/testdata/testdata-dataset/Birds/1024px-Australian_Birds_@_Jurong_Bird_Park_(4374195521).jpg size: 116537 mimetype: image/jpeg md5sum: 53c6ac03b64f61d5e0b596f70ed75a51 Created DataFile record: /api/v1/dataset_file/123456 File path: $cwd/tests/testdata/testdata-dataset/Birds/Black-beaked-sea-bird-close-up.jpg Failed to check for existing DataFile record on MyTardis. Skipping for now. 1 of 2 files have been indexed by MyTardis. 0 of 2 files have been verified by MyTardis. 1 of 2 files were newly indexed in this session. """)), "Flowers": Template( textwrap.dedent("""\ File path: $cwd/tests/testdata/testdata-dataset/Flowers/1024px-Colourful_flowers.JPG DataFile record was found, so we won't create another record for this file. File path: $cwd/tests/testdata/testdata-dataset/Flowers/Flowers_growing_on_the_campus_of_Cebu_City_National_Science_High_School.jpg DataFile record was found, so we won't create another record for this file. File path: $cwd/tests/testdata/testdata-dataset/Flowers/Pond_Water_Hyacinth_Flowers.jpg size: 341543 mimetype: image/jpeg md5sum: 4eecf4d4b352c6a12100013a6ad2474a Created DataFile record: /api/v1/dataset_file/123456 3 of 3 files have been indexed by MyTardis. 2 of 3 files have been verified by MyTardis. 1 of 3 files were newly indexed in this session. File records on server without any DataFileObjects: Dataset ID: 1001, Filename: Pond_Water_Hyacinth_Flowers.jpg """)), } expected_files_output = expected_files_output_template[ folder_name].substitute(cwd=os.getcwd()) assert result.output == expected_output_template.substitute( folder_name=folder_name, dataset_id=dataset_ids[folder_name], files=expected_files_output, cwd=os.getcwd(), )
def test_validate_settings(set_exp_dataset_config): """Test ability to validate settings. """ from mydata.conf import settings from mydata.models.settings.validation import validate_settings from mydata.utils.exceptions import InvalidSettings with requests_mock.Mocker() as mocker: list_api_endpoints_url = ("%s/api/v1/?format=json" % settings.general.mytardis_url) mocker.get(list_api_endpoints_url, text=MOCK_API_ENDPOINTS_RESPONSE) mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) validate_settings() old_value = settings.general.mytardis_url settings.general.mytardis_url = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid MyTardis URL" in str(excinfo.value) settings.general.mytardis_url = old_value old_value = settings.general.data_directory settings.general.data_directory = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid data directory" in str(excinfo.value) settings.general.data_directory = old_value old_value = settings.general.data_directory settings.general.data_directory = "this/folder/does/not/exist" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "doesn't exist" in str(excinfo.value) settings.general.data_directory = old_value old_value = settings.general.instrument_name old_value = settings.general.instrument_name settings.general.instrument_name = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid instrument name" in str(excinfo.value) settings.general.instrument_name = old_value old_value = settings.general.facility_name settings.general.facility_name = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid facility name" in str(excinfo.value) settings.general.facility_name = old_value old_value = settings.general.facility_name settings.general.facility_name = "Invalid" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert 'Facility "Invalid" was not found in MyTardis.' in str( excinfo.value) settings.general.facility_name = old_value old_value = settings.general.contact_name settings.general.contact_name = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid contact name" in str(excinfo.value) settings.general.contact_name = old_value old_value = settings.general.contact_email settings.general.contact_email = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid contact email" in str(excinfo.value) settings.general.contact_email = old_value old_value = settings.general.contact_email settings.general.contact_email = "invalid-email-address" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a valid contact email" in str(excinfo.value) settings.general.contact_email = old_value old_value = settings.advanced.folder_structure old_value2 = settings.advanced.validate_folder_structure settings.advanced.folder_structure = "Email / Dataset" settings.advanced.validate_folder_structure = True with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert re.match("Folder name .* is not a valid email address.", str(excinfo.value)) settings.advanced.folder_structure = old_value settings.advanced.validate_folder_structure = old_value2 old_value = settings.general.username settings.general.username = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter a MyTardis username" in str(excinfo.value) settings.general.username = old_value old_value = settings.general.api_key settings.general.api_key = "" with pytest.raises(InvalidSettings) as excinfo: validate_settings() assert "Please enter your MyTardis API key" in str(excinfo.value) settings.general.api_key = old_value
def test_upload_email_dataset_structure(set_email_dataset_config, mock_key_pair): """Test uploading files from within the "Email / Dataset" folder structure """ from mydata.commands.upload import upload_cmd from mydata.conf import settings with requests_mock.Mocker() as mocker: mock_uploader_creation_response(mocker, settings) settings.uploader.ssh_key_pair = mock_key_pair mock_get_urr(mocker, settings, mock_key_pair.fingerprint, approved=False) mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) mock_testusers_response(mocker, settings, ["testuser1", "testuser2"]) for user in ("testuser1", "testuser2"): name = "Test User1" if user == "testuser1" else "Test User2" title = "Test Instrument - %s" % name user_folder_name = "*****@*****.**" % user mock_exp_creation(mocker, settings, title, user_folder_name) mock_birds_flowers_dataset_creation(mocker, settings) mock_birds_flowers_datafile_lookups(mocker, api_prefix="mydata_") runner = CliRunner() result = runner.invoke(upload_cmd, ["-vv"], input="y\n") if result.exception: raise result.exception assert result.exit_code == 0 assert result.output == textwrap.dedent(""" Using MyData configuration in: %s Scanning tests/testdata/testdata-email-dataset/ using the "Email / Dataset" folder structure... Checking for approved upload method... Using Multipart POST upload method. Uploads via staging haven't yet been approved. Do you want to continue? [y/N]: y Found user folder: [email protected] Found user folder: [email protected] Found 2 dataset folders in tests/testdata/testdata-email-dataset/ Data in Birds/ is being archived to http://127.0.0.1:9000/dataset/1002 Data in Flowers/ is being archived to http://127.0.0.1:9000/dataset/1001 4 of 5 files have been uploaded to MyTardis. 2 of 5 files have been verified by MyTardis. 1 of 5 files were found unverified without any DataFileObjects! Contact server admins! 2 of 5 files were newly uploaded in this session. 0 of 5 file lookups were found in the local cache. File records on server without any DataFileObjects: Dataset ID: 1001, Filename: Pond_Water_Hyacinth_Flowers.jpg Failed lookups: Black-beaked-sea-bird-close-up.jpg [500 Server Error: None for url: %s/api/v1/mydata_dataset_file/?format=json&dataset__id=1002&filename=Black-beaked-sea-bird-close-up.jpg&directory=] Unverified lookups: Pond_Water_Hyacinth_Flowers.jpg Not found on MyTardis server: 1024px-Australian_Birds_@_Jurong_Bird_Park_(4374195521).jpg Files uploaded: 1024px-Australian_Birds_@_Jurong_Bird_Park_(4374195521).jpg [Completed] Pond_Water_Hyacinth_Flowers.jpg [Completed] """ % (settings.config_path, settings.mytardis_url))
def test_upload_username_dataset_scp(set_username_dataset_config, mock_scp_server, mock_key_pair, mock_staging_path): """Test ability to scan the Username / Dataset folder structure and upload using the SCP protocol. """ from mydata.commands.upload import upload_cmd from mydata.conf import settings upload_uploader_info(settings, mock_key_pair) data_dir = settings.general.data_directory filenames = { "Flowers": os.listdir(os.path.join(data_dir, "testuser1", "Flowers")), "Birds": os.listdir(os.path.join(data_dir, "testuser2", "Birds")), "Dataset with spaces": os.listdir(os.path.join(data_dir, "testuser1", "Dataset with spaces")), "InvalidUserDataset1": os.listdir( os.path.join(data_dir, "INVALID_USER", "InvalidUserDataset1")), } folders = [] for folder_name in filenames: folder = Mock() folder.dataset = Mock(id=1) folder.name = folder_name folder.num_files = len(filenames[folder_name]) folder.local_files = [] for filename in filenames[folder_name]: folder.local_files.append(LocalFile(filename, "", False)) folders.append(folder) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_testusers_response(mocker, settings, ["testuser1", "testuser2"]) mock_invalid_user_response(mocker, settings) mock_birds_flowers_dataset_creation(mocker, settings) for folder in folders: exp_id = 1 instrument_id = 1 mock_dataset_creation(mocker, settings, exp_id, instrument_id, folder.name) mock_responses_for_upload_folders(folders, mocker, settings, mock_staging_path, mock_scp_server) runner = CliRunner() result = runner.invoke(upload_cmd, ["-vv"]) if result.exception: raise result.exception assert result.exit_code == 0 assert result.output == textwrap.dedent(""" Using MyData configuration in: %s Scanning tests/testdata/testdata-username-dataset/ using the "Username / Dataset" folder structure... Checking for approved upload method... Using SCP upload method. Found user folder: INVALID_USER [USER "INVALID_USER" WAS NOT FOUND ON THE MYTARDIS SERVER] Found user folder: testuser1 Found user folder: testuser2 Found 4 dataset folders in tests/testdata/testdata-username-dataset/ Data in Birds/ is being archived to http://127.0.0.1:9000/dataset/1 Data in Dataset with spaces/ is being archived to http://127.0.0.1:9000/dataset/1 Data in Flowers/ is being archived to http://127.0.0.1:9000/dataset/1 Data in InvalidUserDataset1/ is being archived to http://127.0.0.1:9000/dataset/1 12 of 12 files have been uploaded to MyTardis. 0 of 12 files have been verified by MyTardis. 12 of 12 files were newly uploaded in this session. 0 of 12 file lookups were found in the local cache. Not found on MyTardis server: 1024px-Australian_Birds_@_Jurong_Bird_Park_(4374195521).jpg 1024px-Colourful_flowers.JPG Black-beaked-sea-bird-close-up.jpg Flowers_growing_on_the_campus_of_Cebu_City_National_Science_High_School.jpg InvalidUserFile1.txt Pond_Water_Hyacinth_Flowers.jpg existing_unverified_full_size_file.txt existing_unverified_incomplete_file.txt existing_verified_file.txt hello.txt missing_mydata_replica_api_endpoint.txt zero_sized_file.txt Files uploaded: 1024px-Australian_Birds_@_Jurong_Bird_Park_(4374195521).jpg [Completed] 1024px-Colourful_flowers.JPG [Completed] Black-beaked-sea-bird-close-up.jpg [Completed] Flowers_growing_on_the_campus_of_Cebu_City_National_Science_High_School.jpg [Completed] InvalidUserFile1.txt [Completed] Pond_Water_Hyacinth_Flowers.jpg [Completed] existing_unverified_full_size_file.txt [Completed] existing_unverified_incomplete_file.txt [Completed] existing_verified_file.txt [Completed] hello.txt [Completed] missing_mydata_replica_api_endpoint.txt [Completed] zero_sized_file.txt [Completed] """ % settings.config_path)
def test_dataset_exceptions(set_exp_dataset_config): """Test ability to handle dataset-related exceptions. """ from mydata.conf import settings from mydata.models.dataset import Dataset from mydata.models.experiment import Experiment from mydata.models.folder import Folder from mydata.threads.flags import FLAGS with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) owner = settings.general.default_owner dataset_folder_name = "Flowers" exp_folder_name = "Exp1" location = os.path.join(settings.general.data_directory, exp_folder_name) # Test creating dataset record and ensure that no exception # is raised: user_folder_name = owner.username group_folder_name = None folder = Folder(dataset_folder_name, location, user_folder_name, group_folder_name, owner) folder.experimentTitle = "Existing Experiment" mock_exp_response = build_list_response([{ "id": 1, "title": "Existing Experiment" }]) with requests_mock.Mocker() as mocker: get_exp_url = ( "%s/api/v1/mydata_experiment/?format=json&title=" "&folder_structure=Experiment%%20/%%20Dataset" "&user_folder_name=testfacility") % settings.general.mytardis_url mocker.get(get_exp_url, text=mock_exp_response) experiment = Experiment.get_exp_for_folder(folder) assert experiment.title == "Existing Experiment" folder.experiment = experiment FLAGS.test_run_running = False mock_dataset_response = build_list_response([{ "id": 1, "description": "Flowers" }]) with requests_mock.Mocker() as mocker: get_dataset_url = ("%s/api/v1/dataset/?format=json&experiments__id=1" "&description=Flowers&instrument__id=1" ) % settings.general.mytardis_url mocker.get(get_dataset_url, text=mock_dataset_response) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) dataset = Dataset.create_dataset_if_necessary(folder) assert dataset.description == dataset_folder_name mock_dataset_response = EMPTY_LIST_RESPONSE with requests_mock.Mocker() as mocker: get_dataset_url = ("%s/api/v1/dataset/?format=json&experiments__id=1" "&description=Flowers&instrument__id=1" ) % settings.general.mytardis_url mocker.get(get_dataset_url, text=mock_dataset_response) # Simulate creating dataset record during test run # and ensure that no exception is raised: FLAGS.test_run_running = True dataset = Dataset.create_dataset_if_necessary(folder) assert dataset is None FLAGS.test_run_running = False # Simulate retrieving existing dataset record during test run # and ensure that no exception is raised: with requests_mock.Mocker() as mocker: get_dataset_url = ("%s/api/v1/dataset/?format=json&experiments__id=1" "&description=Existing%%20Dataset&instrument__id=1" ) % settings.general.mytardis_url mocker.get(get_dataset_url, text=existing_dataset_response(1, "Existing Dataset")) mock_test_facility_response(mocker, settings.general.mytardis_url) FLAGS.test_run_running = True folder.data_view_fields["name"] = "Existing Dataset" dataset = Dataset.create_dataset_if_necessary(folder) FLAGS.test_run_running = False assert dataset.description == "Existing Dataset"
def test_config_generate_command(): """Test mydata config generate Mock the MyTardis API responses required for settings validation """ from mydata.commands.config import config_cmd from mydata.conf import settings with tempfile.NamedTemporaryFile() as tmpfile: settings.config_path = tmpfile.name settings.set_default_config() with requests_mock.Mocker() as mocker: inputs = dict( mytardis_url="http://mytardis.example.com", username="******", api_key="api_key1", facility="Test Facility", instrument="Test Instrument", data_directory=os.path.join(".", "tests", "testdata", "testdata-email-dataset"), contact_name="Joe Bloggs", contact_email="*****@*****.**", ) mock_api_endpoints_response(mocker, inputs["mytardis_url"]) mock_testfacility_user_response(mocker, inputs["mytardis_url"]) mock_testuser_response(mocker, settings, inputs["username"]) mock_test_facility_response(mocker, inputs["mytardis_url"]) mock_test_instrument_response(mocker, inputs["mytardis_url"]) runner = CliRunner() stdin = "\n".join([ inputs["mytardis_url"], inputs["username"], inputs["api_key"], inputs["facility"], inputs["instrument"], inputs["data_directory"], inputs["contact_name"], inputs["contact_email"], ]) result = runner.invoke(config_cmd, ["generate"], input=stdin) if result.exception: raise result.exception assert result.output == textwrap.dedent(""" MyTardis URL: http://mytardis.example.com MyTardis Username: testuser1 MyTardis API key: api_key1 Facility Name: Test Facility Instrument Name: Test Instrument Data Directory: ./tests/testdata/testdata-email-dataset Contact Name: Joe Bloggs Contact Email: [email protected] Wrote settings to: %s """ % settings.config_path) # We need to ensure that changes to settings singleton don't propagate # to subsequent tests. # For other tests, we're doing this in tests/fixtures.py, but this # test doesn't use a fixture: unload_modules()
def test_post_uploads(set_username_dataset_config): """ Test POST uploads """ from mydata.conf import settings from mydata.tasks.folders import scan_folders from mydata.tasks.uploads import upload_folder from mydata.models.lookup import LookupStatus from mydata.models.upload import UploadStatus, UploadMethod users = [] folders = [] def found_user(user): users.append(user) found_exp = None found_group = None def found_dataset(folder): folders.append(folder) with requests_mock.Mocker() as mocker: mock_testfacility_user_response(mocker, settings.general.mytardis_url) mock_testusers_response(mocker, settings, ["testuser1", "testuser2"]) mock_invalid_user_response(mocker, settings) mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) scan_folders(found_user, found_group, found_exp, found_dataset) assert sorted([user.username for user in users]) == [ "INVALID_USER", "testuser1", "testuser2", ] assert sorted([folder.name for folder in folders]) == [ "Birds", "Dataset with spaces", "Flowers", "InvalidUserDataset1", ] assert sum([folder.num_files for folder in folders]) == 12 with requests_mock.Mocker() as mocker: mock_test_facility_response(mocker, settings.general.mytardis_url) mock_test_instrument_response(mocker, settings.general.mytardis_url) for username, name in [ ("testuser1", "Test User1"), ("testuser2", "Test User2"), ("INVALID_USER", "INVALID_USER (USER NOT FOUND IN MYTARDIS)"), ]: title = "Test Instrument - %s" % name user_folder_name = username mock_exp_creation(mocker, settings, title, user_folder_name) for folder in folders: get_dataset_url = ( "%s/api/v1/dataset/?format=json&experiments__id=1" "&description=%s&instrument__id=1") % ( settings.general.mytardis_url, quote(folder.name)) mocker.get(get_dataset_url, text=EMPTY_LIST_RESPONSE) get_df_url_template = Template(( "%s/api/v1/mydata_dataset_file/?format=json&dataset__id=1&filename=$filename&directory=" ) % settings.general.mytardis_url) for dfi in range(0, folder.num_files): datafile_path = folder.get_datafile_path(dfi) datafile_name = os.path.basename(datafile_path) get_datafile_url = get_df_url_template.substitute( filename=quote(datafile_name)) mocker.get(get_datafile_url, text=EMPTY_LIST_RESPONSE) post_dataset_url = "%s/api/v1/dataset/" % settings.general.mytardis_url post_datafile_url = ("%s/api/v1/mydata_dataset_file/" % settings.general.mytardis_url) mocker.post(post_datafile_url, status_code=201) lookups = [] uploads = [] def lookup_callback(lookup): msg = "File: %s had unexpected lookup result: %s" % ( lookup.filename, lookup.message, ) assert lookup.status == LookupStatus.NOT_FOUND, msg lookups.append(lookup) def upload_callback(upload): msg = "File: %s had unexpected upload result: %s" % ( upload.filename, upload.message, ) assert upload.status == UploadStatus.COMPLETED, msg uploads.append(upload) for folder in folders: mock_dataset_response = created_dataset_response(1, folder.name) mocker.post(post_dataset_url, text=mock_dataset_response) upload_folder(folder, lookup_callback, upload_callback, UploadMethod.MULTIPART_POST) # Ensure that all 12 files were looked up: assert len(lookups) == 12 # Ensure that all 12 files were uploaded: assert len(uploads) == 12