def test_get_end_point(): assert get_end_point("https://someurl", "target") == "https://someurl/target/" assert get_end_point("https://someurl/test", "target") == "https://someurl/test/target/" assert get_end_point("https://someurl/test/", "target") == "https://someurl/test/target/" assert get_end_point("https://someurl/test/", "target/") == "https://someurl/test/target/"
def test_get_on_end_point_unpaginated_dict_result(): with patch("requests.get") as get: json_data_1 = { "url": "mock_url_v", "version": "1", "model": "mock_url_b" } get.return_value = MockResponse(json_data_1) assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == json_data_1 assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == json_data_1 get.assert_called_once_with(get_end_point(DATA_REGISTRY_URL, "target1"), headers=get_headers(TOKEN))
def test_get_on_end_point_paginated_no_count(): with patch("requests.get") as get: results = [{ "url": "mock_url_v", "version": "1", "model": "mock_url_b" }] json_data_1 = {"next": None, "results": results} get.return_value = MockResponse(json_data_1) assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == results assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == results get.assert_called_once_with(get_end_point(DATA_REGISTRY_URL, "target1"), headers=get_headers(TOKEN))
def test_get_on_end_point(): with patch("requests.get") as get: json_data_1 = [{ "url": "mock_url_v", "version": "1", "model": "mock_url_b" }] get.return_value = MockResponse(json_data_1) assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == json_data_1 assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == json_data_1 get.assert_called_once_with(get_end_point(DATA_REGISTRY_URL, "target1"), headers=get_headers(TOKEN)) json_data_2 = [{"a": 1}, {"b": 2}] get.return_value = MockResponse(json_data_2) assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target2"), TOKEN) == json_data_2 assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == json_data_1
def upload_text_to_text_table(text: str, data_registry_url: str, token: str) -> str: """ Uploads text to the text_file table and creates a storage_root and storage_location for it :param text: text to upload :param data_registry_url: the url of the data registry :param token: personal access token :return: storage_location url reference for this text """ hash_ = sha1(text.encode("utf-8")).hexdigest() text_data = {DataRegistryField.text: text} storage_root_data = { DataRegistryField.root: get_end_point(data_registry_url, DataRegistryTarget.text_file), DataRegistryField.name: "text_file", } post_data = { "post": [ { "target": DataRegistryTarget.text_file, "data": text_data }, { "target": DataRegistryTarget.storage_root, "data": storage_root_data }, ] } upload_from_config(post_data, data_registry_url, token) text_ref = get_reference(text_data, DataRegistryTarget.text_file, data_registry_url, token) storage_root_ref = get_reference(storage_root_data, DataRegistryTarget.storage_root, data_registry_url, token) storage_location_data = { DataRegistryField.storage_root: storage_root_ref, DataRegistryField.hash: hash_, DataRegistryField.path: f"{text_ref.split('/')[-2]}/?format=text", } post_data = { "post": [{ "target": DataRegistryTarget.storage_location, "data": storage_location_data }] } upload_from_config(post_data, data_registry_url, token) return get_reference(storage_location_data, DataRegistryTarget.storage_location, data_registry_url, token)
def test_get_on_end_point_paginated_multiple_pages(): with patch("requests.get") as get: results = [{ "url": "mock_url_v", "version": "1", "model": "mock_url_b" }] json_data_1 = {"count": 2, "next": "target2", "results": results} json_data_2 = {"count": 2, "next": None, "results": results} get.side_effect = [ MockResponse(json_data_1), MockResponse(json_data_2) ] expected = (results + results).copy() assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == expected assert get_on_end_point(get_end_point(DATA_REGISTRY_URL, "target1"), TOKEN) == expected get.assert_has_calls([ call(get_end_point(DATA_REGISTRY_URL, "target1"), headers=get_headers(TOKEN)), call("target2", headers=get_headers(TOKEN)) ])
def test_upload_from_config_good_version(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: version: '1.1.1' """ ) with patch("requests.get") as get: with patch("requests.post") as post: get.return_value = MockResponse([]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) post.assert_called_once_with( get_end_point(DATA_REGISTRY_URL, "end_point_1"), data={"version": "1.1.1"}, headers=get_headers(TOKEN), )
def test_upload_from_config_with_post_not_present(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: name: 'B' description: 'posted B' """ ) with patch("requests.get") as get: with patch("requests.post") as post: get.return_value = MockResponse([]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) post.assert_called_once_with( get_end_point(DATA_REGISTRY_URL, "end_point_1"), data={"name": "B", "description": "posted B"}, headers=get_headers(TOKEN), )
def upload_from_config(config: Dict[str, List[YamlDict]], data_registry_url: str, token: str) -> None: """ Iterates over the provided input configuration and calls PATCH or POST with the data to the data registry as appropriate, resolving references to other data where required. :param config: loaded configuration :param data_registry_url: base url of the data registry :param token: personal access token """ for method in ("PATCH", "POST"): data_list = config.get(method.lower(), []) post = method == "POST" for data in data_list: target = data["target"] data = data["data"] fail_fast = data.get("fail_fast", False) logger.info(f"Working on {method} for target '{target}'") data = resolve_references(data, data_registry_url, token) reference = get_reference(data, target, data_registry_url, token) if post: end_point = get_end_point(data_registry_url, target) requests_func = requests.post clear_cache = reference is None do_request = reference is None else: end_point = reference requests_func = requests.patch clear_cache = False do_request = reference is not None if do_request: if DataRegistryField.version in data: try: semver.VersionInfo.parse( data[DataRegistryField.version]) except ValueError as e: raise ValueError( f"version must match the Semantic Versioning (SemVer) " f"format but was '{data['version']}'") from e logger.info(f"{method} {end_point}: {data}") result = requests_func(end_point, data=data, headers=get_headers(token)) result.raise_for_status() url = result.json().get(DataRegistryField.url) logger.info( f"{method} successful: {result.status_code}. URL: {url}") elif fail_fast and post: raise ValueError( f"fail_fast POST was attempted but data already existed at {end_point}: {data}" ) elif fail_fast: raise ValueError( f"fail_fast PATCH was attempted but no data existed at {end_point}: {data}" ) else: logger.info( f"Nothing to do for {method} for target '{target}'") if clear_cache: get_on_end_point.cache_clear()