async def test_get_links(client: ( 'Callable[ [str, FastAPI, str, Literal["get", "post", "put", "delete", "patch"]], Awaitable[Response]]' ), ): """Test GET /links""" from optimade.models import LinksResponse response = await client("/links") assert response.status_code == 200, f"Request failed: {response.json()}" response = LinksResponse(**response.json()) assert response assert response.meta.data_returned == 2 assert response.meta.data_available == 2 assert not response.meta.more_data_available
def test_providers(self): """ Validates the providers.json as a list of `Provider`s objects. """ links_dir = TOP_DIR / 'src' / 'links' versions = [ v.parts[-1] for v in links_dir.iterdir() if v.is_dir() and v.parts[-1].startswith('v') ] for v_ind, version in enumerate(versions): path = pathlib.Path( f"{TOP_DIR}/src/links/{version}/providers.json").resolve() with open(path, 'r') as f: json_rep = json.load(f) LinksResponse(**json_rep)
def test_providers(self): """ Validates the providers.json as a list of `Provider`s objects. """ links_dir = TOP_DIR / "src" / "links" versions = [ v.parts[-1] for v in links_dir.iterdir() if v.is_dir() and v.parts[-1].startswith("v") ] for version in versions: path = pathlib.Path( f"{TOP_DIR}/src/links/{version}/providers.json").resolve() with open(path, "r") as f: json_rep = json.load(f) LinksResponse(**json_rep)
async def load_optimade_providers_databases() -> None: # pylint: disable=too-many-branches,too-many-statements,too-many-locals """Load in the providers' OPTIMADE databases from Materials-Consortia Utilize the Materials-Consortia list of OPTIMADE providers at [https://providers.optimade.org](https://providers.optimade.org). Load in all databases with a valid base URL. """ import asyncio import httpx from optimade import __api_version__ from optimade.models import LinksResponse from optimade.models.links import LinkType from optimade.server.routers.utils import BASE_URL_PREFIXES from optimade_gateway.common.utils import clean_python_types, get_resource_attribute from optimade_gateway.models.databases import DatabaseCreate from optimade_gateway.queries.perform import db_get_all_resources from optimade_gateway.routers.utils import resource_factory if not CONFIG.load_optimade_providers_databases: LOGGER.debug( "Will not load databases from Materials-Consortia list of providers." ) return if TYPE_CHECKING or bool(os.getenv("MKDOCS_BUILD", "")): # pragma: no cover providers: "Union[httpx.Response, LinksResponse]" async with httpx.AsyncClient() as client: providers = await client.get( f"https://providers.optimade.org/v{__api_version__.split('.', maxsplit=1)[0]}" "/links") if providers.is_error: LOGGER.warning( "Response from Materials-Consortia's list of OPTIMADE providers was not " "successful (status code != 200). No databases will therefore be added at " "server startup.") return LOGGER.info( "Registering Materials-Consortia list of OPTIMADE providers' databases." ) providers = LinksResponse(**providers.json()) valid_providers = [] for provider in providers.data: if get_resource_attribute(provider, "id") in ("exmpl", "optimade"): LOGGER.info( "- %s (id=%r) - Skipping: Not a real provider.", get_resource_attribute(provider, "attributes.name", "N/A"), get_resource_attribute(provider, "id"), ) continue if not get_resource_attribute(provider, "attributes.base_url"): LOGGER.info( "- %s (id=%r) - Skipping: No base URL information.", get_resource_attribute(provider, "attributes.name", "N/A"), get_resource_attribute(provider, "id"), ) continue valid_providers.append(provider) # Run queries to each database using the supported major versioned base URL to get a # list of the provider's databases. # There is no need to use ThreadPoolExecutor here, since we want this to block # everything and then finish, before the server actually starts up. provider_queries = [ asyncio.create_task( db_get_all_resources( database=provider, endpoint="links", response_model=LinksResponse, )) for provider in valid_providers ] for query in asyncio.as_completed(provider_queries): provider_databases, provider = await query LOGGER.info( "- %s (id=%r) - Processing", get_resource_attribute(provider, "attributes.name", "N/A"), get_resource_attribute(provider, "id"), ) if not provider_databases: LOGGER.info(" - No OPTIMADE databases found.") continue provider_databases = [ db for db in provider_databases if await clean_python_types( get_resource_attribute(db, "attributes.link_type", "")) == LinkType.CHILD.value ] if not provider_databases: LOGGER.info(" - No OPTIMADE databases found.") continue for database in provider_databases: if not get_resource_attribute(database, "attributes.base_url"): LOGGER.info( " - %s (id=%r) - Skipping: No base URL information.", get_resource_attribute(database, "attributes.name", "N/A"), get_resource_attribute(database, "id"), ) continue LOGGER.info( " - %s (id=%r) - Checking versioned base URL and /structures", get_resource_attribute(database, "attributes.name", "N/A"), get_resource_attribute(database, "id"), ) async with httpx.AsyncClient() as client: try: db_response = await client.get( f"{str(get_resource_attribute(database, 'attributes.base_url')).rstrip('/')}" # pylint: disable=line-too-long f"{BASE_URL_PREFIXES['major']}/structures", ) except httpx.ReadTimeout: LOGGER.info( " - %s (id=%r) - Skipping: Timeout while requesting " "%s/structures.", get_resource_attribute(database, "attributes.name", "N/A"), get_resource_attribute(database, "id"), BASE_URL_PREFIXES["major"], ) continue if db_response.status_code != 200: LOGGER.info( " - %s (id=%r) - Skipping: Response from %s/structures is not " "200 OK.", get_resource_attribute(database, "attributes.name", "N/A"), get_resource_attribute(database, "id"), BASE_URL_PREFIXES["major"], ) continue new_id = (f"{get_resource_attribute(provider, 'id')}" f"/{get_resource_attribute(database, 'id')}" if len(provider_databases) > 1 else get_resource_attribute( database, "id")) registered_database, _ = await resource_factory( DatabaseCreate( id=new_id, **await clean_python_types( get_resource_attribute(database, "attributes", {})), )) LOGGER.info( " - %s (id=%r) - Registered database with id=%r", get_resource_attribute(database, "attributes.name", "N/A"), get_resource_attribute(database, "id"), registered_database.id, )
def test_index_metadb(self): """ Validates that all (non-null) entries in providers.json point to an index meta-db. """ # We collect all errors and report all of them at the end, see below problems = [] links_dir = TOP_DIR / "src" / "links" versions = [ v.parts[-1] for v in links_dir.iterdir() if v.is_dir() and v.parts[-1].startswith("v") ] for version in versions: path = pathlib.Path( f"{TOP_DIR}/src/links/{version}/providers.json" ).resolve() with open(path, "r") as f: json_rep = json.load(f) response = LinksResponse(**json_rep) for entry in response.data: entry_id = entry.id if entry.attributes.dict().get("base_url", None) is not None: # the provider has a non-null base_url print(f'[INFO] Checking provider "{entry_id}" ({version})') # I check the /info endpoint entry_base_url = ( entry.attributes.base_url.href if isinstance(entry.attributes.base_url, Link) else entry.attributes.base_url ) info_endpoint = f"{entry_base_url}/{version}/info" tested_info_endpoints = [info_endpoint] try: try: response_content = query_optimade(info_endpoint) except urllib.error.HTTPError as exc: if ( apply_v0_workarounds and version == "v1" and exc.code == 404 ): try: # Temporary workaround for optimade-python-tools while v1 is released info_endpoint = f"{entry_base_url}/v0.10/info" tested_info_endpoints.append(info_endpoint) response_content = query_optimade(info_endpoint) except urllib.error.HTTPError as exc: # Temporary workaround for nomad that uses v0 as a prefix info_endpoint = f"{entry_base_url}/v0/info" tested_info_endpoints.append(info_endpoint) response_content = query_optimade(info_endpoint) else: raise except urllib.error.HTTPError as exc: fallback_string = ( "" if len(tested_info_endpoints) == 1 else f" (I tried all these URLs: {tested_info_endpoints})" ) problems.append( f'Provider "{entry_id}" {info_endpoint} endpoint is not reachable{fallback_string}. Error: {str(exc)}' ) continue try: info_response = IndexInfoResponse( **json.loads(response_content) ) except Exception as exc: problems.append( f'Provider "{entry_id}": {info_endpoint} endpoint has problems during validation.\nError message:\n{str(exc)}' ) continue # If unspecified, it should be assumed as False, according to the OPTIMADE specs is_index = info_response.data.attributes.dict().get( "is_index", False ) if not is_index: print(f" > PROBLEM DETECTED with provider '{entry_id}'.") print(response_content) problems.append( f'Provider "{entry_id}" is NOT providing an index meta-database at {info_endpoint}' ) continue print( f'[INFO] Provider "{entry_id}" ({version}) validated correctly ({info_endpoint})' ) # I am collecting all problems and printing at the end because in this way we get a full overview if problems: err_msg = "PROBLEMS DETECTED!\n\n" + "\n\n".join(problems) # Prepend with [ERROR] so that it gets colored in the GitHub output err_msg = "\n".join(f"[ERROR] {line}" for line in err_msg.splitlines()) print(err_msg) raise AssertionError(err_msg)
def get_index_metadb_data(base_url): """Return some info after inspecting the base_url of this index_metadb.""" versions_to_test = ['v1', 'v0.10', 'v0'] provider_data = {} for version in versions_to_test: info_endpoint = f'{base_url}/{version}/info' try: with urllib.request.urlopen(info_endpoint) as url_response: response_content = url_response.read() provider_data['info_endpoint'] = info_endpoint break except urllib.error.HTTPError as exc: if exc.code == 404: continue else: provider_data['state'] = "problem" provider_data[ 'tooltip_lines'] = "Generic error while fetching the data:\n{}".format( traceback.format_exc()).splitlines() provider_data['color'] = "light-red" return provider_data else: # Did not break: no version found provider_data['state'] = "not found" provider_data['tooltip_lines'] = [ "I couldn't find the index meta-database, I tried the following versions: {}" .format(", ".join(versions_to_test)) ] provider_data['color'] = "light-red" return provider_data provider_data['state'] = "found" provider_data['color'] = "green" provider_data['version'] = version provider_data['default_subdb'] = None # Let's continue, it was found try: json_response = json.loads(response_content) IndexInfoResponse(**json_response) except Exception as exc: # Adapt the badge info provider_data['state'] = "validation error" provider_data['color'] = "orange" provider_data[ 'tooltip_lines'] = "Error while validating the Index MetaDB:\n{}".format( traceback.format_exc()).splitlines() provider_data['version'] = version else: try: # For now I use this way of getting it provider_data['default_subdb'] = json_response['data'][ 'relationships']['default']['data']['id'] except Exception: # For now, whatever the error, I just ignore it pass links_endpoint = f'{base_url}/{version}/links' try: with urllib.request.urlopen(links_endpoint) as url_response: response_content = url_response.read() except urllib.error.HTTPError as exc: provider_data['links_state'] = "problem" provider_data[ 'links_tooltip_lines'] = "Generic error while fetching the /links endpoint:\n{}".format( traceback.format_exc()).splitlines() provider_data['links_color'] = "light-red" return provider_data provider_data['links_endpoint'] = links_endpoint provider_data['links_state'] = "found" provider_data['links_color'] = "green" try: links_json_response = json.loads(response_content) LinksResponse(**links_json_response) except Exception as exc: # Adapt the badge info provider_data['links_state'] = "validation error" provider_data['links_color'] = "orange" provider_data[ 'links_tooltip_lines'] = "Error while validating the /links endpoint of the Index MetaDB:\n{}".format( traceback.format_exc()).splitlines() return provider_data # We also filter out any non-child DB link type. all_linked_dbs = links_json_response['data'] subdbs = [ subdb for subdb in all_linked_dbs if subdb['attributes'].get('link_type', 'UNKNOWN') == 'child' ] print( f" [{len(all_linked_dbs)} links found, of which {len(subdbs)} child sub-dbs]" ) # Order putting the default first, and then the rest in alphabetical order (by key) # Note that False gets before True. provider_data['subdbs'] = sorted( subdbs, key=lambda subdb: (subdb['id'] != provider_data['default_subdb'], subdb['id'])) # Count the non-null ones non_null_subdbs = [ subdb for subdb in provider_data["subdbs"] if subdb["attributes"]["base_url"] ] provider_data['num_non_null_subdbs'] = len(non_null_subdbs) provider_data["subdb_validation"] = {} for subdb in non_null_subdbs: url = subdb["attributes"]["base_url"] results = validate_childdb(url + "/v1" if not url.endswith("/v1") else "") provider_data["subdb_validation"][url] = {} provider_data["subdb_validation"][url][ "valid"] = not results["failure_count"] provider_data["subdb_validation"][url]["success_count"] = results[ "success_count"] provider_data["subdb_validation"][url]["failure_count"] = results[ "failure_count"] provider_data["subdb_validation"][url]["internal_errors"] = bool( results["internal_failure_count"]) # Count errors apart from internal errors provider_data["subdb_validation"][url]["total_count"] = results[ "success_count"] + results["failure_count"] ratio = results["success_count"] / (results["success_count"] + results["failure_count"]) # Use the red/green values from the badge css ratio = 2 * (max(0.5, ratio) - 0.5) green = (77, 175, 74) red = (228, 26, 28) colour = list(green) for ind, channel in enumerate(colour): gradient = red[ind] - green[ind] colour[ind] += gradient * (1 - ratio) colour = [str(int(channel)) for channel in colour] provider_data["subdb_validation"][url][ "_validator_results_colour"] = f"rgb({','.join(colour)});" return provider_data