def test_translate_index(): # Known index assert mdf_toolbox.translate_index("mdf") == "1a57bbe5-5272-477f-9d31-343b8258b7a5" # Unknown index assert mdf_toolbox.translate_index("frdr") == "9be6dd95-48f0-48bb-82aa-c6577a988775" # Invalid index assert mdf_toolbox.translate_index("invalid_index_not_real") == "invalid_index_not_real"
def ingest_to_search(name, path, idx="dlhub-test"): """Ingests the model metadata into Globus Search Args: name (str): The model name. (generally the same as container directory name but not always) e.g. "cifar10" path (str): Path to the model metadata. e.g. "metadata/cifar10.json" idx (str): The Globus Index to upload search metadata to. Defaults=dlhub-test """ if path == None: return dl = client.DLHub() uuid = dl.get_id_by_name(name) iden = "https://dlhub.org/api/v1/servables/{}".format(uuid) index = mdf_toolbox.translate_index(idx) with open(path, 'r') as f: ingestable = json.load(f) ingestable = mdf_toolbox.format_gmeta(ingestable, acl="public", identifier=iden) ingestable = mdf_toolbox.format_gmeta( [ingestable]) # Make it a GIngest list of GMetaEntry ingest_client = mdf_toolbox.login( services=["search_ingest"])["search_ingest"] ingest_client.ingest(index, ingestable) print("Ingestion of {} to DLHub servables complete".format(name))
def test_search(capsys): # Error on no query with pytest.raises(ValueError): f = SearchHelper(INDEX, search_client=SEARCH_CLIENT) f.search() # Return info if requested res2 = f.search("Al", info=False) assert isinstance(res2, list) assert isinstance(res2[0], dict) res3 = f.search("Al", info=True) assert isinstance(res3, tuple) assert isinstance(res3[0], list) assert isinstance(res3[0][0], dict) assert isinstance(res3[1], dict) # Check limit res4 = f.match_term("Al").search(limit=3) assert len(res4) == 3 # Check reset_query f.match_field("mdf.source_name", "ta_melting") res5 = f.search(reset_query=False) res6 = f.search() assert all([r in res6 for r in res5]) and all([r in res5 for r in res6]) # Check default index f2 = SearchHelper(INDEX, search_client=SEARCH_CLIENT) assert (f2.match_term("data").search( limit=1, info=True)[1]["index_uuid"] == mdf_toolbox.translate_index(INDEX))
def _mapping(self): """Fetch the entire mapping for the specified index. Returns: dict: The full mapping for the index. """ return (self.__search_client.get("/beta/index/{}/mapping".format( mdf_toolbox.translate_index(self.index)))["mappings"])
def __init__(self, index, **kwargs): """Create a SearchHelper object. Arguments: index (str): The Globus Search index to search on. Keyword Arguments: search_client (globus_sdk.SearchClient): The Globus Search client to use for searching. If not provided, one will be created and the user may be asked to log in. **Default**: ``None``. anonymous (bool): If ``True``, will not authenticate with Globus Auth. If ``False``, will require authentication (either a SearchClient or a user-interactive login). **Default:** ``False``. Caution: Authentication is required to view non-public data in Search. An anonymous SearchHelper will only return public results. app_name (str): The application name to use. Should be changed for subclassed clients, and left alone otherwise. Only used if performing login flow. **Default**: ``"SearchHelper_Client"``. client_id (str): The ID of a native client to use when authenticating. Only used if performing login flow. **Default**: The default SearchHelper client ID. q (str): A query string to initialize the SearchHelper with. Intended for internal use. advanced (bool): The initial advanced state for thie SearchHelper. Intended for internal use. """ if kwargs.get("search_client"): self.__search_client = kwargs["search_client"] elif kwargs.get("anonymous"): self.__search_client = mdf_toolbox.anonymous_login(["search" ])["search"] else: self.__search_client = mdf_toolbox.login( app_name=kwargs.get("app_name", self.__app_name), client_id=kwargs.get("client_id", self.__client_id), services=["search"])["search"] # Get the UUID for the index if the name was provided self.index = mdf_toolbox.translate_index(index) # Query init self.__query = deepcopy(BLANK_QUERY) if kwargs.get("q"): self.__query["q"] = kwargs["q"] if kwargs.get("advanced"): self.__query["advanced"] = kwargs["advanced"]
def ingest_to_search(name, path, idx): if path == None: return dl = client.DLHub() uuid = dl.get_id_by_name(name) iden = "https://dlhub.org/api/v1/servables/{}".format(uuid) index = mdf_toolbox.translate_index(idx) with open(path, 'r') as f: ingestable = json.load(f) ingestable = mdf_toolbox.format_gmeta(ingestable, acl="public", identifier=iden) ingestable = mdf_toolbox.format_gmeta( [ingestable]) # Make it a GIngest list of GMetaEntry ingest_client = mdf_toolbox.login( services=["search_ingest"])["search_ingest"] ingest_client.ingest(index, ingestable) print("Ingestion of {} complete".format(name))
def search_ingest(task): """ Ingest the servable data into a Globus Search index. Args: task (dict): the task description. """ logging.debug("Ingesting servable into Search.") # idx = "dlhub" idx = '847c9105-18a0-4ffb-8a71-03dd76dfcc9d' iden = "https://dlhub.org/servables/{}".format(task['dlhub']['id']) index = mdf_toolbox.translate_index(idx) ingestable = task d = [convert_dict(ingestable, str)] glist = [] visible_to = task['dlhub'].get('visible_to', ['public']) # Add public so it isn't an empty list if len(visible_to) == 0: visible_to = ['public'] for document in d: gmeta_entry = mdf_toolbox.format_gmeta(document, visible_to, iden) glist.append(gmeta_entry) gingest = mdf_toolbox.format_gmeta(glist) ingest_client = mdf_toolbox.login(services=["search_ingest"], no_local_server=True, no_browser=True)["search_ingest"] logging.info("ingesting to search") logging.info(gingest) ingest_client.ingest(idx, gingest) logging.info("Ingestion of {} to DLHub servables complete".format(iden))