Example #1
0
def test_translate_index():
    # Known index
    assert mdf_toolbox.translate_index("mdf") == "1a57bbe5-5272-477f-9d31-343b8258b7a5"
    # Unknown index
    assert mdf_toolbox.translate_index("frdr") == "9be6dd95-48f0-48bb-82aa-c6577a988775"
    # Invalid index
    assert mdf_toolbox.translate_index("invalid_index_not_real") == "invalid_index_not_real"
Example #2
0
def ingest_to_search(name, path, idx="dlhub-test"):
    """Ingests the model metadata into Globus Search
    Args:
        name (str): The model name. (generally the same as container directory name
                    but not always) e.g. "cifar10"
        path (str): Path to the model metadata. e.g. "metadata/cifar10.json"
        idx (str): The Globus Index to upload search metadata to. Defaults=dlhub-test
    """
    if path == None:
        return
    dl = client.DLHub()
    uuid = dl.get_id_by_name(name)
    iden = "https://dlhub.org/api/v1/servables/{}".format(uuid)
    index = mdf_toolbox.translate_index(idx)

    with open(path, 'r') as f:
        ingestable = json.load(f)

    ingestable = mdf_toolbox.format_gmeta(ingestable,
                                          acl="public",
                                          identifier=iden)
    ingestable = mdf_toolbox.format_gmeta(
        [ingestable])  # Make it a GIngest list of GMetaEntry

    ingest_client = mdf_toolbox.login(
        services=["search_ingest"])["search_ingest"]
    ingest_client.ingest(index, ingestable)
    print("Ingestion of {} to DLHub servables complete".format(name))
Example #3
0
def test_search(capsys):
    # Error on no query
    with pytest.raises(ValueError):
        f = SearchHelper(INDEX, search_client=SEARCH_CLIENT)
        f.search()

    # Return info if requested
    res2 = f.search("Al", info=False)
    assert isinstance(res2, list)
    assert isinstance(res2[0], dict)

    res3 = f.search("Al", info=True)
    assert isinstance(res3, tuple)
    assert isinstance(res3[0], list)
    assert isinstance(res3[0][0], dict)
    assert isinstance(res3[1], dict)

    # Check limit
    res4 = f.match_term("Al").search(limit=3)
    assert len(res4) == 3

    # Check reset_query
    f.match_field("mdf.source_name", "ta_melting")
    res5 = f.search(reset_query=False)
    res6 = f.search()
    assert all([r in res6 for r in res5]) and all([r in res5 for r in res6])

    # Check default index
    f2 = SearchHelper(INDEX, search_client=SEARCH_CLIENT)
    assert (f2.match_term("data").search(
        limit=1,
        info=True)[1]["index_uuid"] == mdf_toolbox.translate_index(INDEX))
Example #4
0
    def _mapping(self):
        """Fetch the entire mapping for the specified index.

        Returns:
            dict: The full mapping for the index.
        """
        return (self.__search_client.get("/beta/index/{}/mapping".format(
            mdf_toolbox.translate_index(self.index)))["mappings"])
Example #5
0
    def __init__(self, index, **kwargs):
        """Create a SearchHelper object.

        Arguments:
            index (str): The Globus Search index to search on.

        Keyword Arguments:
            search_client (globus_sdk.SearchClient): The Globus Search client to use for
                    searching. If not provided, one will be created and the user may be asked
                    to log in. **Default**: ``None``.
            anonymous (bool): If ``True``, will not authenticate with Globus Auth.
                    If ``False``, will require authentication (either a SearchClient or
                    a user-interactive login).
                    **Default:** ``False``.

                    Caution:
                        Authentication is required to view non-public data in Search.
                        An anonymous SearchHelper will only return public results.

            app_name (str): The application name to use. Should be changed for
                    subclassed clients, and left alone otherwise.
                    Only used if performing login flow.
                    **Default**: ``"SearchHelper_Client"``.
            client_id (str): The ID of a native client to use when authenticating.
                    Only used if performing login flow.
                    **Default**: The default SearchHelper client ID.

            q (str): A query string to initialize the SearchHelper with.
                    Intended for internal use.
            advanced (bool): The initial advanced state for thie SearchHelper.
                    Intended for internal use.
        """
        if kwargs.get("search_client"):
            self.__search_client = kwargs["search_client"]
        elif kwargs.get("anonymous"):
            self.__search_client = mdf_toolbox.anonymous_login(["search"
                                                                ])["search"]
        else:
            self.__search_client = mdf_toolbox.login(
                app_name=kwargs.get("app_name", self.__app_name),
                client_id=kwargs.get("client_id", self.__client_id),
                services=["search"])["search"]

        # Get the UUID for the index if the name was provided
        self.index = mdf_toolbox.translate_index(index)

        # Query init
        self.__query = deepcopy(BLANK_QUERY)
        if kwargs.get("q"):
            self.__query["q"] = kwargs["q"]
        if kwargs.get("advanced"):
            self.__query["advanced"] = kwargs["advanced"]
Example #6
0
def ingest_to_search(name, path, idx):
    if path == None:
        return
    dl = client.DLHub()
    uuid = dl.get_id_by_name(name)
    iden = "https://dlhub.org/api/v1/servables/{}".format(uuid)
    index = mdf_toolbox.translate_index(idx)

    with open(path, 'r') as f:
        ingestable = json.load(f)

    ingestable = mdf_toolbox.format_gmeta(ingestable,
                                          acl="public",
                                          identifier=iden)
    ingestable = mdf_toolbox.format_gmeta(
        [ingestable])  # Make it a GIngest list of GMetaEntry

    ingest_client = mdf_toolbox.login(
        services=["search_ingest"])["search_ingest"]
    ingest_client.ingest(index, ingestable)
    print("Ingestion of {} complete".format(name))
Example #7
0
def search_ingest(task):
    """
    Ingest the servable data into a Globus Search index.

    Args:
        task (dict): the task description.
    """
    logging.debug("Ingesting servable into Search.")

    #    idx = "dlhub"
    idx = '847c9105-18a0-4ffb-8a71-03dd76dfcc9d'
    iden = "https://dlhub.org/servables/{}".format(task['dlhub']['id'])
    index = mdf_toolbox.translate_index(idx)

    ingestable = task
    d = [convert_dict(ingestable, str)]

    glist = []
    visible_to = task['dlhub'].get('visible_to', ['public'])

    # Add public so it isn't an empty list
    if len(visible_to) == 0:
        visible_to = ['public']

    for document in d:
        gmeta_entry = mdf_toolbox.format_gmeta(document, visible_to, iden)
        glist.append(gmeta_entry)
    gingest = mdf_toolbox.format_gmeta(glist)

    ingest_client = mdf_toolbox.login(services=["search_ingest"],
                                      no_local_server=True,
                                      no_browser=True)["search_ingest"]
    logging.info("ingesting to search")
    logging.info(gingest)
    ingest_client.ingest(idx, gingest)
    logging.info("Ingestion of {} to DLHub servables complete".format(iden))