Beispiel #1
0
    def search_by_authors(self,
                          authors,
                          match_all=True,
                          limit=None,
                          only_latest=True):
        """Execute a search for servables from certain authors.

        Authors in DLHub may be different than the owners of the servable and generally are
        the people who developed functionality of a certain servable (e.g., the creators
        of the machine learning model used in a servable).

        If you want to search by ownership, see :meth:`search_by_servable`

        Args:
            authors (str or list of str): The authors to match. Names must be in
                "Family Name, Given Name" format
            match_all (bool): If ``True``, will require all authors be on any results.
                    If ``False``, will only require one author to be in results.
                    **Default**: ``True``.
            limit (int): The maximum number of results to return.
                    **Default:** ``None``, for no limit.
            only_latest (bool): When ``True``, will only return the latest version
                    of each servable. When ``False``, will return all matching versions.
                    **Default**: ``True``.

        Returns:
            [dict]: List of servables from the desired authors
        """
        results = self.query.match_authors(
            authors, match_all=match_all).search(limit=limit)
        return filter_latest(results) if only_latest else results
Beispiel #2
0
    def search_by_related_doi(self, doi, limit=None, only_latest=True):
        """Get all of the servables associated with a certain publication

        Args:
            doi (string): DOI of related paper
            limit (int): Maximum number of results to return
            only_latest (bool): Whether to return only the most recent version of the model
        Returns:
            [dict]: List of servables from the requested paper
        """

        results = self.query.match_doi(doi).search(limit=limit)
        return filter_latest(results) if only_latest else results
Beispiel #3
0
def search_cmd(owner, name, all_versions, author, domain, doi, query):
    """Search command

    See above for argument details
    """

    # Get the client
    client = get_dlhub_client()

    # Start the query object
    query = DLHubSearchHelper(client._search_client, q="(" + " ".join(query), advanced=True)

    # Add the filters
    query.match_owner(owner)
    query.match_servable(servable_name=name)
    query.match_authors(author)
    query.match_domains(domain)
    query.match_doi(doi)

    # If no query strings are given, return an error
    if not query.initialized:
        click.echo('Error: No query specified. For options, call: dlhub search --help')
        click.get_current_context().exit(1)

    # Perform the query
    results = query.search()

    # If no results, return nothing
    if len(results) == 0:
        click.echo('No results')
        return

    # If desired, filter the entries
    if not all_versions:
        results = filter_latest(results)

    # Get only a subset of the data and print it as a table
    results_df = pd.DataFrame([{
        'Owner': r['dlhub']['owner'],
        'Model Name': r['dlhub']['name'],
        'Publication Date': datetime.fromtimestamp(int(r['dlhub']['publication_date']) /
                                                   1000).strftime('%Y-%m-%d %H:%M'),
        'Type': r['servable']['type']
    } for r in results])

    results_df.sort_values(['Owner', 'Model Name', 'Publication Date'],
                           ascending=[True, True, False], inplace=True)

    click.echo(tabulate(results_df.values, headers=results_df.columns))
Beispiel #4
0
    def search_by_servable(self,
                           servable_name=None,
                           owner=None,
                           version=None,
                           only_latest=True,
                           limit=None,
                           get_info=False):
        """Search by the ownership, name, or version of a servable

        Args:
            servable_name (str): The name of the servable. **Default**: None, to match
                    all servable names.
            owner (str): The name of the owner of the servable. **Default**: ``None``,
                    to match all owners.
            version (int): Model version, which corresponds to the date when the
                servable was published. **Default**: ``None``, to match all versions.
            only_latest (bool): When ``True``, will only return the latest version
                    of each servable. When ``False``, will return all matching versions.
                    **Default**: ``True``.
            limit (int): The maximum number of results to return.
                    **Default:** ``None``, for no limit.
            get_info (bool): If ``False``, search will return a list of the results.
                    If ``True``, search will return a tuple containing the results list
                    and other information about the query.
                    **Default:** ``False``.

        Returns:
            If ``info`` is ``False``, *list*: The search results.
            If ``info`` is ``True``, *tuple*: The search results,
            and a dictionary of query information.
        """
        if not servable_name and not owner and not version:
            raise ValueError(
                "One of 'servable_name', 'owner', or 'publication_date' is required."
            )

        # Perform the query
        results, info = (self.query.match_servable(
            servable_name=servable_name, owner=owner,
            publication_date=version).search(limit=limit, info=True))

        # Filter out the latest models
        if only_latest:
            results = filter_latest(results)

        if get_info:
            return results, info
        return results
Beispiel #5
0
    def search(self, query, advanced=False, limit=None, only_latest=True):
        """Query the DLHub servable library

        By default, the query is used as a simple plaintext search of all model metadata.
        Optionally, you can provided an advanced query on any of the indexed fields in
        the DLHub model metadata by setting :code:`advanced=True` and following the guide for
        constructing advanced queries found in the
        `Globus Search documentation <https://docs.globus.org/api/search/search/#query_syntax>`_.

        Args:
             query (string): Query to be performed
             advanced (bool): Whether to perform an advanced query
             limit (int): Maximum number of entries to return
             only_latest (bool): Whether to return only the latest version of the model
        Returns:
            ([dict]): All records matching the search query
        """

        results = self.query.search(query, advanced=advanced, limit=limit)
        return filter_latest(results) if only_latest else results