def search_by_authors(self, authors, match_all=True, limit=None, only_latest=True): """Execute a search for servables from certain authors. Authors in DLHub may be different than the owners of the servable and generally are the people who developed functionality of a certain servable (e.g., the creators of the machine learning model used in a servable). If you want to search by ownership, see :meth:`search_by_servable` Args: authors (str or list of str): The authors to match. Names must be in "Family Name, Given Name" format match_all (bool): If ``True``, will require all authors be on any results. If ``False``, will only require one author to be in results. **Default**: ``True``. limit (int): The maximum number of results to return. **Default:** ``None``, for no limit. only_latest (bool): When ``True``, will only return the latest version of each servable. When ``False``, will return all matching versions. **Default**: ``True``. Returns: [dict]: List of servables from the desired authors """ results = self.query.match_authors( authors, match_all=match_all).search(limit=limit) return filter_latest(results) if only_latest else results
def search_by_related_doi(self, doi, limit=None, only_latest=True): """Get all of the servables associated with a certain publication Args: doi (string): DOI of related paper limit (int): Maximum number of results to return only_latest (bool): Whether to return only the most recent version of the model Returns: [dict]: List of servables from the requested paper """ results = self.query.match_doi(doi).search(limit=limit) return filter_latest(results) if only_latest else results
def search_cmd(owner, name, all_versions, author, domain, doi, query): """Search command See above for argument details """ # Get the client client = get_dlhub_client() # Start the query object query = DLHubSearchHelper(client._search_client, q="(" + " ".join(query), advanced=True) # Add the filters query.match_owner(owner) query.match_servable(servable_name=name) query.match_authors(author) query.match_domains(domain) query.match_doi(doi) # If no query strings are given, return an error if not query.initialized: click.echo('Error: No query specified. For options, call: dlhub search --help') click.get_current_context().exit(1) # Perform the query results = query.search() # If no results, return nothing if len(results) == 0: click.echo('No results') return # If desired, filter the entries if not all_versions: results = filter_latest(results) # Get only a subset of the data and print it as a table results_df = pd.DataFrame([{ 'Owner': r['dlhub']['owner'], 'Model Name': r['dlhub']['name'], 'Publication Date': datetime.fromtimestamp(int(r['dlhub']['publication_date']) / 1000).strftime('%Y-%m-%d %H:%M'), 'Type': r['servable']['type'] } for r in results]) results_df.sort_values(['Owner', 'Model Name', 'Publication Date'], ascending=[True, True, False], inplace=True) click.echo(tabulate(results_df.values, headers=results_df.columns))
def search_by_servable(self, servable_name=None, owner=None, version=None, only_latest=True, limit=None, get_info=False): """Search by the ownership, name, or version of a servable Args: servable_name (str): The name of the servable. **Default**: None, to match all servable names. owner (str): The name of the owner of the servable. **Default**: ``None``, to match all owners. version (int): Model version, which corresponds to the date when the servable was published. **Default**: ``None``, to match all versions. only_latest (bool): When ``True``, will only return the latest version of each servable. When ``False``, will return all matching versions. **Default**: ``True``. limit (int): The maximum number of results to return. **Default:** ``None``, for no limit. get_info (bool): If ``False``, search will return a list of the results. If ``True``, search will return a tuple containing the results list and other information about the query. **Default:** ``False``. Returns: If ``info`` is ``False``, *list*: The search results. If ``info`` is ``True``, *tuple*: The search results, and a dictionary of query information. """ if not servable_name and not owner and not version: raise ValueError( "One of 'servable_name', 'owner', or 'publication_date' is required." ) # Perform the query results, info = (self.query.match_servable( servable_name=servable_name, owner=owner, publication_date=version).search(limit=limit, info=True)) # Filter out the latest models if only_latest: results = filter_latest(results) if get_info: return results, info return results
def search(self, query, advanced=False, limit=None, only_latest=True): """Query the DLHub servable library By default, the query is used as a simple plaintext search of all model metadata. Optionally, you can provided an advanced query on any of the indexed fields in the DLHub model metadata by setting :code:`advanced=True` and following the guide for constructing advanced queries found in the `Globus Search documentation <https://docs.globus.org/api/search/search/#query_syntax>`_. Args: query (string): Query to be performed advanced (bool): Whether to perform an advanced query limit (int): Maximum number of entries to return only_latest (bool): Whether to return only the latest version of the model Returns: ([dict]): All records matching the search query """ results = self.query.search(query, advanced=advanced, limit=limit) return filter_latest(results) if only_latest else results