def batch_create(self, metadata_list, overwrite=True, **kwargs): """ Create the list of metadata associated with the list of guids Args: metadata_list (List[Dict{"guid": "", "data": {}}]): list of metadata objects in a specific format. Expects a dict with "guid" and "data" fields where "data" is another JSON blob to add to the mds overwrite (bool, optional): whether or not to overwrite existing data """ url = self.admin_endpoint + f"/metadata" if len(metadata_list) > 1 and ("guid" not in metadata_list[0] and "data" not in metadata_list[0]): logging.warning( "it looks like your metadata list for bulk create is malformed. " "the expected format is a list of dicts that have 2 keys: 'guid' " "and 'data', where 'guid' is a string and 'data' is another dict. " f"The first element doesn't match that pattern: {metadata_list[0]}" ) url_with_params = append_query_params(url, overwrite=overwrite, **kwargs) logging.debug(f"hitting: {url_with_params}") logging.debug(f"data: {metadata_list}") response = requests.post(url_with_params, json=metadata_list, auth=self._auth_provider) raise_for_status(response) return response.json()
def list_jobs(self): """ List all jobs """ response = requests.get(self.endpoint + "/list", auth=self._auth_provider) raise_for_status(response) return response.json()
def update_blank(self, guid, rev, hashes, size, urls=None, authz=None): """ Update only hashes and size for a blank index Args: guid (string): record id rev (string): data revision - simple consistency mechanism hashes (dict): {hash type: hash value,} eg ``hashes={'md5': ab167e49d25b488939b1ede42752458b'}`` size (int): file size metadata associated with a given uuid """ params = {"rev": rev} json = {"hashes": hashes, "size": size} if urls: json["urls"] = urls if authz: json["authz"] = authz response = self.client._put( "index/blank", guid, headers={"content-type": "application/json"}, params=params, auth=self.client.auth, data=client.json_dumps(json), ) raise_for_status(response) rec = response.json() return self.get_record(rec["did"])
async def async_update(self, guid, metadata, _ssl=None, **kwargs): """ Asynchronous function to update metadata Args: guid (str): guid to use metadata (Dict): dictionary representing what will end up a JSON blob attached to the provided GUID as metadata _ssl (None, optional): whether or not to use ssl """ async with aiohttp.ClientSession() as session: url = self.admin_endpoint + f"/metadata/{guid}" url_with_params = append_query_params(url, **kwargs) # aiohttp only allows basic auth with their built in auth, so we # need to manually add JWT auth header headers = {"Authorization": self._auth_provider._get_auth_value()} async with session.put(url_with_params, json=metadata, headers=headers, ssl=_ssl) as response: raise_for_status(response) response = await response.json() return response
def get_output(self, job_id): """ Get the output of a previously completed job """ response = requests.get(self.endpoint + f"/output?UID={job_id}", auth=self._auth_provider) raise_for_status(response) return response.json()
def get_status(self, job_id): """ Get the status of a previously created job """ response = requests.get(self.endpoint + f"/status?UID={job_id}", auth=self._auth_provider) raise_for_status(response) return response.json()
def get_stats(self): """ Return basic info about the records in indexd """ response = self.client._get("_stats") raise_for_status(response) return response.json()
def get_version(self): """ Return the version of indexd """ response = self.client._get("_version") raise_for_status(response) return response.json()
def get_url(urlstr, dest_path): """Simple url fetch to dest_path with backoff""" res = requests.get(urlstr) raise_for_status(res) if dest_path == "-": sys.stdout.write(res.text) else: with open(dest_path, "wb") as f: f.write(res.content)
def get_version(self): """ Return the version Returns: str: the version """ response = requests.get(self.endpoint + "/_version", auth=self._auth_provider) raise_for_status(response) return response.json().get("version")
def get_index_key_paths(self): """ List all the metadata key paths indexed in the database. Returns: List: list of metadata key paths """ response = requests.get(self.admin_endpoint + "/metadata_index", auth=self._auth_provider) raise_for_status(response) return response.json()
def create_index_key_path(self, path): """ Create a metadata key path indexed in the database. Args: path (str): metadata key path """ response = requests.post(self.admin_endpoint + f"/metadata_index/{path}", auth=self._auth_provider) raise_for_status(response) return response.json()
def delete_index_key_path(self, path): """ List all the metadata key paths indexed in the database. Args: path (str): metadata key path """ response = requests.delete(self.admin_endpoint + f"/metadata_index/{path}", auth=self._auth_provider) raise_for_status(response) return response
def create_object(self, file_name, authz, metadata=None, aliases=None): url = self.endpoint + "/objects" body = { "file_name": file_name, "authz": authz, "metadata": metadata, "aliases": aliases, } response = requests.post(url, json=body, auth=self._auth_provider) raise_for_status(response) data = response.json() return data["guid"], data["upload_url"]
def download_url(self, ws, wskey): """ Get a download url for the given workspace key Args: ws (string): name of the workspace wskey (string): key of the object in the workspace """ wskey = wskey.lstrip("/") res = self._auth_provider.curl("/ws-storage/download/{}/{}".format( ws, wskey)) raise_for_status(res) return res.json()
def ls(self, ws, wskey): """ List the contents under the given workspace path Args: ws (string): name of the workspace wskey (string): key of the object in the workspace """ wskey = wskey.lstrip("/") res = self._auth_provider.curl("/ws-storage/list/{}/{}".format( ws, wskey)) raise_for_status(res) return res.json()
async def async_get_output(self, job_id, _ssl=None, **kwargs): async with aiohttp.ClientSession() as session: url = self.endpoint + f"/output?UID={job_id}" url_with_params = append_query_params(url, **kwargs) # aiohttp only allows basic auth with their built in auth, so we # need to manually add JWT auth header headers = {"Authorization": self._auth_provider._get_auth_value()} async with session.get(url_with_params, headers=headers, ssl=_ssl) as response: raise_for_status(response) response = await response.json(content_type=None) return response
def rm(self, ws, wskey): """ Remove the given workspace key Args: ws (string): name of the workspace wskey (string): key of the object in the workspace """ wskey = wskey.lstrip("/") res = self._auth_provider.curl("/ws-storage/list/{}/{}".format( ws, wskey), request="DELETE") raise_for_status(res) return res.json()
def query_urls(self, pattern): """ Query all record URLs for given pattern Args: pattern (str): pattern to match against indexd urls Returns: List[records]: indexd records with urls matching pattern """ response = self.client._get(f"/_query/urls/q?include={pattern}") raise_for_status(response) return response.json()
def create_program(self, json): """Create a program. Args: json (object): The json of the program to create Examples: This creates a program in the sandbox commons. >>> Gen3Submission.create_program(json) """ api_url = "{}/api/v0/submission/".format(self._endpoint) output = requests.post(api_url, auth=self._auth_provider, json=json) raise_for_status(output) return output.json()
async def async_update_record( self, guid, file_name=None, urls=None, version=None, metadata=None, acl=None, authz=None, urls_metadata=None, ): """ Asynchronous function to update a record in indexd. Args: guid: string - record id body: json/dictionary format - index record information that needs to be updated. - can not update size or hash, use new version for that """ async with aiohttp.ClientSession() as session: updatable_attrs = { "file_name": file_name, "urls": urls, "version": version, "metadata": metadata, "acl": acl, "authz": authz, "urls_metadata": urls_metadata, } record = await async_get_record(guid) revision = record.get("rev") for key, value in updatable_attrs.items(): if value is not None: record[key] = value async with session.put( f"{self.client.url}/index/{guid}/rev={revision}", json=record, headers={"content-type": "application/json"}, ssl=_ssl, auth=self.client.auth, ) as response: raise_for_status(response) response = await response.json() return response
def delete(self, guid, **kwargs): """ Delete the metadata associated with the guid Args: guid (str): guid to use """ url = self.admin_endpoint + f"/metadata/{guid}" url_with_params = append_query_params(url, **kwargs) logging.debug(f"hitting: {url_with_params}") response = requests.delete(url_with_params, auth=self._auth_provider) raise_for_status(response) return response.json()
def create_project(self, program, json): """Create a project. Args: program (str): The program to create a project on json (object): The json of the project to create Examples: This creates a project on the DCF program in the sandbox commons. >>> Gen3Submission.create_project("DCF", json) """ api_url = "{}/api/v0/submission/{}".format(self._endpoint, program) output = requests.put(api_url, auth=self._auth_provider, json=json) raise_for_status(output) return output.json()
def open_project(self, program, project): """Mark a project ``open``. Opening a project means uploads, deletions, etc. are allowed. Args: program: the name of the program the project is from project: the name of the project you want to 'open' Example: >>> Gen3Submission.get_project_manifest("DCF", "CCLE") """ api_url = f"{self._endpoint}/api/v0/submission/{program}/{project}/open" output = requests.put(api_url, auth=self._auth_provider) raise_for_status(output) return output.json()
def get_versions(self, guid): """ Get the metadata of index record version associated with the given id Args: guid: string - record id """ response = self.client._get(f"/index/{guid}/versions") raise_for_status(response) versions = response.json() return [r for _, r in versions.items()]
def is_healthy(self): """ Return if is healthy or not Returns: bool: True if healthy """ try: response = requests.get(self.endpoint + "/_status", auth=self._auth_provider) raise_for_status(response) except Exception as exc: logging.error(exc) return False return response.json().get("status") == "OK"
def get_projects(self, program): """List registered projects for a given program Args: program: the name of the program you want the projects from Example: This lists all the projects under the DCF program >>> Gen3Submission.get_projects("DCF") """ api_url = f"{self._endpoint}/api/v0/submission/{program}" output = requests.get(api_url, auth=self._auth_provider) raise_for_status(output) return output.json()
def get_project_dictionary(self, program, project): """Get dictionary schema for a given project Args: program: the name of the program the project is from project: the name of the project you want the dictionary schema from Example: >>> Gen3Submission.get_project_dictionary("DCF", "CCLE") """ api_url = f"{self._endpoint}/api/v0/submission/{program}/{project}/_dictionary" output = requests.get(api_url, auth=self._auth_provider) raise_for_status(output) return output.json()
def create_job(self, job_name, job_input): """ Create a job with given name and input Args: job_name (str): name for the job, can use globals in this file job_input (Dict): dictionary of input for the job Returns: Dict: Response from the endpoint """ data = {"action": job_name, "input": job_input} response = requests.post( self.endpoint + "/dispatch", json=data, auth=self._auth_provider ) raise_for_status(response) return response.json()
async def async_create_job(self, job_name, job_input, _ssl=None, **kwargs): async with aiohttp.ClientSession() as session: url = self.endpoint + f"/dispatch" url_with_params = append_query_params(url, **kwargs) data = json.dumps({"action": job_name, "input": job_input}) # aiohttp only allows basic auth with their built in auth, so we # need to manually add JWT auth header headers = {"Authorization": self._auth_provider._get_auth_value()} async with session.post( url_with_params, data=data, headers=headers, ssl=_ssl ) as response: raise_for_status(response) response = await response.json(content_type=None) return response