def test_get_workspace(self): """Test get_workspace().""" r = fapi.get_workspace(self.project, self.workspace) print(r.status_code, r.content) self.assertEqual(r.status_code, 200) space_dict = r.json()['workspace'] self.assertEqual(space_dict['name'], self.workspace) self.assertEqual(space_dict['namespace'], self.project)
def get_workspace_info(workspace_namespace: str, workspace_name: str) -> dict: """Get workspace attributes using workspace_namespace and workspace_name. """ ws = fapi.get_workspace(workspace_namespace, workspace_name) if ws.status_code == 404: raise ValueError( f"Unable to fetch information from workspace {workspace_namespace}/{workspace_name} - {ws.json()}!" ) return ws.json()["workspace"]
def get_or_create_workspace(workspace_namespace, workspace_name): ws = fapi.get_workspace(workspace_namespace, workspace_name) if ws.status_code == 404: ws = fapi.create_workspace(workspace_namespace, workspace_name) if ws.status_code != 201: raise ValueError('Unable to create workspace') return ws.json() else: return ws.json()['workspace']
def test_space_search(self): # First retrieve information about the space r = fapi.get_workspace(self.project, self.workspace) fapi._check_response_code(r, 200) metadata = r.json()["workspace"] # Now use part of that info (bucket id) to find the space (name) result = call_func("space_search", "-b", metadata['bucketName']) self.assertIn(metadata['name'], ''.join(result)) # Now search for goofy thing that should never be found self.assertEqual([], call_func("space_search", "-b", '__NoTTHeRe__'))
def refresh(self): """Reload workspace metadata from firecloud. Workspace metadata is cached in the data attribute of a Workspace, and may become stale, requiring a refresh(). """ r = fapi.get_workspace(self.namespace, self.name, self.api_url) fapi._check_response_code(r, 200) self.data = r.json() return self
def __init__(self, namespace, name, api_url=fapi.PROD_API_ROOT): """Get an existing workspace from Firecloud by name. This method assumes that a workspace with the given name and namespace is present at the api_url given, and raises an error if it does not exist. To create a new workspace, use Workspace.new() Raises: FireCloudServerError: Workspace does not exist, or API call fails """ self.api_url = api_url self.namespace = namespace self.name = name ## Call out to FireCloud r = fapi.get_workspace(namespace, name, api_url) fapi._check_response_code(r, 200) self.data = r.json()
def upload(args): r = fapi.get_workspace(args.namespace, args.workspace, args.api_url) fapi._check_response_code(r, 200) bucket = r.json()['workspace']['bucketName'] dest = 'gs://' + bucket + '/' if args.destination is not None: dest = dest + args.destination.lstrip('/') gsutil_args = [ "gsutil", "-o", "GSUtil:parallel_composite_upload_threshold=50M", "cp" ] if os.path.isdir(args.source): gsutil_args.append("-r") gsutil_args.extend([args.source, dest]) print_(' '.join(gsutil_args)) if not args.show: return subprocess.check_call(gsutil_args)
def get_bucket_name(namespace, workspace): response = fapi.get_workspace(namespace, workspace) fapi._check_response_code(response, 200) return response.json()['workspace']['bucketName']
def update_notebooks(workspace_name, workspace_project, replace_this, with_this): print("Updating NOTEBOOKS for " + workspace_name) ## update notebooks # Getting the workspace bucket r = fapi.get_workspace(workspace_project, workspace_name) fapi._check_response_code(r, 200) workspace = r.json() bucket = workspace['workspace']['bucketName'] # check if bucket is empty gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/'] bucket_files = subprocess.check_output(gsutil_args) # Check output produces a string in Py2, Bytes in Py3, so decode if necessary if type(bucket_files) == bytes: bucket_files = bucket_files.decode().split('\n') # print(bucket_files) editingFolder = "../notebookEditingFolder" # if the bucket isn't empty, check for notebook files and copy them if 'gs://' + bucket + '/notebooks/' in bucket_files: #len(bucket_files)>0: # bucket_prefix = 'gs://' + bucket # Creating the Notebook Editing Folder if os.path.exists(editingFolder): shutil.rmtree(editingFolder) os.mkdir(editingFolder) # Runing a gsutil ls to list files present in the bucket gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/notebooks/**'] bucket_files = subprocess.check_output(gsutil_args, stderr=subprocess.PIPE) # Check output produces a string in Py2, Bytes in Py3, so decode if necessary if type(bucket_files) == bytes: bucket_files = bucket_files.decode().split('\n') #Getting all notebook files notebook_files = [] print("Copying files to local disk...") for bf in bucket_files: if ".ipynb" in bf: notebook_files.append(bf) # Downloading notebook to Notebook Editing Folder gsutil_args = ['gsutil', 'cp', bf, editingFolder] print(' copying ' + bf) copyFiles = subprocess.check_output(gsutil_args, stderr=subprocess.PIPE) #Does URL replacement print("Replacing text in files...") sed_command = "sed -i '' -e 's#{replace_this}#{with_this}#' {editing_folder}/*.ipynb".format( replace_this=replace_this, with_this=with_this, editing_folder=editingFolder) os.system(sed_command) #Upload notebooks back into workspace print("Uploading files to bucket...") for filename in os.listdir(editingFolder): if not filename.startswith('.'): if not filename.endswith(".ipynb"): print(" WARNING: non notebook file, not replacing " + filename) else: print(' uploading ' + filename) gsutil_args = [ 'gsutil', 'cp', editingFolder + '/' + filename, 'gs://' + bucket + "/notebooks/" + filename ] uploadfiles = subprocess.check_output( gsutil_args, stderr=subprocess.PIPE) #Remove notebook from the Notebook Editing Folder os.remove(editingFolder + '/' + filename) #Deleting Notebook Editing Folder to delete any old files lingering in the folder shutil.rmtree(editingFolder) else: print("Workspace has no notebooks folder")
def hard_copy(original_workspace, original_project, new_workspace, new_project, set_auth_domain=None): # try: # check for auth_domain info if set_auth_domain is None: response = fapi.get_workspace(namespace=original_project, workspace=original_workspace) if response.status_code not in [200]: raise ferrors.FireCloudServerError(response.status_code, response.content) authorization_domain = response.json( )['workspace']['authorizationDomain'] if len(authorization_domain) > 0: authorization_domain = authorization_domain[0]['membersGroupName'] else: authorization_domain = set_auth_domain print(f'Setting authorization domain to {authorization_domain}') # clone the workspace response = fapi.clone_workspace(from_namespace=original_project, from_workspace=original_workspace, to_namespace=new_project, to_workspace=new_workspace, authorizationDomain=authorization_domain) if response.status_code in [409]: print(f'\nNOTE: {new_project}/{new_workspace} already exists!') return 'already exists' elif response.status_code not in [201]: raise ferrors.FireCloudServerError(response.status_code, response.content) # get bucket info for original and new workspace original_bucket = fapi.get_workspace( original_project, original_workspace).json()['workspace']['bucketName'] new_bucket = fapi.get_workspace( new_project, new_workspace).json()['workspace']['bucketName'] # copy bucket over bucket_files = run_subprocess( ['gsutil', 'ls', 'gs://' + original_bucket + '/'], 'Error listing bucket contents') if len(bucket_files) > 0: gsutil_args = [ 'gsutil', '-m', 'rsync', '-r', 'gs://' + original_bucket, 'gs://' + new_bucket ] bucket_files = run_subprocess( gsutil_args, 'Error copying over original bucket to clone bucket') # update data references update_attributes(new_workspace, new_project, replace_this=original_bucket, with_this=new_bucket) update_entities(new_workspace, new_project, replace_this=original_bucket, with_this=new_bucket) update_notebooks(new_workspace, new_project, replace_this=original_bucket, with_this=new_bucket) # done print( f'\nFinished copying {original_project}/{original_workspace} to {new_project}/{new_workspace}.\nCheck it out at https://app.terra.bio/#workspaces/{new_project}/{new_workspace}' ) return 'copy successful'
from firecloud import api as fapi import json import subprocess #"featured-workspace-testing", "Terra_Quickstart_Workspace_MORGAN_manualclone" - 4 Notebooks #fc-product-demo", "Terra_Quickstart_Workspace" - 4 Notebooks #"gmqltobroad" , "gmql" - 0 Notbooks r = fapi.get_workspace("gmqltobroad", "gmql") fapi._check_response_code(r, 200) workspace = r.json() bucket = workspace['workspace']['bucketName'] bucket_prefix = 'gs://' + bucket workspace_name = workspace['workspace']['name'] # # Now run a gsutil ls to list files present in the bucket gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/**'] bucket_files = subprocess.check_output(gsutil_args, stderr=subprocess.PIPE) # Check output produces a string in Py2, Bytes in Py3, so decode if necessary if type(bucket_files) == bytes: bucket_files = bucket_files.decode().split('\n') notebook_files = [] print("Testing") #TODO: Add a use case that if the bucket doesn't have notebooks for f in bucket_files: if "notebook" in f: notebook_files.append(f) if len(notebook_files) == 0: print("Workspace has no notebooks")
def main(): args = parse_args() logger.info("Args:\n" + pprint.pformat(args.__dict__)) try: response = api.get_workspace(args.source_namespace, args.source_workspace) except Exception as e: logger.error(e) return try: ws = response.json() except Exception as e: logger.error(e) logger.error(response) return logger.info("Workspace:\n" + pprint.pformat(ws)) source_bucket = ws['workspace']['bucketName'] bucket_and_batch_name = (args.dest_bucket, args.batch_name) # hg19 bams #copy_hg19_bams(args) for workflow_id in (args.workflow_id or ['']): if workflow_id: logger.info("------------------------------------------------") logger.info("Processing workflow id: " + workflow_id) source_prefix = "gs://%s/%s" % (source_bucket, workflow_id) # star if "star" in args.file_type: dest = "gs://%s/%s/star/" % bucket_and_batch_name gsutil_cp("%s**star_out/*.Aligned.sortedByCoord.out.bam" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.Aligned.sortedByCoord.out.bam.bai" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.Chimeric.out.junction.gz" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.Log.final.out" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.Log.out" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.Log.progress.out" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.ReadsPerGene.out.tab.gz" % source_prefix, dest, force=args.force) gsutil_cp("%s**star_out/*.SJ.out.tab.gz" % source_prefix, dest, force=args.force) # rnaseqc if "rnaseqc" in args.file_type: dest = "gs://%s/%s/rnaseqc/" % bucket_and_batch_name gsutil_cp("%s**call-rnaseqc2/*.metrics.tsv" % source_prefix, dest, force=args.force) gsutil_cp("%s**call-rnaseqc2/*.exon_reads.gct.gz" % source_prefix, dest, force=args.force) gsutil_cp("%s**call-rnaseqc2/*.gene_reads.gct.gz" % source_prefix, dest, force=args.force) gsutil_cp("%s**call-rnaseqc2/*.gene_tpm.gct.gz" % source_prefix, dest, force=args.force) # fastqc if "fastqc" in args.file_type: dest = "gs://%s/%s/fastqc/zip/" % bucket_and_batch_name gsutil_cp("%s**_fastqc.zip" % source_prefix, dest, force=args.force) if "coverage" in args.file_type: dest = "gs://%s/%s/bigWig/" % bucket_and_batch_name gsutil_cp("%s**.bigWig" % source_prefix, dest, force=args.force) logger.info("Done")