Esempio n. 1
0
 def test_get_workspace(self):
     """Test get_workspace()."""
     r = fapi.get_workspace(self.project, self.workspace)
     print(r.status_code, r.content)
     self.assertEqual(r.status_code, 200)
     space_dict = r.json()['workspace']
     self.assertEqual(space_dict['name'], self.workspace)
     self.assertEqual(space_dict['namespace'], self.project)
def get_workspace_info(workspace_namespace: str, workspace_name: str) -> dict:
    """Get workspace attributes using workspace_namespace and workspace_name.
    """
    ws = fapi.get_workspace(workspace_namespace, workspace_name)
    if ws.status_code == 404:
        raise ValueError(
            f"Unable to fetch information from workspace {workspace_namespace}/{workspace_name} - {ws.json()}!"
        )
    return ws.json()["workspace"]
Esempio n. 3
0
def get_or_create_workspace(workspace_namespace, workspace_name):
    ws = fapi.get_workspace(workspace_namespace, workspace_name)
    if ws.status_code == 404:
        ws = fapi.create_workspace(workspace_namespace, workspace_name)
        if ws.status_code != 201:
            raise ValueError('Unable to create workspace')
        return ws.json()
    else:
        return ws.json()['workspace']
Esempio n. 4
0
 def test_space_search(self):
     # First retrieve information about the space
     r = fapi.get_workspace(self.project, self.workspace)
     fapi._check_response_code(r, 200)
     metadata = r.json()["workspace"]
     # Now use part of that info (bucket id) to find the space (name)
     result = call_func("space_search", "-b", metadata['bucketName'])
     self.assertIn(metadata['name'], ''.join(result))
     # Now search for goofy thing that should never be found
     self.assertEqual([], call_func("space_search", "-b", '__NoTTHeRe__'))
Esempio n. 5
0
    def refresh(self):
        """Reload workspace metadata from firecloud.

        Workspace metadata is cached in the data attribute of a Workspace,
        and may become stale, requiring a refresh().
        """
        r = fapi.get_workspace(self.namespace, self.name, self.api_url)
        fapi._check_response_code(r, 200)
        self.data = r.json()
        return self
Esempio n. 6
0
    def __init__(self, namespace, name, api_url=fapi.PROD_API_ROOT):
        """Get an existing workspace from Firecloud by name.

        This method assumes that a workspace with the given name and
        namespace is present at the api_url given, and raises an error
        if it does not exist. To create a new workspace, use
        Workspace.new()

        Raises:
            FireCloudServerError:  Workspace does not exist, or
                API call fails
        """
        self.api_url = api_url
        self.namespace = namespace
        self.name = name

        ## Call out to FireCloud
        r = fapi.get_workspace(namespace, name, api_url)

        fapi._check_response_code(r, 200)
        self.data = r.json()
Esempio n. 7
0
def upload(args):
    r = fapi.get_workspace(args.namespace, args.workspace, args.api_url)
    fapi._check_response_code(r, 200)

    bucket = r.json()['workspace']['bucketName']

    dest = 'gs://' + bucket + '/'
    if args.destination is not None:
        dest = dest + args.destination.lstrip('/')

    gsutil_args = [
        "gsutil", "-o", "GSUtil:parallel_composite_upload_threshold=50M", "cp"
    ]

    if os.path.isdir(args.source):
        gsutil_args.append("-r")
    gsutil_args.extend([args.source, dest])

    print_(' '.join(gsutil_args))
    if not args.show:
        return subprocess.check_call(gsutil_args)
Esempio n. 8
0
def get_bucket_name(namespace, workspace):
    response = fapi.get_workspace(namespace, workspace)
    fapi._check_response_code(response, 200)
    return response.json()['workspace']['bucketName']
Esempio n. 9
0
def update_notebooks(workspace_name, workspace_project, replace_this,
                     with_this):
    print("Updating NOTEBOOKS for " + workspace_name)

    ## update notebooks
    # Getting the workspace bucket
    r = fapi.get_workspace(workspace_project, workspace_name)
    fapi._check_response_code(r, 200)
    workspace = r.json()
    bucket = workspace['workspace']['bucketName']

    # check if bucket is empty
    gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/']
    bucket_files = subprocess.check_output(gsutil_args)
    # Check output produces a string in Py2, Bytes in Py3, so decode if necessary
    if type(bucket_files) == bytes:
        bucket_files = bucket_files.decode().split('\n')
    # print(bucket_files)

    editingFolder = "../notebookEditingFolder"

    # if the bucket isn't empty, check for notebook files and copy them
    if 'gs://' + bucket + '/notebooks/' in bucket_files:  #len(bucket_files)>0:
        # bucket_prefix = 'gs://' + bucket
        # Creating the Notebook Editing Folder
        if os.path.exists(editingFolder):
            shutil.rmtree(editingFolder)
        os.mkdir(editingFolder)
        # Runing a gsutil ls to list files present in the bucket
        gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/notebooks/**']
        bucket_files = subprocess.check_output(gsutil_args,
                                               stderr=subprocess.PIPE)
        # Check output produces a string in Py2, Bytes in Py3, so decode if necessary
        if type(bucket_files) == bytes:
            bucket_files = bucket_files.decode().split('\n')
        #Getting all notebook files
        notebook_files = []
        print("Copying files to local disk...")
        for bf in bucket_files:
            if ".ipynb" in bf:
                notebook_files.append(bf)
                # Downloading notebook to Notebook Editing Folder
                gsutil_args = ['gsutil', 'cp', bf, editingFolder]
                print('  copying ' + bf)
                copyFiles = subprocess.check_output(gsutil_args,
                                                    stderr=subprocess.PIPE)
        #Does URL replacement
        print("Replacing text in files...")
        sed_command = "sed -i '' -e 's#{replace_this}#{with_this}#' {editing_folder}/*.ipynb".format(
            replace_this=replace_this,
            with_this=with_this,
            editing_folder=editingFolder)
        os.system(sed_command)
        #Upload notebooks back into workspace
        print("Uploading files to bucket...")
        for filename in os.listdir(editingFolder):
            if not filename.startswith('.'):
                if not filename.endswith(".ipynb"):
                    print("  WARNING: non notebook file, not replacing " +
                          filename)
                else:
                    print('  uploading ' + filename)
                    gsutil_args = [
                        'gsutil', 'cp', editingFolder + '/' + filename,
                        'gs://' + bucket + "/notebooks/" + filename
                    ]
                    uploadfiles = subprocess.check_output(
                        gsutil_args, stderr=subprocess.PIPE)
                    #Remove notebook from the Notebook Editing Folder
                    os.remove(editingFolder + '/' + filename)
        #Deleting Notebook Editing Folder to delete any old files lingering in the folder
        shutil.rmtree(editingFolder)
    else:
        print("Workspace has no notebooks folder")
Esempio n. 10
0
def hard_copy(original_workspace,
              original_project,
              new_workspace,
              new_project,
              set_auth_domain=None):
    # try:
    # check for auth_domain info
    if set_auth_domain is None:
        response = fapi.get_workspace(namespace=original_project,
                                      workspace=original_workspace)
        if response.status_code not in [200]:
            raise ferrors.FireCloudServerError(response.status_code,
                                               response.content)
        authorization_domain = response.json(
        )['workspace']['authorizationDomain']
        if len(authorization_domain) > 0:
            authorization_domain = authorization_domain[0]['membersGroupName']
    else:
        authorization_domain = set_auth_domain

    print(f'Setting authorization domain to {authorization_domain}')

    # clone the workspace
    response = fapi.clone_workspace(from_namespace=original_project,
                                    from_workspace=original_workspace,
                                    to_namespace=new_project,
                                    to_workspace=new_workspace,
                                    authorizationDomain=authorization_domain)
    if response.status_code in [409]:
        print(f'\nNOTE: {new_project}/{new_workspace} already exists!')
        return 'already exists'
    elif response.status_code not in [201]:
        raise ferrors.FireCloudServerError(response.status_code,
                                           response.content)

    # get bucket info for original and new workspace
    original_bucket = fapi.get_workspace(
        original_project, original_workspace).json()['workspace']['bucketName']
    new_bucket = fapi.get_workspace(
        new_project, new_workspace).json()['workspace']['bucketName']

    # copy bucket over
    bucket_files = run_subprocess(
        ['gsutil', 'ls', 'gs://' + original_bucket + '/'],
        'Error listing bucket contents')
    if len(bucket_files) > 0:
        gsutil_args = [
            'gsutil', '-m', 'rsync', '-r', 'gs://' + original_bucket,
            'gs://' + new_bucket
        ]
        bucket_files = run_subprocess(
            gsutil_args, 'Error copying over original bucket to clone bucket')

    # update data references
    update_attributes(new_workspace,
                      new_project,
                      replace_this=original_bucket,
                      with_this=new_bucket)
    update_entities(new_workspace,
                    new_project,
                    replace_this=original_bucket,
                    with_this=new_bucket)
    update_notebooks(new_workspace,
                     new_project,
                     replace_this=original_bucket,
                     with_this=new_bucket)

    # done
    print(
        f'\nFinished copying {original_project}/{original_workspace} to {new_project}/{new_workspace}.\nCheck it out at https://app.terra.bio/#workspaces/{new_project}/{new_workspace}'
    )

    return 'copy successful'
Esempio n. 11
0
from firecloud import api as fapi
import json
import subprocess

#"featured-workspace-testing", "Terra_Quickstart_Workspace_MORGAN_manualclone" - 4 Notebooks
#fc-product-demo", "Terra_Quickstart_Workspace" - 4 Notebooks
#"gmqltobroad" , "gmql" - 0 Notbooks
r = fapi.get_workspace("gmqltobroad", "gmql")

fapi._check_response_code(r, 200)
workspace = r.json()
bucket = workspace['workspace']['bucketName']
bucket_prefix = 'gs://' + bucket
workspace_name = workspace['workspace']['name']

# # Now run a gsutil ls to list files present in the bucket
gsutil_args = ['gsutil', 'ls', 'gs://' + bucket + '/**']
bucket_files = subprocess.check_output(gsutil_args, stderr=subprocess.PIPE)
# Check output produces a string in Py2, Bytes in Py3, so decode if necessary
if type(bucket_files) == bytes:
    bucket_files = bucket_files.decode().split('\n')
notebook_files = []
print("Testing")
#TODO: Add a use case that if the bucket doesn't have notebooks

for f in bucket_files:
    if "notebook" in f:
        notebook_files.append(f)

if len(notebook_files) == 0:
    print("Workspace has no notebooks")
def main():
    args = parse_args()
    logger.info("Args:\n" + pprint.pformat(args.__dict__))

    try:
        response = api.get_workspace(args.source_namespace,
                                     args.source_workspace)
    except Exception as e:
        logger.error(e)
        return

    try:
        ws = response.json()
    except Exception as e:
        logger.error(e)
        logger.error(response)
        return

    logger.info("Workspace:\n" + pprint.pformat(ws))

    source_bucket = ws['workspace']['bucketName']
    bucket_and_batch_name = (args.dest_bucket, args.batch_name)

    # hg19 bams
    #copy_hg19_bams(args)

    for workflow_id in (args.workflow_id or ['']):
        if workflow_id:
            logger.info("------------------------------------------------")
            logger.info("Processing workflow id: " + workflow_id)

        source_prefix = "gs://%s/%s" % (source_bucket, workflow_id)

        # star
        if "star" in args.file_type:
            dest = "gs://%s/%s/star/" % bucket_and_batch_name
            gsutil_cp("%s**star_out/*.Aligned.sortedByCoord.out.bam" %
                      source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.Aligned.sortedByCoord.out.bam.bai" %
                      source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.Chimeric.out.junction.gz" %
                      source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.Log.final.out" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.Log.out" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.Log.progress.out" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.ReadsPerGene.out.tab.gz" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**star_out/*.SJ.out.tab.gz" % source_prefix,
                      dest,
                      force=args.force)

        # rnaseqc
        if "rnaseqc" in args.file_type:
            dest = "gs://%s/%s/rnaseqc/" % bucket_and_batch_name
            gsutil_cp("%s**call-rnaseqc2/*.metrics.tsv" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**call-rnaseqc2/*.exon_reads.gct.gz" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**call-rnaseqc2/*.gene_reads.gct.gz" % source_prefix,
                      dest,
                      force=args.force)
            gsutil_cp("%s**call-rnaseqc2/*.gene_tpm.gct.gz" % source_prefix,
                      dest,
                      force=args.force)

        # fastqc
        if "fastqc" in args.file_type:
            dest = "gs://%s/%s/fastqc/zip/" % bucket_and_batch_name
            gsutil_cp("%s**_fastqc.zip" % source_prefix,
                      dest,
                      force=args.force)

        if "coverage" in args.file_type:
            dest = "gs://%s/%s/bigWig/" % bucket_and_batch_name
            gsutil_cp("%s**.bigWig" % source_prefix, dest, force=args.force)

    logger.info("Done")