コード例 #1
0
def drs_copy(args: argparse.Namespace):
    """
    Copy drs:// object to local file or Google Storage bucket
    examples:
        tnu drs copy drs://my-drs-id /tmp/doom
        tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key
    """
    args.workspace, args.workspace_namespace = Config.resolve(
        args.workspace, args.workspace_namespace)
    drs.copy(args.drs_url, args.dst, args.workspace, args.workspace_namespace)
コード例 #2
0
 def test_arg_propagation(self):
     resp_json = mock.MagicMock(
         return_value={
             'googleServiceAccount': {
                 'data': {
                     'project_id': "foo"
                 }
             },
             'dos': {
                 'data_object': {
                     'urls': [{
                         'url': 'gs://asdf/asdf'
                     }]
                 }
             }
         })
     requests_post = mock.MagicMock(
         return_value=mock.MagicMock(status_code=200, json=resp_json))
     with ExitStack() as es:
         es.enter_context(
             mock.patch("terra_notebook_utils.drs.gs.get_client"))
         es.enter_context(mock.patch("terra_notebook_utils.drs.tar_gz"))
         es.enter_context(
             mock.patch(
                 "terra_notebook_utils.blobstore.gs.GSBlob.download"))
         es.enter_context(
             mock.patch("terra_notebook_utils.drs.DRSCopyClient"))
         es.enter_context(
             mock.patch("terra_notebook_utils.drs.GSBlob.open"))
         es.enter_context(
             mock.patch("terra_notebook_utils.drs.http",
                        post=requests_post))
         with mock.patch("terra_notebook_utils.drs.enable_requester_pays"
                         ) as enable_requester_pays:
             with self.subTest("Copy to local"):
                 with tempfile.NamedTemporaryFile() as tf:
                     drs.copy(self.drs_url, tf.name)
                 enable_requester_pays.assert_called_with(
                     WORKSPACE_NAME, WORKSPACE_NAMESPACE)
             with self.subTest("Copy to bucket"):
                 enable_requester_pays.reset_mock()
                 drs.copy(self.drs_url, "gs://some_bucket/some_key")
                 enable_requester_pays.assert_called_with(
                     WORKSPACE_NAME, WORKSPACE_NAMESPACE)
             with self.subTest("Extract tarball"):
                 enable_requester_pays.reset_mock()
                 drs.extract_tar_gz(self.drs_url)
                 enable_requester_pays.assert_called_with(
                     WORKSPACE_NAME, WORKSPACE_NAMESPACE)
コード例 #3
0
def drs_copy(args: argparse.Namespace):
    """
    Copy drs:// object to local file or Google Storage bucket

    If 'dst' is suffixed with "/", the destination is assumed to be a folder and the file name is
    derived from the drs response and appended to 'dst'. Otherwise the destination is assumed
    to be absolute.

    examples:
        tnu drs copy drs://my-drs-id /tmp/doom  # copy to /tmp/doom
        tnu drs copy drs://my-drs-id /tmp/doom/  # copy to /tmp/doom/{file-name-from-drs-resolution}
        tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key
        tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key/
    """
    args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace)
    kwargs: Dict[str, Any] = dict(workspace_name=args.workspace, workspace_namespace=args.workspace_namespace)
    if CLIConfig.progress_indicator_type() is not None:
        kwargs['indicator_type'] = CLIConfig.progress_indicator_type()
    drs.copy(args.drs_url, args.dst, **kwargs)
コード例 #4
0
 def test_copy(self):
     with self.subTest("Test copy to local location"):
         with tempfile.NamedTemporaryFile() as tf:
             drs.copy(self.drs_url, tf.name)
             self.assertTrue(os.path.isfile(tf.name))
     with self.subTest("Test copy to bucket location"):
         key = f"test_oneshot_object_{uuid4()}"
         drs.copy(self.drs_url, f"gs://{WORKSPACE_BUCKET}/{key}")
         self.assertTrue(self._gs_obj_exists(key))
     with self.subTest("Test copy to bare bucket"):
         name = drs.info(self.drs_url)['name']
         drs.copy(self.drs_url, f"gs://{WORKSPACE_BUCKET}")
         self.assertTrue(self._gs_obj_exists(name))
コード例 #5
0
 def test_copy_to_local(self):
     with tempfile.NamedTemporaryFile() as tf:
         drs.copy(self.jade_dev_url, tf.name)
コード例 #6
0
 def test_download(self):
     with tempfile.NamedTemporaryFile() as tf:
         drs.copy(self.jade_dev_url, tf.name)
コード例 #7
0
    DRS_URI_025_GB = "drs://dg.4503/3e8438ec-9a7f-4215-8c23-de2c321aeb42"  # 1000 Genomes, 24.82 GB
    DRS_URI_069_GB = "drs://dg.4503/81f2efd4-20bc-44c9-bf04-2743275d21ac"  # 1000 Genomes, 68.54 GB
    DRS_URI_100_GB = "drs://dg.4503/6ff298c4-35fc-44aa-acb2-f0b4d98e407a"  # 1000 Genomes, 100 GB
    DRS_URI_TAR_GZ = "drs://dg.4503/da8cb525-4532-4d0f-90a3-4d327817ec73"  # GENOA, 198 GB

with herzog.Cell("python"):
    """Test drs info"""
    drs.info(DRS_URI_370_KB)

with herzog.Cell("python"):
    """Test drs head"""
    drs.head(DRS_URI_370_KB)

with herzog.Cell("python"):
    """Test drs copy to local disk"""
    drs.copy(DRS_URI_370_KB, ".")
    drs.copy(DRS_URI_240_MB, ".")

with herzog.Cell("python"):
    """Test drs copy to bucket"""
    drs.copy(DRS_URI_370_KB, f"gs://{bucket_name}/test-notebook-{uuid4()}")
    drs.copy(DRS_URI_240_MB, f"gs://{bucket_name}/test-notebook-{uuid4()}")
    drs.copy_to_bucket(DRS_URI_370_KB, f"test-notebook-{uuid4()}")
    drs.copy_to_bucket(DRS_URI_240_MB, f"test-notebook-{uuid4()}")

with herzog.Cell("python"):
    """Test drs copy batch"""
    manifest = [
        dict(drs_uri=DRS_URI_370_KB,
             dst=f"gs://{bucket_name}/test-notebook-{uuid4()}"),
        dict(drs_uri=DRS_URI_370_KB, dst=f"."),
コード例 #8
0
    The TOPMed genomic data that you import from Gen3 is controlled access and imported into Terra as a Data Repository Service (DRS) URL to the controlled access bucket that holds the file. The code below allows you to share your credentials and download the file to your workspace so that you can interact with the file in a notebook.

    See which files are available in the Reference File data table
    """

with herzog.Cell("python"):
    data_table = "reference_file"
    table.print_column(data_table, "pfb:file_name")
with herzog.Cell("markdown"):
    """
    Select which VCF you would like to use in your analysis from the printed list above.
    """

with herzog.Cell("python"):
    # Get a drs url from our workspace data table (make sure to put in a file name!)
    file_name = "YOUR_FILE_NAME_.tar.gz"

    # If this next step throws a key error, make sure you are not on a Spark cluster
    # See notes in the "set up your notebook" heading above
    drs_url = table.fetch_drs_url(data_table, file_name)
    print(drs_url)

    # Copy object into our workspace bucket
    drs.copy(drs_url, file_name)
with herzog.Cell("python"):
    # Extract .tar.gz to our workspace bucket
    drs.extract_tar_gz(drs_url, file_name)
with herzog.Cell("python"):
    elapsed_notebook_time = time.time() - start_notebook_time
    print(timedelta(seconds=elapsed_notebook_time))