def test_fetch_drs_url(self): file_name = f"{uuid4()}" drs_uri = f"drs://{uuid4()}" with self.subTest( "new pfb format (column headers prefixed with 'pfb:')"): table = f"test-table-{uuid4()}" tnu_table.put_row(table, { 'pfb:file_name': file_name, 'pfb:object_id': drs_uri }) val = tnu_table.fetch_drs_url(table, file_name) self.assertEqual(val, drs_uri) with self.subTest("old format"): table = f"test-table-{uuid4()}" tnu_table.put_row(table, { 'file_name': file_name, 'object_id': drs_uri }) val = tnu_table.fetch_drs_url(table, file_name) self.assertEqual(val, drs_uri)
table_name = f"test-notebook-table-{uuid4()}" data = [ dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_100_GB}"), dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_240_MB}"), dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_021_MB}"), dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_069_GB}") ] assert table_name not in {table_name for table_name in table.list_tables()} try: table.put_rows(table_name, data) assert table_name in {table_name for table_name in table.list_tables()} for row in table.list_rows(table_name): assert row.attributes in data trow = table.get_row(table_name, row.name) assert trow.attributes == row.attributes drs_uri = table.fetch_drs_url(table_name, trow.attributes['file_name']) assert trow.attributes['object_id'] == drs_uri rows = [row for row in table.list_rows(table_name)] table.del_rows(table_name, [rows[0].name, rows[1].name]) assert len(data) - 2 == len( [row for row in table.list_rows(table_name)]) finally: table.delete(table_name) assert table_name not in {table_name for table_name in table.list_tables()} with herzog.Cell("python"): """Test workflows list""" for s in workflows.list_submissions(): print(json.dumps(s, indent=2)) with herzog.Cell("python"):
def fetch_drs_url(args: argparse.Namespace): """ Fetch the DRS URL associated with `--file-name` in `--table`. """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) print(tnu_table.fetch_drs_url(args.table, args.file_name, args.workspace, args.workspace_namespace))
The TOPMed genomic data that you import from Gen3 is controlled access and imported into Terra as a Data Repository Service (DRS) URL to the controlled access bucket that holds the file. The code below allows you to share your credentials and download the file to your workspace so that you can interact with the file in a notebook. See which files are available in the Reference File data table """ with herzog.Cell("python"): data_table = "reference_file" table.print_column(data_table, "pfb:file_name") with herzog.Cell("markdown"): """ Select which VCF you would like to use in your analysis from the printed list above. """ with herzog.Cell("python"): # Get a drs url from our workspace data table (make sure to put in a file name!) file_name = "YOUR_FILE_NAME_.tar.gz" # If this next step throws a key error, make sure you are not on a Spark cluster # See notes in the "set up your notebook" heading above drs_url = table.fetch_drs_url(data_table, file_name) print(drs_url) # Copy object into our workspace bucket drs.copy(drs_url, file_name) with herzog.Cell("python"): # Extract .tar.gz to our workspace bucket drs.extract_tar_gz(drs_url, file_name) with herzog.Cell("python"): elapsed_notebook_time = time.time() - start_notebook_time print(timedelta(seconds=elapsed_notebook_time))