コード例 #1
0
    def test_fetch_drs_url(self):
        file_name = f"{uuid4()}"
        drs_uri = f"drs://{uuid4()}"

        with self.subTest(
                "new pfb format (column headers prefixed with 'pfb:')"):
            table = f"test-table-{uuid4()}"
            tnu_table.put_row(table, {
                'pfb:file_name': file_name,
                'pfb:object_id': drs_uri
            })
            val = tnu_table.fetch_drs_url(table, file_name)
            self.assertEqual(val, drs_uri)

        with self.subTest("old format"):
            table = f"test-table-{uuid4()}"
            tnu_table.put_row(table, {
                'file_name': file_name,
                'object_id': drs_uri
            })
            val = tnu_table.fetch_drs_url(table, file_name)
            self.assertEqual(val, drs_uri)
コード例 #2
0
    table_name = f"test-notebook-table-{uuid4()}"
    data = [
        dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_100_GB}"),
        dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_240_MB}"),
        dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_021_MB}"),
        dict(file_name=f"{uuid4()}", object_id=f"{DRS_URI_069_GB}")
    ]
    assert table_name not in {table_name for table_name in table.list_tables()}
    try:
        table.put_rows(table_name, data)
        assert table_name in {table_name for table_name in table.list_tables()}
        for row in table.list_rows(table_name):
            assert row.attributes in data
        trow = table.get_row(table_name, row.name)
        assert trow.attributes == row.attributes
        drs_uri = table.fetch_drs_url(table_name, trow.attributes['file_name'])
        assert trow.attributes['object_id'] == drs_uri
        rows = [row for row in table.list_rows(table_name)]
        table.del_rows(table_name, [rows[0].name, rows[1].name])
        assert len(data) - 2 == len(
            [row for row in table.list_rows(table_name)])
    finally:
        table.delete(table_name)
    assert table_name not in {table_name for table_name in table.list_tables()}

with herzog.Cell("python"):
    """Test workflows list"""
    for s in workflows.list_submissions():
        print(json.dumps(s, indent=2))

with herzog.Cell("python"):
コード例 #3
0
ファイル: table.py プロジェクト: mitchac/terra-notebook-utils
def fetch_drs_url(args: argparse.Namespace):
    """
    Fetch the DRS URL associated with `--file-name` in `--table`.
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    print(tnu_table.fetch_drs_url(args.table, args.file_name, args.workspace, args.workspace_namespace))
コード例 #4
0
    The TOPMed genomic data that you import from Gen3 is controlled access and imported into Terra as a Data Repository Service (DRS) URL to the controlled access bucket that holds the file. The code below allows you to share your credentials and download the file to your workspace so that you can interact with the file in a notebook.

    See which files are available in the Reference File data table
    """

with herzog.Cell("python"):
    data_table = "reference_file"
    table.print_column(data_table, "pfb:file_name")
with herzog.Cell("markdown"):
    """
    Select which VCF you would like to use in your analysis from the printed list above.
    """

with herzog.Cell("python"):
    # Get a drs url from our workspace data table (make sure to put in a file name!)
    file_name = "YOUR_FILE_NAME_.tar.gz"

    # If this next step throws a key error, make sure you are not on a Spark cluster
    # See notes in the "set up your notebook" heading above
    drs_url = table.fetch_drs_url(data_table, file_name)
    print(drs_url)

    # Copy object into our workspace bucket
    drs.copy(drs_url, file_name)
with herzog.Cell("python"):
    # Extract .tar.gz to our workspace bucket
    drs.extract_tar_gz(drs_url, file_name)
with herzog.Cell("python"):
    elapsed_notebook_time = time.time() - start_notebook_time
    print(timedelta(seconds=elapsed_notebook_time))