Exemple #1
0
 def _test_cmd(self, cmd: Callable, **kwargs):
     with NamedTemporaryFile() as tf:
         with ConfigOverride(WORKSPACE_NAME, WORKSPACE_GOOGLE_PROJECT, tf.name):
             Config.write()
             args = argparse.Namespace(**dict(**self.common_kwargs, **kwargs))
             out = io.StringIO()
             with redirect_stdout(out):
                 cmd(args)
             return out.getvalue().strip()
 def test_config_print(self):
     workspace = f"{uuid4()}"
     workspace_namespace = f"{uuid4()}"
     with NamedTemporaryFile() as tf:
         with ConfigOverride(workspace, workspace_namespace, tf.name):
             Config.write()
             args = argparse.Namespace()
             out = io.StringIO()
             with redirect_stdout(out):
                 terra_notebook_utils.cli.config.config_print(args)
             data = json.loads(out.getvalue())
             self.assertEqual(
                 data,
                 dict(workspace=workspace,
                      workspace_namespace=workspace_namespace))
Exemple #3
0
def estimate_submission_cost(args: argparse.Namespace):
    """
    Estimate costs for all workflows in a submission
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    workflows_metadata = workflows.get_all_workflows(args.submission_id, args.workspace, args.workspace_namespace)
    reporter = TXTReport([("workflow_id", 37),
                          ("shard", 6),
                          ("cpus", 5),
                          ("memory (GB)", 12),
                          ("duration (h)", 13),
                          ("call cached", 12),
                          ("cost", 5)])
    reporter.print_headers()
    total = 0
    for workflow_id, workflow_metadata in workflows_metadata.items():
        shard = 1
        for item in workflows.estimate_workflow_cost(workflow_id, workflow_metadata):
            cost, cpus, mem, duration, call_cached = (item[k] for k in ('cost',
                                                                        'number_of_cpus',
                                                                        'memory',
                                                                        'duration',
                                                                        'call_cached'))
            reporter.print_line(workflow_id, shard, cpus, mem, duration / 3600, call_cached, cost)
            total += cost
            shard += 1
    reporter.print_divider()
    reporter.print_line("", "", "", "", "", "", total)
Exemple #4
0
def get_workflow(args: argparse.Namespace):
    """
    Get information about a workflow
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    wf = workflows.get_workflow(args.submission_id, args.workflow_id, args.workspace, args.workspace_namespace)
    print(json.dumps(wf, indent=2))
Exemple #5
0
def get_submission(args: argparse.Namespace):
    """
    Get information about a submission, including member worklows
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    submission = workflows.get_submission(args.submission_id, args.workspace, args.workspace_namespace)
    print(json.dumps(submission, indent=2))
def list_tables(args: argparse.Namespace):
    """
    List all tables in the workspace
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    for table in tnu_table.list_tables(args.workspace, args.workspace_namespace):
        print(table)
def list_rows(args: argparse.Namespace):
    """
    Get all rows
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    for row in tnu_table.list_rows(args.table, args.workspace, args.workspace_namespace):
        print(row.name, row.attributes)
def get_row(args: argparse.Namespace):
    """
    Get one row
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    row = tnu_table.get_row(args.table, args.row, args.workspace, args.workspace_namespace)
    if row is not None:
        print(row.name, json.dumps(row.attributes))
 def test_config_set(self):
     new_workspace = f"{uuid4()}"
     new_workspace_namespace = f"{uuid4()}"
     with NamedTemporaryFile() as tf:
         with ConfigOverride(None, None, tf.name):
             Config.write()
             args = argparse.Namespace(workspace=new_workspace)
             terra_notebook_utils.cli.config.set_config_workspace(args)
             args = argparse.Namespace(
                 workspace_namespace=new_workspace_namespace)
             terra_notebook_utils.cli.config.set_config_workspace_namespace(
                 args)
             with open(tf.name) as fh:
                 data = json.loads(fh.read())
             self.assertEqual(
                 data,
                 dict(workspace=new_workspace,
                      workspace_namespace=new_workspace_namespace))
Exemple #10
0
def list_submissions(args: argparse.Namespace):
    """
    List workflow submissions in chronological order
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    listing = [(s['submissionId'], s['submissionDate'], s['status'])
               for s in workflows.list_submissions(args.workspace, args.workspace_namespace)]
    for submission_id, date, status in sorted(listing, key=lambda item: item[1]):
        print(submission_id, date, status)
Exemple #11
0
def drs_copy(args: argparse.Namespace):
    """
    Copy drs:// object to local file or Google Storage bucket
    examples:
        tnu drs copy drs://my-drs-id /tmp/doom
        tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key
    """
    args.workspace, args.workspace_namespace = Config.resolve(
        args.workspace, args.workspace_namespace)
    drs.copy(args.drs_url, args.dst, args.workspace, args.workspace_namespace)
Exemple #12
0
def samples(args: argparse.Namespace):
    """
    Output VCF samples.
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    blob = _get_blob(args.path, args.workspace_namespace)
    if blob:
        info = vcf.VCFInfo.with_blob(blob)
    else:
        info = vcf.VCFInfo.with_file(args.path)
    print(json.dumps(info.samples, indent=2))
Exemple #13
0
def head(args: argparse.Namespace):
    """
    Output VCF header.
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    blob = _get_blob(args.path, args.workspace_namespace)
    if blob:
        info = vcf.VCFInfo.with_blob(blob)
    else:
        info = vcf.VCFInfo.with_file(args.path)
    info.print_header()
Exemple #14
0
def drs_copy_batch(args: argparse.Namespace):
    """
    Copy several drs:// objects to local directory or Google Storage bucket
    examples:
        tnu drs copy drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst /tmp/doom
        tnu drs copy drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst gs://my-cool-bucket/my-cool-folder
    """
    assert 1 <= len(args.drs_urls)
    args.workspace, args.workspace_namespace = Config.resolve(
        args.workspace, args.workspace_namespace)
    drs.copy_batch(args.drs_urls, args.dst, args.workspace,
                   args.workspace_namespace)
Exemple #15
0
def drs_extract_tar_gz(args: argparse.Namespace):
    """
    Extract a `tar.gz` archive resolved by DRS into a Google Storage bucket.
    example:
        tnu drs extract-tar-gz drs://my-tar-gz gs://my-dst-bucket/root
    """
    assert args.dst_gs_url.startswith("gs://")
    bucket, pfx = args.dst_gs_url[5:].split("/", 1)
    pfx = pfx or None
    args.workspace, args.workspace_namespace = Config.resolve(
        args.workspace, args.workspace_namespace)
    drs.extract_tar_gz(args.drs_url, pfx, bucket, args.workspace,
                       args.workspace_namespace)
 def test_resolve(self):
     with self.subTest(
             "Should fall back to env vars if arguments are None and config file missing"
     ):
         with ConfigOverride(None, None):
             workspace, namespace = Config.resolve(None, None)
             self.assertEqual(WORKSPACE_NAME, workspace)
             self.assertEqual(WORKSPACE_GOOGLE_PROJECT, namespace)
     with self.subTest(
             "Should fall back to config if arguments are None/False"):
         with ConfigOverride(str(uuid4()), str(uuid4())):
             workspace, namespace = Config.resolve(None, None)
             self.assertEqual(Config.info['workspace'], workspace)
             self.assertEqual(Config.info['workspace_namespace'], namespace)
     with self.subTest(
             "Should attempt namespace resolve via fiss when workspace present, namespace empty"
     ):
         expected_namespace = str(uuid4())
         with mock.patch(
                 "terra_notebook_utils.workspace.get_workspace_namespace",
                 return_value=expected_namespace):
             with ConfigOverride(WORKSPACE_NAME, None):
                 terra_notebook_utils.cli.WORKSPACE_GOOGLE_PROJECT = None
                 workspace, namespace = Config.resolve(None, None)
                 self.assertEqual(Config.info['workspace'], workspace)
                 self.assertEqual(expected_namespace, namespace)
     with self.subTest(
             "Should use overrides for workspace and workspace_namespace"):
         expected_workspace = str(uuid4())
         expected_namespace = str(uuid4())
         with mock.patch(
                 "terra_notebook_utils.workspace.get_workspace_namespace",
                 return_value=expected_namespace):
             with ConfigOverride(str(uuid4()), str(uuid4())):
                 terra_notebook_utils.cli.WORKSPACE_GOOGLE_PROJECT = None
                 workspace, namespace = Config.resolve(
                     expected_workspace, expected_namespace)
                 self.assertEqual(expected_workspace, workspace)
                 self.assertEqual(expected_namespace, namespace)
Exemple #17
0
def put_row(args: argparse.Namespace):
    """
    Put a row.
    Example:
    tnu table put-row \\
    --table abbrv_merge_input \\
    --id 1 \\
    bucket=fc-9169fcd1-92ce-4d60-9d2d-d19fd326ff10 \\
    input_keys=test_vcfs/a.vcf.gz,test_vcfs/b.vcf.gz \\
    output_key=foo.vcf.gz
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    attributes = dict()
    for pair in args.data:
        key, val = pair.split("=")
        attributes[key] = val
    row = tnu_table.Row(name=args.row or f"{uuid4()}", attributes=attributes)
    tnu_table.put_row(args.table, row, args.workspace, args.workspace_namespace)
Exemple #18
0
def stats(args: argparse.Namespace):
    """
    Output VCF stats.
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    blob = _get_blob(args.path, args.workspace_namespace)
    if blob:
        info = vcf.VCFInfo.with_blob(blob)
        size = blob.size
    else:
        import os
        info = vcf.VCFInfo.with_file(args.path)
        size = os.path.getsize(os.path.abspath(args.path))
    stats = {
        'first data line chromosome': info.chrom,
        'length associated with first data line chromosome': info.length,
        'number of samples': len(info.samples),
        'size': size
    }
    print(json.dumps(stats, indent=2))
Exemple #19
0
def drs_head(args: argparse.Namespace):
    """
    Print the first bytes of a drs:// object.

    Example:
        tnu drs head drs://crouching-drs-hidden-access
    """
    args.workspace, args.workspace_namespace = Config.resolve(
        args.workspace, args.workspace_namespace)
    the_bytes = drs.head(args.drs_url,
                         num_bytes=args.bytes,
                         buffer=args.buffer,
                         workspace_name=args.workspace,
                         workspace_namespace=args.workspace_namespace)
    # sys.stdout.buffer is used outside of a python notebook since that's the standard non-lossy way
    # to write/display bytes; sys.stdout.buffer is not available inside of a python notebook
    # though, as sys.stdout is a ipykernel.iostream.OutStream object:
    # https://github.com/ipython/ipykernel/blob/master/ipykernel/iostream.py#L265
    # so we use bare sys.stdout and rely on the ipykernel method's lossy unicode stream
    stdout_buffer = getattr(sys.stdout, 'buffer', sys.stdout)
    stdout_buffer.write(the_bytes)
def set_config_workspace_namespace(args: argparse.Namespace):
    """
    Set workspace namespace for cli commands
    """
    Config.info["workspace_namespace"] = args.workspace_namespace
    Config.write()
Exemple #21
0
def fetch_drs_url(args: argparse.Namespace):
    """
    Fetch the DRS URL associated with `--file-name` in `--table`.
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    print(tnu_table.fetch_drs_url(args.table, args.file_name, args.workspace, args.workspace_namespace))
Exemple #22
0
def delete_row(args: argparse.Namespace):
    """
    Delete a row
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    tnu_table.del_row(args.table, args.row, args.workspace, args.workspace_namespace)
Exemple #23
0
def delete_table(args: argparse.Namespace):
    """
    Get one row
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    tnu_table.delete(args.table, args.workspace, args.workspace_namespace)