def _test_cmd(self, cmd: Callable, **kwargs): with NamedTemporaryFile() as tf: with ConfigOverride(WORKSPACE_NAME, WORKSPACE_GOOGLE_PROJECT, tf.name): Config.write() args = argparse.Namespace(**dict(**self.common_kwargs, **kwargs)) out = io.StringIO() with redirect_stdout(out): cmd(args) return out.getvalue().strip()
def test_config_print(self): workspace = f"{uuid4()}" workspace_namespace = f"{uuid4()}" with NamedTemporaryFile() as tf: with ConfigOverride(workspace, workspace_namespace, tf.name): Config.write() args = argparse.Namespace() out = io.StringIO() with redirect_stdout(out): terra_notebook_utils.cli.config.config_print(args) data = json.loads(out.getvalue()) self.assertEqual( data, dict(workspace=workspace, workspace_namespace=workspace_namespace))
def estimate_submission_cost(args: argparse.Namespace): """ Estimate costs for all workflows in a submission """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) workflows_metadata = workflows.get_all_workflows(args.submission_id, args.workspace, args.workspace_namespace) reporter = TXTReport([("workflow_id", 37), ("shard", 6), ("cpus", 5), ("memory (GB)", 12), ("duration (h)", 13), ("call cached", 12), ("cost", 5)]) reporter.print_headers() total = 0 for workflow_id, workflow_metadata in workflows_metadata.items(): shard = 1 for item in workflows.estimate_workflow_cost(workflow_id, workflow_metadata): cost, cpus, mem, duration, call_cached = (item[k] for k in ('cost', 'number_of_cpus', 'memory', 'duration', 'call_cached')) reporter.print_line(workflow_id, shard, cpus, mem, duration / 3600, call_cached, cost) total += cost shard += 1 reporter.print_divider() reporter.print_line("", "", "", "", "", "", total)
def get_workflow(args: argparse.Namespace): """ Get information about a workflow """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) wf = workflows.get_workflow(args.submission_id, args.workflow_id, args.workspace, args.workspace_namespace) print(json.dumps(wf, indent=2))
def get_submission(args: argparse.Namespace): """ Get information about a submission, including member worklows """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) submission = workflows.get_submission(args.submission_id, args.workspace, args.workspace_namespace) print(json.dumps(submission, indent=2))
def list_tables(args: argparse.Namespace): """ List all tables in the workspace """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) for table in tnu_table.list_tables(args.workspace, args.workspace_namespace): print(table)
def list_rows(args: argparse.Namespace): """ Get all rows """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) for row in tnu_table.list_rows(args.table, args.workspace, args.workspace_namespace): print(row.name, row.attributes)
def get_row(args: argparse.Namespace): """ Get one row """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) row = tnu_table.get_row(args.table, args.row, args.workspace, args.workspace_namespace) if row is not None: print(row.name, json.dumps(row.attributes))
def test_config_set(self): new_workspace = f"{uuid4()}" new_workspace_namespace = f"{uuid4()}" with NamedTemporaryFile() as tf: with ConfigOverride(None, None, tf.name): Config.write() args = argparse.Namespace(workspace=new_workspace) terra_notebook_utils.cli.config.set_config_workspace(args) args = argparse.Namespace( workspace_namespace=new_workspace_namespace) terra_notebook_utils.cli.config.set_config_workspace_namespace( args) with open(tf.name) as fh: data = json.loads(fh.read()) self.assertEqual( data, dict(workspace=new_workspace, workspace_namespace=new_workspace_namespace))
def list_submissions(args: argparse.Namespace): """ List workflow submissions in chronological order """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) listing = [(s['submissionId'], s['submissionDate'], s['status']) for s in workflows.list_submissions(args.workspace, args.workspace_namespace)] for submission_id, date, status in sorted(listing, key=lambda item: item[1]): print(submission_id, date, status)
def drs_copy(args: argparse.Namespace): """ Copy drs:// object to local file or Google Storage bucket examples: tnu drs copy drs://my-drs-id /tmp/doom tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key """ args.workspace, args.workspace_namespace = Config.resolve( args.workspace, args.workspace_namespace) drs.copy(args.drs_url, args.dst, args.workspace, args.workspace_namespace)
def samples(args: argparse.Namespace): """ Output VCF samples. """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) blob = _get_blob(args.path, args.workspace_namespace) if blob: info = vcf.VCFInfo.with_blob(blob) else: info = vcf.VCFInfo.with_file(args.path) print(json.dumps(info.samples, indent=2))
def head(args: argparse.Namespace): """ Output VCF header. """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) blob = _get_blob(args.path, args.workspace_namespace) if blob: info = vcf.VCFInfo.with_blob(blob) else: info = vcf.VCFInfo.with_file(args.path) info.print_header()
def drs_copy_batch(args: argparse.Namespace): """ Copy several drs:// objects to local directory or Google Storage bucket examples: tnu drs copy drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst /tmp/doom tnu drs copy drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst gs://my-cool-bucket/my-cool-folder """ assert 1 <= len(args.drs_urls) args.workspace, args.workspace_namespace = Config.resolve( args.workspace, args.workspace_namespace) drs.copy_batch(args.drs_urls, args.dst, args.workspace, args.workspace_namespace)
def drs_extract_tar_gz(args: argparse.Namespace): """ Extract a `tar.gz` archive resolved by DRS into a Google Storage bucket. example: tnu drs extract-tar-gz drs://my-tar-gz gs://my-dst-bucket/root """ assert args.dst_gs_url.startswith("gs://") bucket, pfx = args.dst_gs_url[5:].split("/", 1) pfx = pfx or None args.workspace, args.workspace_namespace = Config.resolve( args.workspace, args.workspace_namespace) drs.extract_tar_gz(args.drs_url, pfx, bucket, args.workspace, args.workspace_namespace)
def test_resolve(self): with self.subTest( "Should fall back to env vars if arguments are None and config file missing" ): with ConfigOverride(None, None): workspace, namespace = Config.resolve(None, None) self.assertEqual(WORKSPACE_NAME, workspace) self.assertEqual(WORKSPACE_GOOGLE_PROJECT, namespace) with self.subTest( "Should fall back to config if arguments are None/False"): with ConfigOverride(str(uuid4()), str(uuid4())): workspace, namespace = Config.resolve(None, None) self.assertEqual(Config.info['workspace'], workspace) self.assertEqual(Config.info['workspace_namespace'], namespace) with self.subTest( "Should attempt namespace resolve via fiss when workspace present, namespace empty" ): expected_namespace = str(uuid4()) with mock.patch( "terra_notebook_utils.workspace.get_workspace_namespace", return_value=expected_namespace): with ConfigOverride(WORKSPACE_NAME, None): terra_notebook_utils.cli.WORKSPACE_GOOGLE_PROJECT = None workspace, namespace = Config.resolve(None, None) self.assertEqual(Config.info['workspace'], workspace) self.assertEqual(expected_namespace, namespace) with self.subTest( "Should use overrides for workspace and workspace_namespace"): expected_workspace = str(uuid4()) expected_namespace = str(uuid4()) with mock.patch( "terra_notebook_utils.workspace.get_workspace_namespace", return_value=expected_namespace): with ConfigOverride(str(uuid4()), str(uuid4())): terra_notebook_utils.cli.WORKSPACE_GOOGLE_PROJECT = None workspace, namespace = Config.resolve( expected_workspace, expected_namespace) self.assertEqual(expected_workspace, workspace) self.assertEqual(expected_namespace, namespace)
def put_row(args: argparse.Namespace): """ Put a row. Example: tnu table put-row \\ --table abbrv_merge_input \\ --id 1 \\ bucket=fc-9169fcd1-92ce-4d60-9d2d-d19fd326ff10 \\ input_keys=test_vcfs/a.vcf.gz,test_vcfs/b.vcf.gz \\ output_key=foo.vcf.gz """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) attributes = dict() for pair in args.data: key, val = pair.split("=") attributes[key] = val row = tnu_table.Row(name=args.row or f"{uuid4()}", attributes=attributes) tnu_table.put_row(args.table, row, args.workspace, args.workspace_namespace)
def stats(args: argparse.Namespace): """ Output VCF stats. """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) blob = _get_blob(args.path, args.workspace_namespace) if blob: info = vcf.VCFInfo.with_blob(blob) size = blob.size else: import os info = vcf.VCFInfo.with_file(args.path) size = os.path.getsize(os.path.abspath(args.path)) stats = { 'first data line chromosome': info.chrom, 'length associated with first data line chromosome': info.length, 'number of samples': len(info.samples), 'size': size } print(json.dumps(stats, indent=2))
def drs_head(args: argparse.Namespace): """ Print the first bytes of a drs:// object. Example: tnu drs head drs://crouching-drs-hidden-access """ args.workspace, args.workspace_namespace = Config.resolve( args.workspace, args.workspace_namespace) the_bytes = drs.head(args.drs_url, num_bytes=args.bytes, buffer=args.buffer, workspace_name=args.workspace, workspace_namespace=args.workspace_namespace) # sys.stdout.buffer is used outside of a python notebook since that's the standard non-lossy way # to write/display bytes; sys.stdout.buffer is not available inside of a python notebook # though, as sys.stdout is a ipykernel.iostream.OutStream object: # https://github.com/ipython/ipykernel/blob/master/ipykernel/iostream.py#L265 # so we use bare sys.stdout and rely on the ipykernel method's lossy unicode stream stdout_buffer = getattr(sys.stdout, 'buffer', sys.stdout) stdout_buffer.write(the_bytes)
def set_config_workspace_namespace(args: argparse.Namespace): """ Set workspace namespace for cli commands """ Config.info["workspace_namespace"] = args.workspace_namespace Config.write()
def fetch_drs_url(args: argparse.Namespace): """ Fetch the DRS URL associated with `--file-name` in `--table`. """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) print(tnu_table.fetch_drs_url(args.table, args.file_name, args.workspace, args.workspace_namespace))
def delete_row(args: argparse.Namespace): """ Delete a row """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) tnu_table.del_row(args.table, args.row, args.workspace, args.workspace_namespace)
def delete_table(args: argparse.Namespace): """ Get one row """ args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace) tnu_table.delete(args.table, args.workspace, args.workspace_namespace)