def import_bedfiles( cls, technique, targets_path, baits_path, assembly, species, description=None ): """ Register input_bed_path in technique's storage dir and update `data`. Arguments: technique (str): technique slug. targets_path (str): path to targets bedfile. baits_path (str): path to baits bedfile. assembly (str): name of reference genome for bedfile. species (str): name of genome species. description (str): a description of the BED files. Returns: dict: updated technique instance as retrieved from API. """ utils.check_admin() technique = api.get_instance("techniques", technique) targets_key = f"{assembly}_targets_bedfile" baits_key = f"{assembly}_baits_bedfile" if targets_key in technique["reference_data"]: raise click.UsageError( f"Technique '{technique['slug']}' " f"has registered BED files for '{assembly}':\n" f'\n\t{technique["reference_data"][targets_key]}' f'\n\t{technique["reference_data"][baits_key]}' ) if not technique["storage_url"]: technique = update_storage_url("techniques", technique["pk"]) api.create_instance("assemblies", name=assembly, species=species) beds_dir = join(technique["storage_url"], "bed_files", assembly) base_name = slugify(f'{technique["slug"]}.{assembly}') targets_dst = join(beds_dir, f"{base_name}.targets.bed") baits_dst = join(beds_dir, f"{base_name}.baits.bed") os.makedirs(beds_dir, exist_ok=True) for src, dst in [(targets_path, targets_dst), (baits_path, baits_dst)]: cls.echo_src_dst("Copying", src, dst) shutil.copy(src, dst) click.secho(f"\nProcessing {basename(dst)}...", fg="blue") cls.process_bedfile(dst) click.secho(f'\nSuccess! patching {technique["slug"]}...', fg="green") for i, j in [(targets_key, targets_dst), (baits_key, baits_dst)]: technique["reference_data"][i] = { "url": j + ".gz", "description": description, } return api.patch_instance( endpoint="techniques", instance_id=technique["pk"], storage_usage=utils.get_tree_size(technique["storage_url"]), reference_data=technique["reference_data"], )
def import_data( cls, identifier, data_src, data_id, symlink, description, sub_dir=None, model="assemblies", ): """ Register reference resources for a given assembly. Arguments: identifier (str): name of assembly or technique. model (str): either `techniques` or `assemblies`. data_src (str): path to reference data. data_id (str): identifier that will be used for reference data. symlink (str): symlink instead of move. description (str): reference data description. sub_dir (str): target sub dir for the resource, default is data_id. Returns: dict: updated assembly instance as retrieved from API. """ utils.check_admin() data_id = slugify(data_id, separator="_") click.echo(f'`data_id` set to: {click.style(data_id, fg="green")}') instance = api.get_instance(model, identifier) if data_id in instance["reference_data"]: raise click.UsageError( f"{instance['name']} has already reference data registered with id " f'"{data_id}":\n\n\t{instance["reference_data"][data_id]}' ) if not instance["storage_url"]: instance = update_storage_url(model, instance["name"]) data_dir = join(instance["storage_url"], sub_dir or data_id) data_dst = join(data_dir, basename(data_src)) os.makedirs(data_dir, exist_ok=True) if symlink: cls.echo_src_dst("Linking", data_src, data_dst) cls.symlink(data_src, data_dst) else: cls.echo_src_dst("Moving", data_src, data_dst) cls.move(data_src, data_dst) click.secho(f'\nSuccess! patching {instance["name"]}...', fg="green") instance["reference_data"][data_id] = {} instance["reference_data"][data_id]["url"] = data_dst instance["reference_data"][data_id]["description"] = description return api.patch_instance( endpoint=model, instance_id=instance["pk"], storage_usage=utils.get_tree_size(instance["storage_url"]), reference_data=instance["reference_data"], )
def cb_app_results_keys(ctx, param, value): """Print applications results keys.""" if not value or ctx.resilient_parsing: # pragma: no cover return click.echo("\n".join( f"{click.style(i, fg='green')}\t{j.description}" for i, j in sorted( api.get_instance("applications", value).results.items())).expandtabs(30)) ctx.exit()
def _filters_or_identifiers(endpoint, identifiers, filters, fields=None): if filters and identifiers: # pragma: no cover raise click.UsageError("Can't combine filters and identifiers.") if fields: filters["fields"] = fields filters["limit"] = 100_000 return ([ api.get_instance(endpoint, i, fields=fields) for i in identifiers ] if identifiers else api.get_instances(endpoint, verbose=True, **filters))
def client(self): """Get client configuration from database.""" from isabl_cli.api import get_instance if not self.client_id: click.secho( "Set environment variable ISABL_CLIENT_ID " "to your databased client primary key or slug " "to configure Isabl CLI directly from the API.", fg="yellow", ) return {} return get_instance("clients", self.client_id)
def get_reference(identifier, data_id, resources, model): """Retrieve reference data from assemblies (default) or techniques.""" instance = api.get_instance(model, identifier) if resources: click.echo("\n".join( f"{click.style(i, fg='green')}\t{j.description}" for i, j in sorted(instance["reference_data"].items())).expandtabs( 30)) else: try: click.echo(instance["reference_data"][data_id]["url"]) except KeyError: # pragma: no cover raise click.UsageError( f"No {data_id} reference for {instance['name']}.")
def get_bed(technique, bed_type, assembly): """Get a BED file for a given Sequencing Tehcnique.""" instance = api.get_instance("techniques", technique) data_id = f"{assembly}_{bed_type}_bedfile" paths = {} for i, j in instance.reference_data.items(): if i.endswith(f"{bed_type}_bedfile"): paths[i] = j["url"] if not paths: raise click.UsageError("No BED files registered yet...") elif len(paths) > 1 and not assembly: raise click.UsageError( f"Multiple BEDs for {technique}, pass --assembly") click.echo(paths[data_id] if assembly else list(paths.values())[0])
def test_system_id(): data_a = factories.ExperimentFactory() data_b = factories.ExperimentFactory(sample=data_a["sample"]) instance_a = api.create_instance("experiments", **data_a) instance_b = api.create_instance("experiments", **data_b) system_ids = [instance_a["system_id"], instance_b["system_id"]] assert instance_a["sample"]["pk"] == instance_b["sample"]["pk"] assert api.get_instance("experiments", system_ids[0])["pk"] == instance_a["pk"] assert len(api.get_instances("experiments", system_ids)) == 2 instance_a["sample"]["data"]["key"] = "value" instance_a["sample"]["notes"] = "a note" patched = api.patch_instance("experiments", instance_a["pk"], sample=instance_a["sample"]) assert patched["sample"]["data"]["key"] == "value" assert patched["sample"]["notes"] == "a note"
def rerun_signals(filters): """Rerun failed signals.""" for i in api.get_instances("signals", pk__gt=0, data__failure_traceback__isnull=False, **filters): click.secho(f"Rerunning signal: {i.slug}", fg="yellow") instance = api.get_instance(i.target_endpoint, i.target_id) try: api._run_signals( endpoint=i.target_endpoint, instance=instance, signals=[import_from_string(i.import_string)], raise_error=True, ) api.delete_instance("signals", i.pk) except exceptions.AutomationError: pass
def test_api_methods(): endpoint = "diseases" diseases = [factories.DiseaseFactory() for _ in range(3)] created = [api.create_instance(endpoint, **i) for i in diseases] pk = created[0]["pk"] pks = [i["pk"] for i in created[:2]] patched = api.patch_instance(endpoint, pk, data={"one": 1}) assert patched["data"]["one"] == 1 assert api.get_instance(endpoint, pk)["pk"] == pk assert api.get_instances(endpoint, pk=pk)[0]["pk"] == pk assert api.get_instances_count(endpoint, pk=pk) == 1 assert len( api.get_instances(endpoint)) == api.get_instances_count(endpoint) assert len(api.get_instances(endpoint, pks)) == 2 assert len(api.get_instances(endpoint, pks, pk__in=pks)) == 2 assert len(api.get_instances(endpoint, pks, pk__in=pks[0])) == 1 for i in created: assert api.delete_instance(endpoint, i["pk"]) is None assert api.get_token_headers()["Authorization"]
def run_web_signals(filters): """Rerun web signals.""" for i in api.get_instances( "signals", import_string__in=[ "isabl_cli.signals.resume_analysis_signal", "isabl_cli.signals.force_analysis_signal", ], **filters, ): click.secho(f"Running web signal: {i.slug}", fg="yellow") instance = api.get_instance(i.target_endpoint, i.target_id) try: api._run_signals( endpoint=i.target_endpoint, instance=instance, signals=[import_from_string(i.import_string)], raise_error=True, ) api.delete_instance("signals", i.pk) except exceptions.AutomationError: pass
def merge_individual_analyses(individual, application): # pragma: no cover """Merge analyses by individual.""" individual = api.get_instance("individuals", individual) application = api.get_instance("applications", application) application = import_from_string(application["application_class"])() application.run_individual_merge(individual)
def merge_project_analyses(project, application): # pragma: no cover """Merge analyses by project.""" project = api.get_instance("projects", project) application = api.get_instance("applications", application) application = import_from_string(application["application_class"])() application.run_project_merge(project)
def patch_status(key, status): """Patch status of a given analysis.""" analysis = api.get_instance("analyses", key) api.patch_analysis_status(analysis, status)
def test_local_data_import(tmpdir): dirs = [tmpdir.strpath] projects = [api.create_instance("projects", **factories.ProjectFactory())] experiments = [ factories.ExperimentFactory(projects=projects) for i in range(4) ] experiments = [ api.create_instance("experiments", **i) for i in experiments ] keys = [i["pk"] for i in experiments] importer = data.LocalDataImporter() _, summary = importer.import_data(directories=dirs, pk__in=keys) obtained = len(summary.rsplit("no files matched")) assert obtained == 4 + 1 # test can't determine type of fastq with pytest.raises(click.UsageError) as error: path_1 = tmpdir.join(f'{experiments[0]["system_id"]}.fastq') path_1.write("foo") importer.import_data(directories=dirs, pk__in=keys) path_1.remove() assert "cant determine fastq type from" in str(error.value) # test imports fastq path_1 = tmpdir.join(f'{experiments[0]["system_id"]}_R1_foo.fastq') path_2 = tmpdir.join(f'{experiments[0]["system_id"]}_R2_foo.fastq') path_1.write("foo") path_2.write("foo") _, summary = importer.import_data(directories=dirs, pk__in=keys, commit=True) assert "samples matched: 1" in summary assert api.Experiment(experiments[0].pk).get_fastq() # test can exclude formats path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq') path_2 = tmpdir.join(f'{experiments[1]["system_id"]}.bam') path_1.write("foo") path_2.write("foo") _, summary = importer.import_data(directories=dirs, pk__in=keys, dtypes=["BAM"]) assert "FASTQ_R1" not in str(summary) assert "BAM" in str(summary) # test can import multiple formats _, summary = importer.import_data(directories=dirs, pk__in=keys, commit=True) assert "FASTQ_R1" in str(summary) assert "BAM" in str(summary) # test raise error if duplicated ids with pytest.raises(click.UsageError) as error: api.patch_instance("experiments", experiments[2]["pk"], identifier="dup_id") api.patch_instance("experiments", experiments[3]["pk"], identifier="dup_id") importer.import_data(key=lambda x: x["identifier"], directories=dirs, pk__in=keys) assert "same identifier for" in str(error.value) # test summary path_1 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram1_.cram') path_2 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram2_.cram') path_3 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam1_.bam') path_4 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam2_.bam') path_1.write("foo") path_2.write("foo") path_3.write("foo") path_4.write("foo") imported, summary = importer.import_data(directories=dirs, commit=True, symlink=True, pk__in=keys) project = api.get_instance("projects", projects[0]["pk"]) assert project["storage_url"] assert imported[0]["storage_usage"] > 0 assert imported[0]["raw_data"] assert imported[1]["raw_data"] assert "experiments" in imported[1]["storage_url"] assert len(os.listdir(os.path.join(imported[1]["storage_url"], "data"))) == 2 assert "samples matched: 2" in summary assert "samples skipped: 2" in summary # test import data from command line and files_data functionality path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq') path_2 = tmpdir.join(f'{experiments[1]["system_id"]}_2.fastq') path_1.write("foo") path_2.write("foo") api.patch_instance("experiments", experiments[1]["pk"], raw_data=None) file_data = tmpdir.join("file_data.yaml") with open(file_data.strpath, "w") as f: yaml.dump( { os.path.basename(path_1.strpath): { "PU": "TEST_PU" }, os.path.basename(path_2.strpath): { "PU": "TEST_PU" }, }, f, default_flow_style=False, ) command = data.LocalDataImporter.as_cli_command() runner = CliRunner() args = [ "-di", tmpdir.strpath, "-id", "system_id", "-fi", "pk__in", keys, "--files-data", file_data.strpath, "--commit", ] result = runner.invoke(command, args, catch_exceptions=False) assert "samples matched: 1" in result.output experiments[1] = api.get_instance("experiments", experiments[1]["pk"]) assert experiments[1]["raw_data"][0]["file_data"]["PU"] == "TEST_PU" assert experiments[1]["raw_data"][1]["file_data"]["PU"] == "TEST_PU" # test import using invalid identifier args = ["-di", tmpdir.strpath, "-id", "sample", "-fi", "pk__in", keys] result = runner.invoke(command, args) assert "invalid type for identifier" in result.output
def test_commands(tmpdir): analysis = api.create_instance( "analyses", project_level_analysis=factories.ProjectFactory(), storage_url=tmpdir.strpath, status="FINISHED", **factories.AnalysisFactory(ran_by=None), ) path = tmpdir.join("test.path") path.write("not empty") runner = CliRunner() args = ["-fi", "pk", analysis["pk"]] runner.invoke(commands.process_finished, args, catch_exceptions=False) analysis = api.get_instance("analyses", analysis["pk"]) assert analysis["status"] == "SUCCEEDED" assert analysis["storage_usage"] args = ["--key", analysis["pk"], "--status", "STAGED"] runner.invoke(commands.patch_status, args, catch_exceptions=False) analysis = api.get_instance("analyses", analysis["pk"]) assert analysis["status"] == "STAGED" args = [ "analyses", "-fi", "pk", analysis["pk"], "-f", "pk", "-f", "application.name", "-f", "application", "-f", "carlos", "-f", "invalid.nested_attr", ] result = runner.invoke(commands.get_metadata, args, catch_exceptions=False) assert analysis["application"]["name"] in result.output assert "application.name" in result.output assert "INVALID KEY (carlos)" in result.output assert "INVALID KEY (nested_attr)" in result.output result = runner.invoke(commands.get_metadata, args + ["--json"], catch_exceptions=False) args = ["analyses", "-fi", "pk", analysis["pk"], "--pattern", "*.path"] result = runner.invoke(commands.get_paths, args, catch_exceptions=False) assert tmpdir.strpath in result.output assert "test.path" in result.output args = ["analyses", "-fi", "pk", analysis["pk"]] result = runner.invoke(commands.get_paths, args, catch_exceptions=False) assert tmpdir.strpath in result.output args = ["analyses", "-fi", "pk", analysis["pk"]] result = runner.invoke(commands.get_count, args, catch_exceptions=False) assert "1" in result.output args = ["-fi", "pk", analysis["pk"]] result = runner.invoke(commands.get_outdirs, args, catch_exceptions=False) assert tmpdir.strpath in result.output result = runner.invoke(commands.get_outdirs, args + ["--pattern", "*.path"], catch_exceptions=False) assert "test.path" in result.output # use two experiments to increase coverage with project_results= project = api.create_instance("projects", **factories.ProjectFactory()) experiment = factories.ExperimentFactory(projects=[project]) experiment["sample"]["individual"]["species"] = "HUMAN" experiment_b = factories.ExperimentFactory(projects=[project]) experiment_b["sample"] = experiment["sample"] analysis = utils.assert_run( application=MockApplication(), tuples=[ ([api.create_instance("experiments", **experiment)], []), ([api.create_instance("experiments", **experiment_b)], []), ], commit=True, project_results=["project_result_key"], )[0] args = ["--app-results", analysis.application.pk] result = runner.invoke(commands.get_results, args, catch_exceptions=False) assert "command_script" in result.output args = ["-fi", "pk", analysis.pk, "-r", "command_script"] result = runner.invoke(commands.get_results, args, catch_exceptions=False) assert "head_job.sh" in result.output args = ["-fi", "pk", analysis.pk, "--force"] result = runner.invoke(commands.patch_results, args, catch_exceptions=False) assert "Retrieving 1 from analyses API endpoint" in result.output