Example #1
0
    def import_bedfiles(
        cls, technique, targets_path, baits_path, assembly, species, description=None
    ):
        """
        Register input_bed_path in technique's storage dir and update `data`.

        Arguments:
            technique (str): technique slug.
            targets_path (str): path to targets bedfile.
            baits_path (str): path to baits bedfile.
            assembly (str): name of reference genome for bedfile.
            species (str): name of genome species.
            description (str): a description of the BED files.

        Returns:
            dict: updated technique instance as retrieved from API.
        """
        utils.check_admin()
        technique = api.get_instance("techniques", technique)
        targets_key = f"{assembly}_targets_bedfile"
        baits_key = f"{assembly}_baits_bedfile"

        if targets_key in technique["reference_data"]:
            raise click.UsageError(
                f"Technique '{technique['slug']}' "
                f"has registered BED files for '{assembly}':\n"
                f'\n\t{technique["reference_data"][targets_key]}'
                f'\n\t{technique["reference_data"][baits_key]}'
            )

        if not technique["storage_url"]:
            technique = update_storage_url("techniques", technique["pk"])

        api.create_instance("assemblies", name=assembly, species=species)
        beds_dir = join(technique["storage_url"], "bed_files", assembly)
        base_name = slugify(f'{technique["slug"]}.{assembly}')
        targets_dst = join(beds_dir, f"{base_name}.targets.bed")
        baits_dst = join(beds_dir, f"{base_name}.baits.bed")
        os.makedirs(beds_dir, exist_ok=True)

        for src, dst in [(targets_path, targets_dst), (baits_path, baits_dst)]:
            cls.echo_src_dst("Copying", src, dst)
            shutil.copy(src, dst)
            click.secho(f"\nProcessing {basename(dst)}...", fg="blue")
            cls.process_bedfile(dst)

        click.secho(f'\nSuccess! patching {technique["slug"]}...', fg="green")

        for i, j in [(targets_key, targets_dst), (baits_key, baits_dst)]:
            technique["reference_data"][i] = {
                "url": j + ".gz",
                "description": description,
            }

        return api.patch_instance(
            endpoint="techniques",
            instance_id=technique["pk"],
            storage_usage=utils.get_tree_size(technique["storage_url"]),
            reference_data=technique["reference_data"],
        )
Example #2
0
    def import_data(
        cls,
        identifier,
        data_src,
        data_id,
        symlink,
        description,
        sub_dir=None,
        model="assemblies",
    ):
        """
        Register reference resources for a given assembly.

        Arguments:
            identifier (str): name of assembly or technique.
            model (str): either `techniques` or `assemblies`.
            data_src (str): path to reference data.
            data_id (str): identifier that will be used for reference data.
            symlink (str): symlink instead of move.
            description (str): reference data description.
            sub_dir (str): target sub dir for the resource, default is data_id.

        Returns:
            dict: updated assembly instance as retrieved from API.
        """
        utils.check_admin()
        data_id = slugify(data_id, separator="_")
        click.echo(f'`data_id` set to: {click.style(data_id, fg="green")}')
        instance = api.get_instance(model, identifier)

        if data_id in instance["reference_data"]:
            raise click.UsageError(
                f"{instance['name']} has already reference data registered with id "
                f'"{data_id}":\n\n\t{instance["reference_data"][data_id]}'
            )

        if not instance["storage_url"]:
            instance = update_storage_url(model, instance["name"])

        data_dir = join(instance["storage_url"], sub_dir or data_id)
        data_dst = join(data_dir, basename(data_src))
        os.makedirs(data_dir, exist_ok=True)

        if symlink:
            cls.echo_src_dst("Linking", data_src, data_dst)
            cls.symlink(data_src, data_dst)
        else:
            cls.echo_src_dst("Moving", data_src, data_dst)
            cls.move(data_src, data_dst)

        click.secho(f'\nSuccess! patching {instance["name"]}...', fg="green")
        instance["reference_data"][data_id] = {}
        instance["reference_data"][data_id]["url"] = data_dst
        instance["reference_data"][data_id]["description"] = description
        return api.patch_instance(
            endpoint=model,
            instance_id=instance["pk"],
            storage_usage=utils.get_tree_size(instance["storage_url"]),
            reference_data=instance["reference_data"],
        )
Example #3
0
def cb_app_results_keys(ctx, param, value):
    """Print applications results keys."""
    if not value or ctx.resilient_parsing:  # pragma: no cover
        return

    click.echo("\n".join(
        f"{click.style(i, fg='green')}\t{j.description}" for i, j in sorted(
            api.get_instance("applications",
                             value).results.items())).expandtabs(30))
    ctx.exit()
Example #4
0
def _filters_or_identifiers(endpoint, identifiers, filters, fields=None):
    if filters and identifiers:  # pragma: no cover
        raise click.UsageError("Can't combine filters and identifiers.")

    if fields:
        filters["fields"] = fields
        filters["limit"] = 100_000

    return ([
        api.get_instance(endpoint, i, fields=fields) for i in identifiers
    ] if identifiers else api.get_instances(endpoint, verbose=True, **filters))
    def client(self):
        """Get client configuration from database."""
        from isabl_cli.api import get_instance

        if not self.client_id:
            click.secho(
                "Set environment variable ISABL_CLIENT_ID "
                "to your databased client primary key or slug "
                "to configure Isabl CLI directly from the API.",
                fg="yellow",
            )

            return {}
        return get_instance("clients", self.client_id)
Example #6
0
def get_reference(identifier, data_id, resources, model):
    """Retrieve reference data from assemblies (default) or techniques."""
    instance = api.get_instance(model, identifier)

    if resources:
        click.echo("\n".join(
            f"{click.style(i, fg='green')}\t{j.description}"
            for i, j in sorted(instance["reference_data"].items())).expandtabs(
                30))
    else:
        try:
            click.echo(instance["reference_data"][data_id]["url"])
        except KeyError:  # pragma: no cover
            raise click.UsageError(
                f"No {data_id} reference for {instance['name']}.")
Example #7
0
def get_bed(technique, bed_type, assembly):
    """Get a BED file for a given Sequencing Tehcnique."""
    instance = api.get_instance("techniques", technique)
    data_id = f"{assembly}_{bed_type}_bedfile"
    paths = {}

    for i, j in instance.reference_data.items():
        if i.endswith(f"{bed_type}_bedfile"):
            paths[i] = j["url"]

    if not paths:
        raise click.UsageError("No BED files registered yet...")
    elif len(paths) > 1 and not assembly:
        raise click.UsageError(
            f"Multiple BEDs for {technique}, pass --assembly")

    click.echo(paths[data_id] if assembly else list(paths.values())[0])
Example #8
0
def test_system_id():
    data_a = factories.ExperimentFactory()
    data_b = factories.ExperimentFactory(sample=data_a["sample"])
    instance_a = api.create_instance("experiments", **data_a)
    instance_b = api.create_instance("experiments", **data_b)
    system_ids = [instance_a["system_id"], instance_b["system_id"]]
    assert instance_a["sample"]["pk"] == instance_b["sample"]["pk"]
    assert api.get_instance("experiments",
                            system_ids[0])["pk"] == instance_a["pk"]
    assert len(api.get_instances("experiments", system_ids)) == 2

    instance_a["sample"]["data"]["key"] = "value"
    instance_a["sample"]["notes"] = "a note"
    patched = api.patch_instance("experiments",
                                 instance_a["pk"],
                                 sample=instance_a["sample"])
    assert patched["sample"]["data"]["key"] == "value"
    assert patched["sample"]["notes"] == "a note"
Example #9
0
def rerun_signals(filters):
    """Rerun failed signals."""
    for i in api.get_instances("signals",
                               pk__gt=0,
                               data__failure_traceback__isnull=False,
                               **filters):
        click.secho(f"Rerunning signal: {i.slug}", fg="yellow")
        instance = api.get_instance(i.target_endpoint, i.target_id)

        try:
            api._run_signals(
                endpoint=i.target_endpoint,
                instance=instance,
                signals=[import_from_string(i.import_string)],
                raise_error=True,
            )

            api.delete_instance("signals", i.pk)
        except exceptions.AutomationError:
            pass
Example #10
0
def test_api_methods():
    endpoint = "diseases"
    diseases = [factories.DiseaseFactory() for _ in range(3)]
    created = [api.create_instance(endpoint, **i) for i in diseases]
    pk = created[0]["pk"]
    pks = [i["pk"] for i in created[:2]]
    patched = api.patch_instance(endpoint, pk, data={"one": 1})

    assert patched["data"]["one"] == 1
    assert api.get_instance(endpoint, pk)["pk"] == pk
    assert api.get_instances(endpoint, pk=pk)[0]["pk"] == pk
    assert api.get_instances_count(endpoint, pk=pk) == 1
    assert len(
        api.get_instances(endpoint)) == api.get_instances_count(endpoint)
    assert len(api.get_instances(endpoint, pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks[0])) == 1

    for i in created:
        assert api.delete_instance(endpoint, i["pk"]) is None

    assert api.get_token_headers()["Authorization"]
Example #11
0
def run_web_signals(filters):
    """Rerun web signals."""
    for i in api.get_instances(
            "signals",
            import_string__in=[
                "isabl_cli.signals.resume_analysis_signal",
                "isabl_cli.signals.force_analysis_signal",
            ],
            **filters,
    ):
        click.secho(f"Running web signal: {i.slug}", fg="yellow")
        instance = api.get_instance(i.target_endpoint, i.target_id)

        try:
            api._run_signals(
                endpoint=i.target_endpoint,
                instance=instance,
                signals=[import_from_string(i.import_string)],
                raise_error=True,
            )

            api.delete_instance("signals", i.pk)
        except exceptions.AutomationError:
            pass
Example #12
0
def merge_individual_analyses(individual, application):  # pragma: no cover
    """Merge analyses by individual."""
    individual = api.get_instance("individuals", individual)
    application = api.get_instance("applications", application)
    application = import_from_string(application["application_class"])()
    application.run_individual_merge(individual)
Example #13
0
def merge_project_analyses(project, application):  # pragma: no cover
    """Merge analyses by project."""
    project = api.get_instance("projects", project)
    application = api.get_instance("applications", application)
    application = import_from_string(application["application_class"])()
    application.run_project_merge(project)
Example #14
0
def patch_status(key, status):
    """Patch status of a given analysis."""
    analysis = api.get_instance("analyses", key)
    api.patch_analysis_status(analysis, status)
Example #15
0
def test_local_data_import(tmpdir):
    dirs = [tmpdir.strpath]
    projects = [api.create_instance("projects", **factories.ProjectFactory())]
    experiments = [
        factories.ExperimentFactory(projects=projects) for i in range(4)
    ]
    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    keys = [i["pk"] for i in experiments]

    importer = data.LocalDataImporter()
    _, summary = importer.import_data(directories=dirs, pk__in=keys)
    obtained = len(summary.rsplit("no files matched"))
    assert obtained == 4 + 1

    # test can't determine type of fastq
    with pytest.raises(click.UsageError) as error:
        path_1 = tmpdir.join(f'{experiments[0]["system_id"]}.fastq')
        path_1.write("foo")
        importer.import_data(directories=dirs, pk__in=keys)

    path_1.remove()
    assert "cant determine fastq type from" in str(error.value)

    # test imports fastq
    path_1 = tmpdir.join(f'{experiments[0]["system_id"]}_R1_foo.fastq')
    path_2 = tmpdir.join(f'{experiments[0]["system_id"]}_R2_foo.fastq')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "samples matched: 1" in summary
    assert api.Experiment(experiments[0].pk).get_fastq()

    # test can exclude formats
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}.bam')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      dtypes=["BAM"])
    assert "FASTQ_R1" not in str(summary)
    assert "BAM" in str(summary)

    # test can import multiple formats
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "FASTQ_R1" in str(summary)
    assert "BAM" in str(summary)

    # test raise error if duplicated ids
    with pytest.raises(click.UsageError) as error:
        api.patch_instance("experiments",
                           experiments[2]["pk"],
                           identifier="dup_id")
        api.patch_instance("experiments",
                           experiments[3]["pk"],
                           identifier="dup_id")
        importer.import_data(key=lambda x: x["identifier"],
                             directories=dirs,
                             pk__in=keys)

    assert "same identifier for" in str(error.value)

    # test summary
    path_1 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram1_.cram')
    path_2 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram2_.cram')
    path_3 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam1_.bam')
    path_4 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam2_.bam')
    path_1.write("foo")
    path_2.write("foo")
    path_3.write("foo")
    path_4.write("foo")
    imported, summary = importer.import_data(directories=dirs,
                                             commit=True,
                                             symlink=True,
                                             pk__in=keys)

    project = api.get_instance("projects", projects[0]["pk"])
    assert project["storage_url"]
    assert imported[0]["storage_usage"] > 0
    assert imported[0]["raw_data"]
    assert imported[1]["raw_data"]
    assert "experiments" in imported[1]["storage_url"]
    assert len(os.listdir(os.path.join(imported[1]["storage_url"],
                                       "data"))) == 2
    assert "samples matched: 2" in summary
    assert "samples skipped: 2" in summary

    # test import data from command line and files_data functionality
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}_2.fastq')
    path_1.write("foo")
    path_2.write("foo")
    api.patch_instance("experiments", experiments[1]["pk"], raw_data=None)
    file_data = tmpdir.join("file_data.yaml")

    with open(file_data.strpath, "w") as f:
        yaml.dump(
            {
                os.path.basename(path_1.strpath): {
                    "PU": "TEST_PU"
                },
                os.path.basename(path_2.strpath): {
                    "PU": "TEST_PU"
                },
            },
            f,
            default_flow_style=False,
        )

    command = data.LocalDataImporter.as_cli_command()
    runner = CliRunner()
    args = [
        "-di",
        tmpdir.strpath,
        "-id",
        "system_id",
        "-fi",
        "pk__in",
        keys,
        "--files-data",
        file_data.strpath,
        "--commit",
    ]

    result = runner.invoke(command, args, catch_exceptions=False)
    assert "samples matched: 1" in result.output
    experiments[1] = api.get_instance("experiments", experiments[1]["pk"])
    assert experiments[1]["raw_data"][0]["file_data"]["PU"] == "TEST_PU"
    assert experiments[1]["raw_data"][1]["file_data"]["PU"] == "TEST_PU"

    # test import using invalid identifier
    args = ["-di", tmpdir.strpath, "-id", "sample", "-fi", "pk__in", keys]
    result = runner.invoke(command, args)
    assert "invalid type for identifier" in result.output
def test_commands(tmpdir):
    analysis = api.create_instance(
        "analyses",
        project_level_analysis=factories.ProjectFactory(),
        storage_url=tmpdir.strpath,
        status="FINISHED",
        **factories.AnalysisFactory(ran_by=None),
    )

    path = tmpdir.join("test.path")
    path.write("not empty")

    runner = CliRunner()
    args = ["-fi", "pk", analysis["pk"]]
    runner.invoke(commands.process_finished, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])

    assert analysis["status"] == "SUCCEEDED"
    assert analysis["storage_usage"]

    args = ["--key", analysis["pk"], "--status", "STAGED"]
    runner.invoke(commands.patch_status, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])
    assert analysis["status"] == "STAGED"

    args = [
        "analyses",
        "-fi",
        "pk",
        analysis["pk"],
        "-f",
        "pk",
        "-f",
        "application.name",
        "-f",
        "application",
        "-f",
        "carlos",
        "-f",
        "invalid.nested_attr",
    ]

    result = runner.invoke(commands.get_metadata, args, catch_exceptions=False)
    assert analysis["application"]["name"] in result.output
    assert "application.name" in result.output
    assert "INVALID KEY (carlos)" in result.output
    assert "INVALID KEY (nested_attr)" in result.output
    result = runner.invoke(commands.get_metadata,
                           args + ["--json"],
                           catch_exceptions=False)

    args = ["analyses", "-fi", "pk", analysis["pk"], "--pattern", "*.path"]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    assert "test.path" in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_count, args, catch_exceptions=False)
    assert "1" in result.output

    args = ["-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_outdirs, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    result = runner.invoke(commands.get_outdirs,
                           args + ["--pattern", "*.path"],
                           catch_exceptions=False)
    assert "test.path" in result.output

    # use two experiments to increase coverage with project_results=
    project = api.create_instance("projects", **factories.ProjectFactory())
    experiment = factories.ExperimentFactory(projects=[project])
    experiment["sample"]["individual"]["species"] = "HUMAN"
    experiment_b = factories.ExperimentFactory(projects=[project])
    experiment_b["sample"] = experiment["sample"]
    analysis = utils.assert_run(
        application=MockApplication(),
        tuples=[
            ([api.create_instance("experiments", **experiment)], []),
            ([api.create_instance("experiments", **experiment_b)], []),
        ],
        commit=True,
        project_results=["project_result_key"],
    )[0]

    args = ["--app-results", analysis.application.pk]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "command_script" in result.output
    args = ["-fi", "pk", analysis.pk, "-r", "command_script"]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "head_job.sh" in result.output

    args = ["-fi", "pk", analysis.pk, "--force"]
    result = runner.invoke(commands.patch_results,
                           args,
                           catch_exceptions=False)
    assert "Retrieving 1 from analyses API endpoint" in result.output