Beispiel #1
0
def test_assert_run():
    # test assert_run utility
    data = api.create_instance("projects", **factories.ProjectFactory())
    data = factories.ExperimentFactory(projects=[data])
    data["sample"]["individual"]["species"] = "HUMAN"
    assert utils.assert_run(
        application=MockApplication(),
        tuples=[([api.create_instance("experiments", **data)], [])],
        commit=True,
    )
Beispiel #2
0
def test_unique_analysis_per_individual_app(tmpdir):
    individual = factories.IndividualFactory(species="HUMAN")
    sample = factories.SampleFactory(individual=individual)
    project = api.create_instance("projects", **factories.ProjectFactory())
    experiments = [
        factories.ExperimentFactory(identifier=str(i),
                                    sample=sample,
                                    projects=[project]) for i in range(4)
    ]

    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    tuples = [(experiments, [])]
    application = UniquePerIndividualApplication()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4
    assert (ran_analyses[0][0]["results"].analysis_result_key ==
            experiments[0].sample.individual.system_id)

    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)
    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4

    # test application_protect_results false
    tuples = [(experiments[:2], [])]
    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 2

    # test application_protect_results reduce add more samples - dont remove this test
    tuples = [(experiments, [])]
    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4

    runner = CliRunner()
    command = UniquePerIndividualApplication.as_cli_command()
    result = runner.invoke(command,
                           ["-fi", "system_id", experiments[0].system_id],
                           catch_exceptions=False)
    assert experiments[0].system_id in result.output
Beispiel #3
0
def test_local_data_import(tmpdir):
    dirs = [tmpdir.strpath]
    projects = [api.create_instance("projects", **factories.ProjectFactory())]
    experiments = [
        factories.ExperimentFactory(projects=projects) for i in range(4)
    ]
    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    keys = [i["pk"] for i in experiments]

    importer = data.LocalDataImporter()
    _, summary = importer.import_data(directories=dirs, pk__in=keys)
    obtained = len(summary.rsplit("no files matched"))
    assert obtained == 4 + 1

    # test can't determine type of fastq
    with pytest.raises(click.UsageError) as error:
        path_1 = tmpdir.join(f'{experiments[0]["system_id"]}.fastq')
        path_1.write("foo")
        importer.import_data(directories=dirs, pk__in=keys)

    path_1.remove()
    assert "cant determine fastq type from" in str(error.value)

    # test imports fastq
    path_1 = tmpdir.join(f'{experiments[0]["system_id"]}_R1_foo.fastq')
    path_2 = tmpdir.join(f'{experiments[0]["system_id"]}_R2_foo.fastq')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "samples matched: 1" in summary
    assert api.Experiment(experiments[0].pk).get_fastq()

    # test can exclude formats
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}.bam')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      dtypes=["BAM"])
    assert "FASTQ_R1" not in str(summary)
    assert "BAM" in str(summary)

    # test can import multiple formats
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "FASTQ_R1" in str(summary)
    assert "BAM" in str(summary)

    # test raise error if duplicated ids
    with pytest.raises(click.UsageError) as error:
        api.patch_instance("experiments",
                           experiments[2]["pk"],
                           identifier="dup_id")
        api.patch_instance("experiments",
                           experiments[3]["pk"],
                           identifier="dup_id")
        importer.import_data(key=lambda x: x["identifier"],
                             directories=dirs,
                             pk__in=keys)

    assert "same identifier for" in str(error.value)

    # test summary
    path_1 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram1_.cram')
    path_2 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram2_.cram')
    path_3 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam1_.bam')
    path_4 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam2_.bam')
    path_1.write("foo")
    path_2.write("foo")
    path_3.write("foo")
    path_4.write("foo")
    imported, summary = importer.import_data(directories=dirs,
                                             commit=True,
                                             symlink=True,
                                             pk__in=keys)

    project = api.get_instance("projects", projects[0]["pk"])
    assert project["storage_url"]
    assert imported[0]["storage_usage"] > 0
    assert imported[0]["raw_data"]
    assert imported[1]["raw_data"]
    assert "experiments" in imported[1]["storage_url"]
    assert len(os.listdir(os.path.join(imported[1]["storage_url"],
                                       "data"))) == 2
    assert "samples matched: 2" in summary
    assert "samples skipped: 2" in summary

    # test import data from command line and files_data functionality
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}_2.fastq')
    path_1.write("foo")
    path_2.write("foo")
    api.patch_instance("experiments", experiments[1]["pk"], raw_data=None)
    file_data = tmpdir.join("file_data.yaml")

    with open(file_data.strpath, "w") as f:
        yaml.dump(
            {
                os.path.basename(path_1.strpath): {
                    "PU": "TEST_PU"
                },
                os.path.basename(path_2.strpath): {
                    "PU": "TEST_PU"
                },
            },
            f,
            default_flow_style=False,
        )

    command = data.LocalDataImporter.as_cli_command()
    runner = CliRunner()
    args = [
        "-di",
        tmpdir.strpath,
        "-id",
        "system_id",
        "-fi",
        "pk__in",
        keys,
        "--files-data",
        file_data.strpath,
        "--commit",
    ]

    result = runner.invoke(command, args, catch_exceptions=False)
    assert "samples matched: 1" in result.output
    experiments[1] = api.get_instance("experiments", experiments[1]["pk"])
    assert experiments[1]["raw_data"][0]["file_data"]["PU"] == "TEST_PU"
    assert experiments[1]["raw_data"][1]["file_data"]["PU"] == "TEST_PU"

    # test import using invalid identifier
    args = ["-di", tmpdir.strpath, "-id", "sample", "-fi", "pk__in", keys]
    result = runner.invoke(command, args)
    assert "invalid type for identifier" in result.output
def test_commands(tmpdir):
    analysis = api.create_instance(
        "analyses",
        project_level_analysis=factories.ProjectFactory(),
        storage_url=tmpdir.strpath,
        status="FINISHED",
        **factories.AnalysisFactory(ran_by=None),
    )

    path = tmpdir.join("test.path")
    path.write("not empty")

    runner = CliRunner()
    args = ["-fi", "pk", analysis["pk"]]
    runner.invoke(commands.process_finished, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])

    assert analysis["status"] == "SUCCEEDED"
    assert analysis["storage_usage"]

    args = ["--key", analysis["pk"], "--status", "STAGED"]
    runner.invoke(commands.patch_status, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])
    assert analysis["status"] == "STAGED"

    args = [
        "analyses",
        "-fi",
        "pk",
        analysis["pk"],
        "-f",
        "pk",
        "-f",
        "application.name",
        "-f",
        "application",
        "-f",
        "carlos",
        "-f",
        "invalid.nested_attr",
    ]

    result = runner.invoke(commands.get_metadata, args, catch_exceptions=False)
    assert analysis["application"]["name"] in result.output
    assert "application.name" in result.output
    assert "INVALID KEY (carlos)" in result.output
    assert "INVALID KEY (nested_attr)" in result.output
    result = runner.invoke(commands.get_metadata,
                           args + ["--json"],
                           catch_exceptions=False)

    args = ["analyses", "-fi", "pk", analysis["pk"], "--pattern", "*.path"]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    assert "test.path" in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_count, args, catch_exceptions=False)
    assert "1" in result.output

    args = ["-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_outdirs, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    result = runner.invoke(commands.get_outdirs,
                           args + ["--pattern", "*.path"],
                           catch_exceptions=False)
    assert "test.path" in result.output

    # use two experiments to increase coverage with project_results=
    project = api.create_instance("projects", **factories.ProjectFactory())
    experiment = factories.ExperimentFactory(projects=[project])
    experiment["sample"]["individual"]["species"] = "HUMAN"
    experiment_b = factories.ExperimentFactory(projects=[project])
    experiment_b["sample"] = experiment["sample"]
    analysis = utils.assert_run(
        application=MockApplication(),
        tuples=[
            ([api.create_instance("experiments", **experiment)], []),
            ([api.create_instance("experiments", **experiment_b)], []),
        ],
        commit=True,
        project_results=["project_result_key"],
    )[0]

    args = ["--app-results", analysis.application.pk]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "command_script" in result.output
    args = ["-fi", "pk", analysis.pk, "-r", "command_script"]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "head_job.sh" in result.output

    args = ["-fi", "pk", analysis.pk, "--force"]
    result = runner.invoke(commands.patch_results,
                           args,
                           catch_exceptions=False)
    assert "Retrieving 1 from analyses API endpoint" in result.output
Beispiel #5
0
def test_engine(tmpdir):
    _DEFAULTS["DEFAULT_LINUX_GROUP"] = "not_a_group"

    individual = factories.IndividualFactory(species="HUMAN")
    sample = factories.SampleFactory(individual=individual)
    project = api.create_instance("projects", **factories.ProjectFactory())

    experiments = [
        factories.ExperimentFactory(identifier=str(i),
                                    sample=sample,
                                    projects=[project]) for i in range(4)
    ]

    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    tuples = [([i], []) for i in experiments]

    command = MockApplication.as_cli_command()
    application = MockApplication()
    ran_analyses, _, __ = application.run(tuples, commit=True)
    target = api.Experiment(ran_analyses[1][0].targets[0].pk)

    assert "analysis_result_key" in ran_analyses[1][0]["results"].keys()
    assert "analysis_result_key" in ran_analyses[2][0]["results"].keys()
    assert f'analysis: {ran_analyses[1][0]["pk"]}' in application.get_job_name(
        ran_analyses[1][0])

    bam = join(tmpdir, "fake.bam")
    application.update_experiment_bam_file(experiments[0], bam,
                                           ran_analyses[0][0].pk)
    assert bam in application.get_bams([experiments[0]])

    # get coverage for when there is no need to update the bam again
    assert application.update_experiment_bam_file(experiments[0], bam,
                                                  ran_analyses[0][0].pk)

    with pytest.raises(exceptions.ValidationError) as error:
        application.validate_bams(experiments)

    assert (f"{experiments[1].system_id} has no registered bam for "
            f"{application.assembly.name}" in str(error.value))

    with pytest.raises(exceptions.ValidationError) as error:
        application.validate_bedfiles(experiments)

    assert f"{experiments[0].system_id} has no registered bedfile" in str(
        error.value)

    # test that get results work as expected
    assert application.get_results(
        result_key="analysis_result_key",
        experiment=target,
        application_key=application.primary_key,
    ) == [(1, ran_analyses[1][0].pk)]

    # check assertion error is raised when an invalid result is searched for
    with pytest.raises(AssertionError) as error:
        application.get_results(
            result_key="invalid_result_key",
            experiment=target,
            application_key=application.primary_key,
        )

    assert "Result 'invalid_result_key' not found for analysis" in str(
        error.value)

    # test options
    runner = CliRunner()
    result = runner.invoke(command, ["--help"], catch_exceptions=False)
    assert "This is a test application" in result.output
    assert "--commit" in result.output
    assert "--force" in result.output
    assert "--quiet" in result.output
    assert "--restart" in result.output
    assert "--url" in result.output

    runner = CliRunner()
    result = runner.invoke(command, ["--url"], catch_exceptions=False)
    assert "http://www.fake-test-app.org" in result.output

    # test get experiments from default cli options
    runner = CliRunner()
    result = runner.invoke(command,
                           ["-fi", "system_id", experiments[0].system_id],
                           catch_exceptions=False)
    assert experiments[0].system_id in result.output

    # check project level results
    pks = ",".join(str(i["pk"]) for i in experiments)
    args = ["-fi", "pk__in", pks]
    result = runner.invoke(command, args, catch_exceptions=False)
    analysis = application.get_project_level_auto_merge_analysis(project)
    merged = join(analysis["storage_url"], "test.merge")

    assert analysis[
        "status"] == "SUCCEEDED", f"Project Analysis failed {analysis}"
    assert "FAILED" in result.output
    assert "SUCCEEDED" in result.output
    assert "SKIPPED 3" in result.output
    assert "INVALID 1" in result.output
    assert isfile(merged)
    assert "project_result_key" in analysis["results"]

    with open(merged) as f:
        assert f.read().strip() == "2"

    # check individual level results
    analysis = application.get_individual_level_auto_merge_analysis(individual)
    merged = join(analysis["storage_url"], "test.merge")
    assert analysis[
        "status"] == "SUCCEEDED", f"Individual Analysis failed {analysis}"
    assert isfile(merged)
    assert "individual_result_key" in analysis["results"]

    with open(merged) as f:
        assert f.read().strip() == "2"

    # check passing command line args
    args = ["-fi", "pk__in", pks, "--commit", "--force"]
    result = runner.invoke(command, args)
    assert "--commit not required when using --force" in result.output

    args = ["-fi", "pk__in", pks, "--restart", "--force"]
    result = runner.invoke(command, args)
    assert "cant use --force and --restart together" in result.output

    args = ["-fi", "pk__in", pks, "--force"]
    result = runner.invoke(command, args)
    assert "trashing:" in result.output

    args = ["-fi", "pk__in", pks, "--restart", "--quiet"]
    result = runner.invoke(command, args)
    assert "FAILED" not in result.output

    with open(join(ran_analyses[0][0].storage_url, "head_job.log")) as f:
        assert "successfully restarted" in f.read()

    MockApplication.cli_allow_force = False
    MockApplication.cli_allow_restart = False
    result = runner.invoke(MockApplication.as_cli_command(), ["--help"],
                           catch_exceptions=False)
    assert "--force" not in result.output
    assert "--restart" not in result.output