def test_failed_signal():
    analysis = api.create_instance("analyses", **factories.AnalysisFactory())
    get_kwargs = dict(
        target_endpoint="analyses", endpoint="signals", target_id=analysis.pk
    )

    # check signals work and nothing is created
    api._run_signals("analyses", analysis, [besuhof_signal])
    assert len(api.get_instances(**get_kwargs)) == 0

    # check signals failed
    analysis = api.patch_instance("analyses", analysis.pk, notes="please fail")
    api._run_signals("analyses", analysis, [besuhof_signal])
    instances = api.get_instances(**get_kwargs)
    assert len(instances) == 1
    assert _FAILED_SIGNAL_MESSAGE in instances[0].data["failure_traceback"]

    # assert that error traceback is updated
    runner = CliRunner()
    args = f"-fi target_endpoint analyses -fi target_id {analysis.pk}".split()
    api.patch_instance("analyses", analysis.pk, notes="fail with different msg")
    runner.invoke(commands.rerun_signals, args, catch_exceptions=False)
    instances = api.get_instances(**get_kwargs)
    assert len(instances) == 1
    assert "but with a different msg..." in instances[0].data["failure_traceback"]

    # assert that signal is deleted after no failure is detected
    api.patch_instance("analyses", analysis.pk, notes="")
    runner.invoke(commands.rerun_signals, args, catch_exceptions=False)
    assert len(api.get_instances(**get_kwargs)) == 0
Example #2
0
def process_finished(filters):
    """Process and update finished analyses."""
    utils.check_admin()
    filters.update(status="FINISHED")

    for i in api.get_instances("analyses", verbose=True, **filters):
        if i["status"] == "FINISHED":
            api.patch_analysis_status(i, "SUCCEEDED")
Example #3
0
def _filters_or_identifiers(endpoint, identifiers, filters, fields=None):
    if filters and identifiers:  # pragma: no cover
        raise click.UsageError("Can't combine filters and identifiers.")

    if fields:
        filters["fields"] = fields
        filters["limit"] = 100_000

    return ([
        api.get_instance(endpoint, i, fields=fields) for i in identifiers
    ] if identifiers else api.get_instances(endpoint, verbose=True, **filters))
Example #4
0
def assert_run(
    application,
    tuples,
    commit,
    results=None,
    project_results=None,
    assert_valid=True,
    assert_skipped=False,
    assert_invalid=False,
):
    """Run application, check results, and return analyses."""
    ret = []
    valid, skipped, invalid = application.run(tuples, commit=commit)

    if assert_valid:
        assert valid, "No valid RAN analyses."

        for i, status in valid:
            ret.append(i)
            results = results or []

            if not commit:
                assert status == application._staged_message
                continue

            for j in results:
                assert i["results"].get(j) is not None, (
                    f"Result {j} is missing in: " +
                    json.dumps(i["results"], sort_keys=True, indent=4))

    if assert_skipped:
        assert skipped, "No SKIPPED analyses."

    if assert_invalid:
        assert invalid, "No INVALID analyses."

    if project_results:
        project = tuples[0][0][0].projects[0].pk
        analyses = api.get_instances(endpoint="analyses",
                                     project_level_analysis=project,
                                     limit=1)

        if analyses:
            assert (
                analyses[0]["status"] == "SUCCEEDED"
            ), f"Project level analysis {analyses[0].pk} has not SUCCEEDED status."

            for j in project_results:
                assert analyses[0]["results"].get(j) is not None, (
                    f"Result {j} is missing in: " + json.dumps(
                        analyses[0]["results"], sort_keys=True, indent=4))

    return ret
Example #5
0
def test_api_methods():
    endpoint = "diseases"
    diseases = [factories.DiseaseFactory() for _ in range(3)]
    created = [api.create_instance(endpoint, **i) for i in diseases]
    pk = created[0]["pk"]
    pks = [i["pk"] for i in created[:2]]
    patched = api.patch_instance(endpoint, pk, data={"one": 1})

    assert patched["data"]["one"] == 1
    assert api.get_instance(endpoint, pk)["pk"] == pk
    assert api.get_instances(endpoint, pk=pk)[0]["pk"] == pk
    assert api.get_instances_count(endpoint, pk=pk) == 1
    assert len(
        api.get_instances(endpoint)) == api.get_instances_count(endpoint)
    assert len(api.get_instances(endpoint, pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks[0])) == 1

    for i in created:
        assert api.delete_instance(endpoint, i["pk"]) is None

    assert api.get_token_headers()["Authorization"]
Example #6
0
def test_get_instances():
    technique = api.create_instance("techniques",
                                    **factories.TechniqueFactory())
    assert api.get_instances("techniques",
                             [technique.name])[0].pk == technique.pk

    experiment = api.create_instance("experiments",
                                     **factories.ExperimentFactory())
    individual = experiment.sample.individual
    project = experiment.projects[0]
    assert api.get_experiments([experiment.pk])[0].pk == experiment.pk
    assert api.get_projects([project.pk])[0].pk == project.pk
    assert api.get_tree(individual.pk).pk == individual.pk
    assert api.get_trees([individual.pk])[0].pk == individual.pk
Example #7
0
def validate_pairs(pairs):
    """Get experiments for pairs."""
    if not pairs:
        return []

    ids = {i for pair in pairs for i in pair}
    experiments = {i["system_id"]: i for i in get_instances("experiments", ids)}
    ret = []

    for target, reference in pairs:
        if not target in experiments.keys():
            raise exceptions.ValidationError(f"Experiment {target} not found.")
        if not reference in experiments.keys():
            raise exceptions.ValidationError(f"Experiment {reference} not found.")
        ret.append(([experiments[str(target)]], [experiments[str(reference)]]))

    return ret
Example #8
0
def test_patch_analyses_status():
    application = factories.ApplicationFactory()
    analyses = [
        factories.AnalysisFactory(application=application) for _ in range(2)
    ]
    created = [api.create_instance("analyses", **i) for i in analyses]
    assert all([i["status"] == "CREATED" for i in created])

    pks = [i["pk"] for i in created]
    api.patch_analyses_status(created, "STAGED")
    retrieved = api.get_instances("analyses", pks)
    assert all([i["status"] == "STAGED" for i in retrieved])

    for i in created:
        api.delete_instance("analyses", i["pk"])

    api.delete_instance("applications", created[0]["application"]["pk"])
Example #9
0
def test_system_id():
    data_a = factories.ExperimentFactory()
    data_b = factories.ExperimentFactory(sample=data_a["sample"])
    instance_a = api.create_instance("experiments", **data_a)
    instance_b = api.create_instance("experiments", **data_b)
    system_ids = [instance_a["system_id"], instance_b["system_id"]]
    assert instance_a["sample"]["pk"] == instance_b["sample"]["pk"]
    assert api.get_instance("experiments",
                            system_ids[0])["pk"] == instance_a["pk"]
    assert len(api.get_instances("experiments", system_ids)) == 2

    instance_a["sample"]["data"]["key"] = "value"
    instance_a["sample"]["notes"] = "a note"
    patched = api.patch_instance("experiments",
                                 instance_a["pk"],
                                 sample=instance_a["sample"])
    assert patched["sample"]["data"]["key"] == "value"
    assert patched["sample"]["notes"] == "a note"
Example #10
0
def patch_results(filters, force):
    """Update the results field of many analyses."""
    utils.check_admin()
    skipped = []

    with click.progressbar(
            api.get_instances("analyses", verbose=True, **filters),
            label="Patching analyses...",
    ) as bar:
        for i in bar:
            if force or not i.results:
                results = api._get_analysis_results(i, raise_error=False)
                api.patch_instance("analyses", i.pk, results=results)
            else:  # pragma: no cover
                skipped.append(i)

    if skipped:  # pragma: no cover
        click.echo(
            f"{len(skipped)} analyses had results, use --force to update...")
Example #11
0
def rerun_signals(filters):
    """Rerun failed signals."""
    for i in api.get_instances("signals",
                               pk__gt=0,
                               data__failure_traceback__isnull=False,
                               **filters):
        click.secho(f"Rerunning signal: {i.slug}", fg="yellow")
        instance = api.get_instance(i.target_endpoint, i.target_id)

        try:
            api._run_signals(
                endpoint=i.target_endpoint,
                instance=instance,
                signals=[import_from_string(i.import_string)],
                raise_error=True,
            )

            api.delete_instance("signals", i.pk)
        except exceptions.AutomationError:
            pass
Example #12
0
def run_web_signals(filters):
    """Rerun web signals."""
    for i in api.get_instances(
            "signals",
            import_string__in=[
                "isabl_cli.signals.resume_analysis_signal",
                "isabl_cli.signals.force_analysis_signal",
            ],
            **filters,
    ):
        click.secho(f"Running web signal: {i.slug}", fg="yellow")
        instance = api.get_instance(i.target_endpoint, i.target_id)

        try:
            api._run_signals(
                endpoint=i.target_endpoint,
                instance=instance,
                signals=[import_from_string(i.import_string)],
                raise_error=True,
            )

            api.delete_instance("signals", i.pk)
        except exceptions.AutomationError:
            pass
Example #13
0
    def import_data(
        self,
        directories,
        symlink=False,
        commit=False,
        key=lambda x: x["system_id"],
        files_data=None,
        dtypes=None,
        **filters,
    ):
        """
        Import raw data for multiple experiments.

        Experiments's `storage_url`, `storage_usage`, `raw_data` are
        updated.

        Arguments:
            directories (list): list of directories to be recursively explored.
            symlink (bool): if True symlink instead of moving.
            commit (bool): if True perform import operation.
            key (function): given a experiment dict returns id to match.
            filters (dict): key value pairs to use as API query params.
            dtypes (list): data types that should be matched (e.g. BAM, PNG. etc.).
            files_data (dict): keys are files basenames and values are
                dicts with extra annotations such as PL, LB, or any other,
                see also annotate_file_data.

        Raises:
            click.UsageError: if `key` returns the same identifier for multiple
                experiments. If a experiment matches both fastq and bam files.
                if cant determine read 1 or read 2 from matched fastq files.

        Returns:
            tuple: list of experiments for which data has been matched and a
                summary of the operation.
        """
        utils.check_admin()
        files_data = files_data or {}
        experiments_matched = []
        cache = defaultdict(dict)
        patterns = []
        identifiers = {}
        dtypes = set(dtypes or [])

        # validate files_data
        for i, j in files_data.items():
            if not isinstance(j, dict):  # pragma: no cover
                raise click.UsageError(f"Invalid file data, expected dict {i}: {j}")

        # get experiments and load cache dictionary
        for i in api.get_instances("experiments", verbose=True, **filters):
            index = f"primary_key_{i['pk']}"
            using_id = f"{i['system_id']} (Skipped, identifier is NULL)"
            identifier = key(i)

            if identifier in identifiers:  # duplicated identifiers not valid
                raise click.UsageError(
                    f"Can't use same identifier for {i['system_id']} "
                    f"and {identifiers[identifier]}: {identifier}"
                )

            if i["raw_data"] or i["bam_files"]:
                using_id = f"{i['system_id']} (Skipped, experiment has raw data)"
            elif identifier:
                identifiers[identifier] = i["system_id"]
                patterns.append(self.get_regex_pattern(index, identifier))
                using_id = f"{i['system_id']} (using {identifier})"

            cache[index]["using_id"] = using_id
            cache[index]["instance"] = i
            cache[index]["files"] = []

        if patterns:
            # see http://stackoverflow.com/questions/8888567 for pattern
            pattern = re.compile("|".join(patterns))
            data_storage_dir = system_settings.BASE_STORAGE_DIRECTORY
            label = f"Exploring directories..."

            # explore dirs
            for directory in set(directories):
                with click.progressbar(os.walk(directory), label=label) as bar:
                    for root, _, files in bar:
                        if not root.startswith(data_storage_dir):
                            for i in files:
                                if len(patterns) > 500:  # pragma: no cover
                                    click.echo(
                                        f"Matching {i} against "
                                        f"{len(patterns)} experiments..."
                                    )

                                path = join(root, i)
                                match = self.match_path(path, pattern)

                                if match and (not dtypes or match["dtype"] in dtypes):
                                    cache[match.pop("index")]["files"].append(match)

            # process files if needed
            label = "Processing..."
            bar = sorted(cache.values(), key=lambda x: x["instance"]["pk"])
            with click.progressbar(bar, label=label) as bar:
                for i in bar:
                    if commit and i["files"]:
                        experiments_matched.append(
                            self.import_files(
                                instance=i["instance"],
                                files=i["files"],
                                symlink=symlink,
                                files_data=files_data,
                            )
                        )
                    elif i["files"]:  # pragma: no cover
                        experiments_matched.append(i["instance"])

        return experiments_matched, self.get_summary(cache)