def test_failed_signal(): analysis = api.create_instance("analyses", **factories.AnalysisFactory()) get_kwargs = dict( target_endpoint="analyses", endpoint="signals", target_id=analysis.pk ) # check signals work and nothing is created api._run_signals("analyses", analysis, [besuhof_signal]) assert len(api.get_instances(**get_kwargs)) == 0 # check signals failed analysis = api.patch_instance("analyses", analysis.pk, notes="please fail") api._run_signals("analyses", analysis, [besuhof_signal]) instances = api.get_instances(**get_kwargs) assert len(instances) == 1 assert _FAILED_SIGNAL_MESSAGE in instances[0].data["failure_traceback"] # assert that error traceback is updated runner = CliRunner() args = f"-fi target_endpoint analyses -fi target_id {analysis.pk}".split() api.patch_instance("analyses", analysis.pk, notes="fail with different msg") runner.invoke(commands.rerun_signals, args, catch_exceptions=False) instances = api.get_instances(**get_kwargs) assert len(instances) == 1 assert "but with a different msg..." in instances[0].data["failure_traceback"] # assert that signal is deleted after no failure is detected api.patch_instance("analyses", analysis.pk, notes="") runner.invoke(commands.rerun_signals, args, catch_exceptions=False) assert len(api.get_instances(**get_kwargs)) == 0
def process_finished(filters): """Process and update finished analyses.""" utils.check_admin() filters.update(status="FINISHED") for i in api.get_instances("analyses", verbose=True, **filters): if i["status"] == "FINISHED": api.patch_analysis_status(i, "SUCCEEDED")
def _filters_or_identifiers(endpoint, identifiers, filters, fields=None): if filters and identifiers: # pragma: no cover raise click.UsageError("Can't combine filters and identifiers.") if fields: filters["fields"] = fields filters["limit"] = 100_000 return ([ api.get_instance(endpoint, i, fields=fields) for i in identifiers ] if identifiers else api.get_instances(endpoint, verbose=True, **filters))
def assert_run( application, tuples, commit, results=None, project_results=None, assert_valid=True, assert_skipped=False, assert_invalid=False, ): """Run application, check results, and return analyses.""" ret = [] valid, skipped, invalid = application.run(tuples, commit=commit) if assert_valid: assert valid, "No valid RAN analyses." for i, status in valid: ret.append(i) results = results or [] if not commit: assert status == application._staged_message continue for j in results: assert i["results"].get(j) is not None, ( f"Result {j} is missing in: " + json.dumps(i["results"], sort_keys=True, indent=4)) if assert_skipped: assert skipped, "No SKIPPED analyses." if assert_invalid: assert invalid, "No INVALID analyses." if project_results: project = tuples[0][0][0].projects[0].pk analyses = api.get_instances(endpoint="analyses", project_level_analysis=project, limit=1) if analyses: assert ( analyses[0]["status"] == "SUCCEEDED" ), f"Project level analysis {analyses[0].pk} has not SUCCEEDED status." for j in project_results: assert analyses[0]["results"].get(j) is not None, ( f"Result {j} is missing in: " + json.dumps( analyses[0]["results"], sort_keys=True, indent=4)) return ret
def test_api_methods(): endpoint = "diseases" diseases = [factories.DiseaseFactory() for _ in range(3)] created = [api.create_instance(endpoint, **i) for i in diseases] pk = created[0]["pk"] pks = [i["pk"] for i in created[:2]] patched = api.patch_instance(endpoint, pk, data={"one": 1}) assert patched["data"]["one"] == 1 assert api.get_instance(endpoint, pk)["pk"] == pk assert api.get_instances(endpoint, pk=pk)[0]["pk"] == pk assert api.get_instances_count(endpoint, pk=pk) == 1 assert len( api.get_instances(endpoint)) == api.get_instances_count(endpoint) assert len(api.get_instances(endpoint, pks)) == 2 assert len(api.get_instances(endpoint, pks, pk__in=pks)) == 2 assert len(api.get_instances(endpoint, pks, pk__in=pks[0])) == 1 for i in created: assert api.delete_instance(endpoint, i["pk"]) is None assert api.get_token_headers()["Authorization"]
def test_get_instances(): technique = api.create_instance("techniques", **factories.TechniqueFactory()) assert api.get_instances("techniques", [technique.name])[0].pk == technique.pk experiment = api.create_instance("experiments", **factories.ExperimentFactory()) individual = experiment.sample.individual project = experiment.projects[0] assert api.get_experiments([experiment.pk])[0].pk == experiment.pk assert api.get_projects([project.pk])[0].pk == project.pk assert api.get_tree(individual.pk).pk == individual.pk assert api.get_trees([individual.pk])[0].pk == individual.pk
def validate_pairs(pairs): """Get experiments for pairs.""" if not pairs: return [] ids = {i for pair in pairs for i in pair} experiments = {i["system_id"]: i for i in get_instances("experiments", ids)} ret = [] for target, reference in pairs: if not target in experiments.keys(): raise exceptions.ValidationError(f"Experiment {target} not found.") if not reference in experiments.keys(): raise exceptions.ValidationError(f"Experiment {reference} not found.") ret.append(([experiments[str(target)]], [experiments[str(reference)]])) return ret
def test_patch_analyses_status(): application = factories.ApplicationFactory() analyses = [ factories.AnalysisFactory(application=application) for _ in range(2) ] created = [api.create_instance("analyses", **i) for i in analyses] assert all([i["status"] == "CREATED" for i in created]) pks = [i["pk"] for i in created] api.patch_analyses_status(created, "STAGED") retrieved = api.get_instances("analyses", pks) assert all([i["status"] == "STAGED" for i in retrieved]) for i in created: api.delete_instance("analyses", i["pk"]) api.delete_instance("applications", created[0]["application"]["pk"])
def test_system_id(): data_a = factories.ExperimentFactory() data_b = factories.ExperimentFactory(sample=data_a["sample"]) instance_a = api.create_instance("experiments", **data_a) instance_b = api.create_instance("experiments", **data_b) system_ids = [instance_a["system_id"], instance_b["system_id"]] assert instance_a["sample"]["pk"] == instance_b["sample"]["pk"] assert api.get_instance("experiments", system_ids[0])["pk"] == instance_a["pk"] assert len(api.get_instances("experiments", system_ids)) == 2 instance_a["sample"]["data"]["key"] = "value" instance_a["sample"]["notes"] = "a note" patched = api.patch_instance("experiments", instance_a["pk"], sample=instance_a["sample"]) assert patched["sample"]["data"]["key"] == "value" assert patched["sample"]["notes"] == "a note"
def patch_results(filters, force): """Update the results field of many analyses.""" utils.check_admin() skipped = [] with click.progressbar( api.get_instances("analyses", verbose=True, **filters), label="Patching analyses...", ) as bar: for i in bar: if force or not i.results: results = api._get_analysis_results(i, raise_error=False) api.patch_instance("analyses", i.pk, results=results) else: # pragma: no cover skipped.append(i) if skipped: # pragma: no cover click.echo( f"{len(skipped)} analyses had results, use --force to update...")
def rerun_signals(filters): """Rerun failed signals.""" for i in api.get_instances("signals", pk__gt=0, data__failure_traceback__isnull=False, **filters): click.secho(f"Rerunning signal: {i.slug}", fg="yellow") instance = api.get_instance(i.target_endpoint, i.target_id) try: api._run_signals( endpoint=i.target_endpoint, instance=instance, signals=[import_from_string(i.import_string)], raise_error=True, ) api.delete_instance("signals", i.pk) except exceptions.AutomationError: pass
def run_web_signals(filters): """Rerun web signals.""" for i in api.get_instances( "signals", import_string__in=[ "isabl_cli.signals.resume_analysis_signal", "isabl_cli.signals.force_analysis_signal", ], **filters, ): click.secho(f"Running web signal: {i.slug}", fg="yellow") instance = api.get_instance(i.target_endpoint, i.target_id) try: api._run_signals( endpoint=i.target_endpoint, instance=instance, signals=[import_from_string(i.import_string)], raise_error=True, ) api.delete_instance("signals", i.pk) except exceptions.AutomationError: pass
def import_data( self, directories, symlink=False, commit=False, key=lambda x: x["system_id"], files_data=None, dtypes=None, **filters, ): """ Import raw data for multiple experiments. Experiments's `storage_url`, `storage_usage`, `raw_data` are updated. Arguments: directories (list): list of directories to be recursively explored. symlink (bool): if True symlink instead of moving. commit (bool): if True perform import operation. key (function): given a experiment dict returns id to match. filters (dict): key value pairs to use as API query params. dtypes (list): data types that should be matched (e.g. BAM, PNG. etc.). files_data (dict): keys are files basenames and values are dicts with extra annotations such as PL, LB, or any other, see also annotate_file_data. Raises: click.UsageError: if `key` returns the same identifier for multiple experiments. If a experiment matches both fastq and bam files. if cant determine read 1 or read 2 from matched fastq files. Returns: tuple: list of experiments for which data has been matched and a summary of the operation. """ utils.check_admin() files_data = files_data or {} experiments_matched = [] cache = defaultdict(dict) patterns = [] identifiers = {} dtypes = set(dtypes or []) # validate files_data for i, j in files_data.items(): if not isinstance(j, dict): # pragma: no cover raise click.UsageError(f"Invalid file data, expected dict {i}: {j}") # get experiments and load cache dictionary for i in api.get_instances("experiments", verbose=True, **filters): index = f"primary_key_{i['pk']}" using_id = f"{i['system_id']} (Skipped, identifier is NULL)" identifier = key(i) if identifier in identifiers: # duplicated identifiers not valid raise click.UsageError( f"Can't use same identifier for {i['system_id']} " f"and {identifiers[identifier]}: {identifier}" ) if i["raw_data"] or i["bam_files"]: using_id = f"{i['system_id']} (Skipped, experiment has raw data)" elif identifier: identifiers[identifier] = i["system_id"] patterns.append(self.get_regex_pattern(index, identifier)) using_id = f"{i['system_id']} (using {identifier})" cache[index]["using_id"] = using_id cache[index]["instance"] = i cache[index]["files"] = [] if patterns: # see http://stackoverflow.com/questions/8888567 for pattern pattern = re.compile("|".join(patterns)) data_storage_dir = system_settings.BASE_STORAGE_DIRECTORY label = f"Exploring directories..." # explore dirs for directory in set(directories): with click.progressbar(os.walk(directory), label=label) as bar: for root, _, files in bar: if not root.startswith(data_storage_dir): for i in files: if len(patterns) > 500: # pragma: no cover click.echo( f"Matching {i} against " f"{len(patterns)} experiments..." ) path = join(root, i) match = self.match_path(path, pattern) if match and (not dtypes or match["dtype"] in dtypes): cache[match.pop("index")]["files"].append(match) # process files if needed label = "Processing..." bar = sorted(cache.values(), key=lambda x: x["instance"]["pk"]) with click.progressbar(bar, label=label) as bar: for i in bar: if commit and i["files"]: experiments_matched.append( self.import_files( instance=i["instance"], files=i["files"], symlink=symlink, files_data=files_data, ) ) elif i["files"]: # pragma: no cover experiments_matched.append(i["instance"]) return experiments_matched, self.get_summary(cache)