def _process_base_pliface(prj, **kwargs): """ Based on defined data here, create the submission conductors for a project. :param looper.Project prj: project for which submission conductors are to be created :return Mapping[str, looper.conductor.SubmissionConductor], Mapping[str, list[str]]: mapping from pipeline key to submission conductor, and mapping from protocol name to collection of keys for pipelines for that protocol """ return process_protocols(prj, set(PLIFACE_DATA[PROTOMAP_KEY].keys()), **kwargs)
def test_convergent_protocol_mapping_keys(tmpdir): """ Similarly-named protocols do not result in multiple pipelines. """ protomap = OrderedDict([("WGBS", WGBS_PIPE), ("wgbs", WGBS_PIPE), ("ATAC-SEQ", ATAC_PIPE), ("ATACseq", ATAC_PIPE), ("ATAC-seq", ATAC_PIPE)]) records = [("sample" + str(i), p) for i, p in enumerate(protomap)] outdir = tmpdir.strpath sep, ext = "\t", ".tsv" anns_path = os.path.join(outdir, "anns" + ext) records = [SAMPLE_METADATA_HEADER] + records with open(anns_path, 'w') as f: f.write(os.linesep.join(sep.join(r) for r in records)) pliface_data = {PROTOMAP_KEY: dict(protomap), "pipelines": PIPE_SPECS} pliface_filepath = os.path.join(outdir, "pipes.yaml") with open(pliface_filepath, 'w') as f: yaml.dump(pliface_data, f) metadata = { OUTDIR_KEY: outdir, SAMPLE_ANNOTATIONS_KEY: anns_path, "pipeline_interfaces": pliface_filepath } _touch_pipe_files(tmpdir.strpath, pliface_data) prjdat = {METADATA_KEY: metadata} pcfg = tmpdir.join("prj.yaml").strpath with open(pcfg, 'w') as f: yaml.dump(prjdat, f) prj = Project(pcfg) conductors, pipe_keys = process_protocols(prj, set(protomap.keys())) # Conductors collection is keyed on pipeline, not protocol assert set(conductors.keys()) == set(protomap.values()) # Collection of pipeline keys by protocol, not pipeline assert len(pipe_keys) == len(protomap) multi_pipes = [(p, ks) for p, ks in pipe_keys.items() if len(ks) > 1] assert [] == multi_pipes, "{} protocol(s) mapped to multiple pipelines: {}".\ format(len(multi_pipes), multi_pipes)
def test_single_sample_auto_conductor_new_sample_scripts( prj, automatic, max_cmds): """ Validate base/ideal case of submission conduction w.r.t. scripts. """ samples = prj.samples conductors, pipe_keys = \ process_protocols(prj, {s.protocol for s in samples}) subdir = prj.submission_folder assert 0 == _count_files(subdir) for s in samples: pks = pipe_keys[s.protocol] assert 1 == len(pks), \ "Multiple pipelines for sample {}: {}".format(s.name, pks) conductors[pks[0]].add_sample(s) sub_fn_suffix = s.name + ".sub" contents = os.listdir(subdir) assert 1 == len([f for f in contents if sub_fn_suffix in f]), \ "No filename containing {} in {}; contents: {}".\ format(sub_fn_suffix, subdir, contents)
def test_ignoring_flags(prj, flag_name, flagged_sample_names, validate): """ Script creation is automatic, and submission is counted. """ preexisting = _collect_flags(prj) print("collected") assert {} == preexisting, "Preexisting flag(s): {}".format(preexisting) flagged_samples = list( filter(lambda s: s.name in flagged_sample_names, prj.samples)) print("flagged: {}".format(flagged_sample_names)) assert len(flagged_sample_names) == len(flagged_samples), \ "Expected {expn} flagged samples ({exp}) but found {obsn} ({obs})".format( expn=len(flagged_sample_names), exp=", ".join(flagged_sample_names), obsn=len(flagged_samples), obs=", ".join(s.name for s in flagged_samples)) flag_files_made = [_mkflag(s, prj, flag_name) for s in flagged_samples] print("flag_files_made: {}".format(flag_files_made)) assert all(os.path.isfile(f) for f in flag_files_made), \ "Missing setup flag file(s): {}".format( ", ".join([f for f in flag_files_made if not os.path.isfile(f)])) preexisting = _collect_flags(prj) print("preexisting: {}".format(preexisting)) assert len(flagged_sample_names) == len(preexisting) assert set(flag_files_made) == set(itertools.chain(*preexisting.values())) conductors, pipe_keys = process_protocols( prj, set(PLIFACE_DATA[PROTOMAP_KEY].keys()), ignore_flags=True) print("processed") assert all(map(lambda c: c.ignore_flags, conductors.values())), \ "Failed to establish precondition, that flags are to be ignored" print("asserted") for s in prj.samples: pks = pipe_keys[s.protocol] assert 1 == len(pks), \ "Need exactly one pipeline key but got {} for protocol {}: {}".\ format(len(pks), s.protocol, pks) print("adding: {}".format(s.name)) cond = conductors[pks[0]] print("cond: {}".format(cond)) cond.add_sample(s) print("added: {}".format(s.name)) print("Validating...") validate(prj, conductors.values())
def test_sample_yaml_outputs_inclusion(prj, outs_by_pipe): import mock import looper protocols = {s.protocol for s in prj.samples} print("PROTOCOLS: {}".format(protocols)) conductors, pipe_keys = \ process_protocols(prj, protocols) assert len(PIPEKEYS) > 0 # Pretest assert len(PIPEKEYS) == len(conductors) # As many pipelines as conductors sample_pk_pairs = [(s, pipe_keys[s.protocol]) for s in prj.samples] multi_pipe_samples, sample_key_pairs = [], [] for s, pks in sample_pk_pairs: if len(pks) == 1: sample_key_pairs.append((s, pks[0])) else: multi_pipe_samples.append((s, pks)) if multi_pipe_samples: raise Exception( "Samples with non-1 number of pipeline keys: {}".format( multi_pipe_samples)) sample_conductor_pairs = [(s, conductors[pk]) for s, pk in sample_key_pairs] sample_yaml_pairs = [] with mock.patch.object(looper.conductor, "_use_sample", return_value=True), \ mock.patch.object(looper.conductor, "_check_argstring"): for s, c in sample_conductor_pairs: f = os.path.join(prj.submission_folder, s.generate_filename()) assert not os.path.exists(f) c.add_sample(s) sample_yaml_pairs.append((s, f)) missing = [(s.name, f) for s, f in sample_yaml_pairs if not os.path.isfile(f)] if missing: print("Project outdir contents: {}".format(os.listdir(prj.output_dir))) print("Submission folder contents: {}".format( os.listdir(prj.submission_folder))) raise Exception("Samples missing YAML file: {}".format(missing)) sample_expout_pairs = [(s, outs_by_pipe[pk]) for s, pk in sample_key_pairs] bads = [] for (s, yaml_path), (_, xo) in zip(sample_yaml_pairs, sample_expout_pairs): with open(yaml_path, 'r') as f: obsdat = yaml.load(f, yaml.SafeLoader) if xo: try: obs = obsdat[OUTKEY] except KeyError: bads.append((s, "Missing outputs key")) else: if obs != xo: bads.append((s, xo, obs)) else: if OUTKEY in obsdat: bads.append((s, "Unexpectedly found outputs in YAML")) if bads: pytest.fail("Unmet expectations: {}".format(bads))