Beispiel #1
0
def _process_base_pliface(prj, **kwargs):
    """
    Based on defined data here, create the submission conductors for a project.

    :param looper.Project prj: project for which submission conductors are
        to be created
    :return Mapping[str, looper.conductor.SubmissionConductor], Mapping[str, list[str]]:
        mapping from pipeline key to submission conductor, and mapping from
        protocol name to collection of keys for pipelines for that protocol
    """
    return process_protocols(prj, set(PLIFACE_DATA[PROTOMAP_KEY].keys()),
                             **kwargs)
Beispiel #2
0
def test_convergent_protocol_mapping_keys(tmpdir):
    """ Similarly-named protocols do not result in multiple pipelines. """

    protomap = OrderedDict([("WGBS", WGBS_PIPE), ("wgbs", WGBS_PIPE),
                            ("ATAC-SEQ", ATAC_PIPE), ("ATACseq", ATAC_PIPE),
                            ("ATAC-seq", ATAC_PIPE)])
    records = [("sample" + str(i), p) for i, p in enumerate(protomap)]

    outdir = tmpdir.strpath

    sep, ext = "\t", ".tsv"
    anns_path = os.path.join(outdir, "anns" + ext)
    records = [SAMPLE_METADATA_HEADER] + records
    with open(anns_path, 'w') as f:
        f.write(os.linesep.join(sep.join(r) for r in records))

    pliface_data = {PROTOMAP_KEY: dict(protomap), "pipelines": PIPE_SPECS}
    pliface_filepath = os.path.join(outdir, "pipes.yaml")

    with open(pliface_filepath, 'w') as f:
        yaml.dump(pliface_data, f)

    metadata = {
        OUTDIR_KEY: outdir,
        SAMPLE_ANNOTATIONS_KEY: anns_path,
        "pipeline_interfaces": pliface_filepath
    }

    _touch_pipe_files(tmpdir.strpath, pliface_data)

    prjdat = {METADATA_KEY: metadata}
    pcfg = tmpdir.join("prj.yaml").strpath
    with open(pcfg, 'w') as f:
        yaml.dump(prjdat, f)
    prj = Project(pcfg)

    conductors, pipe_keys = process_protocols(prj, set(protomap.keys()))

    # Conductors collection is keyed on pipeline, not protocol
    assert set(conductors.keys()) == set(protomap.values())
    # Collection of pipeline keys by protocol, not pipeline
    assert len(pipe_keys) == len(protomap)
    multi_pipes = [(p, ks) for p, ks in pipe_keys.items() if len(ks) > 1]
    assert [] == multi_pipes, "{} protocol(s) mapped to multiple pipelines: {}".\
        format(len(multi_pipes), multi_pipes)
Beispiel #3
0
def test_single_sample_auto_conductor_new_sample_scripts(
        prj, automatic, max_cmds):
    """ Validate base/ideal case of submission conduction w.r.t. scripts. """
    samples = prj.samples
    conductors, pipe_keys = \
        process_protocols(prj, {s.protocol for s in samples})
    subdir = prj.submission_folder
    assert 0 == _count_files(subdir)
    for s in samples:
        pks = pipe_keys[s.protocol]
        assert 1 == len(pks), \
            "Multiple pipelines for sample {}: {}".format(s.name, pks)
        conductors[pks[0]].add_sample(s)
        sub_fn_suffix = s.name + ".sub"
        contents = os.listdir(subdir)
        assert 1 == len([f for f in contents if sub_fn_suffix in f]), \
            "No filename containing {} in {}; contents: {}".\
            format(sub_fn_suffix, subdir, contents)
Beispiel #4
0
def test_ignoring_flags(prj, flag_name, flagged_sample_names, validate):
    """ Script creation is automatic, and submission is counted. """
    preexisting = _collect_flags(prj)
    print("collected")
    assert {} == preexisting, "Preexisting flag(s): {}".format(preexisting)
    flagged_samples = list(
        filter(lambda s: s.name in flagged_sample_names, prj.samples))
    print("flagged: {}".format(flagged_sample_names))
    assert len(flagged_sample_names) == len(flagged_samples), \
        "Expected {expn} flagged samples ({exp}) but found {obsn} ({obs})".format(
            expn=len(flagged_sample_names),
            exp=", ".join(flagged_sample_names), obsn=len(flagged_samples),
            obs=", ".join(s.name for s in flagged_samples))
    flag_files_made = [_mkflag(s, prj, flag_name) for s in flagged_samples]
    print("flag_files_made: {}".format(flag_files_made))
    assert all(os.path.isfile(f) for f in flag_files_made), \
        "Missing setup flag file(s): {}".format(
            ", ".join([f for f in flag_files_made if not os.path.isfile(f)]))
    preexisting = _collect_flags(prj)
    print("preexisting: {}".format(preexisting))
    assert len(flagged_sample_names) == len(preexisting)
    assert set(flag_files_made) == set(itertools.chain(*preexisting.values()))
    conductors, pipe_keys = process_protocols(
        prj, set(PLIFACE_DATA[PROTOMAP_KEY].keys()), ignore_flags=True)
    print("processed")
    assert all(map(lambda c: c.ignore_flags, conductors.values())), \
        "Failed to establish precondition, that flags are to be ignored"
    print("asserted")
    for s in prj.samples:
        pks = pipe_keys[s.protocol]
        assert 1 == len(pks), \
            "Need exactly one pipeline key but got {} for protocol {}: {}".\
            format(len(pks), s.protocol, pks)
        print("adding: {}".format(s.name))
        cond = conductors[pks[0]]
        print("cond: {}".format(cond))
        cond.add_sample(s)
        print("added: {}".format(s.name))
    print("Validating...")
    validate(prj, conductors.values())
Beispiel #5
0
def test_sample_yaml_outputs_inclusion(prj, outs_by_pipe):
    import mock
    import looper

    protocols = {s.protocol for s in prj.samples}
    print("PROTOCOLS: {}".format(protocols))

    conductors, pipe_keys = \
        process_protocols(prj, protocols)

    assert len(PIPEKEYS) > 0  # Pretest
    assert len(PIPEKEYS) == len(conductors)  # As many pipelines as conductors

    sample_pk_pairs = [(s, pipe_keys[s.protocol]) for s in prj.samples]
    multi_pipe_samples, sample_key_pairs = [], []
    for s, pks in sample_pk_pairs:
        if len(pks) == 1:
            sample_key_pairs.append((s, pks[0]))
        else:
            multi_pipe_samples.append((s, pks))
    if multi_pipe_samples:
        raise Exception(
            "Samples with non-1 number of pipeline keys: {}".format(
                multi_pipe_samples))

    sample_conductor_pairs = [(s, conductors[pk])
                              for s, pk in sample_key_pairs]
    sample_yaml_pairs = []
    with mock.patch.object(looper.conductor, "_use_sample", return_value=True), \
         mock.patch.object(looper.conductor, "_check_argstring"):
        for s, c in sample_conductor_pairs:
            f = os.path.join(prj.submission_folder, s.generate_filename())
            assert not os.path.exists(f)
            c.add_sample(s)
            sample_yaml_pairs.append((s, f))
    missing = [(s.name, f) for s, f in sample_yaml_pairs
               if not os.path.isfile(f)]
    if missing:
        print("Project outdir contents: {}".format(os.listdir(prj.output_dir)))
        print("Submission folder contents: {}".format(
            os.listdir(prj.submission_folder)))
        raise Exception("Samples missing YAML file: {}".format(missing))

    sample_expout_pairs = [(s, outs_by_pipe[pk]) for s, pk in sample_key_pairs]
    bads = []
    for (s, yaml_path), (_, xo) in zip(sample_yaml_pairs, sample_expout_pairs):
        with open(yaml_path, 'r') as f:
            obsdat = yaml.load(f, yaml.SafeLoader)
        if xo:
            try:
                obs = obsdat[OUTKEY]
            except KeyError:
                bads.append((s, "Missing outputs key"))
            else:
                if obs != xo:
                    bads.append((s, xo, obs))
        else:
            if OUTKEY in obsdat:
                bads.append((s, "Unexpectedly found outputs in YAML"))
    if bads:
        pytest.fail("Unmet expectations: {}".format(bads))