コード例 #1
0
 def open_command(*repl_args):
     command = copy.copy(_command)
     if pre_hook:
         # might want to pass repl_args to the pre_hook
         pre_hook(command)
     to_run = [OPEN]
     if "app" in command:
         to_run.extend(["-a", command["app"]])
     repl_args = [arg for arg in repl_args if arg]
     args = command.get("args", [])
     for arg in args:
         if arg.count("%s") == 1:
             if len(repl_args) == 1:
                 arg = arg % helpers.quote(repl_args)
             else:
                 arg = arg % helpers.quote(mac.pbpaste())
         if arg.startswith("~"):
             arg = helpers.expand_path(arg)
         to_run.append(arg)
     helpers.run(to_run)
コード例 #2
0
def get_epilog_full_command(prog_spec,
                            readsfile,
                            sitesfile,
                            outdir,
                            min_rlen,
                            min_qual,
                            strand_method,
                            rrbs_fill,
                            context,
                            epis=True,
                            halt=None,
                            strand_specific=False,
                            no_epi_stats=False,
                            process_logfile=None):
    """
    Create base for epiallele processing command.

    :param ProgSpec prog_spec: Bundle of JAR, mem alloc, and number of cores
    :param str readsfile: Path to sorted, aligned BAM with reads to analyze.
    :param str sitesfile: Path to gzipped, tabix-indexed file with methyl sites.
    :param str outdir: Path to output folder for single-site (and epiallele,
        as desired) output.
    :param int min_rlen: Minimum number of bases aligned for a read to be used.
    :param int min_qual: Minimum base call quality at a site and neighbor
        site(s) for it to be used.
    :param str strand_method: Name of strategy to determine read orientation;
        'tag' or 'flag'
    :param int rrbs_fill: Number of bases at read end to ignore due to RRBS
        "fill-in"
    :param str context: Methylation context (sense strand, e.g. 'CG' for
        typical mammalian analysis)
    :param bool epis: Produce epiallele results in addition to the more
        single-site-oriented output
    :param str halt: Name of processing stage after which to halt; if omitted,
        run as much as possible
    :param bool strand_specific: Indicate no strand merger is desired.
    :param bool no_epi_stats: Skip epiallele diversity/heterogeneity statistics
    :param str process_logfile: Path to file for epiallele processing
        performance statistics
    :raise EpilogPretestError: if one of the necessary preconditions to run
        epilog is violated
    :return (str, EpilogTarget): Command for main epilog processing, and a
        pypiper "target" (path to calls file, or that and epiallele path if
        applicable)
    """

    import os

    contexts = ["C", "CG"]

    problems = []

    pos_int_vals = {"Length": min_rlen, "Quality": min_qual}
    for label, value in pos_int_vals.items():
        valid = False
        try:
            valid = int(value) >= 0
        except (TypeError, ValueError):
            pass
        if not valid:
            problems.append("Did not get nonnegative value -- {} = {}".format(
                label, value))

    readsfile = expand_path(readsfile)
    sitesfile = expand_path(sitesfile)

    problems.extend([
        "Missing {} file: {}".format(ft, fp)
        for ft, fp in zip(["reads", "sites"], [readsfile, sitesfile])
        if not os.path.isfile(fp)
    ])

    if context not in contexts:
        problems.append("Invalid context ({}); choose one: {}".format(
            context, ", ".join(contexts)))

    if problems:
        raise EpilogPretestError(problems)

    name_ss_file = "all_calls.txt"

    def get_outpath(fn):
        return os.path.join(outdir, fn)

    single_sites_file = get_outpath(name_ss_file)

    cmd = "{b} --minBaseQuality {q} --minReadLength {rl} --context {ctx} --rrbsFill {base_fill} --cores {cores} --strandMethod {sm} -O {o} {r} {s}".format(
        b=prog_spec.get_command_base(),
        rl=min_rlen,
        q=min_qual,
        ctx=context,
        base_fill=rrbs_fill,
        sm=strand_method,
        o=single_sites_file,
        r=readsfile,
        s=sitesfile,
        cores=prog_spec.cores)

    if epis:
        epis_file = get_outpath("all_epialleles.txt")
        cmd += " --outputEpialleles {}".format(epis_file)
    else:
        epis_file = None
    if process_logfile:
        cmd += " --processLogfile {}".format(process_logfile)
    if halt:
        cmd += " --through {}".format(halt)
    if strand_specific:
        cmd += " --strandSpecific"
    if no_epi_stats:
        cmd += " --noEpiStats"

    # TODO: though this is not going to be the encouraged route, while/if it's to be provided, consider the downstream file(s) as targets.
    # TODO: beware, though, of the effect on the "main-only" function that calls into this. Its targets are the main files.
    return cmd, EpilogTarget(single_sites_file=single_sites_file,
                             epis_file=epis_file)
コード例 #3
0
        ]

    def test_columns(self):
        assert self.data.columns()[0] == ['A', 'B', 'B', 'C', 'C', 'C']

    def test_dicts(self):
        assert self.data.dicts()[0] == {
            'patient_mrn': 'A',
            'name': 'Blood Pressure',
            'value': '120',
            'order_time': '09/03/17 10:30AM',
            'taken_time': '09/03/17 11:30AM'
        }


description_paths = expand_path(__file__, 'fixtures/descriptions/*.yml')
curator = Curator(engine, description_paths)


class TestCurator():
    def setup_method(self):
        self.labs_dirty = FixtureData('labs_dirty.csv')
        self.patients_dirty = FixtureData('patients_dirty.csv')
        self.patients_missing_first = FixtureData(
            'patients_dirty_missing_key.json')

    def test_transform_dicts(self):
        results = curator.transform_dicts('lab', self.labs_dirty.dicts())
        for result, clean in zip(results, labs_clean):
            assert result == clean
コード例 #4
0
def test_expand_path_simple_glob():
    read_values = []
    for path in helpers.expand_path(__file__, 'fixtures/glob/*.txt'):
        read_values.append(open(path).read())
    assert set(read_values) == set(['1\n', '2\n', '3\n'])
コード例 #5
0
def test_expand_path_with_single_path():
    expected = helpers.expand_path(__file__, 'fixtures/glob/*.txt')
    actual = helpers.expand_paths(__file__, 'fixtures/glob/*.txt')
    assert actual == expected
コード例 #6
0
def test_expand_path_nested_glob():
    read_values = []
    for path in helpers.expand_path(__file__, 'fixtures/glob/**/*.txt'):
        read_values.append(open(path).read())
    assert set(read_values) == set(['A\n', 'B\n', 'C\n', 'D\n'])