def open_command(*repl_args): command = copy.copy(_command) if pre_hook: # might want to pass repl_args to the pre_hook pre_hook(command) to_run = [OPEN] if "app" in command: to_run.extend(["-a", command["app"]]) repl_args = [arg for arg in repl_args if arg] args = command.get("args", []) for arg in args: if arg.count("%s") == 1: if len(repl_args) == 1: arg = arg % helpers.quote(repl_args) else: arg = arg % helpers.quote(mac.pbpaste()) if arg.startswith("~"): arg = helpers.expand_path(arg) to_run.append(arg) helpers.run(to_run)
def get_epilog_full_command(prog_spec, readsfile, sitesfile, outdir, min_rlen, min_qual, strand_method, rrbs_fill, context, epis=True, halt=None, strand_specific=False, no_epi_stats=False, process_logfile=None): """ Create base for epiallele processing command. :param ProgSpec prog_spec: Bundle of JAR, mem alloc, and number of cores :param str readsfile: Path to sorted, aligned BAM with reads to analyze. :param str sitesfile: Path to gzipped, tabix-indexed file with methyl sites. :param str outdir: Path to output folder for single-site (and epiallele, as desired) output. :param int min_rlen: Minimum number of bases aligned for a read to be used. :param int min_qual: Minimum base call quality at a site and neighbor site(s) for it to be used. :param str strand_method: Name of strategy to determine read orientation; 'tag' or 'flag' :param int rrbs_fill: Number of bases at read end to ignore due to RRBS "fill-in" :param str context: Methylation context (sense strand, e.g. 'CG' for typical mammalian analysis) :param bool epis: Produce epiallele results in addition to the more single-site-oriented output :param str halt: Name of processing stage after which to halt; if omitted, run as much as possible :param bool strand_specific: Indicate no strand merger is desired. :param bool no_epi_stats: Skip epiallele diversity/heterogeneity statistics :param str process_logfile: Path to file for epiallele processing performance statistics :raise EpilogPretestError: if one of the necessary preconditions to run epilog is violated :return (str, EpilogTarget): Command for main epilog processing, and a pypiper "target" (path to calls file, or that and epiallele path if applicable) """ import os contexts = ["C", "CG"] problems = [] pos_int_vals = {"Length": min_rlen, "Quality": min_qual} for label, value in pos_int_vals.items(): valid = False try: valid = int(value) >= 0 except (TypeError, ValueError): pass if not valid: problems.append("Did not get nonnegative value -- {} = {}".format( label, value)) readsfile = expand_path(readsfile) sitesfile = expand_path(sitesfile) problems.extend([ "Missing {} file: {}".format(ft, fp) for ft, fp in zip(["reads", "sites"], [readsfile, sitesfile]) if not os.path.isfile(fp) ]) if context not in contexts: problems.append("Invalid context ({}); choose one: {}".format( context, ", ".join(contexts))) if problems: raise EpilogPretestError(problems) name_ss_file = "all_calls.txt" def get_outpath(fn): return os.path.join(outdir, fn) single_sites_file = get_outpath(name_ss_file) cmd = "{b} --minBaseQuality {q} --minReadLength {rl} --context {ctx} --rrbsFill {base_fill} --cores {cores} --strandMethod {sm} -O {o} {r} {s}".format( b=prog_spec.get_command_base(), rl=min_rlen, q=min_qual, ctx=context, base_fill=rrbs_fill, sm=strand_method, o=single_sites_file, r=readsfile, s=sitesfile, cores=prog_spec.cores) if epis: epis_file = get_outpath("all_epialleles.txt") cmd += " --outputEpialleles {}".format(epis_file) else: epis_file = None if process_logfile: cmd += " --processLogfile {}".format(process_logfile) if halt: cmd += " --through {}".format(halt) if strand_specific: cmd += " --strandSpecific" if no_epi_stats: cmd += " --noEpiStats" # TODO: though this is not going to be the encouraged route, while/if it's to be provided, consider the downstream file(s) as targets. # TODO: beware, though, of the effect on the "main-only" function that calls into this. Its targets are the main files. return cmd, EpilogTarget(single_sites_file=single_sites_file, epis_file=epis_file)
] def test_columns(self): assert self.data.columns()[0] == ['A', 'B', 'B', 'C', 'C', 'C'] def test_dicts(self): assert self.data.dicts()[0] == { 'patient_mrn': 'A', 'name': 'Blood Pressure', 'value': '120', 'order_time': '09/03/17 10:30AM', 'taken_time': '09/03/17 11:30AM' } description_paths = expand_path(__file__, 'fixtures/descriptions/*.yml') curator = Curator(engine, description_paths) class TestCurator(): def setup_method(self): self.labs_dirty = FixtureData('labs_dirty.csv') self.patients_dirty = FixtureData('patients_dirty.csv') self.patients_missing_first = FixtureData( 'patients_dirty_missing_key.json') def test_transform_dicts(self): results = curator.transform_dicts('lab', self.labs_dirty.dicts()) for result, clean in zip(results, labs_clean): assert result == clean
def test_expand_path_simple_glob(): read_values = [] for path in helpers.expand_path(__file__, 'fixtures/glob/*.txt'): read_values.append(open(path).read()) assert set(read_values) == set(['1\n', '2\n', '3\n'])
def test_expand_path_with_single_path(): expected = helpers.expand_path(__file__, 'fixtures/glob/*.txt') actual = helpers.expand_paths(__file__, 'fixtures/glob/*.txt') assert actual == expected
def test_expand_path_nested_glob(): read_values = [] for path in helpers.expand_path(__file__, 'fixtures/glob/**/*.txt'): read_values.append(open(path).read()) assert set(read_values) == set(['A\n', 'B\n', 'C\n', 'D\n'])