def run_rtc(rtc): files = list(readFofn(rtc.task.input_files[0])) report = fofn_to_report(len(files)) shutil.copy(rtc.task.input_files[0], rtc.task.output_files[0]) write_report_and_log(report, rtc.task.output_files[1]) log.info("Completed running {i}".format(i=rtc.task)) return 0
def test_read_ccs_from_multiple_smrtcells_write_one_bam(self): """ test_read_ccs_from_multiple_smrtcells_write_one_bam: Read ccs reads from multiple bam files of multiple SMRTCells, select records of a few zmws from each movie, and write to an output bam file. """ fofn = op.join(self.bigDataDir, "ccsbam.fofn") movies = [ "m140802_021814_42161_c110036822550000001823106706241500_s1_p0", "m140802_043938_42161_c110036822550000001823106706241501_s1_p0", "m140802_070303_42161_c110036822550000001823106706241502_s1_p0" ] ccsbamfns = [f for f in readFofn(fofn)] outbamfn = op.join(self.outDir, "test_writebam_ccs_3.bam") m1, m2, m3 = movies hn1 = [54434, 54440, 54493, 80328, 163395] hn2 = [1784, 7201, 40789, 79704, 152904] hn3 = [705, 4838, 40197, 84197, 126136] zmws = ["%s/%d" % (m1, hn) for hn in hn1] + \ ["%s/%d" % (m2, hn) for hn in hn2] + \ ["%s/%d" % (m3, hn) for hn in hn3] expected_num_ccs_reads = 13821 + 13457 + 17016 + 13705 + 13581 + 14900 + 13477 + 12238 + 11318 #123513 _verify_write_compare_ccs(self, ccsbamfns, zmws, outbamfn, expected_movies=movies, expected_len=expected_num_ccs_reads)
def test_read_ccs_from_multiple_smrtcells_write_one_bam(self): """ test_read_ccs_from_multiple_smrtcells_write_one_bam: Read ccs reads from multiple bam files of multiple SMRTCells, select records of a few zmws from each movie, and write to an output bam file. """ fofn = op.join(self.bigDataDir, "ccsbam.fofn") movies = ["m140802_021814_42161_c110036822550000001823106706241500_s1_p0", "m140802_043938_42161_c110036822550000001823106706241501_s1_p0", "m140802_070303_42161_c110036822550000001823106706241502_s1_p0"] ccsbamfns = [f for f in readFofn(fofn)] outbamfn = op.join(self.outDir, "test_writebam_ccs_3.bam") m1, m2, m3 = movies hn1 = [54434, 54440, 54493, 80328, 163395] hn2 = [1784, 7201, 40789, 79704, 152904] hn3 = [705, 4838, 40197, 84197, 126136] zmws = ["%s/%d" % (m1, hn) for hn in hn1] + \ ["%s/%d" % (m2, hn) for hn in hn2] + \ ["%s/%d" % (m3, hn) for hn in hn3] expected_num_ccs_reads = 13821 + 13457 + 17016 + 13705 + 13581 + 14900 + 13477 + 12238 + 11318 #123513 _verify_write_compare_ccs(self, ccsbamfns, zmws, outbamfn, expected_movies=movies, expected_len=expected_num_ccs_reads)
def fromSmrtPortalPath(jobPath): # reckon the reports path from the input fofn ... reportsPaths = set([ updir(path) for path in readFofn(op.join(jobPath, "input.fofn"))]) if len(reportsPaths) > 1: raise ValueError, "No support for multi-movie jobs yet" else: reportsPath = list(reportsPaths)[0] basFname = findOneOrNone("*.bas.h5", reportsPath) or findOneOrNone("*.bax.h5", reportsPath) plsFname = findOneOrNone("*.pls.h5", reportsPath) or findOneOrNone("*.plx.h5", reportsPath) trcFname = findOneOrNone("*.trc.h5", updir(reportsPath)) alnFname = findOneOrNone("*.cmp.h5", op.join(jobPath, "data")) return Fixture(trcFname=trcFname, plsFname=plsFname, basFname=basFname, alnFname=alnFname)
def parse_input_file(i_file): """Parse input file, get input bam or bax.h5 file.""" bam_fn, bax_fn = None, None if (i_file.endswith(".bam")): bam_fn = i_file elif (i_file.endswith(".fofn")): fns = [f for f in readFofn(i_file)] if not all([f.endswith(".bax.h5") for f in fns]) or \ len(fns) != 1: raise ValueError ("%s fofn should contain exactly one bax.h5 file.") else: bax_fn = fns[0] elif i_file.endswith(".bax.h5"): bax_fn = i_file else: raise ValueError ("Unsupported file format %s" % i_file) return bam_fn, bax_fn
def parse_input_file(i_file): """Parse input file, get input bam or bax.h5 file.""" bam_fn, bax_fn = None, None if (i_file.endswith(".bam")): bam_fn = i_file elif (i_file.endswith(".fofn")): fns = [f for f in readFofn(i_file)] if not all([f.endswith(".bax.h5") for f in fns]) or \ len(fns) != 1: raise ValueError("%s fofn should contain exactly one bax.h5 file.") else: bax_fn = fns[0] elif i_file.endswith(".bax.h5"): bax_fn = i_file else: raise ValueError("Unsupported file format %s" % i_file) return bam_fn, bax_fn
def fromSmrtPortalPath(jobPath): # reckon the reports path from the input fofn ... reportsPaths = set( [updir(path) for path in readFofn(op.join(jobPath, "input.fofn"))]) if len(reportsPaths) > 1: raise ValueError, "No support for multi-movie jobs yet" else: reportsPath = list(reportsPaths)[0] basFname = findOneOrNone("*.bas.h5", reportsPath) or findOneOrNone( "*.bax.h5", reportsPath) plsFname = findOneOrNone("*.pls.h5", reportsPath) or findOneOrNone( "*.plx.h5", reportsPath) trcFname = findOneOrNone("*.trc.h5", updir(reportsPath)) alnFname = findOneOrNone("*.cmp.h5", op.join(jobPath, "data")) return Fixture(trcFname=trcFname, plsFname=plsFname, basFname=basFname, alnFname=alnFname)
def __init__(self, fname, *rest): # Can read any of the following: # ~ fofn # ~ h5, multipart # ~ h5, single part self.filename = op.abspath(op.expanduser(fname)) if fname.endswith(".h5"): directory = op.dirname(self.filename) with h5py.File(self.filename, "r") as f: if f.get("MultiPart"): partFilenames = [ op.join(directory, fn) for fn in f["/MultiPart/Parts"] ] else: partFilenames = [ self.filename ] elif self.filename.endswith(".fofn"): partFilenames = list(readFofn(self.filename)) else: raise ValueError, "Invalid file name for multipart reader" self._parts = [ self.PART_READER_CLASS(pfn, *rest) for pfn in partFilenames ]
def test_simple(): fofn = StringIO("/a/b\n/c/d") lst = list(readFofn(fofn)) assert_array_equal(["/a/b", "/c/d"], lst)
def test_absolutifying(): for fofnPath in data.getFofns(): for filePath in readFofn(fofnPath): assert_true(isabs(filePath))
def test_empty_lines(): fofn = StringIO("/a/b\n \n/c/d\n ") lst = list(readFofn(fofn)) assert_array_equal(["/a/b", "/c/d"], lst)
def test_simple(): fofn = StringIO("/a/b\n/c/d") lst = list(readFofn(fofn)) assert ["/a/b", "/c/d"] == lst
def test_empty_lines(): fofn = StringIO(u"/a/b\n \n/c/d\n ") lst = list(readFofn(fofn)) assert ["/a/b", "/c/d"] == lst