def test_integration(self):
     ccs_barcoded = pbtestdata.get_file("ccs-barcoded")
     datastore = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
     lima_out = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     ccs_in = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     with ConsensusReadSet(ccs_barcoded) as ccs_tmp:
         ccs_tmp.name = "My Data (filtered)"
         ccs_tmp.tags = "ccs,filtered"
         ccs_tmp.write(ccs_in)
         ccs_tmp.name = "lima out"
         ccs_tmp.write(lima_out)
     ds = DataStore([
         DataStoreFile(uuid.uuid4(), "lima", FileTypes.DS_CCS.file_type_id,
                       lima_out)
     ])
     ds.write_json(datastore)
     args = [
         "python3", "-m", "pbcoretools.tasks.make_trimmed_dataset",
         datastore, ccs_in
     ]
     self._check_call(args)
     with ConsensusReadSet("trimmed.consensusreadset.xml",
                           trustCounts=True) as ccs_out:
         assert ccs_out.numRecords > 0
         assert ccs_out.name == "My Data (trimmed)"
         assert ccs_out.tags == "ccs"
Example #2
0
def run_args(args):
    dstore = DataStore.load_from_json(os.path.realpath(args.datastore))
    ds_in = ConsensusReadSet(args.ccs_in, trustCounts=True)
    ds_out = ConsensusReadSet(*([f.path for f in dstore.files.values()]),
                              trustCounts=True)
    sanitize_dataset_tags(ds_out, remove_hidden=True)
    ds_out.name = ds_in.name.replace(" (filtered)", "") + " (trimmed)"
    ds_out.subdatasets = []
    ds_out.write("trimmed.consensusreadset.xml")
    return 0
 def test_ccs_barcodes_table_asymmetric(self):
     CCS_DS = op.join(ROOT_DATA_DIR, "ccs", "asym_barcodes",
                      "ccs.consensusreadset.xml")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual(r.tables[1].columns[0].values,
                      ['F5--R5', 'F8--R8', 'F20--R20', 'F29--R29', 'F30--R30'])
 def test_ccs_mulitple_movies_single_bam(self):
     """
     Check that the report doesn't crash when a single BAM file contains
     reads from multiple movies
     """
     ds = ConsensusReadSet(self.CCS_BAM)
     r = to_report(ds, tempfile.mkdtemp())
 def test_ccs_barcodes_table(self):
     CCS_DS = pbtestdata.get_file("ccs-barcoded")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual([c.values for c in r.tables[1].columns[0:4]],
                      [["lbc1--lbc1", "lbc3--lbc3"], [1, 1], [1958, 1954], [1958, 1954]])
     self.assertAlmostEqual(r.tables[1].columns[4].values[0], 0.9724,
                            places=4)
     self.assertAlmostEqual(r.tables[1].columns[4].values[1], 0.9926,
                            places=4)
    def test_get_bio_sample_name(self):
        filename = pbtestdata.get_file("subreads-sequel")
        ds1 = SubreadSet(filename)
        get_bio_sample_name(ds1) == "Narwhale"

        filename = pbtestdata.get_file("subreads-biosample-2")
        ds2 = SubreadSet(filename)
        get_bio_sample_name(ds2) == "UnnamedSample"

        ds3 = ds1 + ds2
        get_bio_sample_name(ds3) == "Multiple"

        filename = pbtestdata.get_file("rsii-ccs-multi-cell")
        ds4 = ConsensusReadSet(filename)
        get_bio_sample_name(ds4) == "Multiple"

        filename = pbtestdata.get_file("ccs-sequel")
        ds4 = ConsensusReadSet(filename)
        get_bio_sample_name(ds4) == "NarwhalCcs"
Example #7
0
def run_dev_ccs_report(rtc):
    from pbcore.io import ConsensusReadSet
    with ConsensusReadSet(rtc.task.input_files[0]) as ds:
        ds.updateCounts()
        attr = [
            Attribute("number_of_records", value=ds.numRecords),
            Attribute("total_length", value=ds.totalLength)
        ]
        report = Report("ccs_report",
                        title="ConsensusReadSet XML Report",
                        attributes=attr)
        report.write_json(rtc.task.output_files[0])
    return 0
Example #8
0
 def add_bash5(self, filename):
     """Add a bas.h5/ccs.h5/ccs.bam to cacher."""
     basename = os.path.basename(filename)
     if filename.endswith('.bax.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9],
                                                  suffix='.bax.h5')
     elif filename.endswith('.1.ccs.h5') or \
             filename.endswith('.2.ccs.h5') or \
             filename.endswith('.3.ccs.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9])
     elif filename.endswith('.ccs.h5'):
         # a single .ccs.h5 (post 150k runs), treat the same as .bas.h5
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith('.1.subreads.bam') or \
          filename.endswith('.2.subreads.bam') or \
          filename.endswith('.3.subreads.bam'):
         movie = basename[:-15]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-15])
     elif filename.endswith('subreads.bam'):
         raise NotImplementedError(
             "%s add_bash5 *.subreads.bam not implemented." %
             (self.__class__.__name__))
     elif filename.endswith('.1.ccs.bam') or \
          filename.endswith('.2.ccs.bam') or \
          filename.endswith('.3.ccs.bam'):
         movie = basename[:-10]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-10])
     elif filename.endswith('.bas.h5'):
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith(".consensusreadset.xml"):
         ds = ConsensusReadSet(filename)
         for rr in ds.resourceReaders():
             for rg in rr.readGroupTable:
                 self.bas_files[rg.MovieName] = dataset_wrapper(filename)
     else:
         raise IOError("Unsupported file format: %s" % filename)