Example #1
0
 def test_ccsread_build(self):
     ds1 = ConsensusReadSet(data.getXml(2), strict=False, skipMissing=True)
     self.assertEquals(type(ds1).__name__, 'ConsensusReadSet')
     self.assertEquals(type(ds1._metadata).__name__, 'SubreadSetMetadata')
     ds2 = ConsensusReadSet(data.getXml(2), strict=False, skipMissing=True)
     self.assertEquals(type(ds2).__name__, 'ConsensusReadSet')
     self.assertEquals(type(ds2._metadata).__name__, 'SubreadSetMetadata')
Example #2
0
 def test_ccsread_build(self):
     ds1 = ConsensusReadSet(data.getXml(2), strict=False, skipMissing=True)
     assert type(ds1).__name__ == 'ConsensusReadSet'
     assert type(ds1._metadata).__name__ == 'SubreadSetMetadata'
     ds2 = ConsensusReadSet(data.getXml(2), strict=False, skipMissing=True)
     assert type(ds2).__name__ == 'ConsensusReadSet'
     assert type(ds2._metadata).__name__ == 'SubreadSetMetadata'
 def test_ccsset_from_bam(self):
     # DONE bug 28698
     ds1 = ConsensusReadSet(upstreamData.getCCSBAM(), strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".consensusreadset.xml").name
     log.debug(fn)
     ds1.write(fn, validate=False)
     ds1.write(fn)
 def test_integration(self):
     ccs_barcoded = pbtestdata.get_file("ccs-barcoded")
     datastore = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
     lima_out = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     ccs_in = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     with ConsensusReadSet(ccs_barcoded) as ccs_tmp:
         ccs_tmp.name = "My Data (filtered)"
         ccs_tmp.tags = "ccs,filtered"
         ccs_tmp.write(ccs_in)
         ccs_tmp.name = "lima out"
         ccs_tmp.write(lima_out)
     ds = DataStore([
         DataStoreFile(uuid.uuid4(), "lima", FileTypes.DS_CCS.file_type_id,
                       lima_out)
     ])
     ds.write_json(datastore)
     args = [
         "python3", "-m", "pbcoretools.tasks.make_trimmed_dataset",
         datastore, ccs_in
     ]
     self._check_call(args)
     with ConsensusReadSet("trimmed.consensusreadset.xml",
                           trustCounts=True) as ccs_out:
         assert ccs_out.numRecords > 0
         assert ccs_out.name == "My Data (trimmed)"
         assert ccs_out.tags == "ccs"
Example #5
0
def resolved_tool_contract_runner(rtc):
    """Given resolved tool contract, run"""
    p = ChunkTasksPickle.read(rtc.task.input_files[0])
    assert all([isinstance(task, PartialChunkTask) for task in p])
    dummy_sentinel_file = rtc.task.input_files[1]
    ccs_file = rtc.task.input_files[2]
    nproc = rtc.task.nproc
    tmp_dir = rtc.task.tmpdir_resources[0].path \
            if len(rtc.task.tmpdir_resources) > 0 else None

    log.info("Looking for QVs in CCS input...")
    with ConsensusReadSet(ccs_file) as ds:
        for bam in ds.resourceReaders():
            qvs = bam.pulseFeaturesAvailable()
            if qvs != set(['SubstitutionQV', 'InsertionQV', 'DeletionQV']):
                log.warn(
                    "Missing QV fields from %s, will use default probabilities",
                    bam.filename)
                ccs_file = None
                break

    with open(rtc.task.output_files[0], 'w') as writer:
        for task in p:
            log.info("Running ice_partial on cluster bin %s, nfl chunk %s/%s",
                     str(task.cluster_bin_index), str(task.nfl_index),
                     str(task.n_nfl_chunks))
            task_runner(task=task,
                        ccs_file=ccs_file,
                        nproc=nproc,
                        tmp_dir=tmp_dir)
            writer.write(
                "ice_partial of cluster bin %s, nfl chunk %s/%s in %s is DONE: %s\n"
                % (task.cluster_bin_index, task.nfl_index, task.n_nfl_chunks,
                   task.cluster_out_dir, task.nfl_pickle))
Example #6
0
def resolved_tool_contract_runner(rtc):
    ccs_set = rtc.task.input_files[2]
    # FIXME we have to ignore the new CCS output for now because it doesn't
    # contain the necessary QV fields; however, since the old behavior appears
    # to be to use this always (independent of --use_finer_qv), it will still
    # accommodate the older CCS files we use for testing
    log.info("Looking for QVs in CCS input...")
    with ConsensusReadSet(ccs_set) as ds:
        for bam in ds.resourceReaders():
            qvs = bam.pulseFeaturesAvailable()
            if qvs != set(['SubstitutionQV', 'InsertionQV', 'DeletionQV']):
                log.warn(
                    "Missing QV fields from %s, will use default probabilities"
                    % bam.filename)
                ccs_set = None
                break
    tmp_dir = rtc.task.tmpdir_resources[0].path \
            if len(rtc.task.tmpdir_resources) > 0 else None
    print 'my tmp_dir is '
    print tmp_dir
    return IcePartialOne(input_fasta=rtc.task.input_files[0],
                         ref_fasta=rtc.task.input_files[1],
                         out_pickle=rtc.task.output_files[0],
                         ccs_fofn=ccs_set,
                         blasr_nproc=rtc.task.nproc,
                         tmp_dir=tmp_dir).run()
Example #7
0
def update_consensus_reads(ccs_in,
                           subreads_in,
                           ccs_out,
                           use_run_design_uuid=False):
    ds_subreads = SubreadSet(subreads_in, skipCounts=True)
    with ConsensusReadSet(ccs_in) as ds:
        ds.name = ds_subreads.name + " (CCS)"
        run_design_uuid = None
        if use_run_design_uuid:
            uuids = set([])
            for collection in ds.metadata.collections:
                if collection.consensusReadSetRef is not None:
                    uuids.add(collection.consensusReadSetRef.uuid)
            if len(uuids) == 1:
                run_design_uuid = list(uuids)[0]
            elif len(uuids) == 0:
                log.warning("No pre-defined ConsensusReadSetRef UUID found")
            else:
                log.warning("Multiple ConsensusReadSetRef UUIDs found")
        for collection in ds.metadata.collections:
            if len(collection.wellSample.bioSamples) == 0:
                for collection2 in ds_subreads.metadata.collections:
                    for bio_sample in collection2.wellSample.bioSamples:
                        collection.bioSamples.append(bio_sample)
        ds.updateCounts()
        if run_design_uuid is not None:
            ds.uuid = run_design_uuid
        else:
            ds.newUuid()
        sanitize_dataset_tags(ds, remove_hidden=True)
        ds.write(ccs_out)
    return 0
 def test_ccs_barcodes_table_asymmetric(self):
     CCS_DS = op.join(ROOT_DATA_DIR, "ccs", "asym_barcodes",
                      "ccs.consensusreadset.xml")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual(r.tables[1].columns[0].values,
                      ['F5--R5', 'F8--R8', 'F20--R20', 'F29--R29', 'F30--R30'])
 def test_ccs_mulitple_movies_single_bam(self):
     """
     Check that the report doesn't crash when a single BAM file contains
     reads from multiple movies
     """
     ds = ConsensusReadSet(self.CCS_BAM)
     r = to_report(ds, tempfile.mkdtemp())
Example #10
0
 def test_ccs_bam_np_is_at_least_npasses(self):
     """
     Check that the number of passes of each read in the ConsensusReadSet
     output is at least equal to the minimum specified in the resolved
     tool contract.
     """
     nchecked = nskipped = 0
     for rtc in self.resolved_tool_contracts:
         if rtc.task.task_id == "pbccs.tasks.ccs":
             min_passes = rtc.task.options["pbccs.task_options.min_passes"]
             with ConsensusReadSet(rtc.task.output_files[0]) as ccs:
                 for bam in ccs.resourceReaders():
                     if len(bam) > NRECORDS_MAX_ITER:
                         nskipped += 1
                     else:
                         for rec in bam:
                             np = rec.peer.opt("np")
                             self.assertTrue(
                                 np >= min_passes,
                                 "{r} has np {n} < {e}".format(
                                     r=rec.qName, n=np, e=min_passes))
                         nchecked += 1
     if nchecked == 0:
         if nskipped == 0:
             raise unittest.SkipTest("No CCS BAM files found")
         else:
             raise unittest.SkipTest(
                 "File size over limit - 'np' not checked")
Example #11
0
def run_report(input_file, report_json, output_dir):
    log.info("Running {f} v{v}.".format(f=os.path.basename(__file__),
                                        v=__version__))
    report = None
    ds = ConsensusReadSet(input_file)
    report = to_report(ds, output_dir)
    log.info(pformat(report.to_dict()))
    report.write_json(report_json)
    return 0
Example #12
0
 def add_bash5(self, filename):
     """Add a bas.h5/ccs.h5/ccs.bam to cacher."""
     basename = os.path.basename(filename)
     if filename.endswith('.bax.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9],
                                                  suffix='.bax.h5')
     elif filename.endswith('.1.ccs.h5') or \
             filename.endswith('.2.ccs.h5') or \
             filename.endswith('.3.ccs.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9])
     elif filename.endswith('.ccs.h5'):
         # a single .ccs.h5 (post 150k runs), treat the same as .bas.h5
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith('.1.subreads.bam') or \
          filename.endswith('.2.subreads.bam') or \
          filename.endswith('.3.subreads.bam'):
         movie = basename[:-15]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-15])
     elif filename.endswith('subreads.bam'):
         raise NotImplementedError(
             "%s add_bash5 *.subreads.bam not implemented." %
             (self.__class__.__name__))
     elif filename.endswith('.1.ccs.bam') or \
          filename.endswith('.2.ccs.bam') or \
          filename.endswith('.3.ccs.bam'):
         movie = basename[:-10]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-10])
     elif filename.endswith('.bas.h5'):
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith(".consensusreadset.xml"):
         ds = ConsensusReadSet(filename)
         for rr in ds.resourceReaders():
             for rg in rr.readGroupTable:
                 self.bas_files[rg.MovieName] = dataset_wrapper(filename)
     else:
         raise IOError("Unsupported file format: %s" % filename)
 def test_ccs_barcodes_table(self):
     CCS_DS = pbtestdata.get_file("ccs-barcoded")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual([c.values for c in r.tables[1].columns[0:4]],
                      [["lbc1--lbc1", "lbc3--lbc3"], [1, 1], [1958, 1954], [1958, 1954]])
     self.assertAlmostEqual(r.tables[1].columns[4].values[0], 0.9724,
                            places=4)
     self.assertAlmostEqual(r.tables[1].columns[4].values[1], 0.9926,
                            places=4)
Example #14
0
    def test_get_bio_sample_name(self):
        filename = pbtestdata.get_file("subreads-sequel")
        ds1 = SubreadSet(filename)
        get_bio_sample_name(ds1) == "Narwhale"

        filename = pbtestdata.get_file("subreads-biosample-2")
        ds2 = SubreadSet(filename)
        get_bio_sample_name(ds2) == "UnnamedSample"

        ds3 = ds1 + ds2
        get_bio_sample_name(ds3) == "Multiple"

        filename = pbtestdata.get_file("rsii-ccs-multi-cell")
        ds4 = ConsensusReadSet(filename)
        get_bio_sample_name(ds4) == "Multiple"

        filename = pbtestdata.get_file("ccs-sequel")
        ds4 = ConsensusReadSet(filename)
        get_bio_sample_name(ds4) == "NarwhalCcs"
Example #15
0
 def add_bash5(self, filename):
     """Add a bas.h5/ccs.h5/ccs.bam to cacher."""
     basename = os.path.basename(filename)
     if filename.endswith('.bax.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9],
                                                  suffix='.bax.h5')
     elif filename.endswith('.1.ccs.h5') or \
             filename.endswith('.2.ccs.h5') or \
             filename.endswith('.3.ccs.h5'):
         movie = basename[:-9]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-9])
     elif filename.endswith('.ccs.h5'):
         # a single .ccs.h5 (post 150k runs), treat the same as .bas.h5
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith('.1.subreads.bam') or \
          filename.endswith('.2.subreads.bam') or \
          filename.endswith('.3.subreads.bam'):
         movie = basename[:-15]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-15])
     elif filename.endswith('subreads.bam'):
         raise NotImplementedError("%s add_bash5 *.subreads.bam not implemented."
                                   % (self.__class__.__name__))
     elif filename.endswith('.1.ccs.bam') or \
          filename.endswith('.2.ccs.bam') or \
          filename.endswith('.3.ccs.bam'):
         movie = basename[:-10]
         if movie not in self.bas_files:
             self.bas_files[movie] = smrt_wrapper(filename[:-10])
     elif filename.endswith('.bas.h5'):
         movie = basename[:-7]
         self.bas_files[movie] = defaultdict(lambda: filename)
     elif filename.endswith(".consensusreadset.xml"):
         ds = ConsensusReadSet(filename)
         for rr in ds.resourceReaders():
             for rg in rr.readGroupTable:
                 self.bas_files[rg.MovieName] = dataset_wrapper(filename)
     else:
         raise IOError("Unsupported file format: %s" % filename)
Example #16
0
 def test_read_ccs_multiple_movies_one_bam(self):
     """
     Test for sane BamCollection.__getitem__() behavior when a .bam file
     contains multiple read groups.
     """
     dataset_xml = op.join(self.ioDataDir,
                           "ccs_multi_movie.consensusreadset.xml")
     bc = BamCollection(dataset_xml)
     with ConsensusReadSet(dataset_xml) as ds:
         for read in ds:
             self.assertEqual(bc[read.qName].readName, read.qName)
Example #17
0
    def test_qname_css(self):
        fn = ('/pbi/dept/secondary/siv/testdata/ccs-unittest/'
              'tiny/little.ccs.bam')
        sset = ConsensusReadSet(fn)

        self.assertEqual(len(sset), 14)
        size = 4
        qn = [r.qName for r in sset[:size]]
        good_qn = [('=', qn)]
        sset.filters.addRequirement(qname=good_qn)
        self.assertEqual(size, sum(1 for _ in sset))
        self.assertEqual(size, len(sset))
Example #18
0
def run_args(args):
    dstore = DataStore.load_from_json(os.path.realpath(args.datastore))
    ds_in = ConsensusReadSet(args.ccs_in, trustCounts=True)
    ds_out = ConsensusReadSet(*([f.path for f in dstore.files.values()]),
                              trustCounts=True)
    sanitize_dataset_tags(ds_out, remove_hidden=True)
    ds_out.name = ds_in.name.replace(" (filtered)", "") + " (trimmed)"
    ds_out.subdatasets = []
    ds_out.write("trimmed.consensusreadset.xml")
    return 0
Example #19
0
 def test_ccsset_from_bam(self):
     # DONE bug 28698
     ds1 = ConsensusReadSet(upstreamData.getCCSBAM(), strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".consensusreadset.xml").name
     log.debug(fn)
     ds1.write(fn, validate=False)
     ds1.write(fn)
Example #20
0
def run_dev_ccs_report(rtc):
    from pbcore.io import ConsensusReadSet
    with ConsensusReadSet(rtc.task.input_files[0]) as ds:
        ds.updateCounts()
        attr = [
            Attribute("number_of_records", value=ds.numRecords),
            Attribute("total_length", value=ds.totalLength)
        ]
        report = Report("ccs_report",
                        title="ConsensusReadSet XML Report",
                        attributes=attr)
        report.write_json(rtc.task.output_files[0])
    return 0
Example #21
0
def read_isoseq3_refine_flnc(flnc_ccs):
    """
    Read the header and index of the full-length non-chimeric (FLNC) CCS reads
    output by the 'isoseq3 refine' command, and return a dictionary mapping CCS
    read IDs to sample names.
    """
    flnc_read_samples = {}
    ds = ConsensusReadSet(flnc_ccs, strict=True)
    for rg in ds.readGroupTable:
        sel = ds.index.qId == rg.ID
        zmws = ds.index.holeNumber[sel]
        for zmw in zmws:
            qname = "{m}/{z}/ccs".format(m=rg.MovieName, z=zmw)
            assert not qname in flnc_read_samples, qname
            flnc_read_samples[qname] = rg.SampleName
    return flnc_read_samples
Example #22
0
def _run_args(args):
    ds = ConsensusReadSet(args.ccsxml, strict=True)
    orig_uuid = ds.uuid
    ds.consolidate("reads.bam", useTmp=False)
    bam_res = ds.externalResources[0]
    if args.zmws_json:
        bam_res._setSubResByMetaType("PacBio.FileTypes.json", args.zmws_json)
    if args.report_ccs_processing:
        bam_res._setSubResByMetaType("PacBio.FileTypes.JsonReport",
                                     args.report_ccs_processing)
    ds.uuid = orig_uuid
    ds.write("final.consensusreadset.xml")
    with open("reads.fofn", "wt") as fofn:
        fofn.write(os.path.abspath("reads.bam"))
    return 0
Example #23
0
 def test_validity_ccs_accuracy(self):
     """
     check that ccs accuracy is greater than a preset threshold.  this can
     be specified in the 'ccs' section of test_values.json, otherwise the
     permissive default value of 0.90 will be used.
     """
     with ConsensusReadSet(self.final_ccs_file) as ds:
         values_sum = n_values = 0
         for rr in ds.resourceReaders():
             values_sum += rr.readQual.sum()
             n_values += len(rr)
         # XXX see BamAlignment.readScore docstring for explanation
         readScore = values_sum / n_values
         vmin = MIN_CCS_MEAN_ACCURACY
         if "min_ccs_mean_accuracy" in self.expected_values:
             vmin = self.expected_values["min_ccs_mean_accuracy"]
         self.assertGreater(readScore, vmin)
Example #24
0
def run_args(args):
    datastore_out = op.abspath(args.datastore_out)
    base_dir = op.dirname(datastore_out)
    datastore_files = []
    with ConsensusReadSet(args.dataset_file, strict=True) as ds:
        bam_file_name, file_prefix = get_prefix_and_bam_file_name(
            ds, is_barcoded=False)
        if args.mode == "fasta":
            datastore_files.extend(to_fastx_files(
                FileTypes.FASTA, ds, args.dataset_file, Constants.FASTA_FILE_IDS, base_dir, file_prefix, args.min_rq, no_zip=args.no_zip))
        elif args.mode == "fastq":
            datastore_files.extend(to_fastx_files(
                FileTypes.FASTQ, ds, args.dataset_file, Constants.FASTQ_FILE_IDS, base_dir, file_prefix, args.min_rq, no_zip=args.no_zip))
        elif args.mode == "consolidate":
            if bam_file_name is None:
                datastore_files.append(
                    consolidate_bam(base_dir, file_prefix, ds,
                                    min_rq=args.min_rq))
    DataStore(datastore_files).write_json(datastore_out)
    return 0
Example #25
0
 def setUpClass(cls):
     super(TestAccuracy, cls).setUpClass()
     ref_fasta = cls.test_values["ccs"].get("reference", None)
     cls.ref_csv = cls.test_values["ccs"].get("ccscheck_out", None)
     if cls.ref_csv is None:
         raise unittest.skipTest("No CSV file defined")
     ref_dir = op.dirname(ref_fasta)
     cls.run_dir = tempfile.mkdtemp()
     tmp_ref_fasta = op.join(cls.run_dir, op.basename(ref_fasta))
     shutil.copyfile(ref_fasta, tmp_ref_fasta)
     cls.ref_fasta = tmp_ref_fasta
     pysam.faidx(tmp_ref_fasta)
     cls.final_ccs_file = None
     for file_id, file_info in cls.datastore.get_file_dict().iteritems():
         if file_info.is_chunked:
             continue
         if file_info.file_type_id == FileTypes.DS_CCS.file_type_id:
             cls.final_ccs_file = file_info.path
             break
     cls.ccs_ds = ConsensusReadSet(cls.final_ccs_file)
Example #26
0
 def test_ccs_barcoding_propagation(self):
     """
     Test that any BarcodeSet defined as an external resource of the
     subreads BAM file(s) in the input SubreadSet is also an external
     resource of the output ConsensusReadSet.
     """
     if self.is_barcoded:
         with ConsensusReadSet(self.final_ccs_file) as ccs:
             self.assertTrue(ccs.isBarcoded)
             for ext_res_out in ccs.externalResources:
                 self.assertEqual(self.barcode_set, ext_res_out.barcodes)
             if "barcodes" in self.expected_values:
                 barcodes = set()
                 for bam in ccs.resourceReaders():
                     bc_eq = bam.pbi.bcForward == bam.pbi.bcReverse
                     self.assertTrue(bc_eq.all())
                     barcodes.update(set(list(bam.pbi.bcForward)))
                 self.assertEqual(sorted(list(barcodes)),
                                  self.expected_values["barcodes"])
     else:
         raise unittest.SkipTest("SubreadSet was not barcoded, skipping")
Example #27
0
def run_ccs_bam_fastq_exports(ccs_dataset_file, base_dir, is_barcoded=False,
                              min_rq=Constants.HIFI_RQ, no_zip=False):
    """
    Take a ConsensusReadSet and write BAM/FASTQ files to the output
    directory.  If this is a demultiplexed dataset, it is assumed to have
    a single BAM file within a dataset that is already imported in SMRT Link.
    Note that this function runs the exports serially, and is therefore no
    longer used in this specific task, but rather in the barcoded version that
    runs in parallel.
    """
    datastore_files = []
    with ConsensusReadSet(ccs_dataset_file, strict=True) as ds:
        bam_file_name, file_prefix = get_prefix_and_bam_file_name(
            ds, is_barcoded)
        if bam_file_name is None:
            datastore_files.append(consolidate_bam(base_dir, file_prefix, ds, min_rq))
        fasta_file_ids = [Constants.FASTA_ID, Constants.FASTA2_ID]
        fastq_file_ids = [Constants.FASTQ_ID, Constants.FASTQ2_ID]
        datastore_files.extend(
            to_fastx_files(FileTypes.FASTA,
                           ds,
                           ccs_dataset_file,
                           fasta_file_ids,
                           base_dir,
                           file_prefix,
                           min_rq=min_rq,
                           no_zip=no_zip))
        datastore_files.extend(
            to_fastx_files(FileTypes.FASTQ,
                           ds,
                           ccs_dataset_file,
                           fastq_file_ids,
                           base_dir,
                           file_prefix,
                           min_rq=min_rq,
                           no_zip=no_zip))
    return datastore_files
Example #28
0
 def test_consensus_read_set_ref(self):
     import pbtestdata
     ds = ConsensusReadSet(pbtestdata.get_file("ccs-sequel"), strict=True)
     uuid = ds.metadata.collections[0].consensusReadSetRef.uuid
     assert uuid == "5416f525-d3c7-496b-ba8c-18d7ec1b4499"
Example #29
0
 def test_ccs_bam_index(self):
     """
     Test that the output includes .pbi index file(s).
     """
     with ConsensusReadSet(self.final_ccs_file) as ds:
         ds.assertIndexed()
Example #30
0
 def setUpClass(cls):
     ds = ConsensusReadSet(pbcore.data.getCCSBAM(), strict=True)
     ds.write(cls.INPUT_FILES[0])
 def setUpClass(cls):
     super(TestToolContract, cls).setUpClass()
     ds = ConsensusReadSet(pbcore.data.getCCSBAM(), strict=True)
     ds.write(cls.INPUT_FILES[0])
 def setUpClass(cls):
     ds = ConsensusReadSet(pbcore.data.getCCSBAM(), strict=True)
     ds.write(cls.INPUT_FILES[0])
Example #33
0
 def run_after(self, rtc, output_dir):
     with ConsensusReadSet(rtc.task.output_files[0]) as ds_out:
         zmws = set(ds_out.resourceReaders()[0].holeNumber)
         logging.info("ZMWs = {z}".format(z=zmws))
         for z in zmws:
             self.assertTrue(self.zmw_range[0] < z < self.zmw_range[1])
Example #34
0
def is_ccs_demultiplexed(input_file):
    log.info("Checking {} is lima-demultiplexed or not.".format(input_file))
    # keep memory to an absolute minimum
    ds = ConsensusReadSet(input_file, skipCounts=True)
    pbi_headers = [PbiHeaderOnly(er.pbi) for er in ds.externalResources]
    return any([pbi.hasBarcodeInfo for pbi in pbi_headers])
 def setUpData(cls):
     cls.bam_file_name = pbcore.data.getCCSBAM()
     cls.xml_file_name = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     ds = ConsensusReadSet(cls.bam_file_name)
     ds.write(cls.xml_file_name)