Beispiel #1
0
def ice_fa2fq(in_fa, ccs_fofn, out_fq):
    """Convert an input FASTA file to an output FASTQ file,
       reading QVs from the input ccs.h5, ccs.bam or ccs FOFN.
    """
    ccs_fns = get_files_from_file_or_fofn(ccs_fofn)
    fmt = guess_file_format(ccs_fns)

    if fmt == FILE_FORMATS.H5:
        qver = basQVcacher()
        for ccs_fn in ccs_fns:
            qver.add_bash5(ccs_fn)
        bas_handlers = {}
    elif fmt == FILE_FORMATS.BAM:
        qver = BamCollection(*ccs_fns)
    else:
        raise IOError("ice_fa2fq does not support input %s." % ccs_fofn)

    with ContigSetReaderWrapper(in_fa) as reader, \
            FastqWriter(out_fq) as writer:
        for r in reader:
            logging.debug("Getting QVs for {name} ...".format(name=r.name))
            seqid = r.name.split(' ')[0]
            parsed_read_name = _Parsed_Read_Name(seqid)
            if fmt == FILE_FORMATS.H5:
                try:
                    bas_file = qver.bas_files[parsed_read_name.movie][seqid]
                    if bas_file not in bas_handlers:
                        bas_handlers[bas_file] = BasH5Reader(bas_file)
                except KeyError:
                    raise IOError("Could not read {s} from {f}.".format(
                        s=seqid, f=ccs_fofn))
                qvs = get_qv_from_bas_handler(
                    bas_handler=bas_handlers[bas_file],
                    parsed_read_name=parsed_read_name,
                    qv_name="QualityValue")
            elif fmt == FILE_FORMATS.BAM:
                qvs = get_qvs_from_bam(reader=qver,
                                       parsed_read_name=parsed_read_name,
                                       qv_name="QualityValue")
            else:
                assert False

            if len(r.sequence) != len(qvs):
                raise ValueError(
                    "Sequence and QVs of {r} should be the same!".format(
                        r=r.name))
            writer.writeRecord(r.name, r.sequence[:], qvs)

    if fmt == FILE_FORMATS.H5:
        for bas_file, bas_handler in bas_handlers.iteritems():
            logging.debug("Closing {bas_file} ...".format(bas_file=bas_file))
            bas_handler.close()
    elif fmt == FILE_FORMATS.BAM:
        qver.close()
Beispiel #2
0
 def test_bam(self):
     qver = basQVcacher()
     qver.add_bash5(CCS_BAM)
     seqids = [ rid for rid in _get_read_ids() ]
     qver.precache(seqids)
     qvs = []
     for read_id in seqids:
         qvs.append(qver.get(read_id, "InsertionQV"))
     dqv = qver.get(READ_ID, "DeletionQV")
     self.assertEqual("%.5f" % dqv[0], "0.01995")
     #print dqv[100]
     self.assertEqual(len(qvs), 251)
Beispiel #3
0
 def test_bam(self):
     qver = basQVcacher()
     qver.add_bash5(CCS_BAM)
     seqids = [rid for rid in _get_read_ids()]
     qver.precache(seqids)
     qvs = []
     for read_id in seqids:
         qvs.append(qver.get(read_id, "InsertionQV"))
     dqv = qver.get(READ_ID, "DeletionQV")
     self.assertEqual("%.5f" % dqv[0], "0.01995")
     #print dqv[100]
     self.assertEqual(len(qvs), 251)
Beispiel #4
0
def ice_fa2fq(in_fa, ccs_fofn, out_fq):
    """Convert an input FASTA file to an output FASTQ file,
       reading QVs from the input ccs.h5, ccs.bam or ccs FOFN.
    """
    ccs_fns = get_files_from_file_or_fofn(ccs_fofn)
    fmt = guess_file_format(ccs_fns)

    if fmt == FILE_FORMATS.H5:
        qver = basQVcacher()
        for ccs_fn in ccs_fns:
            qver.add_bash5(ccs_fn)
        bas_handlers = {}
    elif fmt == FILE_FORMATS.BAM:
        qver = BamCollection(*ccs_fns)
    else:
        raise IOError("ice_fa2fq does not support input %s." %
                      ccs_fofn)

    with ContigSetReaderWrapper(in_fa) as reader, \
            FastqWriter(out_fq) as writer:
        for r in reader:
            logging.debug("Getting QVs for {name} ...".format(name=r.name))
            seqid = r.name.split(' ')[0]
            parsed_read_name = _Parsed_Read_Name(seqid)
            if fmt == FILE_FORMATS.H5:
                try:
                    bas_file = qver.bas_files[parsed_read_name.movie][seqid]
                    if bas_file not in bas_handlers:
                        bas_handlers[bas_file] = BasH5Reader(bas_file)
                except KeyError:
                    raise IOError("Could not read {s} from {f}.".
                                  format(s=seqid, f=ccs_fofn))
                qvs = get_qv_from_bas_handler(bas_handler=bas_handlers[bas_file],
                                              parsed_read_name=parsed_read_name,
                                              qv_name="QualityValue")
            elif fmt == FILE_FORMATS.BAM:
                qvs = get_qvs_from_bam(reader=qver,
                                       parsed_read_name=parsed_read_name,
                                       qv_name="QualityValue")
            else:
                assert False

            if len(r.sequence) != len(qvs):
                raise ValueError("Sequence and QVs of {r} should be the same!".
                                 format(r=r.name))
            writer.writeRecord(r.name, r.sequence[:], qvs)

    if fmt == FILE_FORMATS.H5:
        for bas_file, bas_handler in bas_handlers.iteritems():
            logging.debug("Closing {bas_file} ...".format(bas_file=bas_file))
            bas_handler.close()
    elif fmt == FILE_FORMATS.BAM:
        qver.close()