コード例 #1
0
 def test_alt_index_in_middle(self):
     with open("Roche/E3MFGYR02_alt_index_in_middle.sff", "rb") as handle:
         sff2 = list(SffIterator(handle))
     self.check_same(sff2)
コード例 #2
0
 def test_index_at_start(self):
     with open("Roche/E3MFGYR02_index_at_start.sff", "rb") as handle:
         sff2 = list(SffIterator(handle))
     self.check_same(sff2)
コード例 #3
0
 def fileiter(handle):
     for record in SffIterator(handle):
         # print(record.id)
         i = record.id
コード例 #4
0
    # Ugly code to make test files...
    index = ".diy1.00This is a fake index block (DIY = Do It Yourself), which is allowed under the SFF standard.\0"
    padding = len(index) % 8
    if padding:
        padding = 8 - padding
    index += chr(0) * padding
    assert len(index) % 8 == 0

    # Ugly bit of code to make a fake index at start
    index = ".diy1.00This is a fake index block (DIY = Do It Yourself), which is allowed under the SFF standard.\0"
    padding = len(index) % 8
    if padding:
        padding = 8 - padding
    index += chr(0) * padding
    with open("Roche/E3MFGYR02_random_10_reads.sff", "rb") as handle:
        records = list(SffIterator(handle))
    with open("Roche/E3MFGYR02_alt_index_at_start.sff", "w") as out_handle:
        w = SffWriter(out_handle, index=False, xml=None)
        # Fake the header...
        w._number_of_reads = len(records)
        w._index_start = 0
        w._index_length = 0
        w._key_sequence = records[0].annotations["flow_key"]
        w._flow_chars = records[0].annotations["flow_chars"]
        w._number_of_flows_per_read = len(w._flow_chars)
        w.write_header()
        w._index_start = out_handle.tell()
        w._index_length = len(index)
        out_handle.seek(0)
        w.write_header()  # this time with index info
        w.handle.write(index)
コード例 #5
0
 def test_trim(self):
     with open(self.filename, "rb") as handle:
         sff_trim = list(SffIterator(handle, trim=True))
     self.assertEqual(len(self.sff), len(sff_trim))
     for old, new in zip(self.sff, sff_trim):
         self.assertEqual(old.id, new.id)
コード例 #6
0
if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)

if False:
    # Ugly code to make test files...
    index = ".diy1.00This is a fake index block (DIY = Do It Yourself), which is allowed under the SFF standard.\0"
    padding = len(index) % 8
    if padding:
        padding = 8 - padding
    index += chr(0) * padding
    assert len(index) % 8 == 0

    # Ugly bit of code to make a fake index at start
    records = list(
        SffIterator(open("Roche/E3MFGYR02_random_10_reads.sff", "rb")))
    out_handle = open("Roche/E3MFGYR02_alt_index_at_start.sff", "w")
    index = ".diy1.00This is a fake index block (DIY = Do It Yourself), which is allowed under the SFF standard.\0"
    padding = len(index) % 8
    if padding:
        padding = 8 - padding
    index += chr(0) * padding
    w = SffWriter(out_handle, index=False, xml=None)
    # Fake the header...
    w._number_of_reads = len(records)
    w._index_start = 0
    w._index_length = 0
    w._key_sequence = records[0].annotations["flow_key"]
    w._flow_chars = records[0].annotations["flow_chars"]
    w._number_of_flows_per_read = len(w._flow_chars)
    w.write_header()
コード例 #7
0
                elif keep_negatives:
                    if len(seq) >= min_len:
                        negs += 1
                        yield record
                    else:
                        short_neg += 1

    in_handle = open(in_file, "rb")
    try:
        manifest = ReadRocheXmlManifest(in_handle)
    except ValueError:
        manifest = None
    in_handle.seek(0)
    out_handle = open(out_file, "wb")
    writer = SffWriter(out_handle, xml=manifest)
    writer.write_file(process(SffIterator(in_handle)))
    # End of SFF code
elif seq_format.lower().startswith("fastq"):
    in_handle = open(in_file, "rU")
    out_handle = open(out_file, "w")
    reader = fastqReader(in_handle)
    writer = fastqWriter(out_handle)
    if forward:
        for record in reader:
            seq = record.sequence.upper()
            result = primer.search(seq)
            if result:
                # Forward primer, take everything after it
                cut = result.end()
                record.sequence = seq[cut:]
                if len(record.sequence) >= min_len:
コード例 #8
0
    except ImportError:
        #Prior to Biopython 1.56 this was a private function
        from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
    in_handle = open(in_file, "rb")  #must be binary mode!
    try:
        manifest = ReadRocheXmlManifest(in_handle)
    except ValueError:
        manifest = None
    #This makes two passes though the SFF file with isn't so efficient,
    #but this makes the code simple.
    pos_count = neg_count = 0
    if out_positive_file is not None:
        out_handle = open(out_positive_file, "wb")
        writer = SffWriter(out_handle, xml=manifest)
        in_handle.seek(0)  #start again after getting manifest
        pos_count = writer.write_file(rec for rec in SffIterator(in_handle)
                                      if clean_name(rec.id) in ids)
        out_handle.close()
    if out_negative_file is not None:
        out_handle = open(out_negative_file, "wb")
        writer = SffWriter(out_handle, xml=manifest)
        in_handle.seek(0)  #start again
        neg_count = writer.write_file(rec for rec in SffIterator(in_handle)
                                      if clean_name(rec.id) not in ids)
        out_handle.close()
    #And we're done
    in_handle.close()
    #At the time of writing, Galaxy doesn't show SFF file read counts,
    #so it is useful to put them in stdout and thus shown in job info.
    print "%i with and %i without specified IDs" % (pos_count, neg_count)
elif seq_format.lower() == "fasta":
コード例 #9
0
    try:
        from Bio.SeqIO.SffIO import ReadRocheXmlManifest
    except ImportError:
        # Prior to Biopython 1.56 this was a private function
        from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest

    in_handle = open(in_file, "rb")  # must be binary mode!
    try:
        manifest = ReadRocheXmlManifest(in_handle)
    except ValueError:
        manifest = None
    out_handle = open(out_file, "wb")
    writer = SffWriter(out_handle, xml=manifest)
    in_handle.seek(0)  # start again after getting manifest
    count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename))
    out_handle.close()
    in_handle.close()
else:
    # Use Galaxy for FASTA, QUAL or FASTQ
    if seq_format.lower() in ["fasta", "csfasta"] or seq_format.lower().startswith("qual"):
        from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
        reader = fastaReader(open(in_file, "rU"))
        writer = fastaWriter(open(out_file, "w"))
        marker = ">"
    elif seq_format.lower().startswith("fastq"):
        from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
        reader = fastqReader(open(in_file, "rU"))
        writer = fastqWriter(open(out_file, "w"))
        marker = "@"
    else: