class TestReference: @classmethod def setup_class(self): self.reference = Reference('gattaca', 'gattaca') def test_sub_seq(self): assert self.reference.sub_seq(1,5) == 'attac'
def test_from_bam(self): reference = Reference( 'hxb2_pol', 'CCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG' ) mrc = parse_mapped_reads_from_bam(reference, 'tests/data/test1.bam') assert len(mrc.mapped_reads) == 6308
def test_pairwise_alignment_to_differences(): ref = Reference('test', 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG') pad_ref = 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG' pad_query = 'AGCTTAGCTA-GCTACCTATATCTTGGTCTTGGCCG' ref_start = 0 differences = pairwise_alignment_to_differences(pad_ref, pad_query, ref_start) assert differences == {10: '-'}
def test_sam_alignment_to_padded_alignment(): alignment = AlignedSegment() alignment.reference_start = 0 alignment.query_sequence = 'AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG' alignment.cigartuples = ((0,10), (2,1), (0,25)) ref = Reference('test', 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG') (pad_ref, pad_match, pad_query) = sam_alignment_to_padded_alignment(alignment, ref) assert pad_ref == 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG' assert pad_match == '|||||||||| |||||||||||||||||||||||||' assert pad_query == 'AGCTTAGCTA-GCTACCTATATCTTGGTCTTGGCCG'
def setup_class(self): self.reference = Reference('test', 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG') self.mapped_reads_obj = MappedReadCollection(self.reference) for i in range(0, 5): self.mapped_reads_obj.mapped_reads["{0}_{1}".format( 'read%s' % i, 1)] = MappedRead('read%i' % i, 0, 34, {10: '-'}, 0, 35, '+') for i in range(5, 100): self.mapped_reads_obj.mapped_reads["{0}_{1}".format( 'read%s' % i, 1)] = MappedRead('read%i' % i, 0, 34, {}, 0, 35, '+')
def parse_references_from_fasta(fasta): """Parse Reference objects from a fasta file. >>> rs = parse_references_from_fasta('tests/data/ref1.fasta') >>> print(length(rs)) 1 >>> print(rs[0].seq) AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT """ references = () handle = open(fasta) for header, seq in Bio.SeqIO.FastaIO.SimpleFastaParser(handle): name = re.search("(\S+)", header).group(0) references += (Reference(name, seq),) return references
def setup_class(self): self.reference = Reference('gattaca', 'gattaca')