def testMinCount(self): counts = [ (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 3), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAGGG-------------------------------------------------" "--------------------------------------------------"), 2), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAGAA-------------------------------------------------" "--------------------------------------------------"), 2) ] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,3,,,,CTR,,cysteines, 2,4,,,,,,count < 3, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 7,0,0,,,0.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv, min_count=3) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testSummaryX4(self): counts = [ (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTATGAGACCCAACAACAATACAAGAAAAAGTATACATAT------AGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGT"), 2), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCAACAACAATACAAGAAAAAGTATACATAT------AGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 1) ] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,2,0.454349,2.6,X4,CMRPNNNTRKSIHIGPGRAFYATGEIIGDIRRAHC,CMRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RRAHC,, 2,1,0.067754,42.3,R5,CTRPNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CTRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 3,3,2,66.67,X4,100.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testSummaryX4(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTATGAGACCCAACAACAATACAAGAAAAAGTATACATAT------AGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACGAGCACATTGT"), 2), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCAACAACAATACAAGAAAAAGTATACATAT------AGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,2,0.454349,2.6,X4,CMRPNNNTRKSIHIGPGRAFYATGEIIGDIRRAHC,CMRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RRAHC,, 2,1,0.067754,42.3,R5,CTRPNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CTRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 3,3,2,66.67,X4,100.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testSummaryValidPercentageThresholdFailed(self): counts = [ (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCAACAACAATACAAGAAAAA------GTATACATATAGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 300), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 100) ] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,300,0.067754,42.3,R5,CTRPNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CTRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,, 2,100,,,,CTR,,cysteines, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 400,300,0,0.00,,75.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv, min_valid=300, min_valid_percent=75.1) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testMinCount(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 3), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAGGG-------------------------------------------------" "--------------------------------------------------"), 2), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAGAA-------------------------------------------------" "--------------------------------------------------"), 2)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,3,,,,CTR,,cysteines, 2,4,,,,,,count < 3, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 7,0,0,,,0.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv, min_count=3) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testSummaryValidPercentageThresholdFailed(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCAACAACAATACAAGAAAAA------GTATACATATAGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 300), (("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 100) ] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,300,0.067754,42.3,R5,CTRPNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CTRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,, 2,100,,,,CTR,,cysteines, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 400,300,0,0.00,,75.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv, min_valid=300, min_valid_percent=75.1) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())
def testSimple(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,CTR,,cysteines, """ write_rows(self.pssm, counts, self.g2p_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testLengthTooShort(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAA-------" "--------GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGTGT"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,CGGGGGGGGGGGGGGGKGGGGGGGGGGGGGC,,length, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testStopCodon(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTTAGTGT-------------------------------------------------" "--------------------------------------------------"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,C*C,,stop codons, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testLowQuality(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TNTNNNGGN-------------------------------------------------" "--------------------------------------------------"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,,,low quality, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testAmbiguousAtTwoPositions(self): """ Same thing with codons 9 and 18 - rejected. """ counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCAACAACAATACAAGNAAAAG------TATACATATAGGACCAGGGA" "GNGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,CTRPNNNTXKSIHIGPGXAFYATGEIIGDIRQAHC,,> 2 ambiguous, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testAmbiguousMixtureThreeChoices(self): """ Marking position 14 as low quality means codon 5 could be L, S, or *. """ counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCCTNAAACTGT-------------------------------------" "--------------------------------------------------"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,CTRPXNC,,> 2 ambiguous, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testAmbiguousMixture(self): """ Marking position 9 as low quality means codon 3 could be S or R. """ counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGNCCCAACAACAATACAAGAAAAAG------TATACATATAGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,0.066305,43.0,R5,CT[RS]PNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CT[RS]PN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,,ambiguous """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testSynonymMixture(self): """ Marking position 12 as low quality means codon 4 has to be P. """ counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTT---GTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGACCNAACAACAATACAAGAAAAAG------TATACATATAGGACCAGGGA" "GAGCATTTTATGCAACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,0.067754,42.3,R5,CTRPNNNTRKSIHIGPGRAFYATGEIIGDIRQAHC,CTRPN-NNT--RKSIHI---GPGR---AFYAT----GEIIGDI--RQAHC,, """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue())
def testSummaryFailed(self): counts = [(("TGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGA" "GAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGT", "TGTACAAGA-------------------------------------------------" "--------------------------------------------------"), 1)] expected_g2p_csv = """\ rank,count,g2p,fpr,call,seq,aligned,error,comment 1,1,,,,CTR,,cysteines, """ expected_summary_csv = """\ mapped,valid,X4calls,X4pct,final,validpct 1,0,0,,,0.00 """ write_rows(self.pssm, counts, self.g2p_csv, self.g2p_summary_csv) self.assertEqual(expected_g2p_csv, self.g2p_csv.getvalue()) self.assertEqual(expected_summary_csv, self.g2p_summary_csv.getvalue())