def testEmptyReads(self): """ heterogeneousSites must return an empty dictionary if no reads are given. """ reads = Reads() self.assertEqual(({}, {}, []), heterogeneousSites(reads, 0, 1))
def testHomogeneousReads(self): """ heterogeneousSites must return an empty dictionary if homogenous reads are given. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'ACCG')]) self.assertEqual(({}, {}, []), heterogeneousSites(reads, len(read), 1))
def testOneRead(self): """ heterogeneousSites must return an empty dictionary if only one read is given. """ read = Read('id', 'ACCG') reads = Reads([read]) self.assertEqual(({}, {}, []), heterogeneousSites(reads, len(read), 1))
def testGaps(self): """ heterogeneousSites must return an empty dictionary if reads given differ only by gaps; gaps do not count towards heterogeneity. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', '-CC-')]) self.assertEqual(({}, {}, []), heterogeneousSites(reads, len(read), 1))
def testHeterogeneousReadsFractionLow(self): """ heterogeneousSites must return an empty dictionary if reads given differ and are more homogeneous than specified by the homogeneity cutoff fraction. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'ACCC'), Read('id3', 'ACCC')]) self.assertEqual(({}, {}, []), heterogeneousSites(reads, len(read), 0.6))
def testHeterogeneousReadsOneDifference(self): """ heterogeneousSites must return a dictionary with one entry as expected if reads given differ at one site. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'ACCC')]) self.assertEqual(({3: {'G': 1, 'C': 1}}, {3: {'G': ['id'], 'C': ['id2']}}, [3]), heterogeneousSites(reads, len(read), 1))
def testHeterogeneousReadsFractionHigh(self): """ heterogeneousSites must return a dictionary with one entry as expected if reads given differ and are less homogeneous than specified by the homogeneity cutoff fraction. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'ACCC'), Read('id3', 'ACCC')]) self.assertEqual(({3: {'C': 2, 'G': 1}}, {3: {'G': ['id'], 'C': ['id2', 'id3']}}, [3]), heterogeneousSites(reads, len(read), 0.7))
def testHeterogeneousReadsTwoDifferences(self): """ heterogeneousSites must return a dictionary with two entries as expected if reads given differ at two sites. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'TCCC')]) self.assertEqual(({0: {'A': 1, 'T': 1}, 3: {'G': 1, 'C': 1}}, {0: {'A': ['id'], 'T': ['id2']}, 3: {'C': ['id2'], 'G': ['id']}}, [0, 3]), heterogeneousSites(reads, len(read), 1))
def testHeterogeneousReadsFractionLowWithOneDifference(self): """ heterogeneousSites must return a dictionary with one entry if reads given differ at two sites and at one site are more homogeneous than specified by the homogeneity cutoff fraction; at the other site less homogeneous than specified by the homogeneity cutoff fraction. """ read = Read('id', 'ACCG') reads = Reads([read, Read('id2', 'TCCG'), Read('id3', 'TCCG'), Read('id4', 'ACCG')]) self.assertEqual(({0: {'A': 2, 'T': 2}}, {0: {'A': ['id', 'id4'], 'T': ['id2', 'id3']}}, [0]), heterogeneousSites(reads, len(read), 0.6))
args = parser.parse_args() homogenFraction = args.homogenFraction bestSNP = args.bestSNP reads = list(parseFASTACommandLineOptions(args)) if not 0 < homogenFraction <= 1: raise ValueError('--homogenFraction needs to be between 0 and 1.') if len(set(len(read) for read in reads)) != 1: raise ValueError('Not all read lengths are the same.....') else: length = len(reads[0]) count, ids, indexes = heterogeneousSites(reads, length, homogenFraction) width = int(log10(length)) + 1 for index in indexes: print('A SNP is detected at index %*d.' % (width, index + 1)) print('Detected bases: %s' % (str(count[index])[9:-2])) if 'A' in count[index].keys(): print('A:') for id in ids[index]['A']: print(id) if 'C' in count[index].keys(): print('C:') for id in ids[index]['C']: print(id)