def test_get_counts(self): """get_counts: should work with all parameters""" seq = RnaSequence('UCAG-NAUGU') seq2 = RnaSequence('UAAG-CACGC') p = Pairs([(1, 8), (2, 7)]) p2 = Pairs([ (1, 8), (2, 6), (3, 6), (4, 9), ]) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0} self.assertEqual(get_counts(p, p2), exp) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True), exp) seq = RnaSequence('UCAG-NACGU') exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True,\ sequences=[seq], min_dist=2), exp) # check against compare_ct.pm exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\ 'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2} seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper() self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def get_counts(ref, predicted, split_fp=True, sequences=None, min_dist=1, accepted=ACCEPTED_STD): """Return TP, TN, FPcont, FPconf FPcomp, FN counts ref: BasePairs object pred: BasePairs object split_fp: bool, set flags whether to split false positivies into different categories or not sequences: list of sequences of RNA (strings) min_dist: minimum distance between two bases which make up a pair accepted: dict, pairs of letters denoting allowed base pairs """ assert isinstance(ref, BasePairs), 'ref is not BasePairs instance' assert isinstance(predicted, BasePairs), 'predicted is not BasePairs instance' assert isinstance(split_fp, bool), 'split_fp is not bool' assert type(sequences) in [list, tuple, types.NoneType],\ 'sequences is not tuple or list' assert isinstance(min_dist, int), 'min_dist is not integer' assert isinstance(accepted, dict), 'accepted is not dict' from cogent.struct.pairs_util import get_counts counts = get_counts(ref=ref, predicted=predicted, split_fp=split_fp, sequences=None, min_dist=min_dist) if sequences: num_possible_pairs = get_all_pairs(sequences, min_dist, accepted=accepted) counts['TN'] = num_possible_pairs - counts['TP'] -\ counts['FP_INCONS'] - counts['FP_CONTRA'] return counts
def mcc(ref, predicted, seqs, min_dist=1, accepted=ACCEPTED_STD): """Return the Matthews correlation coefficient ref: Pairs object -> reference structure (true structure) predicted: Pairs object -> predicted structure seqs: list of sequences, necessary to compute the number of true negatives. See documentation of extract_seqs function for accepted formats. min_dist: minimum distance required between two members of a base pair. Needed to calculate the number of true negatives. accepted: dict, pairs of letters denoting allowed base pairs """ check_structures(ref, predicted) if not ref and not predicted: return 1.0 elif not predicted: return 0.0 elif not seqs: raise ValueError, 'No sequence provided!' sequences = extract_seqs(seqs) counts = get_counts(ref, predicted, sequences=sequences, split_fp=True,\ min_dist=min_dist, accepted=accepted) result = mcc_formula(counts) if result < -1 or result > 1: raise ValueError("mcc not in range <-1, 1>: %.2f"%(result)) return result
def test_get_counts_pseudo(self): """get_counts: should work when pseudo in ref -> classification off""" # pairs that would normally be compatible, are now contradicting ref = Pairs([(0, 8), (1, 7), (4, 10)]) pred = Pairs([(0, 8), (3, 6), (4, 10)]) seq = 'GACUGUGUCAU' exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\ 'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0} self.assertEqual(get_counts(ref, pred, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def test_get_counts_pseudo(self): """get_counts: should work when pseudo in ref -> classification off""" # pairs that would normally be compatible, are now contradicting ref = Pairs([(0,8),(1,7),(4,10)]) pred = Pairs([(0,8),(3,6),(4,10)]) seq = 'GACUGUGUCAU' exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\ 'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0} self.assertEqual(get_counts(ref, pred, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def test_get_counts(self): """get_counts: should work with all parameters""" seq = RnaSequence('UCAG-NAUGU') seq2 = RnaSequence('UAAG-CACGC') p = Pairs([(1,8),(2,7)]) p2 = Pairs([(1,8),(2,6),(3,6),(4,9),]) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0} self.assertEqual(get_counts(p, p2), exp) exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True), exp) seq = RnaSequence('UCAG-NACGU') exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\ 'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1} self.assertEqual(get_counts(p, p2, split_fp=True,\ sequences=[seq], min_dist=2), exp) # check against compare_ct.pm exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\ 'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2} seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper() self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\ sequences=[seq], min_dist=4), exp)
def sensitivity(ref, predicted, accepted=ACCEPTED_STD): """Return sensitivity of the predicted structure ref: Pairs object -> reference structure (true structure) predicted: Pairs object -> predicted structure accepted: dict, pairs of letters denoting allowed base pairs Formula: sensitivity = tp/(tp + fn) tp = True positives fn = False negatives """ check_structures(ref, predicted) if not ref and not predicted: return 1.0 elif not predicted: return 0.0 counts = get_counts(ref, predicted, accepted=accepted) return sensitivity_formula(counts)