Exemple #1
0
 def test_get_counts(self):
     """get_counts: should work with all parameters"""
     seq = RnaSequence('UCAG-NAUGU')
     seq2 = RnaSequence('UAAG-CACGC')
     p = Pairs([(1, 8), (2, 7)])
     p2 = Pairs([
         (1, 8),
         (2, 6),
         (3, 6),
         (4, 9),
     ])
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0}
     self.assertEqual(get_counts(p, p2), exp)
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True), exp)
     seq = RnaSequence('UCAG-NACGU')
     exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True,\
         sequences=[seq], min_dist=2), exp)
     # check against compare_ct.pm
     exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\
         'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2}
     seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper()
     self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Exemple #2
0
def get_counts(ref, predicted, split_fp=True, sequences=None, min_dist=1,
               accepted=ACCEPTED_STD):
    """Return TP, TN, FPcont, FPconf FPcomp, FN counts
    
        ref: BasePairs object
        pred: BasePairs object
        split_fp: bool, set flags whether to split false positivies into
            different categories or not
        sequences: list of sequences of RNA (strings)
        min_dist: minimum distance between two bases which make up a pair
        accepted: dict, pairs of letters denoting allowed base pairs
    """
    assert isinstance(ref, BasePairs), 'ref is not BasePairs instance'
    assert isinstance(predicted, BasePairs), 'predicted is not BasePairs instance'
    assert isinstance(split_fp, bool), 'split_fp is not bool'
    assert type(sequences) in [list, tuple, types.NoneType],\
        'sequences is not tuple or list'
    assert isinstance(min_dist, int), 'min_dist is not integer'
    assert isinstance(accepted, dict), 'accepted is not dict'
    
    from cogent.struct.pairs_util import get_counts
    counts = get_counts(ref=ref, predicted=predicted, split_fp=split_fp,
                        sequences=None, min_dist=min_dist)
    
    if sequences:
        num_possible_pairs = get_all_pairs(sequences, min_dist,
                                           accepted=accepted)
        counts['TN'] = num_possible_pairs - counts['TP'] -\
            counts['FP_INCONS'] - counts['FP_CONTRA']
        
    return counts
Exemple #3
0
def mcc(ref, predicted, seqs, min_dist=1, accepted=ACCEPTED_STD):
    """Return the Matthews correlation coefficient

    ref: Pairs object -> reference structure (true structure)
    predicted: Pairs object -> predicted structure
    seqs: list of sequences, necessary to compute the number of true
        negatives. See documentation of extract_seqs function for 
        accepted formats.
    min_dist: minimum distance required between two members of a base pair.
        Needed to calculate the number of true negatives.
    accepted: dict, pairs of letters denoting allowed base pairs
    """
    check_structures(ref, predicted)
    if not ref and not predicted:
        return 1.0
    elif not predicted:
        return 0.0
    elif not seqs:
        raise ValueError, 'No sequence provided!'

    sequences = extract_seqs(seqs)
    counts = get_counts(ref, predicted, sequences=sequences, split_fp=True,\
        min_dist=min_dist, accepted=accepted)
    
    result = mcc_formula(counts)
    if result < -1 or result > 1:
        raise ValueError("mcc not in range <-1, 1>: %.2f"%(result))
    return result 
Exemple #4
0
 def test_get_counts_pseudo(self):
     """get_counts: should work when pseudo in ref -> classification off"""
     # pairs that would normally be compatible, are now contradicting
     ref = Pairs([(0, 8), (1, 7), (4, 10)])
     pred = Pairs([(0, 8), (3, 6), (4, 10)])
     seq = 'GACUGUGUCAU'
     exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\
         'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0}
     self.assertEqual(get_counts(ref, pred, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Exemple #5
0
 def test_get_counts_pseudo(self):
     """get_counts: should work when pseudo in ref -> classification off"""
     # pairs that would normally be compatible, are now contradicting
     ref = Pairs([(0,8),(1,7),(4,10)])
     pred = Pairs([(0,8),(3,6),(4,10)])
     seq = 'GACUGUGUCAU'
     exp = {'TP':2,'TN':13-2-1, 'FN':1,'FP':1,\
         'FP_INCONS':0, 'FP_CONTRA':1, 'FP_COMP':0}
     self.assertEqual(get_counts(ref, pred, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Exemple #6
0
 def test_get_counts(self):
     """get_counts: should work with all parameters"""
     seq = RnaSequence('UCAG-NAUGU')
     seq2 = RnaSequence('UAAG-CACGC')
     p = Pairs([(1,8),(2,7)])
     p2 = Pairs([(1,8),(2,6),(3,6),(4,9),])
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':0, 'FP_CONTRA':0, 'FP_COMP':0}
     self.assertEqual(get_counts(p, p2), exp)
     exp = {'TP':1,'TN':0, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True), exp)
     seq = RnaSequence('UCAG-NACGU')
     exp = {'TP':1,'TN':7, 'FN':1,'FP':3,\
         'FP_INCONS':1, 'FP_CONTRA':1, 'FP_COMP':1}
     self.assertEqual(get_counts(p, p2, split_fp=True,\
         sequences=[seq], min_dist=2), exp)
     # check against compare_ct.pm
     exp = {'TP':4,'TN':266, 'FN':6,'FP':6,\
         'FP_INCONS':2, 'FP_CONTRA':2, 'FP_COMP':2}
     seq = 'agguugaaggggauccgauccacuccccggcuggucaaccu'.upper()
     self.assertEqual(get_counts(self.true, self.predicted, split_fp=True,\
         sequences=[seq], min_dist=4), exp)
Exemple #7
0
def sensitivity(ref, predicted, accepted=ACCEPTED_STD):
    """Return sensitivity of the predicted structure

    ref: Pairs object -> reference structure (true structure)
    predicted: Pairs object -> predicted structure
    accepted: dict, pairs of letters denoting allowed base pairs

    Formula: sensitivity = tp/(tp + fn)
    tp = True positives
    fn = False negatives
    """
    check_structures(ref, predicted)
    if not ref and not predicted:
        return 1.0
    elif not predicted:
        return 0.0

    counts = get_counts(ref, predicted, accepted=accepted)
    return sensitivity_formula(counts)