Beispiel #1
0
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True;
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]:
        # force cache load
        biopsy.get_pssm( sascha_acc )
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm( sascha_acc )
        score = biopsy.score_pssm( p.pssm, sascha_seq )
        (
                bind,
                back,
                cum_bind,
                cum_back,
                odds_ratio,
                cum_odds_ratio,
                p_bind,
                cum_p_bind,
                p_value_p_bind
        ) = biopsy.get_pssm_likelihoods_for_score( p, score )
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score )
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True;
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        print hits
        print
Beispiel #2
0
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession('V$DEAF1_01')
    pssm_info = biopsy.get_pssm(pssm_acc)
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range(len(seq) - len(pssm_info.pssm) + 1):
        s = biopsy.score_pssm(pssm_info.pssm, seq[i:])
        p_binding = biopsy.get_p_binding(
            biopsy.get_odds_ratio(s, pssm_info.get_dist(True, False),
                                  pssm_info.get_dist(False, False)))
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence(pssm_acc, seq, 0.05, result)
    print 'Got', len(result), 'hits from', len(seq), 'bases'
    print p_binding
Beispiel #3
0
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' );
    pssm_info = biopsy.get_pssm( pssm_acc )
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range( len( seq ) - len( pssm_info.pssm )  + 1 ):
        s = biopsy.score_pssm( pssm_info.pssm, seq[i:] )
        p_binding = biopsy.get_p_binding(
                biopsy.get_odds_ratio(
                        s,
                        pssm_info.get_dist( True, False ),
                        pssm_info.get_dist( False, False ) ) )
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result )
    print 'Got', len( result ), 'hits from', len( seq ), 'bases'
    print p_binding
Beispiel #4
0
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [0.0, 0.25, 0.5, 1.0, 2.0]:
        # force cache load
        biopsy.get_pssm(sascha_acc)
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm(sascha_acc)
        score = biopsy.score_pssm(p.pssm, sascha_seq)
        (bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind,
         cum_p_bind,
         p_value_p_bind) = biopsy.get_pssm_likelihoods_for_score(p, score)
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods(p, sascha_acc + ': ' + str(pc), score)
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence(sascha_acc, sascha_seq, 0.001, hits)
        print hits
        print