コード例 #1
0
ファイル: biopsy_test.py プロジェクト: JohnReid/biopsy
def test_transfac_pssms():
    transfac_pssms = biopsy.get_transfac_pssm_accessions( biopsy.get_default_transfac_pssm_filter() )
    for p in transfac_pssms:
        print p, biopsy.get_transfac_pssm_name( p )
    print 'Have', len( transfac_pssms ), 'transfac pssms'
    for acc in [ 'R19099', 'M00418' ]:
        print acc, biopsy.get_transfac_pssm_name( acc )
        biopsy.get_pssm( acc )
        print 'Under pssm'
        for under_pssm in biopsy.get_pssm( acc ).get_dist( True, False ):
            print under_pssm
        print 'Under background'
        for under_background in biopsy.get_pssm( acc ).get_dist( False, False ):
            print under_background
コード例 #2
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_transfac_pssms():
    transfac_pssms = biopsy.get_transfac_pssm_accessions(
        biopsy.get_default_transfac_pssm_filter())
    for p in transfac_pssms:
        print p, biopsy.get_transfac_pssm_name(p)
    print 'Have', len(transfac_pssms), 'transfac pssms'
    for acc in ['R19099', 'M00418']:
        print acc, biopsy.get_transfac_pssm_name(acc)
        biopsy.get_pssm(acc)
        print 'Under pssm'
        for under_pssm in biopsy.get_pssm(acc).get_dist(True, False):
            print under_pssm
        print 'Under background'
        for under_background in biopsy.get_pssm(acc).get_dist(False, False):
            print under_background
コード例 #3
0
ファイル: __init__.py プロジェクト: pombredanne/biopsy
def write_minimal_meme_matrix(out, acc):
    """
    The minimal MEME format for a motif looks something like::

        MOTIF crp
        letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
        0.000000  0.176471  0.000000  0.823529
        0.000000  0.058824  0.647059  0.294118
        0.000000  0.058824  0.000000  0.941176
        0.176471  0.000000  0.764706  0.058824
        0.823529  0.058824  0.000000  0.117647
        0.294118  0.176471  0.176471  0.352941
        0.294118  0.352941  0.235294  0.117647
        0.117647  0.235294  0.352941  0.294118
        0.529412  0.000000  0.176471  0.294118
        0.058824  0.235294  0.588235  0.117647
        0.176471  0.235294  0.294118  0.294118
        0.000000  0.058824  0.117647  0.823529
        0.058824  0.882353  0.000000  0.058824
        0.764706  0.000000  0.176471  0.058824
        0.058824  0.882353  0.000000  0.058824
        0.823529  0.058824  0.058824  0.058824
        0.176471  0.411765  0.058824  0.352941
        0.411765  0.000000  0.000000  0.588235
        0.352941  0.058824  0.000000  0.588235
    """
    pssm_info = biopsy.get_pssm(acc)
    print >> out, (
        "MOTIF %s %s\n"
        "letter-probability matrix: alength= 4 w= %d nsites= %d E= %e\n"
        "%s\n") % (biopsy.get_pssm_name(acc), acc, len(
            pssm_info.dists), pssm_info.sites, 0., "\n".join('  '.join(
                ("%.6f" % dist.get_freq(b))
                for b in xrange(4)) for dist in pssm_info.dists))
コード例 #4
0
ファイル: __init__.py プロジェクト: pombredanne/biopsy
def look_for_matrices(names):
    for name in names:
        print name
        for matrix, factor in find_matrices(name):
            print matrix.acc, matrix.name, factor.acc, factor.name
            logo(dist_for_pssm(biopsy.get_pssm(str(matrix.acc))),
                 '%s-%s' % (name, matrix.acc), 'logos')
コード例 #5
0
ファイル: __init__.py プロジェクト: JohnReid/biopsy
def write_minimal_meme_matrix(out, acc):
    """
    The minimal MEME format for a motif looks something like::

        MOTIF crp
        letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
        0.000000  0.176471  0.000000  0.823529
        0.000000  0.058824  0.647059  0.294118
        0.000000  0.058824  0.000000  0.941176
        0.176471  0.000000  0.764706  0.058824
        0.823529  0.058824  0.000000  0.117647
        0.294118  0.176471  0.176471  0.352941
        0.294118  0.352941  0.235294  0.117647
        0.117647  0.235294  0.352941  0.294118
        0.529412  0.000000  0.176471  0.294118
        0.058824  0.235294  0.588235  0.117647
        0.176471  0.235294  0.294118  0.294118
        0.000000  0.058824  0.117647  0.823529
        0.058824  0.882353  0.000000  0.058824
        0.764706  0.000000  0.176471  0.058824
        0.058824  0.882353  0.000000  0.058824
        0.823529  0.058824  0.058824  0.058824
        0.176471  0.411765  0.058824  0.352941
        0.411765  0.000000  0.000000  0.588235
        0.352941  0.058824  0.000000  0.588235
    """
    pssm_info = biopsy.get_pssm(acc)
    print >> out, ("MOTIF %s %s\n" "letter-probability matrix: alength= 4 w= %d nsites= %d E= %e\n" "%s\n") % (
        biopsy.get_pssm_name(acc),
        acc,
        len(pssm_info.dists),
        pssm_info.sites,
        0.0,
        "\n".join("  ".join(("%.6f" % dist.get_freq(b)) for b in xrange(4)) for dist in pssm_info.dists),
    )
コード例 #6
0
ファイル: biopsy_test.py プロジェクト: JohnReid/biopsy
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True;
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]:
        # force cache load
        biopsy.get_pssm( sascha_acc )
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm( sascha_acc )
        score = biopsy.score_pssm( p.pssm, sascha_seq )
        (
                bind,
                back,
                cum_bind,
                cum_back,
                odds_ratio,
                cum_odds_ratio,
                p_bind,
                cum_p_bind,
                p_value_p_bind
        ) = biopsy.get_pssm_likelihoods_for_score( p, score )
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score )
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True;
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        print hits
        print
コード例 #7
0
ファイル: biopsy_test.py プロジェクト: JohnReid/biopsy
def test_likelihoods_indices():
    p = biopsy.get_pssm( 'M00975' )
    dist = p.get_dist( True, False )
    for s in range( len(dist) ):
        score = float(s)/float(len(dist) - 1)
        idx = biopsy.get_likelihood_index( len(dist), score )
        print idx, score
    for score in [ 0.98, 0.99, 1.0 ]:
        print score, biopsy.get_likelihood_index( len(dist), score )
コード例 #8
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_likelihoods_indices():
    p = biopsy.get_pssm('M00975')
    dist = p.get_dist(True, False)
    for s in range(len(dist)):
        score = float(s) / float(len(dist) - 1)
        idx = biopsy.get_likelihood_index(len(dist), score)
        print idx, score
    for score in [0.98, 0.99, 1.0]:
        print score, biopsy.get_likelihood_index(len(dist), score)
コード例 #9
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [0.0, 0.25, 0.5, 1.0, 2.0]:
        # force cache load
        biopsy.get_pssm(sascha_acc)
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm(sascha_acc)
        score = biopsy.score_pssm(p.pssm, sascha_seq)
        (bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind,
         cum_p_bind,
         p_value_p_bind) = biopsy.get_pssm_likelihoods_for_score(p, score)
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods(p, sascha_acc + ': ' + str(pc), score)
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence(sascha_acc, sascha_seq, 0.001, hits)
        print hits
        print
コード例 #10
0
ファイル: biopsy_test.py プロジェクト: pombredanne/biopsy
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession('V$DEAF1_01')
    pssm_info = biopsy.get_pssm(pssm_acc)
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range(len(seq) - len(pssm_info.pssm) + 1):
        s = biopsy.score_pssm(pssm_info.pssm, seq[i:])
        p_binding = biopsy.get_p_binding(
            biopsy.get_odds_ratio(s, pssm_info.get_dist(True, False),
                                  pssm_info.get_dist(False, False)))
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence(pssm_acc, seq, 0.05, result)
    print 'Got', len(result), 'hits from', len(seq), 'bases'
    print p_binding
コード例 #11
0
ファイル: biopsy_test.py プロジェクト: JohnReid/biopsy
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' );
    pssm_info = biopsy.get_pssm( pssm_acc )
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range( len( seq ) - len( pssm_info.pssm )  + 1 ):
        s = biopsy.score_pssm( pssm_info.pssm, seq[i:] )
        p_binding = biopsy.get_p_binding(
                biopsy.get_odds_ratio(
                        s,
                        pssm_info.get_dist( True, False ),
                        pssm_info.get_dist( False, False ) ) )
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result )
    print 'Got', len( result ), 'hits from', len( seq ), 'bases'
    print p_binding
コード例 #12
0
ファイル: __init__.py プロジェクト: JohnReid/biopsy
def look_for_matrices(names):
    for name in names:
        print name
        for matrix, factor in find_matrices(name):
            print matrix.acc, matrix.name, factor.acc, factor.name
            logo(dist_for_pssm(biopsy.get_pssm(str(matrix.acc))), "%s-%s" % (name, matrix.acc), "logos")
コード例 #13
0
ファイル: create_logos.py プロジェクト: JohnReid/STEME
def logo_for_pssm_name(pssm_name):
    import biopsy
    logo_for_pssm(biopsy.get_pssm(pssm_name), pssm_name)