Ejemplo n.º 1
0
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True;
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]:
        # force cache load
        biopsy.get_pssm( sascha_acc )
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm( sascha_acc )
        score = biopsy.score_pssm( p.pssm, sascha_seq )
        (
                bind,
                back,
                cum_bind,
                cum_back,
                odds_ratio,
                cum_odds_ratio,
                p_bind,
                cum_p_bind,
                p_value_p_bind
        ) = biopsy.get_pssm_likelihoods_for_score( p, score )
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score )
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True;
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        print hits
        print
Ejemplo n.º 2
0
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' );
    pssm_info = biopsy.get_pssm( pssm_acc )
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range( len( seq ) - len( pssm_info.pssm )  + 1 ):
        s = biopsy.score_pssm( pssm_info.pssm, seq[i:] )
        p_binding = biopsy.get_p_binding(
                biopsy.get_odds_ratio(
                        s,
                        pssm_info.get_dist( True, False ),
                        pssm_info.get_dist( False, False ) ) )
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result )
    print 'Got', len( result ), 'hits from', len( seq ), 'bases'
    print p_binding
Ejemplo n.º 3
0
            for species in remo.get_sequence_ids():
                yield remo.get_sequence_for(species, True)


def histogram(acc, score_counts):
    import pylab, numpy
    pylab.clf()
    pylab.bar(xrange(num_buckets), numpy.power(score_counts, 0.25))
    pylab.savefig('graphs/%s.png' % acc)


try:
    remome
except NameError:
    remome_file = 'c:/data/remos/100/100.filtered'
    print 'Loading remome: %s' % remome_file
    remome = biopsy.Remome.load(remome_file)

score_counts = {}
for acc in itertools.islice(pssm_accs(), num_pssms):
    score_counts[acc] = numpy.zeros(num_buckets, numpy.uint8)
    for seq in itertools.islice(sequences_from_remome(remome), num_sequences):
        hits = biopsy.HitVec()
        p_bind = biopsy.score_pssm_on_sequence(pssm_name=acc,
                                               threshold=0.0,
                                               sequence=seq,
                                               result=hits)
        for h in hits:
            score_counts[acc][int(h.p_binding * num_buckets)] += 1
    histogram(acc, score_counts[acc])
Ejemplo n.º 4
0
    for aligned in remome.get_aligned_sequences():
        for remo in remome.get_remos_for(aligned):
            for species in remo.get_sequence_ids():
                yield remo.get_sequence_for(species, True)


def histogram(acc, score_counts):
    import pylab, numpy

    pylab.clf()
    pylab.bar(xrange(num_buckets), numpy.power(score_counts, 0.25))
    pylab.savefig("graphs/%s.png" % acc)


try:
    remome
except NameError:
    remome_file = "c:/data/remos/100/100.filtered"
    print "Loading remome: %s" % remome_file
    remome = biopsy.Remome.load(remome_file)

score_counts = {}
for acc in itertools.islice(pssm_accs(), num_pssms):
    score_counts[acc] = numpy.zeros(num_buckets, numpy.uint8)
    for seq in itertools.islice(sequences_from_remome(remome), num_sequences):
        hits = biopsy.HitVec()
        p_bind = biopsy.score_pssm_on_sequence(pssm_name=acc, threshold=0.0, sequence=seq, result=hits)
        for h in hits:
            score_counts[acc][int(h.p_binding * num_buckets)] += 1
    histogram(acc, score_counts[acc])