def test_pssm_pseudo_counts(): sascha_pssms = biopsy.SequenceVec() sascha_acc = 'M00975' # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc' sascha_seq = 'ttgttgcga' sascha_seq = 'ttgttgcaa' # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' ) # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' ) print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence' biopsy.PssmParameters.singleton().use_p_value = True; # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1; for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]: # force cache load biopsy.get_pssm( sascha_acc ) biopsy.clear_pssm_cache() biopsy.PssmParameters.singleton().pseudo_counts = pc p = biopsy.get_pssm( sascha_acc ) score = biopsy.score_pssm( p.pssm, sascha_seq ) ( bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind, cum_p_bind, p_value_p_bind ) = biopsy.get_pssm_likelihoods_for_score( p, score ) print pc, print \ '%f,%f,%f,%f,%f,%f,%f' \ % \ ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind ) biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score ) # print 'Trying with standard distributions' # biopsy.PssmParameters.singleton().use_cumulative_dists = False; # hits = biopsy.HitVec() # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits ) # print hits print 'Trying with cumulative distributions' biopsy.PssmParameters.singleton().use_cumulative_dists = True; hits = biopsy.HitVec() biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits ) print hits print
def test_pssm_score(): # 'V$AP1_Q2' pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' ); pssm_info = biopsy.get_pssm( pssm_acc ) # print pssm_info.pssm seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga' for i in range( len( seq ) - len( pssm_info.pssm ) + 1 ): s = biopsy.score_pssm( pssm_info.pssm, seq[i:] ) p_binding = biopsy.get_p_binding( biopsy.get_odds_ratio( s, pssm_info.get_dist( True, False ), pssm_info.get_dist( False, False ) ) ) if p_binding > 0.05: print i, s, p_binding result = biopsy.HitVec() p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result ) print 'Got', len( result ), 'hits from', len( seq ), 'bases' print p_binding
for species in remo.get_sequence_ids(): yield remo.get_sequence_for(species, True) def histogram(acc, score_counts): import pylab, numpy pylab.clf() pylab.bar(xrange(num_buckets), numpy.power(score_counts, 0.25)) pylab.savefig('graphs/%s.png' % acc) try: remome except NameError: remome_file = 'c:/data/remos/100/100.filtered' print 'Loading remome: %s' % remome_file remome = biopsy.Remome.load(remome_file) score_counts = {} for acc in itertools.islice(pssm_accs(), num_pssms): score_counts[acc] = numpy.zeros(num_buckets, numpy.uint8) for seq in itertools.islice(sequences_from_remome(remome), num_sequences): hits = biopsy.HitVec() p_bind = biopsy.score_pssm_on_sequence(pssm_name=acc, threshold=0.0, sequence=seq, result=hits) for h in hits: score_counts[acc][int(h.p_binding * num_buckets)] += 1 histogram(acc, score_counts[acc])
for aligned in remome.get_aligned_sequences(): for remo in remome.get_remos_for(aligned): for species in remo.get_sequence_ids(): yield remo.get_sequence_for(species, True) def histogram(acc, score_counts): import pylab, numpy pylab.clf() pylab.bar(xrange(num_buckets), numpy.power(score_counts, 0.25)) pylab.savefig("graphs/%s.png" % acc) try: remome except NameError: remome_file = "c:/data/remos/100/100.filtered" print "Loading remome: %s" % remome_file remome = biopsy.Remome.load(remome_file) score_counts = {} for acc in itertools.islice(pssm_accs(), num_pssms): score_counts[acc] = numpy.zeros(num_buckets, numpy.uint8) for seq in itertools.islice(sequences_from_remome(remome), num_sequences): hits = biopsy.HitVec() p_bind = biopsy.score_pssm_on_sequence(pssm_name=acc, threshold=0.0, sequence=seq, result=hits) for h in hits: score_counts[acc][int(h.p_binding * num_buckets)] += 1 histogram(acc, score_counts[acc])