Example #1
0
num_bases, seqs, ids, index = stempy.read_sequences(fasta.encode(sys.stdin.encoding or 'ascii'), options)
occs = stempy.occurrences_from_index(index)
freqs = stempy.ZeroOrderFrequencies(list(occs[:4]))
freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior)
data = stempy.Data(index, max_W=W)



#
# Initialise the background
#
markov_model_create_fn = getattr(stempy, 'create_markov_model_order_from_index_%d' % options.bg_model_order)
bg_model_create_fn = getattr(stempy, 'create_bg_model_from_Markov_model_%d' % options.bg_model_order)
mm, _ = markov_model_create_fn(data.index, options.back_dist_prior)
lls = mm.calculate_likelihoods(data)
base_LL_bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts)
Markov_bg_model = bg_model_create_fn(W, data, mm, freqs_with_pseudo_counts)



#
# Create the model
#
# bg = base_LL_bg_model
bg = Markov_bg_model
bs = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, options.alphabet_size))
model = stempy.Model(data, bs, bg, _lambda=options.lambda_)
model.bs.pssm.log_probs.values()[:] = log_pwm
model.bs.recalculate()

Example #2
0
seq = "ACGTACACAC"
data = create_data(seq)
logging.info('Creating Markov model.')
mm, freqs = stempy.create_markov_model_order_3(data, 1.)
logging.info('Calculating likelihoods.')
lls = mm.calculate_likelihoods(data)
base_probs = map(exp, base_lls(lls[0]))
logging.info(', '.join('%.5f' % p for p in base_probs))
assert feq(stempy.W_mer_log_likelihood(lls[0], 0, 1), (seq.count('A')+1.)/(len(seq)+4.))
assert feq(base_probs[1], (seq.count('AC')+1.)/(seq.count('A')+4.))
assert feq(base_probs[2], (seq.count('ACG')+1.)/(seq.count('AC')-1.+4.)) # -1. because last 'AC' has no following character
assert feq(base_probs[3], (seq.count('ACGT')+1.)/(seq.count('ACG')+4.))
assert feq(base_probs[4], (seq.count('CGTA')+1.)/(seq.count('CGT')+4.))

# check the bg model from likelihoods
bg_model = stempy.create_bg_model_from_base_likelihoods(4, data, lls, freqs)

seq = "AAAC"
data = create_data(seq)
mm, freqs = stempy.create_markov_model_order_3(data, 1.)
assert feq(freqs.freq(0), .375)
assert feq(freqs.freq(1), .125)
assert feq(freqs.freq(2), .125)
assert feq(freqs.freq(3), .375)

seq = "AAACNNNNNNNTCTCTATACGCAGTACGG"
data = create_data(seq)
mm, freqs = stempy.create_markov_model_order_3(data, 1.)
lls = mm.calculate_likelihoods(data)
print ', '.join(map(str, lls))
for i, (x, y) in enumerate(pairs(lls[0])):