Example #1
0
#
# Copyright John Reid 2012, 2013
#

"""
Test background model.
"""

from setup_environment import init_test_env, logging
init_test_env(__file__, level=logging.INFO)

import os, sys, stempy, numpy


#
# Test parameters
#
Z_threshold = .73306
W = 11
fasta = os.path.join(os.path.dirname(__file__), 'fasta', 'random-seqs-10-100.fasta')
log_pwm = numpy.array([
    [-0.2186892 , -4.02535169, -1.82812711, -4.02535169],
    [-0.05505978, -4.02535169, -4.02535169, -4.02535169],
    [-1.46040233, -0.65805586, -1.46040233, -4.02535169],
    [-4.02535169, -0.05505978, -4.02535169, -4.02535169],
    [-4.02535169, -4.02535169, -4.02535169, -0.05505978],
    [-1.82812711, -2.41591378, -0.98082925, -0.98082925],
    [-0.52884413, -4.02535169, -1.19213835, -2.41591378],
    [-1.82812711, -4.02535169, -0.41443378, -1.82812711],
    [-1.82812711, -1.82812711, -0.41443378, -4.02535169],
    [-0.41443378, -1.19213835, -4.02535169, -4.02535169],
#
# Copyright John Reid 2011, 2012, 2013
#

"""
Compare find instances to find best W-mers in terms of efficiency.
"""

from setup_environment import init_test_env, logging
init_test_env(__file__)


import stempy
from cookbook.timer import Timer

seed = 'ATAAAA'
fasta = '/home/john/Data/MO-MK-EB/unique_MK.fasta.masked'
#fasta = '/home/john/Data/MO-MK-EB/MO_MK_EB_shared.fasta.masked'

options = stempy.get_default_options()
W = options.min_w = options.max_w = len(seed)

# load the sequences
num_bases, seqs, ids, index = stempy.read_sequences(fasta, options)

# create the data object
with Timer(msg='build data'):
    data = stempy.Data(index, max_W=options.max_w)

# get the background
mm, freqs = stempy.create_markov_model_order_from_index_4(data.index, options.back_dist_prior)