コード例 #1
0
from setup_environment import init_test_env, logging
init_test_env(__file__)


import stempy
from cookbook.timer import Timer

seed = 'ATAAAA'
fasta = '/home/john/Data/MO-MK-EB/unique_MK.fasta.masked'
#fasta = '/home/john/Data/MO-MK-EB/MO_MK_EB_shared.fasta.masked'

options = stempy.get_default_options()
W = options.min_w = options.max_w = len(seed)

# load the sequences
num_bases, seqs, ids, index = stempy.read_sequences(fasta, options)

# create the data object
with Timer(msg='build data'):
    data = stempy.Data(index, max_W=options.max_w)

# get the background
mm, freqs = stempy.create_markov_model_order_from_index_4(data.index, options.back_dist_prior)
freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior)
lls = mm.calculate_likelihoods(data)
bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts)

# binding site model
bs_model = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, options.alphabet_size))
bs_model.seed(seed)
コード例 #2
0
ファイル: test_bg.py プロジェクト: JohnReid/STEME



#
# Set up options
#
options = stempy.get_default_options()
options.output_dir = os.path.join('output', 'test-bg')



#
# Load the sequences
#
num_bases, seqs, ids, index = stempy.read_sequences(fasta.encode(sys.stdin.encoding or 'ascii'), options)
occs = stempy.occurrences_from_index(index)
freqs = stempy.ZeroOrderFrequencies(list(occs[:4]))
freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior)
data = stempy.Data(index, max_W=W)



#
# Initialise the background
#
markov_model_create_fn = getattr(stempy, 'create_markov_model_order_from_index_%d' % options.bg_model_order)
bg_model_create_fn = getattr(stempy, 'create_bg_model_from_Markov_model_%d' % options.bg_model_order)
mm, _ = markov_model_create_fn(data.index, options.back_dist_prior)
lls = mm.calculate_likelihoods(data)
base_LL_bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts)
コード例 #3
0
 def test_data_subsequence(self):
     # read in data
     fasta_file = os.path.normpath(get_fasta_file("T00759-small.fa"))
     _num_bases, _seqs, _ids, index = stempy.read_sequences(fasta_file, self.options)
     data = stempy.Data(index)
     assert "AGAGCG" == data.subsequence(2, 3, 6), "AGAGCG != %s" % data.subsequence(2, 3, 6)