from setup_environment import init_test_env, logging init_test_env(__file__) import stempy from cookbook.timer import Timer seed = 'ATAAAA' fasta = '/home/john/Data/MO-MK-EB/unique_MK.fasta.masked' #fasta = '/home/john/Data/MO-MK-EB/MO_MK_EB_shared.fasta.masked' options = stempy.get_default_options() W = options.min_w = options.max_w = len(seed) # load the sequences num_bases, seqs, ids, index = stempy.read_sequences(fasta, options) # create the data object with Timer(msg='build data'): data = stempy.Data(index, max_W=options.max_w) # get the background mm, freqs = stempy.create_markov_model_order_from_index_4(data.index, options.back_dist_prior) freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior) lls = mm.calculate_likelihoods(data) bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts) # binding site model bs_model = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, options.alphabet_size)) bs_model.seed(seed)
# # Set up options # options = stempy.get_default_options() options.output_dir = os.path.join('output', 'test-bg') # # Load the sequences # num_bases, seqs, ids, index = stempy.read_sequences(fasta.encode(sys.stdin.encoding or 'ascii'), options) occs = stempy.occurrences_from_index(index) freqs = stempy.ZeroOrderFrequencies(list(occs[:4])) freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior) data = stempy.Data(index, max_W=W) # # Initialise the background # markov_model_create_fn = getattr(stempy, 'create_markov_model_order_from_index_%d' % options.bg_model_order) bg_model_create_fn = getattr(stempy, 'create_bg_model_from_Markov_model_%d' % options.bg_model_order) mm, _ = markov_model_create_fn(data.index, options.back_dist_prior) lls = mm.calculate_likelihoods(data) base_LL_bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts)
def test_data_subsequence(self): # read in data fasta_file = os.path.normpath(get_fasta_file("T00759-small.fa")) _num_bases, _seqs, _ids, index = stempy.read_sequences(fasta_file, self.options) data = stempy.Data(index) assert "AGAGCG" == data.subsequence(2, 3, 6), "AGAGCG != %s" % data.subsequence(2, 3, 6)