# # Copyright John Reid 2010, 2011, 2012, 2013 # """ Test read sequences. """ from setup_environment import init_test_env, fasta_dir init_test_env(__file__) import stempy, os options = stempy.get_default_options() options.cache_index = True # check reading in correct amount of data fasta_file = os.path.join(fasta_dir(), 'find-starts-test.fa') num_bases, seqs, ids, index = stempy.read_sequences(fasta_file, options) assert num_bases == 22 assert len(seqs) == 2 assert '75' in ids assert '76' in ids assert 2 == len(ids)
def get_fasta_file(filename): return os.path.join(fasta_dir(), filename)
Test read sequences. """ # # Trickery to find update path to import stempy from # from setup_environment import init_test_env, fasta_dir init_test_env(__file__) import stempy, os from cookbook.named_tuple import namedtuple Start = namedtuple('Start', 'seed num_sites score model best_w_mers') options = stempy.get_default_options() options.output_dir = os.path.join('output', 'test-em') seed = 'CACTTT' W = len(seed) # read the sequences and build STEME object from index fasta = os.path.join(fasta_dir(), 'em-1-test.fa') algorithm = stempy.Algorithm(options) algorithm._initialise(fasta) motif_finder = algorithm.create_motif_finder() model = algorithm.create_model_of_input(W) model.bs.seed(seed, True) start = Start(seed=seed, num_sites=10, score=0., model=model, best_w_mers=stempy.InstanceVec()) motif_finder._run_em_from_start(start)
Test STEME gets the number of sites correct. """ from setup_environment import init_test_env, logging, fasta_dir init_test_env(__file__, level=logging.INFO) import stempy, os from stempy.planted_sites import parse_meme_output_for_sites # # Set up the options # site = 'AAGGTTCCTTGGAATT' W = len(site) fasta_file = os.path.join(fasta_dir(), 'random-seqs-4-sites.fasta') options = stempy.get_default_options() options.output_dir = os.path.join('output', 'test-num-sites') options.bg_model_order = 0 options.min_w = options.max_w = W options.min_num_sites = 2 options.max_num_sites = 10 options.meme_like_output = 'test-num-sites.txt' meme_output = os.path.join(options.output_dir, options.meme_like_output) # # Run the STEME algorithm # algorithm = stempy.Algorithm(options) algorithm(fasta_file)
init_test_env(__file__, level=logging.INFO) import stempy, os from stempy.planted_sites import parse_meme_output_for_sites # from infpy.roc import RocCalculator # from optparse import OptionParser # rocs = dict() # meme_rocs = dict() # # Set up the options # fasta_file = os.path.join(fasta_dir(), "random-seqs-two-motifs.fasta") options = stempy.get_default_options() options.output_dir = os.path.join("output", "test-2-motifs") options.min_w = 8 options.max_w = 10 options.num_motifs = 2 options.meme_like_output = "two-motif-test-meme.txt" meme_output = os.path.join(options.output_dir, options.meme_like_output) # # Run the STEME algorithm # algorithm = stempy.Algorithm(options) algorithm(fasta_file)
('random-seqs-05-100' , .40, .89), ('random-seqs-with-Ns-05-100', .60, .90), ('random-seqs-05-100' , .40, .89), # cannot achieve (.6,.9) stats when finding starts up-front ('random-seqs-10-100' , .60, .91), ('random-seqs-with-Ns-10-100', .20, .98), # lower specificity with Ns ('random-seqs-30-200' , .46, .99), ('random-seqs-with-Ns-30-200', .46, .99), ] rocs = dict() meme_rocs = dict() for data_set, min_sensitivity, min_specificity in data_sets: # # Set up the options # fasta_file = os.path.join(fasta_dir(), '%s.fasta' % data_set) options = stempy.get_default_options() options.min_w = 6 options.max_w = 11 options.output_dir = os.path.join('output', 'test-steme-accuracy', data_set) meme_output = os.path.join(options.output_dir, options.meme_like_output) # # Run the STEME algorithm # algorithm = stempy.Algorithm(options) algorithm(fasta_file) # # Analyse output #
# from setup_environment import init_test_env, fasta_dir init_test_env(__file__) import stempy, os from cookbook.named_tuple import namedtuple Start = namedtuple('Start', 'seed num_sites score model best_w_mers') options = stempy.get_default_options() options.output_dir = os.path.join('output', 'test-em-2') seed = 'AAATTT' W = len(seed) # read the sequences and build STEME object from index fasta = os.path.join(fasta_dir(), 'T00759-tiny.fa') algorithm = stempy.Algorithm(options) algorithm._initialise(fasta) motif_finder = algorithm.create_motif_finder() model = algorithm.create_model_of_input(W) model.bs.seed(seed, True) start = Start(seed=seed, num_sites=10, score=0., model=model, best_w_mers=stempy.InstanceVec()) em_result = motif_finder._run_em_from_start(start, return_EM=True) # because we have symmetrical seed, should have same values for positive and negative strand Zs that exist for n in xrange(algorithm.input_sequences.data.N): assert ( not em_result.EM.get_Z(0).first and not em_result.EM.get_Z(0).second