#
# Copyright John Reid 2010, 2011, 2012, 2013
#

"""
Test read sequences.
"""

from setup_environment import init_test_env, fasta_dir
init_test_env(__file__)

import stempy, os

options = stempy.get_default_options()
options.cache_index = True

# check reading in correct amount of data
fasta_file = os.path.join(fasta_dir(), 'find-starts-test.fa')
num_bases, seqs, ids, index = stempy.read_sequences(fasta_file, options)
assert num_bases == 22
assert len(seqs) == 2
assert '75' in ids
assert '76' in ids
assert 2 == len(ids)

def get_fasta_file(filename):
    return os.path.join(fasta_dir(), filename)
Exemple #3
0
Test read sequences.
"""

#
# Trickery to find update path to import stempy from
#
from setup_environment import init_test_env, fasta_dir
init_test_env(__file__)

import stempy, os
from cookbook.named_tuple import namedtuple

Start = namedtuple('Start', 'seed num_sites score model best_w_mers')

options = stempy.get_default_options()
options.output_dir = os.path.join('output', 'test-em')
seed = 'CACTTT'
W = len(seed)

# read the sequences and build STEME object from index
fasta = os.path.join(fasta_dir(), 'em-1-test.fa')
algorithm = stempy.Algorithm(options)
algorithm._initialise(fasta)
motif_finder = algorithm.create_motif_finder()

model = algorithm.create_model_of_input(W)
model.bs.seed(seed, True)
start = Start(seed=seed, num_sites=10, score=0., model=model, best_w_mers=stempy.InstanceVec())
motif_finder._run_em_from_start(start)

Exemple #4
0
Test STEME gets the number of sites correct.
"""

from setup_environment import init_test_env, logging, fasta_dir
init_test_env(__file__, level=logging.INFO)


import stempy, os
from stempy.planted_sites import parse_meme_output_for_sites

#
# Set up the options
#
site = 'AAGGTTCCTTGGAATT'
W = len(site)
fasta_file = os.path.join(fasta_dir(), 'random-seqs-4-sites.fasta')
options = stempy.get_default_options()
options.output_dir = os.path.join('output', 'test-num-sites')
options.bg_model_order = 0
options.min_w = options.max_w = W
options.min_num_sites = 2
options.max_num_sites = 10
options.meme_like_output = 'test-num-sites.txt'
meme_output = os.path.join(options.output_dir, options.meme_like_output)


#
# Run the STEME algorithm
#
algorithm = stempy.Algorithm(options)
algorithm(fasta_file)
init_test_env(__file__, level=logging.INFO)

import stempy, os
from stempy.planted_sites import parse_meme_output_for_sites

# from infpy.roc import RocCalculator
# from optparse import OptionParser

# rocs = dict()
# meme_rocs = dict()

#
# Set up the options
#
fasta_file = os.path.join(fasta_dir(), "random-seqs-two-motifs.fasta")
options = stempy.get_default_options()
options.output_dir = os.path.join("output", "test-2-motifs")
options.min_w = 8
options.max_w = 10
options.num_motifs = 2
options.meme_like_output = "two-motif-test-meme.txt"
meme_output = os.path.join(options.output_dir, options.meme_like_output)


#
# Run the STEME algorithm
#
algorithm = stempy.Algorithm(options)
algorithm(fasta_file)
        ('random-seqs-05-100'        , .40, .89),
        ('random-seqs-with-Ns-05-100', .60, .90),
        ('random-seqs-05-100'        , .40, .89), # cannot achieve (.6,.9) stats when finding starts up-front
        ('random-seqs-10-100'        , .60, .91),
        ('random-seqs-with-Ns-10-100', .20, .98), # lower specificity with Ns
        ('random-seqs-30-200'        , .46, .99),
        ('random-seqs-with-Ns-30-200', .46, .99),
    ]

rocs = dict()
meme_rocs = dict()
for data_set, min_sensitivity, min_specificity in data_sets:
    #
    # Set up the options
    #
    fasta_file = os.path.join(fasta_dir(), '%s.fasta' % data_set)
    options = stempy.get_default_options()
    options.min_w = 6
    options.max_w = 11
    options.output_dir = os.path.join('output', 'test-steme-accuracy', data_set)
    meme_output = os.path.join(options.output_dir, options.meme_like_output)
    
    #
    # Run the STEME algorithm
    #
    algorithm = stempy.Algorithm(options)
    algorithm(fasta_file)

    #
    # Analyse output
    #    
Exemple #7
0
#
from setup_environment import init_test_env, fasta_dir
init_test_env(__file__)

import stempy, os
from cookbook.named_tuple import namedtuple

Start = namedtuple('Start', 'seed num_sites score model best_w_mers')

options = stempy.get_default_options()
options.output_dir = os.path.join('output', 'test-em-2')
seed = 'AAATTT'
W = len(seed)

# read the sequences and build STEME object from index
fasta = os.path.join(fasta_dir(), 'T00759-tiny.fa')
algorithm = stempy.Algorithm(options)
algorithm._initialise(fasta)
motif_finder = algorithm.create_motif_finder()

model = algorithm.create_model_of_input(W)
model.bs.seed(seed, True)
start = Start(seed=seed, num_sites=10, score=0., model=model, best_w_mers=stempy.InstanceVec())
em_result = motif_finder._run_em_from_start(start, return_EM=True)

# because we have symmetrical seed, should have same values for positive and negative strand Zs that exist
for n in xrange(algorithm.input_sequences.data.N):
    assert (
        not em_result.EM.get_Z(0).first
        and
        not em_result.EM.get_Z(0).second