if 1 < len(sys.argv): fasta_file = sys.argv[1] else: fasta_file = os.path.normpath(get_fasta_file('T00759-small.fa')) algorithm = stempy.Algorithm(options) algorithm.initialise(fasta_file.encode(sys.stdin.encoding or 'ascii')) algorithm._initialise_p_value_tables() data = algorithm.data # # Create model, set pseudo-counts and seed # logging.info('Creating model') bg = algorithm._get_bg_model(W) bs = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, algorithm.options.alphabet_size)) model = stempy.Model(data, bs, bg, _lambda=algorithm.options.lambda_) model.bs.seed_pseudo_counts = options.starts_seed_pseudo_counts logging.info('Seeding model with %s', seed) if W != len(seed): raise ValueError('Seed must be same length as motif.') model.bs.seed(seed, True) model.set_lambda_for_sites(data.num_sequences) for BestWMerFinder in [ stempy.FindBestWMersMultiIndex, # stempy.FindBestWMersSet, stempy.FindBestWMersSortedVec ]: #
# load the sequences num_bases, seqs, ids, index = stempy.read_sequences(fasta, options) # create the data object with Timer(msg='build data'): data = stempy.Data(index, max_W=options.max_w) # get the background mm, freqs = stempy.create_markov_model_order_from_index_4(data.index, options.back_dist_prior) freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior) lls = mm.calculate_likelihoods(data) bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts) # binding site model bs_model = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, options.alphabet_size)) bs_model.seed(seed) # whole model model = stempy.Model(data, bs_model, bg_model, _lambda=0.) Z_threshold = .3 with Timer(msg='find instances with Z>%f' % Z_threshold): instance_finder = stempy.FindInstances(data, model, Z_threshold) instance_finder() logging.info('Found %d instances', len(instance_finder.instances)) num_W_mers_to_find = 10000 with Timer(msg='find %d best W-mers' % num_W_mers_to_find): w_mer_finder = stempy.create_best_w_mer_finder(data, model, num_W_mers_to_find)