exp_sites_per_sequence = float(options.exp_sites_per_sequence) "Expected number of sites in each sequence." seed = int(options.seed) "Seed for random numbers." print "Going to generate %d sequences of average length %d" % (N, L) print "The binding sites are of length %d plus an optional gap with probability %f" % (K, p_gap) print "Expect to find %f binding sites per sequence" % exp_sites_per_sequence print "Seeding the random number generator with %d" % seed # seed all the RNGs that we use hmm.seed_rng(seed) numpy.random.seed(seed) # create something to build the gapped pssms builder = single_gap.SingleGappedPssmBuilder(K=K, gap_position=K / 2, markov_order=0, M=4) # create our emission distributions dirichlet_prior_strengths = [0.01, 0.1, 1.0] emissions = [ numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)]) for strength in dirichlet_prior_strengths ] gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths]
exp_sites_per_seq = 1.0 for fragment in all_fragments: # create directory for results directory = os.path.join(root_dir, '%s' % fragment) if not os.access(directory, os.R_OK): os.makedirs(directory) # set up logging logger = logging.getLogger('find_in_fragments.%s' % fragment) logger.addHandler(logging.FileHandler(os.path.join(directory,'log.txt'), 'w')) logger.setLevel(logging.INFO) logger.info('**************** %s *****************' % fragment) hmm.seed_rng(1) random.seed(1) # get the sequences seqs = seqs_for_fragment(fragment) num_bases = hmm.pssm.num_bases(seqs) num_known_bases = hmm.pssm.num_known_bases(seqs) logger.info('%d/%d (%d%%) known bases in %d sequences' % (num_known_bases, num_bases, (100*num_known_bases/num_bases), len(seqs))) p_binding_site = exp_sites_per_seq*len(seqs)/float(num_bases) # one binding site per sequence on average logger.info('%.2f expected sites per sequence gives p(binding site)=%f' % (exp_sites_per_seq, p_binding_site)) def per_fragment_background_model(order, N): return hmm.as_state_model(global_background_model_cache().load((order, N, fragment))) traits = hmm.pssm.GappedPssmTraits( K=K, p_binding_site=100*p_binding_site,