Exemple #1
0
exp_sites_per_sequence = float(options.exp_sites_per_sequence)
"Expected number of sites in each sequence."

seed = int(options.seed)
"Seed for random numbers."


print "Going to generate %d sequences of average length %d" % (N, L)
print "The binding sites are of length %d plus an optional gap with probability %f" % (K, p_gap)
print "Expect to find %f binding sites per sequence" % exp_sites_per_sequence
print "Seeding the random number generator with %d" % seed


# seed all the RNGs that we use
hmm.seed_rng(seed)
numpy.random.seed(seed)


# create something to build the gapped pssms
builder = single_gap.SingleGappedPssmBuilder(K=K, gap_position=K / 2, markov_order=0, M=4)


# create our emission distributions
dirichlet_prior_strengths = [0.01, 0.1, 1.0]
emissions = [
    numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)])
    for strength in dirichlet_prior_strengths
]
gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths]
Exemple #2
0
exp_sites_per_seq = 1.0

for fragment in all_fragments:

    # create directory for results
    directory = os.path.join(root_dir, '%s' % fragment)
    if not os.access(directory, os.R_OK):
        os.makedirs(directory)

    # set up logging
    logger = logging.getLogger('find_in_fragments.%s' % fragment)
    logger.addHandler(logging.FileHandler(os.path.join(directory,'log.txt'), 'w'))
    logger.setLevel(logging.INFO)
    logger.info('**************** %s *****************' % fragment)

    hmm.seed_rng(1)
    random.seed(1)

    # get the sequences
    seqs = seqs_for_fragment(fragment)
    num_bases = hmm.pssm.num_bases(seqs)
    num_known_bases = hmm.pssm.num_known_bases(seqs)
    logger.info('%d/%d (%d%%) known bases in %d sequences' % (num_known_bases, num_bases, (100*num_known_bases/num_bases), len(seqs)))

    p_binding_site = exp_sites_per_seq*len(seqs)/float(num_bases) # one binding site per sequence on average
    logger.info('%.2f expected sites per sequence gives p(binding site)=%f' % (exp_sites_per_seq, p_binding_site))
    def per_fragment_background_model(order, N):
        return hmm.as_state_model(global_background_model_cache().load((order, N, fragment)))
    traits = hmm.pssm.GappedPssmTraits(
            K=K,
            p_binding_site=100*p_binding_site,