예제 #1
0
def calc_pwm_from_simulations(mo, chem_affinity, n_sims=10000):
    include_shape = True if mo.encoding_type == 'ONE_HOT_PLUS_SHAPE' else False
    # we add 4 bases to the motif length to account for the shape features 
    seqs = FixedLengthDNASequences(sample_random_seqs(n_sims, 4+mo.motif_len))
    affinities = -seqs.score_binding_sites(mo, 'FWD')[:,2]
    occs = calc_occ(chem_affinity, affinities)
    # normalize to the lowest occupancy sequence 
    occs /= occs.max()
    # give a pseudo count of one to avoid divide by zeros
    cnts = np.zeros((4, mo.motif_len), dtype=float)
    for seq, occ, aff in izip(seqs, occs, affinities):
        for i, base in enumerate(seq.seq[2:-2]):
            cnts[base_map[base], i] += occ
    # normalize the base columns to sum to 1
    return cnts/cnts.sum(0)
예제 #2
0
 def calc_bnd_frac(affinities, chem_pot):
     # since the weights default to unfiform, this is the mean on average
     return (weights*calc_occ(chem_pot, affinities)).sum()