Esempio n. 1
0
def macina(parse, target, filelst, infl, supl):
    pointsdict = {}
    for path in filelst:
        data = parse(path)
        points = extract_point(data, target)
        points = filter(lambda x: x > infl, points)
        for i, p in enumerate(points):
            l = pointsdict.get(i, [])
            l.append(p)
            pointsdict[i] = l

    stats = []

    if parse == parse_netperf:
        starts = pointsdict[0]
        ends = pointsdict[1]

        length = list(e - s for e, s in zip(ends, starts))
        print "netperf hole lengths:", length
        avg = utils.average(length)
        var = utils.variance(length)
        q1, median, q3 = utils.quartiles(length)

        stats.append((length, (avg, var, min(length), q1, median, q3, max(length))))
    else:
        for points in pointsdict.itervalues():
            print "mesh points:", points
            avg = utils.average(points)
            var = utils.variance(points)
            q1, median, q3 = utils.quartiles(points)

        stats.append((points, (avg, var, min(points), q1, median, q3, max(points))))
    return stats
Esempio n. 2
0
def total_variance(code, bd, i):
    """find the total variance of the ith position, conditional on the
    i-1th position.
    """
    if i == 0:
        aa = bd[0]
        return variance([code[aa, x, y] for x in nucs for y in nucs])
    else:
        aa = bd[i - 1]
        first_term = mean(
            variance([code[aa, x, y] for y in nucs]) for x in nucs)
        second_term = variance(
            [mean(code[aa, x, y] for y in nucs) for x in nucs])
        print first_term, second_term
        return first_term + second_term
Esempio n. 3
0
def predicted_vs_actual_Zb(code, bd):
    L = len(bd) + 1
    kmer_scores = [score_site(code, bd, kmer) for kmer in make_kmers(L)]
    pred_mu = sum(
        [mean([code[aa, n1, n2] for (n1, n2) in nuc_pairs]) for aa in bd])
    pred_sigma_sq = sum(
        [variance([code[aa, n1, n2] for (n1, n2) in nuc_pairs]) for aa in bd])
    pred_mean = exp(pred_mu + (pred_sigma_sq**2) / 2.0)
    obs_mu = mean(kmer_scores)
    obs_sigma_sq = variance(kmer_scores)
    print "mu:", pred_mu, obs_mu, (obs_mu -
                                   pred_mu) / obs_mu  # should be very low
    print "sigma_sq:", pred_sigma_sq, obs_sigma_sq, (
        obs_sigma_sq - pred_sigma_sq) / obs_sigma_sq  # should be very low
    Zb_obs = sum(exp(-kmer_score) for kmer_score in kmer_scores)
    Zb_pred = (4**L) * exp(-pred_mu + pred_sigma_sq / 2.0)
    print Zb_pred, Zb_obs
    print(Zb_obs - Zb_pred) / Zb_obs
Esempio n. 4
0
def print_statistics(data, label):
    avg = utils.average(data)
    var = utils.variance(data)
    minp = min(data)
    q1st, median, q3rd = utils.quartiles(data)
    maxp = max(data)

    print("%s: avg=%.3f, var=%.3f, min=%.3f, 1stq=%.3f, median=%.3f, 3rdq=%.3f, max=%.3f"
          % (label, avg, var, minp, q1st, median, q3rd, maxp))
def occs(code,bd,sites):
    site_energies = [score_site(code,bd,site) for site in sites]
    #background = [score_site(code,bd,random_site(L)) for i in range(G)]
    mu = sum([mean([code[aa,b] for b in "ACGT"]) for aa in bd])
    sigma = sqrt(sum([variance([code[aa,b] for b in "ACGT"]) for aa in bd]))
    fg = sum(exp(-ep) for ep in site_energies)
    #test_bg = np.sum(np.exp(-background))
    bg = ln_mean(-mu,sigma)*G
    #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100)
    return fg/(fg+bg)
def occs(code, bd, sites):
    site_energies = [score_site(code, bd, site) for site in sites]
    #background = [score_site(code,bd,random_site(L)) for i in range(G)]
    mu = sum([mean([code[aa, b] for b in "ACGT"]) for aa in bd])
    sigma = sqrt(sum([variance([code[aa, b] for b in "ACGT"]) for aa in bd]))
    fg = sum(exp(-ep) for ep in site_energies)
    #test_bg = np.sum(np.exp(-background))
    bg = ln_mean(-mu, sigma) * G
    #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100)
    return fg / (fg + bg)
Esempio n. 7
0
    def __init__(self, groups_num, group_size, input_size, group_labels=None, activation_function=relu):
        self.groups_num = groups_num
        self.group_size = group_size
        self.input_size = input_size
        self.group_labels = group_labels if group_labels else 2 ** np.arange(groups_num)
        self.activation_function = activation_function

        self.W_in = theano.shared(np.random.normal(loc=0.0, scale=variance(input_size),
                                                   size=(input_size, groups_num, group_size)).astype(floatX))
        # name="{}_W_in".format(groups_num))

        # Weights for recurrent connection within the group

        self.W_self = np.random.normal(loc=0.0, scale=0.01,
                                       size=(groups_num * group_size, groups_num, group_size)).astype(floatX)
        self.W_self_nullifier = np.zeros(self.W_self.shape, dtype=floatX)
        for dx in xrange(groups_num * group_size):
            for g in xrange(groups_num):
                if g >= (dx // group_size):
                    self.W_self[dx][g] = 0.
                else:
                    self.W_self_nullifier[dx, g] = 1.
                    spng = rng.permutation(group_size)
                    self.W_self[dx][g][spng[15:]] = 0.

        self.W_self = theano.shared(self.W_self,
                                    name="{}_W_self".format(groups_num))
        # self.W_self = theano.shared(np.random.normal(loc=0.0, scale=0.01,
        # size=(groups_num * group_size, groups_num, group_size)).astype(
        # floatX),
        #     name="{}_W_self".format(groups_num))
        #


        self.biases = theano.shared(
            np.zeros((groups_num, group_size), dtype=floatX))

        self.initial_activation = theano.shared(np.random.normal(loc=0.0, scale=variance(groups_num * group_size),
                                                                 size=groups_num * group_size).astype(floatX),
                                                name='init_activation')

        self.params = [self.W_self, self.W_in, self.biases, self.initial_activation]
        self.timestep = theano.shared(1)
def test_fw_method2(mu, sigma, N, trials=10000):
    xs = [
        sum(exp(random.gauss(mu, sigma)) for i in range(N))
        for j in xrange(trials)
    ]
    M, V = mean(xs), variance(xs)
    print "obs M,V,log(V/(M**2)):", M, V, log(V / (M**2))
    ys = map(log, xs)
    m_obs, s_obs = mean(ys), sd(ys)
    m, s = fw_method(mu, sigma, N)
    print "pred:", m, s
    print "obs:", m_obs, s_obs
def exercicio1():
    utils.print_header(1)
    x, y, labels = load_iris(os.path.join(constants.DATA_DIR, constants.FILENAME_IRIS_DATABASE))
    a, d = x.shape  # N samples, d attributes

    print('a)')
    for i in range(d):
        print('\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(i, utils.mean(x[:, i]), utils.variance(x[:, i])))

    print('b)')
    for i in range(labels.shape[0]):
        print('\tClass {}: {}'.format(i, labels[i]))
        for j in range(d):
            print('\t\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(
                j, utils.mean(x[(y == i)[:, 0], j]), utils.variance(x[(y == i)[:, 0], j]))
            )

    print('c)')
    print('\tThe histograms will be displayed')
    f, ax = plt.subplots(1, d, sharex=False, sharey=True)
    for j in range(d):
        # show title only in the top
        ax[j].set_title('Attribute {}'.format(j))
        hist_bins = np.linspace(x[:, j].min(), x[:, j].max(), num=16)
        ax[j].hist(np.vstack([
            x[(y == i)[:, 0], j]
            for i in range(labels.shape[0])
        ]).T, bins=hist_bins, linewidth=0, color=['r', 'b', 'g'])
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-c.pdf')
    plt.legend(labels, loc='upper center', bbox_to_anchor=(0.5, 0.07), ncol=3, bbox_transform=plt.gcf().transFigure)
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15)
    f.set_figheight(3)
    f.set_figwidth(8)
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('d)')
    print('\tA plot will be displayed...')
    x_pca = utils.pca(x, n_components=2)
    # format the plot to mimic Slide 21 of Aula 3
    x_pca[:, 1] *= -1
    a = plt.scatter(x_pca[np.where(y == 0)[0], 1], x_pca[np.where(y == 0)[0], 0], c='r', marker='^', lw=0, s=100)
    b = plt.scatter(x_pca[np.where(y == 1)[0], 1], x_pca[np.where(y == 1)[0], 0], c='b', marker='o', lw=0, s=100)
    c = plt.scatter(x_pca[np.where(y == 2)[0], 1], x_pca[np.where(y == 2)[0], 0], c='g', marker='s', lw=0, s=100)
    plt.xlim([-1.5, 1.5])
    plt.ylim([-4, 4])
    plt.legend((a, b, c), tuple(labels), loc='upper left', fontsize=10)
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-d.pdf')
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()
    print('\tThis plot was saved: {}'.format(plot_fname))
Esempio n. 10
0
    def __init__(self, shape, input_shape, activation_function=softmax):
        self.shape = shape
        self.input_shape = input_shape
        self.W_in = theano.shared(np.random.normal(loc=0.0, scale=variance(input_shape),
                                                   size=(input_shape, shape)).astype(floatX),
                                  name="output_W_in")
        self.biases = theano.shared(
            np.zeros(shape, dtype=floatX))  # np.random.normal(loc=0.0, scale=variance(input_shape),
        # size=shape).astype(floatX),
        # name="output_biases")

        self.params = [self.W_in, self.biases]
        self.activation_function = activation_function
Esempio n. 11
0
def occs(code, bd, sites):
    site_energies = [score_site(code, bd, site) for site in sites]
    #print "test background"
    #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)])
    #print "finish test background"
    mu = sum([mean([code[aa, b1, b2] for (b1, b2) in nuc_pairs]) for aa in bd])
    sigma = sqrt(
        sum([
            variance([code[aa, b1, b2] for (b1, b2) in nuc_pairs]) for aa in bd
        ]))  # XXX revisit w/ bd_variance
    fg = sum(exp(-ep) for ep in site_energies)
    #test_bg = np.sum(np.exp(-background))
    bg = ln_mean(-mu, sigma) * G
    #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100)
    return fg / (fg + bg)
def test_Zb_approx(trials=10, G=5 * 10**6, L=10):
    predicted_Zb = exp(L * sigma**2 / 2.0 + log(G))  # a priori prediction
    matrix = [[random.gauss(0, sigma) for j in range(4)] for i in range(L)]
    score_mu = sum(mean(row) for row in matrix)
    score_sigma_sq = sum(variance(row, correct=False) for row in matrix)
    predicted_Zb2 = exp(score_mu + score_sigma_sq / 2 +
                        log(G))  # prediction given matrix
    Zbs = []
    for trial in trange(trials):
        eps = [sum(random.choice(row) for row in matrix) for i in range(G)]
        Zb = sum(exp(-ep) for ep in eps)
        Zbs.append(Zb)
    print "Predicted: %1.3e +/- %1.3e" % (predicted_Zb,
                                          sqrt(var_Zb(sigma, L, G)))
    print "Predicted2: %1.3e" % (predicted_Zb2)
    print "Actual: %1.3e +/- %1.3e" % (mean(Zbs), sd(Zbs))
Esempio n. 13
0
def mu_summary_stat_experiment():
    """Can we correlate copy number with a summary statistic?"""
    trials = 100
    ep_mu = -2
    ep_sigma = 5
    G = 100
    ts = []
    copies = []
    eps = [random.gauss(ep_mu,ep_sigma) for i in range(G)]
    mus = interpolate(-10,10,1000)
    eta = mean(eps)
    gamma = 1.0/variance(eps)
    print gamma
    plt.plot(*pl(lambda mu:mean_occ(eps,mu),mus))
    plt.plot(*pl(lambda mu:G*fd(eta,mu,beta=gamma),mus))
    plt.plot(*pl(lambda x:G/2.0,mus))
Esempio n. 14
0
def sample_uniform_energy(matrix):
    mu = sum(map(mean, matrix))
    sigma = sqrt(sum(map(lambda x:variance(x,correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    M_min = 1/norm.pdf(ep_min, mu, sigma)
    M_max = 1/norm.pdf(ep_max, mu, sigma)
    M = max(M_min, M_max)
    trials = 0
    while True:
        trials += 1
        if trials % 10000 == 0:
            print trials
        site = random_site(L)
        ep = score_seq(matrix, site)
        ar = 1/(M*norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site
Esempio n. 15
0
def sample_uniform_energy(matrix):
    mu = sum(map(mean, matrix))
    sigma = sqrt(sum(map(lambda x: variance(x, correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    M_min = 1 / norm.pdf(ep_min, mu, sigma)
    M_max = 1 / norm.pdf(ep_max, mu, sigma)
    M = max(M_min, M_max)
    trials = 0
    while True:
        trials += 1
        if trials % 10000 == 0:
            print trials
        site = random_site(L)
        ep = score_seq(matrix, site)
        ar = 1 / (M * norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site
Esempio n. 16
0
def plot_results(vhdl_values, numpy_values, axes_data, name):
    error = []
    for index in range(len(vhdl_values)):
        error.append(relative_error(numpy_values[index], vhdl_values[index]))
    error_mean = mean(error)
    variance_ = variance(error, error_mean)
    print('Error mean {} Variance {}'.format(error_mean, variance_))
    fig, axes = plt.subplots(1, 2)
    axes[0].plot(axes_data[:len(vhdl_values)], vhdl_values)
    axes[0].set_title(name)
    axes[0].set_ylabel('Angle')
    axes[0].set_xlabel('Angle')
    axes[1].plot(axes_data[:len(vhdl_values)], error, '--*')
    axes[1].set_title('Relative Error')
    axes[1].set_ylabel('Error (%)')
    axes[1].set_xlabel('Angle')
    axes[1].set_ylim(-0.8, 1.5)
    plt.show()
Esempio n. 17
0
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    dfde = lambda ep: -nu*exp(ep-mu)/(1+exp(ep-mu)) - (ep-mat_mu)/mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde,ep_min - 20, ep_max + 20)
    except:
        print (matrix, mu, Ne)
        raise Exception
    kappa = -nu*(exp(mu-mode)/(1+exp(mu-mode))**2) - 1/mat_sigma**2
    sigma_approx = sqrt(-1/kappa)
    integrand = lambda ep:dnorm(ep, mat_mu, mat_sigma) * (1+exp(ep-mu))**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
Esempio n. 18
0
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    dfde = lambda ep: -nu * exp(ep - mu) / (1 + exp(ep - mu)) - (
        ep - mat_mu) / mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde, ep_min - 20, ep_max + 20)
    except:
        print(matrix, mu, Ne)
        raise Exception
    kappa = -nu * (exp(mu - mode) / (1 + exp(mu - mode))**2) - 1 / mat_sigma**2
    sigma_approx = sqrt(-1 / kappa)
    integrand = lambda ep: dnorm(ep, mat_mu, mat_sigma) * (1 + exp(ep - mu)
                                                           )**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
Esempio n. 19
0
def experimentCrossValidate(dataModule, times):
    PI = dataModule.protectedIndex
    PV = dataModule.protectedValue
    originalTrain, originalTest = dataModule.load()
    allData = originalTrain + originalTest

    variances = [[], [], []]  #error, bias, ubif
    mins = [float('inf'), float('inf'), float('inf')]
    maxes = [-float('inf'), -float('inf'), -float('inf')]
    avgs = [0, 0, 0]

    for time in range(times):
        random.shuffle(allData)
        train = allData[:len(originalTrain)]
        test = allData[len(originalTrain):]
        output = statistics(train, test, PI, PV)

        print("\tavg, min, max, variance")
        print("error: %r" % (output[0], ))
        print("bias: %r" % (output[1], ))
        print("ubif: %r" % (output[2], ))

        for i in range(len(output)):
            avgs[i] += (output[i][0] - avgs[i]) / (time + 1)
            mins[i] = min(mins[i], output[i][1])
            maxes[i] = max(maxes[i], output[i][2])
            variances[i].append(
                output[i][0])  # was too lazy to implement online alg
            # warning: this doesn't take into account the variance of each split

    for i in range(len(variances)):
        variances[i] = variance(variances[i])

    print("AGGREGATE STATISTICS:")
    print("\tavg, min, max, variance")
    print("error: %r" % ((avgs[0], mins[0], maxes[0], variances[0]), ))
    print("bias: %r" % ((avgs[1], mins[1], maxes[1], variances[1]), ))
    print("ubif: %r" % ((avgs[2], mins[2], maxes[2], variances[2]), ))
Esempio n. 20
0
def anicam(parse, filelst, infl, supl):
    points = []
    gpoints = []
    for path in filelst:
        offset = None
        tmp = []
        data = parse(path)
        for t, v in data:
            tmp.append((t,v))
            if t >= infl and t <= supl:
                points.append(v)
            if offset == None and v == 0 and t >=supl:
                offset = 40 - t

        if offset == None: raise ValueError("Not found any 0")
        for t,v in tmp:
            gpoints.append(((t + offset), v))

    avg = utils.average(points)
    var = utils.variance(points)
    q1, median, q3 = utils.quartiles(points)
    
    return gpoints, (avg, var, min(points), q1, median, q3, max(points))
Esempio n. 21
0
def experimentCrossValidate(dataModule, times):
   PI = dataModule.protectedIndex
   PV = dataModule.protectedValue
   originalTrain, originalTest = dataModule.load()
   allData = originalTrain + originalTest
   
   variances = [[], [], []] #error, bias, ubif
   mins = [float('inf'), float('inf'), float('inf')]
   maxes = [-float('inf'), -float('inf'), -float('inf')]
   avgs = [0, 0, 0]
   
   for time in range(times):
     random.shuffle(allData)
     train = allData[:len(originalTrain)]
     test = allData[len(originalTrain):]
     output = statistics(train, test, PI, PV)
     
     print("\tavg, min, max, variance")
     print("error: %r" % (output[0],))
     print("bias: %r" % (output[1],))
     print("ubif: %r" % (output[2],))
     
     for i in range(len(output)):
       avgs[i] += (output[i][0] - avgs[i]) / (time + 1)
       mins[i] = min(mins[i], output[i][1])
       maxes[i] = max(maxes[i], output[i][2])
       variances[i].append(output[i][0]) # was too lazy to implement online alg
       # warning: this doesn't take into account the variance of each split
   
   for i in range(len(variances)):
     variances[i] = variance(variances[i])

   print("AGGREGATE STATISTICS:")
   print("\tavg, min, max, variance")
   print("error: %r" % ((avgs[0], mins[0], maxes[0], variances[0]),))
   print("bias: %r" % ((avgs[1], mins[1], maxes[1], variances[1]),))
   print("ubif: %r" % ((avgs[2], mins[2], maxes[2], variances[2]),))
Esempio n. 22
0
def test_variance():
    assert utils.variance(x) == (x.var(ddof=1))
Esempio n. 23
0
    L = len(matrix)
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mean_Zs = acc / trials
    return L * log(4) + log(mean_Zs)


def log_ZM_naive((matrix, mu, Ne), N, trials=1000):
    return N * log_ZS_naive((matrix, mu, Ne), trials=1000)


def log_ZS_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne - 1)), mat_mu,
                                           mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    return log_Zs


def log_ZM_hack((matrix, mu, Ne), N):
    log_ZS = log_ZS_hack((matrix, mu, Ne), N)
    return N * log_ZS


def log_Z_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
Esempio n. 24
0
               for (aa, n) in zip(bd, site)) + bi_code[bd[-1], site[-2],
                                                       site[-1]]


def occs((li_code, bi_code), bd, sites):
    site_energies = [
        score_site((li_code, bi_code), bd, site) for site in sites
    ]
    #print "test background"
    #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)])
    #print "finish test background"
    mu = sum([mean([li_code[aa, b] for b in nucs])
              for aa in bd]) + mean(bi_code[bd[-1], b1, b2]
                                    for b1, b2 in nuc_pairs)
    sigma = sqrt(
        sum([variance([li_code[aa, b] for b in nucs]) for aa in bd]) +
        variance([bi_code[bd[-1], b1, b2]
                  for b1, b2 in nuc_pairs]))  # XXX revisit w/ bd_variance
    fg = sum(exp(-ep) for ep in site_energies)
    #test_bg = np.sum(np.exp(-background))
    bg = ln_mean(-mu, sigma) * G
    #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100)
    return fg / (fg + bg)


def fitness(code, (bd, sites)):
    return occs(code, bd, sites)


def moran_process(code,
                  mutation_rate,
 def aa_sigma(aa):
     return sqrt(variance([code[aa, b] for b in "ACGT"]))
Esempio n. 26
0
    acc = 0
    nu = Ne - 1
    L = len(matrix)
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += (1/(1+exp(ep-mu)))**(Ne-1)
    mean_Zs = acc / trials
    return L * log(4) + log(mean_Zs)

def log_ZM_naive((matrix, mu, Ne), N, trials=1000):
    return N * log_ZS_naive((matrix, mu, Ne), trials=1000)
    
def log_ZS_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    return log_Zs

def log_ZM_hack((matrix, mu, Ne), N):
    log_ZS = log_ZS_hack((matrix, mu, Ne), N)
    return N * log_ZS

def log_Z_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    ans_ref = ((N*L * log(4)) +  log_perc_below_threshold)
 def aa_sigma(aa):
     return sqrt(variance([code[aa,b] for b in "ACGT"]))
    bd = [li_aa]*(L-2) + [aa1,aa2,aa12]
    site = "".join([li_b]*(L-2) + [b1,b2])
    sites = [site for i in range(n)]
    return bd,sites
    
def score_site((li_code,bi_code),bd,site):
    return sum(li_code[aa,n] for (aa,n) in zip(bd,site)) + bi_code[bd[-1],site[-2],site[-1]]

def occs((li_code,bi_code),bd,sites):
    site_energies = [score_site((li_code,bi_code),bd,site) for site in sites]
    #print "test background"
    #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)])
    #print "finish test background"
    mu = sum([mean([li_code[aa,b] for b in nucs]) for aa in bd]) + mean(bi_code[bd[-1],b1,b2] for b1,b2 in nuc_pairs)
    sigma = sqrt(sum([variance([li_code[aa,b] for b in nucs]) for aa in bd]) +
                 variance([bi_code[bd[-1],b1,b2] for b1,b2 in nuc_pairs])) # XXX revisit w/ bd_variance
    fg = sum(exp(-ep) for ep in site_energies)
    #test_bg = np.sum(np.exp(-background))
    bg = ln_mean(-mu,sigma)*G
    #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100)
    return fg/(fg+bg)

def fitness(code,(bd,sites)):
    return occs(code,bd,sites)
    
def moran_process(code,mutation_rate,N=1000,turns=10000,
                  init=sample_species,mutate=mutate,fitness=fitness,pop=None):
    mean_rec_muts,mean_site_muts = mutation_rate/3.0,mutation_rate
    site_mu = mean_site_muts/float(n*L)
    bd_mu = mean_rec_muts/float(L)
Esempio n. 29
0
 def finish(self):
     f = open("pos_variances.txt","w")
     for i in range(len(self.posnames)):
         mean = utils.mean(self.counts[i])
         f.write(self.posnames[i] + "\t" + str(mean) + "\t" + str(utils.median(self.counts[i])) + "\t" + str(utils.variance(self.counts[i])) + "\t" + str(utils.moment(self.counts[i],mean,3)) + "\t" + str(utils.moment(self.counts[i],mean,4)) +  "\t" + str(len([x for x in self.counts[i] if x > 0])) + "\n")
 def bi_aa_sigma(aa1,aa2,aa12):
     return sqrt(variance([bi_code[aa12,b1,b2] + li_code[aa1,b2] + li_code[aa2,b2] for b1,b2 in nuc_pairs]))
Esempio n. 31
0
def bd_variance_ref(code, bd):
    kmer_scores = [score_site(code, bd, kmer) for kmer in make_kmers(L)]
    return variance(kmer_scores)
Esempio n. 32
0
import numpy as np
import math
from utils import variance 

x1 = np.array([3 , 13 , 19 , 24 , 29])
mean_x1 = np.mean(x1)
variance_x1 = variance(x1)
print("estimated population variance variance_x1 : " , variance_x1)

x2 = np.array([12 , 10 , 29, 33 , 38])
mean_x2 = np.mean(x2)
variance_x2 = variance(x2)
print("estimated population variance variance_x2 : " , variance_x2)


num_measurements = x1.size
covariance = np.dot(x1 - mean_x1 , x2 - mean_x2) / (num_measurements - 1)
print("covariance  : ",covariance)


correlation = covariance/(math.sqrt(variance_x1) * math.sqrt(variance_x2) )
print("correlation (1 , -1) : ",correlation)
Esempio n. 33
0
def site_sigma_from_matrix(matrix,correct=False):
    """return sd of site energies from matrix""" # agrees with estimate_site_sigma
    return sqrt(sum(map(lambda xs:variance(xs,correct=correct),matrix)))
Esempio n. 34
0
        fix = np.array(f["fix"])
        scaling = float(np.array(f["scaling"]))

from utils import MDSampler, loadmd
from utils import variance, smile2mass

loadrange = ["arr_" + str(i) for i in range(args.loadrange)]
dataset = loadmd(args.dataset, loadrange, scaling, fix).to(device)
SMILE = smile2mass(args.smile)

if not args.double:
    dataset = dataset.to(torch.float32)

if args.double:
    pVariance = torch.tensor(
        [variance(torch.tensor(item).double(), K) for item in SMILE],
        dtype=torch.float64).reshape(1, -1).repeat(3, 1).permute(1,
                                                                 0).reshape(-1)
else:
    pVariance = torch.tensor(
        [variance(torch.tensor(item), K) for item in SMILE],
        dtype=torch.float32).reshape(1, -1).repeat(3, 1).permute(1,
                                                                 0).reshape(-1)
target = MDSampler(dataset, pVariance=pVariance)


def innerBuilder(num):
    maskList = []
    for i in range(nlayers):
        if i % 2 == 0:
            b = torch.zeros(num)
Esempio n. 35
0
def site_sigma_from_matrix(matrix):
    """return sd of site energies from matrix"""
    return sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
Esempio n. 36
0
 def bi_aa_sigma(aa1, aa2, aa12):
     return sqrt(
         variance([
             bi_code[aa12, b1, b2] + li_code[aa1, b2] + li_code[aa2, b2]
             for b1, b2 in nuc_pairs
         ]))
def predict_median_Zb_from_matrix(matrix, G):
    score_mu = sum(mean(row) for row in matrix)
    score_sigma_sq = sum(variance(row, correct=False) for row in matrix)
    predicted_Zb = exp(-score_mu + log(G))  # prediction given matrix
    return predicted_Zb
Esempio n. 38
0
 def aa_sigma(aa):
     return sqrt(variance([code[aa, b1, b2] for b1, b2 in nuc_pairs]))
def Z_approx(matrix,n,Ne,G=5*10**6):
    """use log fitness approximation to compute partition function"""
    nu = Ne - 1
    sigma_sq = sum(map(lambda xs:variance(xs,correct=False),matrix))
    Zb = Zb_from_matrix(matrix,G)
Esempio n. 40
0
 def test_variance(self):
     var = variance([0, 4], [8, None, None, None, 7], 7.5)
     self.assertEqual(var, 0.5)