def calculate_examplemixtures(sigma=1): n = 200 dim = 4 beta0 = [2, -1, 1, 0] beta0 = mrc.normalize(beta0) x = np.random.normal(3, 1, [n, 4]) epsilon = sigma * mixturenoise(.3, 2, 1, -2, .5, n) Ix = np.dot(x, beta0) y = (Ix + epsilon) #‹ #### solving the ori beta_ini = np.random.uniform(-1, 1, dim) beta_ori, tauback, iterations = mrc.ori(y, x, 4, 200, beta_ini) Ixback = np.dot(x, beta_ori) #plt.plot(Ixback, y, 'o') #print(betaback) #print(tauback) #solving the glm xx = sm.add_constant(x) Gaussian_model = sm.GLM(y, xx, family=sm.families.Gaussian()) Gaussian_results = Gaussian_model.fit() beta_glm = mrc.normalize(Gaussian_results.params[1:]) #plt.plot(np.dot(x,beta_glm),y,'o') cos_glm = np.abs(np.dot(beta0, beta_glm)) cos_ori = np.abs(np.dot(beta0, beta_ori)) deltacos = cos_ori - cos_glm return (cos_ori, cos_glm, deltacos)
def calculate_example1(sigma=1, n=100): beta0=[2,-1,1,0] beta0=mrc.normalize(beta0) x = np.random.normal(0, 1, [n,4]) epsilon=sigma*np.random.lognormal(0,1,n) Ix=np.dot(x,beta0) y=(Ix*epsilon)/(1+np.exp(-Ix+epsilon)) #### solving the ori beta_ini = np.random.uniform(-1, 1, 4) beta_ori, tauback, iteration = mrc.ori(y, x, 4, 200, beta_ini) Ixback=np.dot(x,beta_ori) #solving the glm xx = sm.add_constant(x) Gaussian_model = sm.GLM(y, xx, family=sm.families.Gaussian()) Gaussian_results = Gaussian_model.fit() beta_glm = mrc.normalize(Gaussian_results.params[1:]) cosglm=np.abs(np.dot(beta0,beta_glm)) cosori=np.abs(np.dot(beta0,beta_ori)) deltacos=cosori-cosglm return(cosori, cosglm,deltacos)
def compute_example(sigma, trials): dcorr = np.zeros((trials, 3)) dcos = np.zeros((trials, 3)) for i in range(trials): print(sigma, i) x, y, Ix = data_example_1(sigma=sigma) d = x.shape[1] betaini = mrc.normalize(np.random.uniform(-1, 1, d)) beta_ori, tau_ori, iter = mrc.ori(y, x, betaini=betaini, limit=0.0001) beta_glm, tau_glm = mrc.glm(y, x) Ix_ori = np.dot(x, beta_ori) Ix_glm = np.dot(x, beta_glm) cor_ori = np.corrcoef(Ix, Ix_ori)[0, 1] cor_glm = np.corrcoef(Ix, Ix_glm)[0, 1] cos_ori = np.abs(np.dot(beta_ori, beta_ini)) cos_glm = np.abs(np.dot(beta_glm, beta_ini)) dcorr[i, 0] = cor_ori dcorr[i, 1] = cor_glm dcorr[i, 2] = iter dcos[i, 0] = cos_ori dcos[i, 1] = cos_glm dcos[i, 2] = iter all_ori = dcorr[:, 0] all_glm = dcorr[:, 1] all_iter = dcorr[:, 2] return dcorr, dcos
def data_example_1(n=100, beta0=beta_ini, sigma=.5): dim = 4 if beta0 is None: beta0 = [2, -1, 1, 0] beta0 = mrc.normalize(beta0) x = np.random.normal(0, 1, [n, dim]) epsilon = np.random.lognormal(0, sigma, n) Ix = np.dot(x, beta0) y = (Ix * epsilon) / (1 + np.exp(-Ix + epsilon)) return x, y, Ix
def calculate_examplebehav(sigma=1): xc = np.random.uniform(0, 10, (n, 4)) #xc = x continuous beta0 = [1, 1, 1, 1] beta0 = mrc.normalize(beta0) I0 = np.dot(xc, beta0) y = 1.5 * (I0 - 8) + np.random.lognormal(0, sigma, n) y[y < 0] = 0 y[y > 10] = 10 #plt.plot(I0, y, 'o') tau_real = st.kendalltau(np.dot(xc, beta0), y)[0] x = xc.astype(int) #y = y.astype(int) # x[abs(x)<2]=0 # x[abs(x)>5]=5 tau0 = st.kendalltau(np.dot(x, beta0), y)[0] I00 = np.dot(x, beta0) #plt.plot(I00,y,'o') beta_ini = np.random.uniform(-1, 1, dim) beta_ori, tauback, iterations = mrc.ori(y, x, 5, 100, beta_ini) Ix = np.dot(x, beta_ori) xx = sm.add_constant(x) Gaussian_model = sm.GLM(y, xx, family=sm.families.Gaussian()) Gaussian_results = Gaussian_model.fit() beta_glm = mrc.normalize(Gaussian_results.params[1:]) tauglm = st.kendalltau(np.dot(x, beta_glm), y)[0] ### STATISTICAL SIGNIFICANCE PART ##### # for n=100, d=4, shape1= 8.336884 , shape2= 51.11006 . Think if these estimates apply. shape1 = 8.336884 shape2 = 51.11006 pval = 1 - bt.cdf(tauback, shape1, shape2, loc=0, scale=1) cos_ori = np.dot(beta0, beta_ori) cos_glm = np.dot(beta0, beta_glm) deltacos = cos_ori - cos_glm #plt.plot(Ix,y,'o') return (cos_ori, cos_glm, deltacos)
def calculate_cauchy(sigma): x = np.random.uniform(0, 1, (n, dim)) beta0 = [2, 1, -1, -1] beta0 = mrc.normalize(beta0) noise = cauchy.rvs(loc=0, scale=.1, size=n) I0 = np.dot(x, beta0) y = I0 + sigma * (noise) beta_ini = np.random.uniform(-1, -1, dim) beta_ori, tauori, iterations = mrc.ori(y, x, 2, 500, beta_ini) Ix = np.dot(x, beta_ori) beta_ori = mrc.normalize(beta_ori) xx = sm.add_constant(x) Gaussian_model = sm.GLM(y, xx, family=sm.families.Gaussian()) Gaussian_results = Gaussian_model.fit() #print(Gaussian_results.summary()) beta_glm = mrc.normalize(Gaussian_results.params[1:]) cos_glm = np.abs(np.dot(beta0, beta_glm)) cos_ori = np.abs(np.dot(beta0, beta_ori)) deltacos = cos_ori - cos_glm return (cos_ori, cos_glm, deltacos)
def gradient_descent(x, y, i, beta=None, theta=None, learning_rate=2, steps_max=1000, threshold=.0001, alpha=None): dim = len(x[0, ]) if beta == None: beta = np.random.uniform(-1, 1, dim) if alpha == None: ib = i else: x0 = x[i, ] xb, yb = Openball(x, y, x0, alpha) ib = np.where(xb == x[i, ])[0][0] if theta == None: sdmax = max(np.std(x, axis=0)) theta = 4 * sdmax trials = steps_max betadisc = np.zeros((trials, dim)) taudisc = np.zeros(trials) - 1 beta_t = mrc.normalize(np.random.uniform(-1, 1, dim)) for k in np.arange(1, trials): beta_t = step_gradient(x, y, ib, beta_t, theta, learning_rate) betadisc[k] = beta_t taudisc[k] = st.kendalltau(np.dot(x, betadisc[k, :]), y)[0] if taudisc[k] < taudisc[k - 1]: learning_rate = learning_rate / 2 theta = theta + .1 #print('new lr at k=', k, " lr=", learning_rate) beta_t = betadisc[k - 1] betadisc[k] = betadisc[k - 1] taudisc[k] = taudisc[k - 1] continue if k > 10: if abs(taudisc[k] - taudisc[k - 3]) < threshold: k_final = k #print('convergence at k=', k_final) break return (betadisc[k], taudisc[k], k)
def step_gradient(x, y, i, beta, theta, learning_rate=1): beta_next = beta - learning_rate * gradient_i(x, y, i, beta, theta) beta_next = mrc.normalize(beta_next) return (beta_next)
def orthonormal(input_v, base_v): v_orthogonal = input_v - np.dot(input_v, base_v) * base_v v_orthonormal = mrc.normalize(v_orthogonal) return (v_orthonormal)
import numpy as np import matplotlib.pyplot as plt import mrc_functions as mrc import pandas as pd import seaborn as sns beta_ini = mrc.normalize([2, -1, 1, 0]) def data_example_1(n=100, beta0=beta_ini, sigma=.5): dim = 4 if beta0 is None: beta0 = [2, -1, 1, 0] beta0 = mrc.normalize(beta0) x = np.random.normal(0, 1, [n, dim]) epsilon = np.random.lognormal(0, sigma, n) Ix = np.dot(x, beta0) y = (Ix * epsilon) / (1 + np.exp(-Ix + epsilon)) return x, y, Ix def graph_example1(): x, y, Ix = data_example_1(100, beta_ini, 1) f = plt.figure() plt.plot(Ix, y, 'o') f.show() f.savefig('foo.pdf') def compute_example(sigma, trials):
import mrc_functions as mrc import matplotlib.pyplot as plt import pandas as pd import statsmodels.api as sm from scipy.stats import beta as bt plt.style.use('seaborn-white') ################################ #### We Create the Data set ### n = 100 sigma = .5 beta0 = [2, -1, 1, 0] beta0 = mrc.normalize(beta0) x = np.random.normal(0, 1, [n, 4]) epsilon = sigma * np.random.lognormal(0, 1, n) Ix = np.dot(x, beta0) y = (Ix * epsilon) / (1 + np.exp(-Ix + epsilon)) #### solving the ori beta_ini = np.random.uniform(-1, 1, 4) beta_ori, tauback, iteration = mrc.ori(y, x, 4, 200, beta_ini) Ixori = np.dot(x, beta_ori) #solving the glm xx = sm.add_constant(x) Gaussian_model = sm.GLM(y, xx, family=sm.families.Gaussian()) Gaussian_results = Gaussian_model.fit() beta_glm = mrc.normalize(Gaussian_results.params[1:])
epsilon = np.zeros(n) for i in np.arange(n): k = np.random.uniform(0, 1, 1) if k < klimit: mu, sigma = [mu1, sigma1] else: mu, sigma = [mu2, sigma2] epsilon[i] = np.random.normal(mu, sigma, 1) return (epsilon) sigma = 1 n = 200 dim = 4 beta0 = [2, -1, 1, 0] beta0 = mrc.normalize(beta0) x = np.random.normal(3, 1, [n, 4]) epsilon = sigma * mixturenoise(.3, 2, 1, -2, .5, n) Ix = np.dot(x, beta0) y = (Ix + epsilon) beta_ini = np.random.uniform(-1, 1, dim) beta_ori, tauback, iterations = mrc.ori(y, x, 4, 200, beta_ini) Ixback = np.dot(x, beta_ori) plt.figure(dpi=300) plt.plot(Ixback, y, 'o', color='steelblue') plt.xlabel('I(x)') plt.ylabel('y') plt.savefig('../figures/example_mixture/mixture_realization_dec2019.png') plt.show()