def makeLinearRegression(xObs, yObs, xerr, yerr): print len(xObs), len(yObs), len(xerr), len(yerr) delta = np.ones(len(xerr)) xycov = np.zeros(len(xerr)) model = linmix.LinMix(xObs, yObs, xerr, yerr, xycov, delta, 2, 2) model.run_mcmc(5000, 10000, silent=False) # return intercept, slope, scatter return model.chain['alpha'], model.chain['beta'],\ np.sqrt(model.chain['sigsqr'])
def run_linmix(x, y, err_x, err_y, Nmin=5000, Nmax=10000, vb=True): # pylint: disable = too-many-arguments ''' Runs the Kelly regression algorithm through the package linmix.''' ''' For convenience, here are the linmix arguments: Linmix Args: x(array_like): The observed independent variable. y(array_like): The observed dependent variable. xsig(array_like): 1-sigma measurement errors in x. ysig(array_like): 1-sigma measurement errors in y. xycov(array_like): Covariance between the measurement errors in x and y. delta(array_like): Array indicating whether a data point is censored (i.e., not detected), or not. If delta[i] == 1, then the ith source is detected. If delta[i] == 0, then the ith source is not detected and y[i] will be interpreted as an upper limit. Note that if there are censored data points, then the maximum-likelihood estimate (alpha, beta, sigsqr) is not valid. By default, all data points are assumed to be detected. K(int): The number of Gaussians to use in the mixture model for the distribution of xi. nchains(int): The number of Monte Carlo Markov Chains to instantiate. ''' # Make sure dimensions are correct assert np.size(x) == np.size(y) assert np.size(err_x) == np.size(err_y) assert np.size(x) == np.size(err_x) L = np.size(x) # FIX: Implement censored data! # Run linmix MCMC delta = np.ones(L) xycov = np.zeros(L) model = linmix.LinMix(x, y, err_x, err_y, xycov, delta, 2, 2) model.run_mcmc(Nmin, Nmax, silent=vb) # return intercept, slope, intrinsic scatter intercept = model.chain['alpha'] slope = model.chain['beta'] sigma = np.sqrt(model.chain['sigsqr']) # Return fit parameters consistently with run_lrgs return (intercept, slope, sigma)
def run(): import astropy.io.ascii as ascii try: a = ascii.read('test.dat') except: generate_test_data() a = ascii.read('test.dat') lm = linmix.LinMix(a['x'], a['y'], a['xsig'], a['ysig'], delta=a['delta']) lm.run_mcmc() ascii.write( lm.chain[[ 'alpha', 'beta', 'sigsqr', 'mu0', 'usqr', 'wsqr', 'ximean', 'xisig', 'corr' ]], 'test.pyout')
def do_regression(XX, YY, EX, EY, DELTA, label): # run the linear mixture regression lm = linmix.LinMix(XX, YY, EX, EY, delta=DELTA) lm.run_mcmc(miniter=10000) # save the chains os.system('rm -rf outfiles/' + label + '.regression.npz') np.savez('outfiles/' + label + '.regression.npz', chain=lm.chain) # inference summaries alp = lm.chain['alpha'] bet = lm.chain['beta'] sca = np.sqrt(lm.chain['sigsqr']) cor = lm.chain['corr'] CI_levs = [15.865, 84.135] CI_alp = np.percentile(alp, CI_levs) kde_alp = stats.gaussian_kde(alp) ndisc = np.round((CI_alp[1] - CI_alp[0]) / 0.01) x_alp = np.linspace(CI_alp[0], CI_alp[1], ndisc) pk_alp = x_alp[np.argmax(kde_alp.evaluate(x_alp))] CI_bet = np.percentile(bet, CI_levs) kde_bet = stats.gaussian_kde(bet) ndisc = np.round((CI_bet[1] - CI_bet[0]) / 0.01) x_bet = np.linspace(CI_bet[0], CI_bet[1], ndisc) pk_bet = x_bet[np.argmax(kde_bet.evaluate(x_bet))] CI_sca = np.percentile(sca, CI_levs) kde_sca = stats.gaussian_kde(sca) ndisc = np.round((CI_sca[1] - CI_sca[0]) / 0.01) x_sca = np.linspace(CI_sca[0], CI_sca[1], ndisc) pk_sca = x_sca[np.argmax(kde_sca.evaluate(x_sca))] # dump these to a text file alp_str = 'alpha = %5.2f + %5.2f / - %5.2f' % \ (pk_alp, CI_alp[1]-pk_alp, pk_alp-CI_alp[0]) bet_str = 'beta = %5.2f + %5.2f / - %5.2f' % \ (pk_bet, CI_bet[1]-pk_bet, pk_bet-CI_bet[0]) sca_str = 'dispersion = %5.2f + %5.2f / - %5.2f' % \ (pk_sca, CI_sca[1]-pk_sca, pk_sca-CI_sca[0]) os.system('rm -rf outfiles/' + label + '.regression.txt') f = open('outfiles/' + label + '.regression.txt', 'w') f.write(label + '\n') f.write('\n') f.write(alp_str + '\n') f.write(bet_str + '\n') f.write(sca_str) f.close() # make a covariance plot os.system('rm -rf outfiles/' + label + '.corner.png') posts = np.column_stack([alp, bet, sca, cor]) levs = 1. - np.exp(-0.5 * (np.arange(3) + 1)**2) fig = corner.corner( posts, plot_datapoints=False, levels=levs, labels=[r'$\alpha$', r'$\beta$', r'$\sigma$', r'$\varrho$']) fig.savefig('outfiles/' + label + '.corner.png') fig.clf() return lm.chain
x = [] xsig = [] y = [] ysig = [] for i in range(len(x1)): if str(x1[i]) != 'nan': if str(y1[i]) != 'nan': if str(x1sig[i]) != 'nan': if str(y1sig[i]) != 'nan': x.append(x1[i]) y.append(y1[i]) xsig.append(x1sig[i]) ysig.append(y1sig[i]) corr_coeff, p_value_pearson = stats.pearsonr(x, y) tau, p_value_kendall = stats.kendalltau(x, y) lm = linmix.LinMix(x, y, xsig, ysig, K=2) lm.run_mcmc(silent=True) for i in range(0, len(lm.chain), 25): xs = np.arange(6, 13) ys = lm.chain[i]['alpha'] + xs * lm.chain[i]['beta'] ax1.plot(xs, ys, color='r', alpha=0.02) intercept_array = [] slope_array = [] for i in range(0, len(lm.chain), 25): intercept = lm.chain[i]['alpha'] slope = lm.chain[i]['beta'] intercept_array.append(intercept) slope_array.append(slope) mean_intercept = np.mean(intercept_array) std_intercept = np.std(intercept_array) mean_slope = np.mean(slope_array)
X = random.sample(range(1, 100), 40) Y = random.sample(range(1, 100), 40) #this is a trick to have float errors spanning from the order of 10^2 to 10^-2 X_err = (1. * np.array(random.sample(range(1, 100), 40))) / ( 1. * np.array(random.sample(range(1, 100), 40))) Y_err = (1. * np.array(random.sample(range(1, 100), 40))) / ( 1. * np.array(random.sample(range(1, 100), 40))) # creates array since linmix works with arrays and not lists Xfa = np.array(X) Yfa = np.array(Y) Xfa_err = np.array(X_err) Yfa_err = np.array(Y_err) #run a Monte carlo Markov Chain producing at least 5000 iterations (a.k.a. 5000 different # linear regressions) lm = linmix.LinMix(Xfa, Yfa, xsig=Xfa_err, ysig=Yfa_err, K=3) lm.run_mcmc(miniter=5000, maxiter=100000, silent=True) # print the average parameters several statistical print '' print " Pearson test ", stats.pearsonr(X, Y) print "Spearman test ", stats.spearmanr(X, Y) print '' print '----- Bayesian linear regression with error in both X and Y -----' print 'Beta = ', lm.chain['alpha'].mean(), '+/-', lm.chain['alpha'].std() print 'Alpha = ', lm.chain['beta'].mean(), '+/-', lm.chain['beta'].std() print 'Sigma = ', np.sqrt(lm.chain['sigsqr'].mean()), '+/-', np.sqrt( lm.chain['sigsqr'].std()) print 'Variance = ', lm.chain['sigsqr'].mean(), '+/-', lm.chain['sigsqr'].std() print 'Correlation = ', lm.chain['corr'].mean(), '+/-', lm.chain['corr'].std()
Y = np.log10(Y) # + fig, ax = plt.subplots(figsize=(10, 10)) ax.errorbar(X, Y, xerr=Xe, yerr=Ye, ls=" ", alpha=0.3) ax.scatter(X, Y, marker=".", s=20 / np.hypot(Xe, Ye)) #ax.set( # xlim=[-0.2, 0.8], ylim=[-0.2, 0.8], # xlabel=r"$\log_{10}\, \Pi$", ylabel=r"$\log_{10}\, \Lambda$", #) #ax.set_aspect("equal") #sns.despine() # - lm = linmix.LinMix(X[:-1], Y[:-1], Xe[:-1], Ye[:-1], K=2) lm.run_mcmc() dfchain = pd.DataFrame.from_records(lm.chain.tolist(), columns=lm.chain.dtype.names) dfchain dfchain.describe() pearsonr(X, Y) pd.DataFrame({"X": X, "Xe": Xe, "Y": Y, "Ye": Ye}).describe() # + vmin, vmax = -3.0, 1.0
w10 = (y < 10) & (ysig != 0) y[w10] = 10 delta = np.ones((len(x), ), dtype=int) # should really be bool, but ints are easier delta[w10] = 0 out = Table([x, y, xsig, ysig, delta], names=['x', 'y', 'xsig', 'ysig', 'delta']) import astropy.io.ascii as ascii ascii.write(out, 'test.dat') # def run(): import astropy.io.ascii as ascii try: a = ascii.read('test.dat') except: generate_test_data() a = ascii.read('test.dat') lm = linmix.LinMix(a['x'], a['y'], a['xsig'], a['ysig'], delta=a['delta']) lm.run_mcmc() ascii.write( lm.chain[[ 'alpha', 'beta', 'sigsqr', 'mu0', 'usqr', 'wsqr', 'ximean', 'xisig', 'corr' ]], 'test.pyout') # if __name__ == '__main__': # run()
lmr_chain = N.load('%s_MBH_stellarmass.npy' % linmixfilebase) lmr_alpha = lmr_chain['alpha'] lmr_beta = lmr_chain['beta'] lmupl_chain = N.load('%s_MBH_bulge_withlimits.npy' % linmixfilebase) lmupl_alpha = lmupl_chain['alpha'] lmupl_beta = lmupl_chain['beta'] lmnupl_chain = N.load('%s_MBH_bulge_withoutlimits.npy' % linmixfilebase) lmnupl_alpha = lmnupl_chain['alpha'] lmnupl_beta = lmnupl_chain['beta'] else: #Use linmix to fit to haring and rix x and ys lmhr = linmix.LinMix(xhr, yhr, xhrerr, yhrerr, K=2) lmhr.run_mcmc(silent=False, maxiter=5000) lmhr_alpha = lmhr.chain['alpha'] lmhr_beta = lmhr.chain['beta'] N.save('linmix_results_haringrixfit.npy', lmhr.chain) #Use linmix to fit to stellar mass and MBH relation from DISKDOM sample lmr = linmix.LinMix(results[mstarcol], results[mbhcol], results[dmstarcol], results[dmbhcol], K=2) lmr.run_mcmc(silent=False, maxiter=5000) lmr_alpha = lmr.chain['alpha'] lmr_beta = lmr.chain['beta']