def compute_XD_results(n_components=10, max_iter=500): clf = XDGMM(n_components, max_iter=max_iter, tol=1e-03, verbose=False, random_state=None) clf.fit(Z, Zerr) return clf
def XD_filter(y , yerr): clf = XDGMM(n_components = 2 , n_iter = 4) Y = y.reshape(y.shape[0] , 1) Yerr = np.zeros((y.shape[0] , 1, 1)) #diag = np.arange(Y.shape[-1]) Yerr[:, 0, 0] = yerr ** 2 clf.fit(Y , Yerr) return clf.mu, clf.V
def mixture_fitting(args): ''' component = 0 : u-g, 1: g-r, 2: r-i ''' zmin, zmax, component = args zspec, x, xerr, color, color_err = catalog_slicer(zmin, zmax, component) Y_xd = np.vstack([x,color[component,:]]).T Yerr_xd = np.zeros((Y_xd.shape[0] , 2 , 2)) Yerr_xd[:,0,0] = xerr Yerr_xd[:,1,1] = color_err[component,component,:] #fitting a two component GMM to (mi , color(component) space in the redshift bin) clf_in = XDGMM(2, n_iter=400) clf_in.fit(Y_xd, Yerr_xd) # mixture component associated with the red population red_index = np.where(clf_in.mu[:,1] == clf_in.mu[:,1].max())[0] mu_red , V_red= clf_in.mu[red_index] , clf_in.V[red_index][0] red_line = mu_red[0,1] + V_red[0,1]*(Y_xd[:,0] - mu_red[0,0])/V_red[0,0] red_scatter = V_red[1,1] - V_red[0,1]**2./V_red[0,0] chi_red = (Y_xd[:,1] - red_line)**2. / (red_scatter + Yerr_xd[:,1,1]) mask = chi_red < 2 ##UPDATE : I have converged on using g-r for masking purposes!! # at this point we don't care which color component was used for masking # we keep the masked galaxies (chisq<2) and fit a linear line to the i-colors. # this step is agnostic about the color component used for masking # note that we ahve used mu_red[0,0] (the first component of the center of the red galaxies) as m_ref x_xd = x[mask] xerr_xd = x[mask] Y_xd = np.vstack([color[0,mask], color[1,mask], color[2,mask]]).T Yerr_xd = np.zeros((Y_xd.shape[0] , 3 , 3)) for i in xrange(3): for j in xrange(3): Yerr_xd[:,i,j] = color_err[i,j,mask] # fitting a two component GMM to the remainder of galaxies in the three dimensional colorspace clf_fi = XDGMM(2, n_iter=400) clf_fi.fit(Y_xd, Yerr_xd) pure_index = np.where(clf_fi.mu[:,1] == clf_fi.mu[:,1].max()) mu_pure , V_pure = clf_fi.mu[pure_index] , clf_fi.V[pure_index][0] dY_pure = Y_xd - mu_pure P = np.linalg.inv(V_pure + Yerr_xd) chi = np.einsum('mn,mn->m', np.einsum('ijk,ik->ij', P, dY_pure) , dY_pure) pure_mask = chi<2 zred = zspec[mask][pure_mask] zred = zred.reshape(zred.shape[0],1) ired = x_xd[pure_mask] ired = ired.reshape(ired.shape[0],1) eired = xerr_xd[pure_mask] eired = ired.reshape(eired.shape[0],1) cred = Y_xd[pure_mask] ecred = Yerr_xd[pure_mask].reshape(cred.shape[0],cred.shape[1]*cred.shape[1]) return [mu_red[0,0] , np.hstack([zred,ired,eired,cred,ecred])]
def compute_XD_results(x, y, dx, dy, n_components=6, n_iter=50): X = np.vstack([x,y]).T Xerr = np.zeros(X.shape + X.shape[-1:]) diag = np.arange(X.shape[-1]) Xerr[:, diag, diag] = np.vstack([dx ** 2, dy ** 2]).T clf = None while clf is None: try: clf = XDGMM(n_components, n_iter=n_iter,verbose=True) clf.fit(X, Xerr) except: print('Error: Singular Matrix. Retrying...') clf = None return clf
def initialise(self): nmeas, ndim = self.data.shape lower_idxs = np.tril_indices(ndim, k=-1) if self.data_covariances is not None: xdgmm = XDGMM(1, 1000, verbose=True) xdgmm.fit(self.data, self.data_covariances) guess_mu = xdgmm.mu[0] guess_Sigma = xdgmm.V[0] else: gmm = GaussianMixture(1, max_iter=1000, covariance_type='full').fit(self.data) guess_mu = gmm.means_[0] guess_Sigma = gmm.covariances_[0] guess_chol = np.linalg.cholesky(guess_Sigma) guess_packed_chol = guess_chol[lower_idxs] return guess_mu, guess_Sigma, guess_packed_chol, guess_chol
def check_single_gaussian(N=100, D=3, sigma=0.1): np.random.seed(0) mu = np.random.random(D) V = np.random.random((D, D)) V = np.dot(V, V.T) X = np.random.multivariate_normal(mu, V, size=N) Xerr = np.zeros((N, D, D)) Xerr[:, range(D), range(D)] = sigma**2 X += np.random.normal(0, sigma, X.shape) xdgmm = XDGMM(1) xdgmm.fit(X, Xerr) # because of sample variance, results will be similar # but not identical. We'll use a fudge factor of 0.1 assert_allclose(mu, xdgmm.mu[0], atol=0.1) assert_allclose(V, xdgmm.V[0], atol=0.1)
def check_single_gaussian(N=100, D=3, sigma=0.1): np.random.seed(0) mu = np.random.random(D) V = np.random.random((D, D)) V = np.dot(V, V.T) X = np.random.multivariate_normal(mu, V, size=N) Xerr = np.zeros((N, D, D)) Xerr[:, range(D), range(D)] = sigma ** 2 X += np.random.normal(0, sigma, X.shape) xdgmm = XDGMM(1) xdgmm.fit(X, Xerr) # because of sample variance, results will be similar # but not identical. We'll use a fudge factor of 0.1 assert_allclose(mu, xdgmm.mu[0], atol=0.1) assert_allclose(V, xdgmm.V[0], atol=0.1)
def main(ps1_file, g_lim): try: ps1 = np.load(ps1_file) except: ps1 = ascii.read(ps1_file) cut_ps1 = cut_func(ps1, g_lim=g_lim) ps1_c = coord.SkyCoord(ra=cut_ps1['ra']*u.degree, dec=cut_ps1['dec']*u.degree) cut_ps1 = cut_ps1[ps1_c.separation(cluster_c) > (0.12*u.degree)] # feature and covariance matrices X,Xcov = data_to_X_cov(cut_ps1) n_clusters = 8 n_iter = 512 xd_clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-4, verbose=True) xd_clf.fit(X[::100], Xcov[::100]) # pickle this thing! xd_clf with open("xd_control_clf.pickle", "wb") as f: pickle.dump(xd_clf, f)
def compute_XD_results(n_components=10, n_iter=500): clf = XDGMM(n_components, n_iter=n_iter) clf.fit(X, Xerr) return clf
class Classifier(object): def __init__(self,algorithm='XD',n_comp = 20): if algorithm == 'XD': self.algorithm='XD' self.lQSO_model = XDGMM(n_components=n_comp,verbose=True) self.dud_model = XDGMM(n_components=n_comp,verbose=True) elif algorithm == 'RandomForest': self.algorithm = 'RandomForest' self.trialRF = RandomForestClassifier() self.RF_params = {'n_estimators':(10,50,200),"max_features": ["auto",2,4], 'criterion':["gini","entropy"],"min_samples_leaf": [1,2]} return def train(self,train,truth,covmat=1): if self.algorithm == 'XD': self.XDtrain(train,truth,covmat) elif self.algorithm == 'RandomForest': self.RFtrain(train,truth) return def RFtrain(self,train,truth): tunedRF = grid_search.GridSearchCV(self.trialRF, self.RF_params,\ n_jobs = -1, cv = 3,verbose=1) self.optRF = tunedRF.fit(train, truth) return def XDtrain(self,train,truth,covmat=1): self.lQSO_model.fit(train[truth==1], listify(covmat,np.sum(truth))) self.dud_model.fit(train[truth==0], listify(covmat,np.sum(1-truth))) return def test(self,test,covmat=1): if self.algorithm == 'XD': self.XDprobs(test,covmat) elif self.algorithm == 'RandomForest': self.RFprobs(test) return def RFprobs(self,test): self.dud_probs = self.optRF.predict_proba(test)[:,0] self.lQSO_probs = self.optRF.predict_proba(test)[:,1] return def XDprobs(self,test,covmat): lQSO_like = np.sum(np.exp(self.lQSO_model.logprob_a(test, listify(covmat,test.shape[0]))),axis=1) dud_like= np.sum(np.exp(self.dud_model.logprob_a(test, listify(covmat,test.shape[0]))),axis=1) self.lQSO_probs = (lQSO_like * lQSO_prior) / (lQSO_like * lQSO_prior + dud_like * dud_prior) self.dud_probs = (dud_like * dud_prior) / (lQSO_like * lQSO_prior + dud_like * dud_prior) return def make_roc(self,truth): fpr, tpr, _ = metrics.roc_curve(truth,self.lQSO_probs,pos_label=1) plt.title('ROC Curve') plt.plot(fpr,tpr,'b--') plt.xlabel('FPR') plt.ylabel('TPR') return fpr,tpr def save(self,pkl_fname='classifiers.pkl'): outfile = open(pkl_fname,'wb') outDict = {} if hasattr(self,'lQSO_model'): outDict.update({'lQSO_model':self.lQSO_model}) if hasattr(self,'dud_model'): outDict.update({'dud_model':self.dud_model}) if hasattr(self,'optRF'): outDict.update({'optRF':self.optRF}) pickle.dump(outDict,outfile) outfile.close() return def load(self,pkl_fname='classifiers.pkl'): pkl_in = open(pkl_fname,'rb') inDict = pickle.load(pkl_in) pkl_in.close() if 'lQSO_model' in inDict.keys(): self.lQSO_model = inDict['lQSO_model'] if 'dud_model' in inDict.keys(): self.dud_model = inDict['dud_model'] if 'optRF' in inDict.keys(): self.optRF = inDict['optRF'] return
def compute_XD(n_clusters=12, rseed=0, n_iter=100, verbose=True): np.random.seed(rseed) clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-5, verbose=verbose) clf.fit(X, Xcov) return clf
#Lee ids, pmx, pmy, magK, pmex, pmey = np.genfromtxt('PM_final.dat', unpack=True, usecols=(0,3,4,5,8,9)) #Filtra mag_mask = (magK < max_mag) & (magK > min_mag) err_mask = (pmex**2 + pmey**2)**0.5 < max_err dataset = np.vstack([pmx[mag_mask*err_mask], pmy[mag_mask*err_mask]]).T dataerr = np.zeros(dataset.shape + dataset.shape[-1:]) diag = np.arange(dataset.shape[-1]) dataerr[:, diag, diag] = np.vstack([pmex[mag_mask*err_mask]**2, pmey[mag_mask*err_mask]**2]).T clf = XDGMM(compo, itera, verbose=True) clf.fit(dataset, dataerr) samples = clf.sample(np.sum(mag_mask*err_mask)) clfu = mixture.VBGMM(compo, covariance_type='full', tol=1e-5, n_iter=1000) clfu.fit((pmx[mag_mask*err_mask])[:,np.newaxis]) meu = np.hstack(clfu.means_) stu = np.hstack(clfu.precs_)[0] weu = np.hstack(clfu.weights_) clfd = XDGMM(compo, itera, verbose=True) clfd.fit(pmy[mag_mask*err_mask][:,np.newaxis], (pmey[mag_mask*err_mask]**2)[:,np.newaxis,np.newaxis]) samd = clfd.sample(np.sum(mag_mask*err_mask)) print('Centros:\n', clf.mu) print('Covarianza:\n', clf.V) print('Alpha:\n', clf.alpha)
def compute_XD(n_clusters=2, rseed=0, n_iter=30, verbose=True): np.random.seed(rseed) clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-5, verbose=verbose) clf.fit(newZ, Zcov) return clf
def compute_XD(n_clusters=12, rseed=0, max_iter=100, verbose=True): np.random.seed(rseed) clf = XDGMM(n_clusters, max_iter=max_iter, tol=1E-5, verbose=verbose) clf.fit(X, Xcov) return clf