def getCoverage(gmm, coords, sel_callback=None, repeat=10, rotate=True): # create a new gmm with randomly drawn components at each point in coords: # estimate how this gmm can cover the volume spanned by coords if sel_callback is None: return np.ones(len(coords)) else: coverage = np.zeros(len(coords)) from sklearn.neighbors import KDTree for r in xrange(repeat): sel = sel_callback(coords) inv_sel = sel == False coverage[sel] += 1./repeat gmm_ = pygmmis.GMM(K=gmm.K, D=gmm.D) gmm_.amp = np.random.rand(K) gmm_.amp /= gmm_.amp.sum() gmm_.covar = gmm.covar if rotate: # use random rotations for each component covariance # from http://www.mathworks.com/matlabcentral/newsreader/view_thread/298500 # since we don't care about parity flips we don't have to check # the determinant of R (and hence don't need R) for k in xrange(gmm_.K): Q,_ = np.linalg.qr(np.random.normal(size=(gmm.D, gmm.D)), mode='complete') gmm_.covar[k] = np.dot(Q, np.dot(gmm_.covar[k], Q.T)) inside = coords[sel] outside = coords[inv_sel] outside_cov = coverage[inv_sel] tree = KDTree(inside) closest_inside = tree.query(outside, k=1, return_distance=False).flatten() unique_closest = np.unique(closest_inside) for c in unique_closest: gmm_.mean[:] = inside[c] closest_to_c = (closest_inside == c) outside_cov[closest_to_c] += gmm_(outside[closest_to_c]) / gmm_(np.array([inside[c]])) / repeat coverage[inv_sel] = outside_cov return coverage
def loadPrior(self, priorRefList): self.prior = bfd.MomentPrior() self.log.info('Load prior') first = True for ii, priorRef in enumerate(priorRefList): self.log.info("Adding prior %s" % priorRef.dataId) try: cat = priorRef.get('prior', immediate=True) self.prior.addCatalog(cat, self.config.invariantCovariance, self.config.sampleFraction, self.config.sampleSeed) # Should be same for all prior catalogs if first: self.cov = numpy.array( cat.getTable().getMetadata().getArrayDouble( 'COV')).reshape(6, 6) first = False except Exception as e: print('Failed to read', e) continue if first is False: break self.fluxMin = self.prior.getFluxMin() self.fluxMax = self.prior.getFluxMax() self.varMin = self.prior.getVarMin() self.varMax = self.prior.getVarMax() bins = numpy.arange(0.05, 1.25, 0.05) self.varBin = numpy.digitize([(self.varMax + self.varMin) / 2.], bins)[0] - 1 priorLabel = self.label self.log.info("Creating mixture model") self.gmm = pygmmis.GMM() file_name = 'gmm_files/gmm_%s_%s.npz' % (self.config.rerunLabel, priorLabel) self.gmm.load(file_name) self.log.info("File loaded")
for j in range(niter): # simulate fake gaussian rand = int(np.random.rand(1) * len(ra)) randcenter = [ra[rand], dec[rand]] simcov = np.diagflat([0.1, 0.1]) / simamp simgauss = np.random.multivariate_normal(randcenter, simcov, gaussnum) randcenterlist.append(randcenter) # print "simgauss",simgauss.shape # plt.scatter(simgauss[:,0],simgauss[:,1]) # plt.show() ra = np.concatenate((origra, simgauss[:, 0])) dec = np.concatenate((origdec, simgauss[:, 1])) # ----------------gmm initialization---------------- gmm = pygmmis.GMM(K=K, D=D) data = pygmmis.createShared(data) # positional uncertainties dispersion = .01 default_covar = np.eye(D) * dispersion**2 covar_cb = partial(pygmmis.covar_callback_default, default=default_covar) #background footprint = data.min(axis=0), data.max(axis=0) bg = pygmmis.Background(footprint) bg.amp = 0.95 bg.amp_max = 0.999 bg.adjust_amp = True #run the fitter
disp = 0.5 # additive noise dispersion bg_amp = 0.0 # fraction of background samples w = 0.1 # minimum covariance regularization [data units] cutoff = 5 # cutoff distance between components [sigma] seed = 8365 # seed value oversampling = 10 # for missing data: imputation samples per observed sample # show EM iteration results logging.basicConfig(format='%(message)s', level=logging.INFO) # define RNG for run from numpy.random import RandomState rng = RandomState(seed) # draw N points from 3-component GMM D = 2 gmm = pygmmis.GMM(K=3, D=2) gmm.amp[:] = np.array([0.36060026, 0.27986906, 0.206774]) gmm.amp /= gmm.amp.sum() gmm.mean[:, :] = np.array([[0.08016886, 0.21300697], [ 0.70306351, 0.6709532 ], [0.01087670, 0.852077]]) * 10 gmm.covar[:, :, :] = np.array( [[[0.08530014, -0.00314178], [-0.00314178, 0.00541106]], [[0.03053402, 0.0125736], [0.0125736, 0.01075791]], [[0.00258605, 0.00409287], [0.00409287, 0.01065186]]]) * 100 # data come from pure GMM model or one with background? orig = gmm.draw(N, rng=rng) if bg_amp == 0: orig_bg = orig bg = None
count__cube = np.zeros((C,C,C)) count0_cube = np.zeros((C,C,C)) R = 10 amp0 = np.empty(R*K) frac = np.empty(R*K) Omega = np.empty(R*K) assoc_frac = np.empty(R*K) posterior = np.empty(R*K) cutoff_nd = pygmmis.chi2_cutoff(D, cutoff=inner_cutoff) counter = 0 for r in range(R): print ("start") # create original sample from GMM gmm0 = pygmmis.GMM(K=K, D=D) initCube(gmm0, w=w*10, rng=rng) # use larger size floor than in fit data0, nbh0 = drawWithNbh(gmm0, N, rng=rng) # apply selection sel0 = sel_callback(data0) # how often is each component used comp0 = np.empty(len(data0), dtype='uint32') for k in range(gmm0.K): comp0[nbh0[k]] = k count0 = np.bincount(comp0, minlength=gmm0.K) # compute effective Omega comp = comp0[sel0] count = np.bincount(comp, minlength=gmm0.K)
sel_type = "boxWithHole" # type of selection disp = 0.7 # additive noise dispersion bg_amp = 0.0 # fraction of background samples w = 0.1 # minimum covariance regularization [data units] cutoff = 5 # cutoff distance between components [sigma] seed = 8366 # seed value pygmmis.VERBOSITY = 1 pygmmis.OVERSAMPLING = 10 # define RNG for run from numpy.random import RandomState rng = RandomState(seed) # draw N points from 3-component GMM D = 2 gmm = pygmmis.GMM(K=3, D=2) gmm.amp[:] = np.array([ 0.36060026, 0.27986906, 0.206774]) gmm.amp /= gmm.amp.sum() gmm.mean[:,:] = np.array([[ 0.08016886, 0.21300697], [ 0.70306351, 0.6709532 ], [ 0.01087670, 0.852077]])*10 gmm.covar[:,:,:] = np.array([[[ 0.08530014, -0.00314178], [-0.00314178, 0.00541106]], [[ 0.03053402, 0.0125736], [0.0125736, 0.01075791]], [[ 0.00258605, 0.00409287], [ 0.00409287, 0.01065186]]])*100 # data come from pure GMM model or one with background? orig = gmm.draw(N, rng=rng) if bg_amp == 0: