def bandwidth(self, X): """ Estimate bandwidth TODO Replace this with a method which treats the data like a distorted doughnut by estimating limit cycle, and integrating an ellipsoid swept along the trajectory of the limit cycle with apropriate lengths """ N = X.shape[0] D = X.shape[1] points_in_cluster = N / float(self.Ncl) # Wanted points in a cluster debugLog(self, "Estimating bandwidth") # Sample points from an n-box for numerical intergration S = 10 * (6 ** D) # Grab subsamples to of points to look at idx = randint(0, N, (S)) v = X[idx] # Sample points y = (X.max() - X.min()) * randn(S, D) + X.min() # Find how close together points in our subsample typically are w = std(cdist(X[list(set(arange(N)).difference(idx))], v).min(0)) # Count points in our box that are approximately this close c = sum(cdist(X, y).min(0) < self.sf * w) # Compute volume from length with sphere prefactor V = nSphereVolume(D) * ((c / float(S)) * (X.max() - X.min()) ** D) # Calculate bandwidth by return ((V * points_in_cluster) / N) ** (1.0 / D)
def fit(self, t, par=None): self.dirtyCheck() candidates = [self._makeForm(par, t, p) for p in xrange(1, self.ordPLim + 1)] candidates.sort(key=lambda x: x.AIC) f = candidates[0] debugLog( self, "Final Fourier, OrdP: " + str(f.p.getOrder()) + " AIC: " + str(f.AIC) + " Parameters: " + str(f.numParam), ) self.makeDirty() return f
def cluster(self, X): """ Cluster """ debugLog(self, "Clustering") ms = MeanShift(bandwidth=self.bw, bin_seeding=True) ms.fit(X) labels = ms.labels_ cluster_centers = ms.cluster_centers_ debugLog(self, "Number of estimated clusters : %d" % unique(labels).size) # Now merge clusters if the are small return (cluster_centers, labels)
def getRBFParam(self, X, dX): """ Compute parameters for the new composite radial basis function. """ if self.bw is None: self.bw = self.bandwidth(X) c, labels = self.cluster(X) if self.maxN is None: debugLog(self, "Adjusting number radial basis terms in a cluster so total is comparable.") self.Nsb = int(self.Nsb * self.Ncl / float(c.shape[0])) S = CompositeRBFitter.clustScale(c, labels, X) A = CompositeRBFitter.nnetwork(cdist(c, c), S) basii, dbasii, sigma = self.makeBasii(X, dX, S, labels) M = CompositeRBFitter.getMMat(X, dX, basii, dbasii, sigma, labels) return (c, labels, basii, dbasii, sigma, M, A)
def fit(self, t, par=None): self.dirtyCheck() X = t.getFlatX().T dX = t.getFlatdX().T err = t.flatdot(par) - par.C c, labels, basii, dbasii, sigma, M, A = self.getRBFParam(X, dX) self.Ncl = M.shape[0] if self.Ncl == 1: self.Nit = 1 self.Nsb = M.shape[1] P = zeros((self.Ncl, self.Nsb)) merr = zeros_like(err) debugLog(CompositeRBForm, "Initial sum square error: " + str(sum(err ** 2))) for nit in xrange(self.Nit): order = zip( *sorted( [(i, mean(((err - merr) ** 2)[in1d(labels, i)], 0)) for i in xrange(self.Ncl)], key=lambda x: x[1], reverse=True, ) )[0] for k in order: cl = A[k] adj = hstack([arange(A.shape[0])[cl], k]) Q = in1d(labels, adj) errs = err[Q] merrs = merr[Q] m = vstack(M[k, :, Q]) p = lstsq(m, errs - merrs)[0] P[k] = P[k] + p merr = einsum("ij,ijk->k", P, M) debugLog( CompositeRBForm, ( "Iteration " + str(nit) + " cluster " + str(k) + " sum square error: " + str(sum((err - merr) ** 2)) ), ) debugLog(CompositeRBForm, "Iteration " + str(nit) + " sum square error:" + str(sum((err - merr) ** 2))) c = CompositeRBForm(-P, basii, dbasii, sigma, par, self.pen) c.RSS = sum(resid(t.flatdot(c), c.C) ** 2) c.AIC = AIC(c.RSS, t.getN(), c.numParam) debugLog(self, "Final Radial AIC: " + str(c.AIC) + " Parameters: " + str(c.numParam)) self.makeDirty() return c
def dirtyCheck(self): if self.dirty: debugLog(self, "Attempt to use a dirty fitter!")