def factorize(self, show_progress=False, compute_w=True, compute_h=True, compute_err=True, niter=1): """ Factorize s.t. WH = data Parameters ---------- show_progress : bool print some extra information to stdout. compute_h : bool iteratively update values for H. compute_w : bool iteratively update values for W. compute_err : bool compute Frobenius norm |data-WH| after each update and store it to .ferr[k]. Updated Values -------------- .W : updated values for W. .H : updated values for H. .ferr : Frobenius norm |data-WH|. """ AA.factorize(self, niter=1, show_progress=show_progress, compute_w=compute_w, compute_h=compute_h, compute_err=compute_err)
def __init__(self, data, num_bases=4, dist_measure='l2', init='fastmap', **kwargs): AA.__init__(self, data, num_bases=num_bases) self._dist_measure = dist_measure self._init = init # assign the correct distance function if self._dist_measure == 'l1': self._distfunc = l1_distance elif self._dist_measure == 'l2': self._distfunc = l2_distance elif self._dist_measure == 'cosine': self._distfunc = cosine_distance elif self._dist_measure == 'abs_cosine': self._distfunc = abs_cosine_distance elif self._dist_measure == 'weighted_abs_cosine': self._distfunc = weighted_abs_cosine_distance elif self._dist_measure == 'kl': self._distfunc = kl_divergence
def __init__(self, data, num_bases=4, niter=100, show_progress=False, compW=True, compH=True, dist_measure='l2'): # call inherited method AA.__init__(self, data, num_bases=num_bases, niter=niter, show_progress=show_progress, compW=compW) self._dist_measure = dist_measure self._compH = compH # assign the correct distance function if self._dist_measure == 'l1': self._distfunc = l1_distance elif self._dist_measure == 'l2': self._distfunc = l2_distance elif self._dist_measure == 'cosine': self._distfunc = cosine_distance elif self._dist_measure == 'kl': self._distfunc = kl_divergence elif self._dist_measure == 'sparse_graph_l2': self._distfunc = sparse_graph_l2_distance
def __init__(self, data, num_bases=4, base_sel=3): # call inherited method AA.__init__(self, data, num_bases=num_bases) # base sel should never be larger than the actual data dimension self._base_sel = base_sel if base_sel > self.data.shape[0]: self._base_sel = self.data.shape[0]
def factorize(self, show_progress=False, compute_w=True, compute_h=True, compute_err=True, robust_cluster=3, niter=1, robust_nselect=-1): """ Factorize s.t. WH = data Parameters ---------- show_progress : bool print some extra information to stdout. False, default compute_h : bool iteratively update values for H. True, default compute_w : bool iteratively update values for W. default, True compute_err : bool compute Frobenius norm |data-WH| after each update and store it to .ferr[k]. robust_cluster : int, optional set the number of clusters for robust map selection. 3, default robust_nselect : int, optional set the number of samples to consider for robust map selection. -1, default (automatically determine suitable number) Updated Values -------------- .W : updated values for W. .H : updated values for H. .ferr : Frobenius norm |data-WH|. """ self._robust_cluster = robust_cluster self._robust_nselect = robust_nselect if self._robust_nselect == -1: self._robust_nselect = np.round(np.log(self.data.shape[1]) * 2) AA.factorize(self, niter=1, show_progress=show_progress, compute_w=compute_w, compute_h=compute_h, compute_err=compute_err)
def initialization(self): # Fastmap like initialization # set the starting index for fastmap initialization cur_p = 0 # after 3 iterations the first "real" index is found for i in range(3): d = self._distance(cur_p) cur_p = np.argmax(d) self.select = [] self.select.append(cur_p) if self._compH: self.H = np.zeros((self._num_bases, self._num_samples)) if self._compW: AA.initialization(self)
def update_w(self): """ compute new W """ def select_hull_points(data, n=3): """ select data points for pairwise projections of the first n dimensions """ # iterate over all projections and select data points idx = np.array([]) # iterate over some pairwise combinations of dimensions for i in combinations(range(n), 2): # sample convex hull points in 2D projection convex_hull_d = quickhull(data[i, :].T) # get indices for convex hull data points idx = np.append(idx, vq(data[i, :], convex_hull_d.T)) idx = np.unique(idx) return np.int32(idx) # determine convex hull data points using either PCA or random # projections method = 'randomprojection' if method == 'pca': pcamodel = PCA(self.data) pcamodel.factorize(show_progress=False) proj = pcamodel.H else: R = np.random.randn(self._base_sel, self._data_dimension) proj = np.dot(R, self.data) self._hull_idx = select_hull_points(proj, n=self._base_sel) aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases) # determine W aa_mdl.factorize(niter=50, compute_h=True, compute_w=True, compute_err=True, show_progress=False) self.W = aa_mdl.W self._map_w_to_data()
def factorize(self, show_progress=False, compute_w=True, compute_h=True, compute_err=True, robust_cluster=3, niter=1, robust_nselect=-1): """ Factorize s.t. WH = data Parameters ---------- show_progress : bool print some extra information to stdout. False, default compute_h : bool iteratively update values for H. True, default compute_w : bool iteratively update values for W. default, True compute_err : bool compute Frobenius norm |data-WH| after each update and store it to .ferr[k]. robust_cluster : int, optional set the number of clusters for robust map selection. 3, default robust_nselect : int, optional set the number of samples to consider for robust map selection. -1, default (automatically determine suitable number) Updated Values -------------- .W : updated values for W. .H : updated values for H. .ferr : Frobenius norm |data-WH|. """ self._robust_cluster = robust_cluster self._robust_nselect = robust_nselect if self._robust_nselect == -1: self._robust_nselect = np.round(np.log(self.data.shape[1])*2) AA.factorize(self, niter=1, show_progress=show_progress, compute_w=compute_w, compute_h=compute_h, compute_err=compute_err)
def __init__(self, data, num_bases=4, niter=100, show_progress=False, compW=True, compH=True, base_sel=3): # call inherited method AA.__init__(self, data, num_bases=num_bases, niter=niter, show_progress=show_progress, compW=compW) self._compH = compH # base sel should never be larger than the actual # data dimension if base_sel < self.data.shape[0]: self._base_sel = base_sel else: self._base_sel = self.data.shape[0]
def update_h(self): print self._method if self._method == 'pca': self.H = np.dot(pinv(self.W), self.data) if self._method == 'nmf': mdl = NMF(self.data, num_bases=self._num_bases) mdl.W = self.W mdl.factorize(compute_w=False, niter=50) self.H = mdl.H.copy() if self._method == 'aa': mdl = AA(self.data, num_bases=self._num_bases) mdl.W = self.W mdl.factorize(compute_w=False) self.H = mdl.H.copy()
def updateW(self): def selectHullPoints(data, n=3): """ select data points for pairwise projections of the first n dimensions """ # iterate over all projections and select data points idx = np.array([]) # iterate over some pairwise combinations of dimensions for i in combinations(range(n), 2): # sample convex hull points in 2D projection convex_hull_d = quickhull(data[i, :].T) # get indices for convex hull data points idx = np.append(idx, vq(data[i, :], convex_hull_d.T)) idx = np.unique(idx) return np.int32(idx) # determine convex hull data points only if the total # amount of available data is >50 #if self.data.shape[1] > 50: pcamodel = PCA(self.data, show_progress=self._show_progress) pcamodel.factorize() self._hull_idx = selectHullPoints(pcamodel.H, n=self._base_sel) #else: # self._hull_idx = range(self.data.shape[1]) aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases, niter=self._niter, show_progress=self._show_progress, compW=True) # initialize W, H, and beta aa_mdl.initialization() # determine W aa_mdl.factorize() self.W = aa_mdl.W
def launch(): seed = 1234 np.random.seed(seed) light = Light() light.initials() light.file_snapshot() light.set_seed(seed) w, h = 28, 28 fast_test = False test_ratio = 0.25 valid_ratio = 0.25 light.set("w", w) light.set("h", h) light.set("test_ratio", test_ratio) light.set("valid_ratio", valid_ratio) images = load_images(w=w, h=h) X = images.reshape((-1, w*h)) # prepare X = shuffle(X) if fast_test is True: max_evaluations_hp = 1 default_params = dict( max_epochs=2 ) X = X[0:100] else: default_params = dict() max_evaluations_hp = 20 default_params["batch_size"] = 128 #default_params["nb_layers"] = 1 eval_function = lambda model, X_v, _: float(model.get_reconstruction_error(X_v)) X_train_full, X_test = train_test_split(X, test_size=test_ratio) X_train, X_valid = train_test_split(X_train_full, test_size=valid_ratio) # show original data #X_ = X.reshape((X.shape[0], im[0], im[1])) #X_ = X_[0:10] #grid_plot(X_, imshow_options={"cmap": "gray"}) #plt.savefig(dirname+"/orig.png") #plt.show() all_hp, all_scores = find_all_hp( AA, minimize_fn_with_hyperopt, X_train, X_valid, None, None, max_evaluations=max_evaluations_hp, default_params=default_params, not_allowed_params=["batch_size"], eval_function=eval_function ) argmin = min(range(len(all_hp)), key=lambda i:all_scores[i]) best_hp, best_score = all_hp[argmin], all_scores[argmin] best_hp.update(default_params) aa = AA(**best_hp) aa.fit(X_train_full, X_test) best_model = aa light.set("best_hp", best_hp) light.set("best_score", best_score) light.set("all_hp", all_hp) light.set("all_scores", all_scores) #light.set("best_model", light.insert_blob(best_model)) names = best_model.capsule.batch_optimizer.stats[0].keys() stats = dict() for name in names: stats[name] = get_stat(name, best_model.capsule.batch_optimizer.stats) light.set("layer_weights", light.insert_blob([layer.W.get_value() for layer in (best_model.all_layers[1:-1])])) light.set("best_model_stats", stats) light.set("nb_layers", aa.nb_layers * 2 - 1) # reconstructions R = np.arange(20) X_test_hat = best_model.capsule.predict(X_test[R]).tolist() light.set("reconstructions", light.insert_blob(X_test_hat)) light.endings()
def __init__(self, data, num_bases=4, method='pca', robust_map=True): AA.__init__(self, data, num_bases=num_bases) self.sub = [] self._robust_map = robust_map self._method = method