Beispiel #1
0
 def factorize(self, show_progress=False, compute_w=True, compute_h=True,
               compute_err=True, niter=1):
     """ Factorize s.t. WH = data
         
         Parameters
         ----------
         show_progress : bool
                 print some extra information to stdout.
         compute_h : bool
                 iteratively update values for H.
         compute_w : bool
                 iteratively update values for W.
         compute_err : bool
                 compute Frobenius norm |data-WH| after each update and store
                 it to .ferr[k].
         
         Updated Values
         --------------
         .W : updated values for W.
         .H : updated values for H.
         .ferr : Frobenius norm |data-WH|.
     """
     
     AA.factorize(self, niter=1, show_progress=show_progress, 
               compute_w=compute_w, compute_h=compute_h, 
               compute_err=compute_err)
Beispiel #2
0
 def __init__(self, data, num_bases=4, dist_measure='l2',  init='fastmap',  **kwargs):
    
     AA.__init__(self, data, num_bases=num_bases)
         
     self._dist_measure = dist_measure            
     self._init = init      
     
     # assign the correct distance function
     if self._dist_measure == 'l1':
         self._distfunc = l1_distance
             
     elif self._dist_measure == 'l2':
         self._distfunc = l2_distance
     
     elif self._dist_measure == 'cosine':                
         self._distfunc = cosine_distance
     
     elif self._dist_measure == 'abs_cosine':                
         self._distfunc = abs_cosine_distance
     
     elif self._dist_measure == 'weighted_abs_cosine':                
         self._distfunc = weighted_abs_cosine_distance
             
     elif self._dist_measure == 'kl':
         self._distfunc = kl_divergence    
Beispiel #3
0
    def factorize(self,
                  show_progress=False,
                  compute_w=True,
                  compute_h=True,
                  compute_err=True,
                  niter=1):
        """ Factorize s.t. WH = data
            
            Parameters
            ----------
            show_progress : bool
                    print some extra information to stdout.
            compute_h : bool
                    iteratively update values for H.
            compute_w : bool
                    iteratively update values for W.
            compute_err : bool
                    compute Frobenius norm |data-WH| after each update and store
                    it to .ferr[k].
            
            Updated Values
            --------------
            .W : updated values for W.
            .H : updated values for H.
            .ferr : Frobenius norm |data-WH|.
        """

        AA.factorize(self,
                     niter=1,
                     show_progress=show_progress,
                     compute_w=compute_w,
                     compute_h=compute_h,
                     compute_err=compute_err)
Beispiel #4
0
    def __init__(self,
                 data,
                 num_bases=4,
                 dist_measure='l2',
                 init='fastmap',
                 **kwargs):

        AA.__init__(self, data, num_bases=num_bases)

        self._dist_measure = dist_measure
        self._init = init

        # assign the correct distance function
        if self._dist_measure == 'l1':
            self._distfunc = l1_distance

        elif self._dist_measure == 'l2':
            self._distfunc = l2_distance

        elif self._dist_measure == 'cosine':
            self._distfunc = cosine_distance

        elif self._dist_measure == 'abs_cosine':
            self._distfunc = abs_cosine_distance

        elif self._dist_measure == 'weighted_abs_cosine':
            self._distfunc = weighted_abs_cosine_distance

        elif self._dist_measure == 'kl':
            self._distfunc = kl_divergence
Beispiel #5
0
	def __init__(self, data, num_bases=4, niter=100, 
				show_progress=False, compW=True, compH=True, 
				dist_measure='l2'):

		# call inherited method		
		AA.__init__(self, data, num_bases=num_bases, niter=niter, show_progress=show_progress, compW=compW)
			
		self._dist_measure = dist_measure			
		self._compH = compH		

		# assign the correct distance function
		if self._dist_measure == 'l1':
				self._distfunc = l1_distance
				
		elif self._dist_measure == 'l2':
				self._distfunc = l2_distance
																	
		elif self._dist_measure == 'cosine':				
				self._distfunc = cosine_distance
				
		elif self._dist_measure == 'kl':
				self._distfunc = kl_divergence	
						
		elif self._dist_measure == 'sparse_graph_l2':
				self._distfunc = sparse_graph_l2_distance
Beispiel #6
0
 def __init__(self, data, num_bases=4, base_sel=3):
                          
     # call inherited method
     AA.__init__(self, data, num_bases=num_bases)
             
     # base sel should never be larger than the actual data dimension
     self._base_sel = base_sel
     if base_sel > self.data.shape[0]:
         self._base_sel = self.data.shape[0]
Beispiel #7
0
    def __init__(self, data, num_bases=4, base_sel=3):

        # call inherited method
        AA.__init__(self, data, num_bases=num_bases)

        # base sel should never be larger than the actual data dimension
        self._base_sel = base_sel
        if base_sel > self.data.shape[0]:
            self._base_sel = self.data.shape[0]
Beispiel #8
0
    def factorize(self,
                  show_progress=False,
                  compute_w=True,
                  compute_h=True,
                  compute_err=True,
                  robust_cluster=3,
                  niter=1,
                  robust_nselect=-1):
        """ Factorize s.t. WH = data
            
            Parameters
            ----------
            show_progress : bool
                    print some extra information to stdout.
                    False, default
            compute_h : bool
                    iteratively update values for H.
                    True, default
            compute_w : bool
                    iteratively update values for W.
                    default, True
            compute_err : bool
                    compute Frobenius norm |data-WH| after each update and store
                    it to .ferr[k].
            robust_cluster : int, optional
                    set the number of clusters for robust map selection.
                    3, default 
            robust_nselect : int, optional
                    set the number of samples to consider for robust map
                    selection.
                    -1, default (automatically determine suitable number)
            
            Updated Values
            --------------
            .W : updated values for W.
            .H : updated values for H.
            .ferr : Frobenius norm |data-WH|.
        """
        self._robust_cluster = robust_cluster
        self._robust_nselect = robust_nselect

        if self._robust_nselect == -1:
            self._robust_nselect = np.round(np.log(self.data.shape[1]) * 2)

        AA.factorize(self,
                     niter=1,
                     show_progress=show_progress,
                     compute_w=compute_w,
                     compute_h=compute_h,
                     compute_err=compute_err)
Beispiel #9
0
    def initialization(self):
        # Fastmap like initialization
        # set the starting index for fastmap initialization
        cur_p = 0

        # after 3 iterations the first "real" index is found
        for i in range(3):
            d = self._distance(cur_p)
            cur_p = np.argmax(d)

        self.select = []
        self.select.append(cur_p)
        if self._compH:
            self.H = np.zeros((self._num_bases, self._num_samples))

        if self._compW:
            AA.initialization(self)
Beispiel #10
0
    def update_w(self):
        """ compute new W """
        def select_hull_points(data, n=3):
            """ select data points for pairwise projections of the first n
            dimensions """

            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):
                # sample convex hull points in 2D projection
                convex_hull_d = quickhull(data[i, :].T)

                # get indices for convex hull data points
                idx = np.append(idx, vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)

            return np.int32(idx)

        # determine convex hull data points using either PCA or random
        # projections
        method = 'randomprojection'
        if method == 'pca':
            pcamodel = PCA(self.data)
            pcamodel.factorize(show_progress=False)
            proj = pcamodel.H
        else:
            R = np.random.randn(self._base_sel, self._data_dimension)
            proj = np.dot(R, self.data)

        self._hull_idx = select_hull_points(proj, n=self._base_sel)
        aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases)

        # determine W
        aa_mdl.factorize(niter=50,
                         compute_h=True,
                         compute_w=True,
                         compute_err=True,
                         show_progress=False)

        self.W = aa_mdl.W
        self._map_w_to_data()
Beispiel #11
0
 def factorize(self, show_progress=False, compute_w=True, compute_h=True,
               compute_err=True, robust_cluster=3, niter=1, robust_nselect=-1):
     """ Factorize s.t. WH = data
         
         Parameters
         ----------
         show_progress : bool
                 print some extra information to stdout.
                 False, default
         compute_h : bool
                 iteratively update values for H.
                 True, default
         compute_w : bool
                 iteratively update values for W.
                 default, True
         compute_err : bool
                 compute Frobenius norm |data-WH| after each update and store
                 it to .ferr[k].
         robust_cluster : int, optional
                 set the number of clusters for robust map selection.
                 3, default 
         robust_nselect : int, optional
                 set the number of samples to consider for robust map
                 selection.
                 -1, default (automatically determine suitable number)
         
         Updated Values
         --------------
         .W : updated values for W.
         .H : updated values for H.
         .ferr : Frobenius norm |data-WH|.
     """
     self._robust_cluster = robust_cluster
     self._robust_nselect = robust_nselect
     
     if self._robust_nselect == -1:
         self._robust_nselect = np.round(np.log(self.data.shape[1])*2)        
     
     AA.factorize(self, niter=1, show_progress=show_progress, 
               compute_w=compute_w, compute_h=compute_h, 
               compute_err=compute_err)
Beispiel #12
0
    def update_w(self): 
        """ compute new W """
        def select_hull_points(data, n=3):
            """ select data points for pairwise projections of the first n
            dimensions """
    
            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):
                # sample convex hull points in 2D projection                    
                convex_hull_d = quickhull(data[i, :].T)
            
                # get indices for convex hull data points
                idx = np.append(idx, vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)
                
            return np.int32(idx)
    
        # determine convex hull data points using either PCA or random
        # projections
        method = 'randomprojection'
        if method == 'pca':
            pcamodel = PCA(self.data)        
            pcamodel.factorize(show_progress=False)        
            proj = pcamodel.H
        else:            
            R = np.random.randn(self._base_sel, self._data_dimension)           
            proj = np.dot(R, self.data)
            
        self._hull_idx = select_hull_points(proj, n=self._base_sel)
        aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases)

        # determine W
        aa_mdl.factorize(niter=50, compute_h=True, compute_w=True, 
                         compute_err=True, show_progress=False)
            
        self.W = aa_mdl.W        
        self._map_w_to_data()
Beispiel #13
0
    def __init__(self,
                 data,
                 num_bases=4,
                 niter=100,
                 show_progress=False,
                 compW=True,
                 compH=True,
                 base_sel=3):
        # call inherited method
        AA.__init__(self,
                    data,
                    num_bases=num_bases,
                    niter=niter,
                    show_progress=show_progress,
                    compW=compW)

        self._compH = compH

        # base sel should never be larger than the actual
        # data dimension
        if base_sel < self.data.shape[0]:
            self._base_sel = base_sel
        else:
            self._base_sel = self.data.shape[0]
Beispiel #14
0
 def update_h(self):
     print self._method
     if self._method == 'pca':
        self.H = np.dot(pinv(self.W), self.data)
            
     if self._method == 'nmf':
         mdl = NMF(self.data, num_bases=self._num_bases)
         mdl.W = self.W
         mdl.factorize(compute_w=False, niter=50)
         self.H = mdl.H.copy()
     
     if self._method == 'aa':
         mdl = AA(self.data, num_bases=self._num_bases)
         mdl.W = self.W
         mdl.factorize(compute_w=False)
         self.H = mdl.H.copy()
Beispiel #15
0
    def updateW(self):
        def selectHullPoints(data, n=3):
            """ select data points for pairwise projections of the first n
			dimensions """

            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):

                # sample convex hull points in 2D projection
                convex_hull_d = quickhull(data[i, :].T)

                # get indices for convex hull data points
                idx = np.append(idx, vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)

            return np.int32(idx)

        # determine convex hull data points only if the total
        # amount of available data is >50
        #if self.data.shape[1] > 50:
        pcamodel = PCA(self.data, show_progress=self._show_progress)
        pcamodel.factorize()
        self._hull_idx = selectHullPoints(pcamodel.H, n=self._base_sel)

        #else:
        #	self._hull_idx = range(self.data.shape[1])

        aa_mdl = AA(self.data[:, self._hull_idx],
                    num_bases=self._num_bases,
                    niter=self._niter,
                    show_progress=self._show_progress,
                    compW=True)

        # initialize W, H, and beta
        aa_mdl.initialization()

        # determine W
        aa_mdl.factorize()

        self.W = aa_mdl.W
Beispiel #16
0
def launch():
    seed = 1234
    np.random.seed(seed)
    light = Light()
    light.initials()
    light.file_snapshot()
    light.set_seed(seed)

    w, h = 28, 28
    fast_test = False 
    test_ratio = 0.25
    valid_ratio = 0.25

    light.set("w", w)
    light.set("h", h)
    light.set("test_ratio", test_ratio)
    light.set("valid_ratio", valid_ratio)

    images = load_images(w=w, h=h)
    X = images.reshape((-1, w*h))

    # prepare
    X = shuffle(X)
    if fast_test is True:
        max_evaluations_hp = 1
        default_params = dict(
               max_epochs=2
        )
        X = X[0:100]
    else:
        default_params = dict()
        max_evaluations_hp = 20
    default_params["batch_size"] = 128
    #default_params["nb_layers"] = 1
    eval_function = lambda model, X_v, _: float(model.get_reconstruction_error(X_v))
    X_train_full, X_test = train_test_split(X, test_size=test_ratio)
    X_train, X_valid = train_test_split(X_train_full, test_size=valid_ratio)

    # show original data
    #X_ =  X.reshape((X.shape[0], im[0], im[1]))
    #X_ = X_[0:10]
    #grid_plot(X_, imshow_options={"cmap": "gray"})
    #plt.savefig(dirname+"/orig.png")
    #plt.show()

    all_hp, all_scores = find_all_hp(
        AA,
        minimize_fn_with_hyperopt,
        X_train,
        X_valid,
        None,
        None,
        max_evaluations=max_evaluations_hp,
        default_params=default_params,
        not_allowed_params=["batch_size"],
        eval_function=eval_function
    )
    argmin = min(range(len(all_hp)), key=lambda i:all_scores[i])
    best_hp, best_score = all_hp[argmin], all_scores[argmin]

    best_hp.update(default_params)
    aa = AA(**best_hp)
    aa.fit(X_train_full, X_test)
    best_model = aa

    light.set("best_hp", best_hp)
    light.set("best_score", best_score)
    light.set("all_hp", all_hp)
    light.set("all_scores", all_scores)
    #light.set("best_model", light.insert_blob(best_model))
    names = best_model.capsule.batch_optimizer.stats[0].keys()
    stats = dict()
    for name in names:
        stats[name] =  get_stat(name, best_model.capsule.batch_optimizer.stats)
    light.set("layer_weights", light.insert_blob([layer.W.get_value() for layer in (best_model.all_layers[1:-1])]))
    light.set("best_model_stats", stats)
    light.set("nb_layers", aa.nb_layers * 2 - 1)

    # reconstructions
    R = np.arange(20)
    X_test_hat = best_model.capsule.predict(X_test[R]).tolist()
    light.set("reconstructions", light.insert_blob(X_test_hat))
    light.endings()
Beispiel #17
0
    def __init__(self, data, num_bases=4, method='pca', robust_map=True):

        AA.__init__(self, data, num_bases=num_bases)
        self.sub = []
        self._robust_map = robust_map
        self._method = method
Beispiel #18
0
 def __init__(self, data, num_bases=4, method='pca', robust_map=True):
    
     AA.__init__(self, data, num_bases=num_bases)
     self.sub = []           
     self._robust_map = robust_map
     self._method = method