Example #1
0
    def initialize(self, X0=None, title='',**kwargs):
        """\
        Set initial embedding.

        Parameters:

        X0 : numpy array or None
        Initial embedding. If set to None, the initial embedding is produced 
        randomly using misc.initial_embedding().
        """
        if self.verbose > 0:
            print('- MDS.initialize('+title+'):')
            
        if X0 is None:
            X0 = misc.initial_embedding(self.N,dim=self.dim,
                                        radius=self.D_rms,**kwargs)
            if self.verbose > 0:
                print('  method : random')
        else:
            assert isinstance(X0,np.ndarray)
            assert X0.shape == (self.N,self.dim)
            if self.verbose > 0:
                print('  method : initialization given')
            
        self.X = X0
        self.update()
        
        self.X0 = self.X.copy()
        
        if self.verbose > 0:
            print(f'  initial stress : {self.cost:0.2e}[{self.ncost:0.2e}]')
Example #2
0
    def initialize(self, X0=None, **kwargs):
        """\
        Set initial embedding.
        """
        if self.verbose > 0:
            print(self.indent + '  TSNE.initialize():')

        if X0 is None:
            X0 = misc.initial_embedding(self.N,
                                        dim=self.dim,
                                        radius=1,
                                        **kwargs)
            #radius=self.D['rms'],**kwargs)
            if self.verbose > 0:
                print(self.indent + '    method : random')
        else:
            assert isinstance(X0, np.ndarray)
            assert X0.shape == (self.N, self.dim)
            if self.verbose > 0:
                print(self.indent + '    method : initialization given')

        self.update(X0)
        self.embedding0 = self.embedding.copy()

        if self.verbose > 0:
            print(self.indent + f'    initial cost : {self.cost:0.2e}')
Example #3
0
    def initialize_X(self, X0=None, method='random', max_iters=50, **kwargs):
        """\
        Set initial embedding using misc.initial function.

        Parameters:

        Y0 : numpy array
        Initial embedding (optional)

        number : int > 0
        Number of initial embeddings to be generated and saved. When looking for
        a minimizer of the stress function, the optimization algorithm is run
        using the different initial embeddings and the best solution is 
        retained.
        """
        if self.verbose > 0:
            print('- Multiview.initialize_X():')

        if X0 is not None:
            if self.verbose > 0:
                print('  method : X0 given')
            assert isinstance(X0, np.ndarray)
            assert X0.shape == (self.N, self.persp.dimX)
            self.X = X0
        else:
            if self.verbose > 0:
                print('  method : ', method)
            if method == 'random':
                self.X = misc.initial_embedding(self.N,
                                                dim=self.persp.dimX,
                                                radius=1)
                #radius=self.D_rms,**kwargs)
            elif method == 'mds':
                D = np.average(self.D, axis=0)
                vis = mds.MDS(D, dim=self.persp.dimX)
                vis.initialize()
                vis.optimize(max_iters=max_iters, **kwargs)
                self.X = vis.X
        self.update()
        self.X0 = self.X.copy()
Example #4
0
    def __init__(self, data, weights=None, data_args=None,
                 fixed_embedding=None, fixed_projections=None,
                 initial_embedding=None, initial_projections=None,
                 visualization_method='mds', visualization_args={},
                 total_cost_function='rms',
                 embedding_dimension=3, image_dimension=2,
                 projection_family='linear',projection_constraint='orthogonal',
                 hidden_samples=None,
                 sample_labels=None, perspective_labels=None,
                 sample_colors=None, image_colors=None,
                 verbose=0, indent='',
                 **kwargs):
        """\
        Initializes MPSE object.

        Parameters
        ----------

        data : list, length (n_perspectives)
        List containing distance/dissimilarity/feature data for each 
        perspective. Each array can be of the following forms:
        1) A 1D condensed distance array
        2) A square distance matrix
        3) An array containing features
        ***4) A dictionary describing a graph

        weights : None or string or array or list
        If visualization allows for it, weights to be used in computation of
        cost/gradiant of each perspective.
        IF a list is given, then the list must have length equal to the number
        of perspectives. Otherwise, it is assumed that the given weights are the
        same for all perspectives.
        The possible weights are described in setup.setup_weights. These are:
        1) None : no weights are used
        2) string : method to compute weights based on distances
        3) function : function to compute weights based on distances
        4) array : array containing pairwise weights or node weights, depending
        on size (must be of length of distances or of samples).

        data_args : dictionary (optional) or list
        Optional arguments to pass to distances.setup().
        If a list is passed, then the length must be the number of perspectives
        and each element must be a dictionary. Then, each set of distances will
        be set up using a different set of arguments.

        fixed_embedding : array
        If an array is given, this is assumed to be the true embedding and
        by default optimization is done w.r.t. the projections only.

        fixed_projections : list
        If a list is given, this is assumed to be the true projections and by
        default optimization is done w.r.t. the embedding coordinates only.

        initial_embedding : array
        If given, this is the initial embedding used.

        initial_projections : list
        If given, this is the initial projections used.

        visualization_method : str
        Visualization method. Current options are 'mds' and 'tsne'.
        The visualization method can be different for different perspectives, 
        by passing a list of visualization methods instead.

        visualization_args : dict
        Dictionary with arguments to pass to each visualization method.
        Different arguments can be passed to different visualization methods
        by passing a list of dictionaries instead.

        embedding_dimension : int
        Dimension of embedding.

        image_dimension : int
        Dimension of image (after projection). Each perspective can have a
        different image dimension, by specifying a list instead.

        projection_family : str
        Projection family. Options are 'linear'.

        projection_constraint : str
        Constraints on projection family. Options are None, 'orthogonal',
        'similar'.

        embedding_dimension : int > 0
        Dimension of the embedding.
        Alternative name: embedding_dimension

        projection_dimension : int or array
        Dimension of projections. Can be different for each perspective.
 
        persp : Object instance of projections.Persp class or int > 0.
        Describes set of allowed projection functions and stores list of
        projection parameters. See perspective.py. If instead of a Persp object
        a positive integer int is given, then it is assumed that 
        embedding_dimension=image_dimension=int
        and that all projections are the identity.

        sample_labels : list (optional)
        List containing labels of samples (used in plots).

        sample_colors : array (optional)
        Array containing color value of samples (used in plots).

        image_colors : array-like, shape (n_perspectives, n_samples)
        Colors for each image.
        """
        self.verbose, self.indent = verbose, indent
        if verbose > 0:
            print(indent+'mview.MPSE():')

        ##set up sets of distances from data
        self.distances = setup.setup_distances_from_multiple_perspectives(
            data, data_args)
        self.n_perspectives = len(self.distances)
        self.n_samples = scipy.spatial.distance.num_obs_y(self.distances[0])

        ##set up weights from data
        if isinstance(weights,list) or isinstance(weights, np.ndarray):
            assert len(weights) == self.n_perspectives
            self.weights = weights
        else:
            self.weights = [weights]*self.n_perspectives
        for i in range(self.n_perspectives):
            self.weights[i] = setup.setup_weights(self.distances[i], \
                                self.weights[i], min_weight = 0)

        ##set up parameters
        self.embedding_dimension = embedding_dimension
        self.image_dimension = image_dimension
        self.projection_family = projection_family
        self.projection_constraint = projection_constraint
        proj = projections.PROJ(embedding_dimension,image_dimension,
                                projection_family,projection_constraint)
        self.proj = proj

        ##set up hidden samples
        if hidden_samples is not None:
            assert isinstance(hidden_samples, list)
            assert len(hidden_samples) == self.n_perspectives
        self.hidden_samples = hidden_samples

        if verbose > 0:
            print(indent+'  data details:')
            print(indent+f'    number of perspectives : {self.n_perspectives}')
            print(indent+f'    number of samples : {self.n_samples}')
            print(indent+'  visualization details:')
            print(indent+'    embedding dimension :',self.embedding_dimension)
            print(indent+f'    image dimension : {self.image_dimension}')
            print(indent+f'    visualization type : {visualization_method}')

        #setup sample labels:
        if sample_labels is not None:
            assert len(sample_labels) == self.n_samples
        self.sample_labels = sample_labels
        #setup perspective labels:
        if perspective_labels is None:
            perspective_labels = range(1,self.n_perspectives+1)
        else:
            assert len(perspective_labels) == self.n_perspectives
        self.perspective_labels = perspective_labels
        
        #setup colors:
        self.sample_colors = sample_colors
        self.image_colors = image_colors

        #setup visualization instances:
        self.visualization_instances = []
        self.visualization_method = visualization_method
        if isinstance(visualization_method,str):
            visualization_method = [visualization_method]*self.n_perspectives
        if isinstance(visualization_args,dict):
            visualization_args = [visualization_args]*self.n_perspectives
        for i in range(self.n_perspectives):
            assert visualization_method[i] in ['mds','tsne']
            if self.verbose > 0:
                print('  setup visualization instance for perspective',
                      self.perspective_labels[i],':')
            if visualization_method[i] == 'mds':
                vis = mds.MDS(self.distances[i],
                              weights = self.weights[i],
                              embedding_dimension=self.image_dimension,
                              verbose=self.verbose, indent=self.indent+'    ',
                              **visualization_args[i])
            elif visualization_method[i] == 'tsne':
                vis = tsne.TSNE(self.distances[i],
                                embedding_dimension=self.image_dimension,
                                verbose=self.verbose, indent=self.indent+'    ',
                                **visualization_args[i])
            self.visualization_instances.append(vis)
        self.visualization = self.visualization_instances

        #setup objectives:
        if total_cost_function == 'rms':
            self.total_cost_function = lambda individual_costs : \
                np.sqrt(np.sum(individual_costs**2)/self.n_perspectives)
        else:
            assert callable(total_cost_function)
            self.total_cost_function = total_cost_function
        def cost_function(X,Q,Y=None,**kwargs):
            if Y is None:
                Y = self.proj.project(Q,X)
            individual_costs = np.zeros(self.n_perspectives)
            for k in range(self.n_perspectives):
                individual_costs[k] = \
                    self.visualization[k].objective(Y[k],**kwargs)
            cost = self.total_cost_function(individual_costs)
            return cost, individual_costs
        self.cost_function = cost_function

        #setup gradient function:
        if self.projection_family == 'linear':
            def gradient(embedding,projections,batch_size=None,indices=None,
                         return_embedding=True,return_projections=True,
                         return_cost=True, return_individual_costs=False):
                """\
                Returns MPSE gradient(s), along with cost and individual costs 
                (optional).

                Parameters
                ----------

                embedding : numpy array
                Current embedding.

                projections : numpy array
                Current projections (as a single array).

                return_embedding : boolean
                If True, returns MPSE gradient w.r.t. embedding.

                return_projections : boolean
                If True, returns MPSE gradient w.r.t. projections. 

                return_cost : boolean
                If True, returns MPSE cost.

                return_individual_costs : boolean
                If True, returns individual embedding costs.
                """
                if return_embedding:
                    dX = np.zeros(embedding.shape)
                if return_projections:
                    dQ = []
                individual_costs = np.empty(self.n_perspectives)
                Y = self.proj.project(projections,embedding)
                for k in range(self.n_perspectives):
                    dY_k, cost_k = self.visualization[k].gradient(
                        Y[k],batch_size=batch_size,indices=indices)
                    individual_costs[k] = cost_k
                    if return_embedding:
                        dX += dY_k @ projections[k][:2, :3]
                    if return_projections:
                        dQ.append(dY_k.T @ embedding)
                if return_embedding:
                    dX /= self.n_perspectives
                cost = self.total_cost_function(individual_costs)
                if return_embedding is False:
                    grad = np.array(dQ)
                elif return_projections is False:
                    grad = dX
                else:
                    grad = [dX,np.array(dQ)]
                if return_individual_costs:
                    return grad, cost, individual_costs
                else:
                    return grad, cost
            self.gradient = gradient
        else:
            def gradient_X(X,Q,Y=None):
                pgradient = self.proj.compute_gradient(X[0],params_list=Q)
                if Y is None:
                    Y = self.proj.project(X,params_list=Q)
                gradient = np.zeros((self.n_samples,self.embedding_dimension))
                for k in range(self.n_perspectives):
                    gradient += self.visualization[k].gradient(Y[k]) \
                                @ pgradient[k]
                return gradient
            self.gradient_X = gradient_X

        #set up initial embedding and projections (fixed optional):
        if verbose > 0:
            print(indent+'  initialize:')
        #set fixed and initial embedding:
        if fixed_embedding is not None:
            if verbose > 0:
                print(indent+'    fixed embedding : True')
            self.embedding = fixed_embedding
            self.initial_embedding = fixed_embedding
            self.fixed_embedding = True
        else:
            if verbose > 0:
                print(indent+'    fixed embedding : False')
            if initial_embedding is None:
                if verbose > 0:
                    print(indent+'    initial embedding : random')
                self.initial_embedding = misc.initial_embedding(
                    self.n_samples,dim=self.embedding_dimension, radius=1)
            else:
                assert isinstance(initial_embedding,np.ndarray)
                assert initial_embedding.shape == (
                    self.n_samples, self.embedding_dimension)
                if verbose > 0:
                    print(indent+'    initial embedding : given')
                self.initial_embedding = initial_embedding
            self.embedding = self.initial_embedding
            self.fixed_embedding = False
        #set fixed and initial projections:
        if fixed_projections is not None:
            if isinstance(fixed_projections,str):
                fixed_projections = self.proj.generate(number= \
                            self.n_perspectives,method=fixed_projections)
            assert(all([isinstance(fp,np.ndarray) for fp in fixed_projections]))
            fixed_projections = [f[:2, :3] for f in fixed_projections]
            self.projections = fixed_projections
            self.initial_projections = fixed_projections
            self.fixed_projections = True
            if verbose > 0:
                print(indent+'    fixed projections : True')
        else:
            if verbose > 0:
                print(indent+'    fixed projections : False')
            if initial_projections is None:
                if verbose > 0:
                    print(indent+'    initial projections : random')
                self.initial_projections = self.proj.generate(
                    number=self.n_perspectives, **kwargs)
            else:
                if verbose > 0:
                    print(indent+'    initial projections : given')
                if isinstance(initial_projections,str):
                    initial_projections = self.proj.generate(number= \
                            self.n_perspectives,method=initial_projections)
                self.initial_projections = initial_projections
            self.projections = self.initial_projections
            self.fixed_projections = False        
        print(indent+' Projection is:')
        print(self.projections)

        self.initial_cost = None
        self.initial_individual_cost = None
        self.computation_history = []
        self.time = 0
        self.update(**kwargs)
Example #5
0
    def __init__(self,
                 data,
                 dim=2,
                 weights=None,
                 estimate=True,
                 safety=1e-4,
                 normalize=True,
                 initial_embedding='random',
                 sample_colors=None,
                 verbose=0,
                 indent='',
                 **kwargs):
        """\
        Initializes MDS object.

        Parameters:

        data : array or dictionary
        Distance/dissimilarity/feature data, which can have any of the 
        following formats:
        1) a 1D condensed distance array
        2) a square distance matrix/array
        3) a feature array
        4) a dictionary describing a graph

        dim : int > 0
        Embedding dimension.

        weights : None or str or callable or array
        Weights to be used in defining MDS stress.

        verbose : int >= 0
        Print status of methods in MDS object if verbose > 0.

        indent : str
        When printing, add indent before printing every new line.
        """
        self.verbose = verbose
        self.indent = indent
        if self.verbose > 0:
            print(self.indent + 'mview.MDS():')

        self.distances = setup.setup_distances(data, **kwargs)
        self.n_samples = scipy.spatial.distance.num_obs_y(self.distances)

        if safety is None:
            self.minimum_distance = None
        else:
            assert safety > 0 and safety <= 1e-2
            self.minimum_distance = np.max(self.distances) * safety
            self.distances = np.maximum(self.distances, self.minimum_distance)

        self.weights = setup.setup_weights(self.distances, weights=weights)
        self.normalize = normalize

        if sample_colors is None:
            self.sample_colors = self.distances[0:self.n_samples]
        else:
            self.sample_colors = sample_colors

        assert isinstance(dim, int)
        assert dim > 0
        self.dim = dim

        assert isinstance(estimate, bool)
        self.estimate = estimate

        self.objective = lambda X, **kwargs: stress(
            self.distances, X, weights=self.weights, normalize=self.normalize)

        def gradient(embedding, batch_size=None, indices=None, **kwargs):
            if batch_size is None or batch_size >= self.n_samples:
                return full_gradient(self.distances,
                                     embedding,
                                     weights=self.weights,
                                     normalize=self.normalize,
                                     minimum_distance=self.minimum_distance)
            else:
                return batch_gradient(self.distances,
                                      embedding,
                                      batch_size,
                                      indices,
                                      weights=self.weights,
                                      normalize=self.normalize,
                                      minimum_distance=self.minimum_distance)

        self.gradient = gradient

        if verbose > 0:
            print(indent + '  data details:')
            print(indent + f'    number of samples : {self.n_samples}')
            if self.weights is None:
                print(indent + f'    weighted : False')
            else:
                print(indent + f'    weighted : True')
            print(indent + '  embedding details:')
            print(indent + f'    embedding dimension : {self.dim}')

        #save or compute initial embedding
        if isinstance(initial_embedding, np.ndarray):
            assert initial_embedding.shape == (self.n_samples, self.dim)
            if self.verbose > 0:
                print('    initial embedding : given')
            self.X0 = initial_embedding
            self.X = self.X0
        elif initial_embedding == 'random':
            self.X0 = misc.initial_embedding(self.n_samples,
                                             dim=self.dim,
                                             radius=1,
                                             **kwargs)
            self.X = self.X0
            if self.verbose > 0:
                print('    initial embedding : random')
        else:
            assert initial_embedding is None

        #save initial costs
        if initial_embedding is not None:
            self.initial_cost = self.objective(self.X0, **kwargs)
            self.cost = self.initial_cost
            if self.verbose > 0:
                print(f'    initial stress : {self.cost:0.2e}')

        self.computation_history = []