コード例 #1
0
 def __init__(self, params):
     """
     Construct generic factorization model.
     
     :param params: MF runtime and algorithm parameters and options. For detailed explanation
        of the general model parameters see :mod:`mf_run`. For algorithm specific
        model options see documentation of chosen factorization method.
     :type params: `dict`
     """
     self.__dict__.update(params)
     # check if tuples of target and factor matrices are passed
     if isinstance(self.V, tuple):
         if len(self.V) > 2:
             raise utils.MFError("Multiple NMF uses two target matrices.")
         else:
             self.V1 = self.V[1]
             self.V = self.V[0]
     if isinstance(self.H, tuple):
         if len(self.H) > 2:
             raise utils.MFError("Multiple NMF uses two mixture matrices.")
         else:
             self.H1 = self.H[1]
             self.H = self.H[0]
     if isinstance(self.W, tuple):
         raise utils.MFError("Multiple NMF uses one basis matrix.")
     # do not copy target and factor matrices into the program
     if sp.isspmatrix(self.V):
         self.V = self.V.tocsr().astype('d')
     else:
         self.V = np.asmatrix(
             self.V) if self.V.dtype == np.dtype(float) else np.asmatrix(
                 self.V, dtype='d')
     if hasattr(self, "V1"):
         if sp.isspmatrix(self.V1):
             self.V1 = self.V1.tocsr().astype('d')
         else:
             self.V1 = np.asmatrix(self.V1) if self.V1.dtype == np.dtype(
                 float) else np.asmatrix(self.V1, dtype='d')
     if self.W != None:
         if sp.isspmatrix(self.W):
             self.W = self.W.tocsr().astype('d')
         else:
             self.W = np.asmatrix(self.W) if self.W.dtype == np.dtype(
                 float) else np.asmatrix(self.W, dtype='d')
     if self.H != None:
         if sp.isspmatrix(self.H):
             self.H = self.H.tocsr().astype('d')
         else:
             self.H = np.asmatrix(self.H) if self.H.dtype == np.dtype(
                 float) else np.asmatrix(self.H, dtype='d')
     if self.H1 != None:
         if sp.isspmatrix(self.H1):
             self.H1 = self.H1.tocsr().astype('d')
         else:
             self.H1 = np.asmatrix(self.H1) if self.H1.dtype == np.dtype(
                 float) else np.asmatrix(self.H1, dtype='d')
コード例 #2
0
 def __is_smdefined(self):
     """Check if MF and seeding methods are well defined."""
     if isinstance(self.seed, str):
         if self.seed in seeding.methods:
             self.seed = seeding.methods[self.seed]()
         else:
             raise utils.MFError("Unrecognized seeding method.")
     else:
         if not str(self.seed).lower() in seeding.methods:
             raise utils.MFError("Unrecognized seeding method.")
コード例 #3
0
    def purity(self, membership=None, idx=None):
        """
        Compute the purity given a priori known groups of samples [Park2007]_.
        
        The purity is a measure of performance of a clustering method in recovering
        classes defined by a list a priori known (true class labels).
        
        Return the real number in [0,1]. The larger the purity, the better the
        clustering performance.
        
        :param membership: Specify known class membership for each sample. 
        :type membership: `list`

        :param idx: Used in the multiple NMF model. In factorizations following
           standard NMF model or nonsmooth NMF model ``idx`` is always None.
        :type idx: None or `str` with values 'coef' or 'coef1' (`int` value of 0 or 1, respectively) 
        """
        V = self.target(idx)
        if not membership:
            raise utils.MFError(
                "Known class membership for each sample is not specified.")
        n = V.shape[1]
        mbs = np.array(self.predict(what="samples", prob=False,
                                    idx=idx)).squeeze()
        dmbs, dmembership = {}, {}
        [dmbs.setdefault(mbs[i], set()).add(i) for i in range(len(mbs))]
        [
            dmembership.setdefault(membership[i], set()).add(i)
            for i in range(len(membership))
        ]
        return 1. / n * sum(
            np.max([
                len(dmbs[k].intersection(dmembership[j])) for j in dmembership
            ]) for k in dmbs)
コード例 #4
0
    def _check_compatibility(self):
        """
        Check if chosen seeding method is compatible with chosen factorization
        method or fixed initialization is passed.

        :param mf_model: The underlying initialized model of matrix factorization.
        :type mf_model: Class inheriting :class:`models.nmf.Nmf`
        """

        self.check_V()

        W = self.basis()
        H = self.coef(0)
        H1 = self.coef(1) if self.model_name == 'mm' else None
        if self.seed is None and W is None and H is None and H1 is None:
            self.seed = None if "none" in self.aseeds else "random"
        if W is not None and H is not None:
            if self.seed is not None and self.seed is not "fixed":
                raise utils.MFError("Initial factorization is fixed.")
            else:
                self.seed = seeding.fixed.Fixed()
                self.seed._set_fixed(W=W, H=H, H1=H1)

        self.__is_smdefined()
        self.__compatibility()
コード例 #5
0
 def predict(self, what='samples', prob=False, idx=None):
     """
     Compute the dominant basis components. The dominant basis component is computed as the row index for which
     the entry is the maximum within the column. 
     
     If :param:`prob` is not specified, list is returned which contains computed index for each sample (feature). Otherwise
     tuple is returned where first element is a list as specified before and second element is a list of associated
     probabilities, relative contribution of the maximum entry within each column. 
     
     :param what: Specify target for dominant basis components computation. Two values are possible, 'samples' or
                  'features'. When what='samples' is specified, dominant basis component for each sample is determined based
                  on its associated entries in the mixture coefficient matrix (H). When what='features' computation is performed
                  on the transposed basis matrix (W.T). 
     :type what: `str`
     :param prob: Specify dominant basis components probability inclusion. 
     :type prob: `bool` equivalent
     :param idx: Used in the multiple NMF model. In factorizations following standard NMF model or nonsmooth NMF model
                 :param:`idx` is always None.
     :type idx: None or `str` with values 'coef' or 'coef1' (`int` value of 0 or 1, respectively) 
     """
     X = self.coef(idx) if what == "samples" else self.basis(
     ).T if what == "features" else None
     if X == None:
         raise utils.MFError(
             "Dominant basis components can be computed for samples or features."
         )
     eX, idxX = argmax(X, axis=0)
     if not prob:
         return idxX
     sums = X.sum(axis=0)
     prob = [e / sums[0, s] for e, s in zip(eX, list(xrange(X.shape[1])))]
     return idxX, prob
コード例 #6
0
 def __init__(self, params):
     self.model_name = "smf"
     self.__dict__.update(params)
     self.V1 = None
     self.H1 = None
     # do not copy target and factor matrices into the program
     if sp.isspmatrix(self.V):
         self.V = self.V.tocsr().astype('d')
     else:
         self.V = np.asmatrix(
             self.V) if self.V.dtype == np.dtype(float) else np.asmatrix(
                 self.V, dtype='d')
     if self.W is not None or self.H is not None or self.H1 is not None:
         raise utils.MFError(
             "Fixed initialized is not supported by SMF model.")
     self._compatibility()
コード例 #7
0
    def distance(self, metric='euclidean', idx=None):
        """
        Return the loss function value.
        
        :param distance: Specify distance metric to be used. Possible are Euclidean and
           Kullback-Leibler (KL) divergence. Strictly, KL is not a metric.
        :type distance: `str` with values 'euclidean' or 'kl'

        :param idx: Used in the multiple MF model. In standard MF ``idx`` is always None.
        :type idx: None
        """
        if metric.lower() == 'euclidean':
            R = self.V - dot(self.W, self.H)
            return power(R, 2).sum()
        elif metric.lower() == 'kl':
            Va = dot(self.W, self.H)
            return (multiply(self.V, sop(elop(self.V, Va, div), op=log)) - self.V + Va).sum()
        else:
            raise utils.MFError("Unknown distance metric.")
コード例 #8
0
 def __compatibility(self):
     """Check if MF model is compatible with the seeding method."""
     if not str(self.seed).lower() in self.aseeds:
         raise utils.MFError(
             "MF model is incompatible with the seeding method.")