Beispiel #1
0
def mseclustersfuzzy(X, B, donormalise=True, GDM=None):
    Xloc = np.array(X)
    Bloc = np.array(B)

    if ds.maxDepthOfArray(Xloc) == 2:
        Xloc = np.expand_dims(Xloc, axis=0)
    Nx = len(Xloc)  # Number of datasets
    if len(Bloc.shape) == 1:
        Bloc = Bloc.reshape(-1, 1)
    M = Bloc.shape[0]  # Number of genes
    K = Bloc.shape[1]  # Number of clusters

    if GDM is None:
        GDMloc = np.ones([Bloc.shape[0], Nx], dtype=bool)
    else:
        GDMloc = np.array(GDM)

    # I commented these two lines after adding GDM
    #if any([True if x.shape[0] != M else False for x in Xloc]):
    #    raise ValueError('Unequal number of genes in datasets and partitions')

    mseC = np.zeros([Nx, K], dtype=float)

    Nk = [np.sum(b) for b in Bloc.transpose()]  # Number of genes per cluster
    Nd = [x.shape[1] for x in Xloc]  # Number of dimensions per dataset

    # Normalise if needed
    if donormalise:
        Xloc = [pp.normaliseSampleFeatureMat(x, 4) for x in Xloc]

    # Calculations
    for nx in range(Nx):
        for k in range(K):
            if Nk[k] == 0:
                mseC[nx, k] = float('nan')
            else:
                Cmeanloc = nu.multiplyaxis(
                    Xloc[nx], Bloc[GDMloc[:, nx], k],
                    axis=1) / Nk[k]  # Weighted mean for the cluster
                tmp = nu.subtractaxis(Xloc[nx], Cmeanloc, axis=0)  # Errors
                tmp = nu.multiplyaxis(tmp, Bloc[GDMloc[:, nx], k],
                                      axis=1)  # Weighted errors
                tmp = np.sum(np.power(tmp, 2))  # Squared weighted errors
                mseC[nx, k] = tmp / Nd[nx] / Nk[k]  # Weighted MSE

    return np.mean(mseC, axis=0)
Beispiel #2
0
def generateCoPaM(U,
                  relabel_technique='minmin',
                  w=None,
                  X=None,
                  distCriterion='direct_euc',
                  K=0,
                  GDM=None):
    # Helping functions
    def calwmeans(w):
        wm = [
            np.mean(calwmeans(ww)) if isinstance(ww,
                                                 (list, tuple,
                                                  np.ndarray)) else np.mean(ww)
            for ww in w
        ]
        return np.array(wm)

    def CoPaMsdist(CoPaM1, CoPaM2):
        return np.linalg.norm(CoPaM1 - CoPaM2)

    def orderpartitions(U, method='rand', X=None, GDM=None):
        if method == 'rand':
            return np.random.permutation(range(len(U))), None
        elif method == 'mn':
            # TODO: Implement ranking partitions based on M-N plots
            raise NotImplementedError(
                'Ranking partitions based on the M-N plots logic has not been implemented yet.'
            )
        elif method == 'mse':
            R = len(U)
            mses = np.zeros(R)
            for r in range(R):
                if isinstance(U[r][0][0], (list, tuple, np.ndarray)):
                    mses[r] = np.mean(
                        orderpartitions(U[r], method=method, X=X, GDM=GDM)[1])
                else:
                    mses[r] = np.mean([
                        mn.mseclustersfuzzy(X,
                                            U[r],
                                            donormalise=False,
                                            GDM=GDM)
                    ])
            order = np.argsort(mses)
            return order, mses[order]

    # Fix parameters
    Uloc = ds.listofarrays2arrayofarrays(U)
    R = len(Uloc)
    if GDM is None:
        GDMloc = np.ones([Uloc[0].shape[0], R], dtype=bool)
    elif GDM.shape[1] == 1:
        if R > 1:
            GDMloc = np.tile(GDM, [1, R])
        else:
            GDMloc = np.array(GDM)
    else:
        GDMloc = np.array(GDM)
    if w is None or (w is str and w in ['all', 'equal']):
        w = np.ones(R)
    elif ds.numel(w) == 1:
        w = np.array([w for i in range(R)])
    wmeans = calwmeans(w)

    # Work!
    #permR = orderpartitions(Uloc, method='rand', X=X, GDM=GDM)[0]
    if GDM is None:
        permR = orderpartitions(Uloc, method='mse', X=X, GDM=None)[0]
    else:
        permR = orderpartitions(Uloc, method='mse', X=X, GDM=GDMloc)[0]
    Uloc = Uloc[permR]
    if GDMloc.shape[1] > 1:
        GDMloc = GDMloc[:, permR]
    wmeans = wmeans[permR]

    if isinstance(Uloc[0][0][0], (list, tuple, np.ndarray)):
        Uloc[0] = generateCoPaM(Uloc[0],
                                relabel_technique=relabel_technique,
                                w=w[0],
                                X=X,
                                distCriterion=distCriterion,
                                K=K,
                                GDM=GDMloc)
    #CoPaM = np.zeros([GDMloc.shape[0], Uloc[0].shape[1]], float)
    CoPaM = np.array(Uloc[0], dtype=float)
    K = CoPaM.shape[1]
    for r in range(1, R):
        if isinstance(Uloc[r][0][0], (list, tuple, np.ndarray)):
            Uloc[r] = generateCoPaM(Uloc[r],
                                    relabel_technique=relabel_technique,
                                    w=w[r],
                                    X=X,
                                    distCriterion=distCriterion,
                                    K=K,
                                    GDM=GDMloc)
        if Uloc[r].shape[1] != K:
            raise ValueError(
                'Inequal numbers of clusters in the partition {}.'.format(r))

        Uloc[r] = relabelClusts(CoPaM,
                                Uloc[r],
                                method=relabel_technique,
                                X=X,
                                distCriterion=distCriterion)

        dotprod = np.dot(GDMloc[:, 0:r],
                         wmeans[0:r].transpose())  # (Mxr) * (rx1) = (Mx1)
        CoPaM[dotprod > 0] = nu.multiplyaxis(CoPaM[dotprod > 0],
                                             dotprod[dotprod > 0],
                                             axis=1)
        CoPaM[dotprod > 0] += wmeans[r] * Uloc[r][dotprod > 0]
        dotprod = np.dot(GDMloc[:, 0:(r + 1)], wmeans[0:(r + 1)].transpose())
        CoPaM[dotprod > 0] = nu.divideaxis(CoPaM[dotprod > 0],
                                           dotprod[dotprod > 0],
                                           axis=1)

    return CoPaM