コード例 #1
0
def cache_kmeans_init(X, K, methods, datasetID):
    global kmeans_init

    if datasetID == -1:
        return

    # Get the k-means parameters
    methodsloc = [
        n if isinstance(n, (list, tuple, np.ndarray)) else [n] for n in methods
    ]
    kmeansFound = False
    for ms in range(len(methodsloc)):
        if methodsloc[ms][0].lower() in ['k-means', 'kmeans']:
            params = methodsloc[ms][1:]
            pnames = ['init', 'max_iter', 'n_jobs', 'distance']
            dflts = ['KA', 300, -1, 'euclidean']
            if isinstance(params, np.ndarray):
                paramsloc = params.tolist()
            else:
                paramsloc = params
            (init, max_iter, n_jobs,
             distance) = ds.resolveargumentpairs(pnames, dflts, paramsloc)
            kmeansFound = True

    # Perform initialisation over the largest K value and cache it, if k-means was found and init is some 'KA'
    if kmeansFound:
        if init == 'KA':
            if X.shape[0] <= maxgenesinsetforpdist:
                kmeans_init[datasetID] = initclusterKA(X, np.max(K), distance)
            else:
                kmeans_init[datasetID] = initclusterKA_memorysaver(
                    X, np.max(K), distance)
コード例 #2
0
def ckmeans(X, K, datasetID=-1, params=()):
    global kmeans_init

    pnames = ['init', 'max_iter', 'n_jobs', 'distance', 'n_init']
    #dflts  = ['k-means++',        300,       -1, 'euclidean',       10]
    dflts = ['KA', 300, -1, 'euclidean', 1]
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (init, max_iter, n_jobs, distance,
     n_init) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    if datasetID in kmeans_init:
        init = kmeans_init[datasetID][0:K]
    elif init == 'KA':
        if X.shape[0] <= maxgenesinsetforpdist:
            init = initclusterKA(X, K, distance)
        else:
            init = initclusterKA_memorysaver(X, K, distance)

    C = skcl.KMeans(K,
                    init=init,
                    max_iter=max_iter,
                    n_init=n_init,
                    n_jobs=n_jobs).fit(X).labels_
    return clustVec2partMat(C, K)
コード例 #3
0
def csoms(X, D, params=()):
    pnames = ['neighbour', 'learning_rate', 'input_length_ratio']
    dflts = [0.1, 0.2, -1]
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (neighbour, learning_rate,
     input_length_ratio) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    Xloc = np.array(X)

    K = D[0] * D[1]  # Number of clusters
    N = Xloc.shape[0]  # Number of genes
    Ndim = Xloc.shape[1]  # Number of dimensions in X

    som = sompy.SOM(D, Xloc)
    som.set_parameter(neighbor=neighbour,
                      learning_rate=learning_rate,
                      input_length_ratio=input_length_ratio)

    centres = som.train(N).reshape(K, Ndim)
    dists = [[spdist.euclidean(c, x) for c in centres] for x in Xloc]
    C = [np.argmin(d) for d in dists]
    return clustVec2partMat(C, K)
コード例 #4
0
def chc(X, K, params=()):
    pnames = ['linkage_method', 'distance']
    dflts = ['ward', 'euclidean']
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (linkage_method,
     distance) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    Z = sphc.linkage(X, method=linkage_method, metric=distance)
    C = sphc.fcluster(Z, K, criterion='maxclust')
    return clustVec2partMat(C, K)