コード例 #1
0
def agglom(E, k=100, linkage="complete", simdist_function="pearson_correlation"):
    importr("cluster")
    ro.globalenv["distances"] =  simdist(E, simdist_function, similarity=False)
    ro.r("hclust_results = hclust(as.dist(distances), method='{linkage}')".format(**locals()))
    rresults = ro.r("labels = cutree(hclust_results, k={k})".format(**locals()))
    modules = convert_labels2modules(list(rresults), E.columns)
    return modules
コード例 #2
0
def kmedoids(E, number=100, simdist_function="pearson_correlation"):
    importr("cluster")

    distances = simdist(E, simdist_function, similarity=False)
    rresults = ro.r["pam"](distances, diss=True, k=number)
    modules = convert_labels2modules(list(rresults.rx2("clustering")), E.columns)
    return modules
コード例 #3
0
def agglom(E, k=100, linkage="complete", simdist_function="pearson_correlation", **kwargs):
    importr("cluster")
    ro.globalenv["distances"] =  simdist(E, simdist_function, similarity=False, **kwargs)
    ro.r("hclust_results = hclust(as.dist(distances), method='{linkage}')".format(**locals()))
    rresults = ro.r("labels = cutree(hclust_results, k={k})".format(**locals()))
    modules = convert_labels2modules(list(rresults), E.columns)
    return modules
コード例 #4
0
def kmedoids(E, k=100, simdist_function="pearson_correlation", **kwargs):
    importr("cluster")

    distances = simdist(E, simdist_function, similarity=False, **kwargs)
    rresults = ro.r["pam"](distances, diss=True, k=k)
    modules = convert_labels2modules(list(rresults.rx2("clustering")), E.columns)
    return modules
コード例 #5
0
def mcl(E,
        simdist_function="pearson_correlation",
        inflation=10,
        threshold=None,
        **kwargs):
    similarities = simdist(E, simdist_function, **kwargs)

    if threshold is not None:
        similarities.values[similarities.values < threshold] = np.nan

    netinput = """
---8<------8<------8<------8<------8<---
""" + "\n".join([
        "\t".join([str(g) for g in edge]) + "\t" + str(similarity)
        for edge, similarity in list(similarities.stack().iteritems())
        if edge[0] != edge[1] and similarity != np.nan
    ]) + """
--->8------>8------>8------>8------>8---
    """

    p = sp.Popen(['mcl - --abc -I ' + str(inflation) + ' -o -'],
                 stdout=sp.PIPE,
                 stdin=sp.PIPE,
                 stderr=sp.PIPE,
                 shell=True)

    stdout_data = p.communicate(input=bytes(netinput))

    output = stdout_data[0].decode("utf-8")

    modules = []
    for line in output.splitlines():
        modules.append(line.split("\t"))

    return modules
コード例 #6
0
def dclust(E, rho=0.5, delta=0.5, simdist_function="pearson_correlation"):
    ro.packages.importr("densityClust")

    distances = simdist(E, simdist_function, False)
    rresults =  ro.r["densityClust"](ro.r["as.dist"](distances))
    rresults = ro.r["findClusters"](rresults, rho=rho, delta=delta)

    modules = convert_labels2modules(list(rresults.rx2("clusters")), E.columns)

    return modules
コード例 #7
0
def dclust(E, rho=0.5, delta=0.5, simdist_function="pearson_correlation", **kwargs):
    ro.packages.importr("densityClust")

    distances = simdist(E, simdist_function, False, **kwargs)
    rresults =  ro.r["densityClust"](ro.r["as.dist"](distances))
    rresults = ro.r["findClusters"](rresults, rho=rho, delta=delta)

    modules = convert_labels2modules(list(rresults.rx2("clusters")), E.columns)

    return modules
コード例 #8
0
def sota(E, maxCycles=1000, maxEpochs=1000, distance="euclidean", wcell=0.01, pcell=0.005, scell=0.001, delta=1e-04, neighb_level=0, alpha=0.95, unrest_growth=False):
    importr("clValid")

    distances = simdist(standardize(E), "euclidean", False)
    maxDiversity = np.percentile(distances.as_matrix().flatten(), maxDiversity_percentile)

    rresults = ro.r["sota"](standardize(E).T, maxCycles, maxEpochs, distance, wcell, pcell, scell, delta, neighb_level, maxDiversity, unrest_growth)

    modules = convert_labels2modules(list(rresults.rx2("clust")), E.columns)

    return modules
コード例 #9
0
def sota(E, maxCycles=1000, maxEpochs=1000, distance="euclidean", wcell=0.01, pcell=0.005, scell=0.001, delta=1e-04, neighb_level=0, alpha=0.95, unrest_growth=False, **kwargs):
    importr("clValid")

    distances = simdist(standardize(E), "euclidean", False, **kwargs)
    maxDiversity = np.percentile(distances.values.flatten(), alpha)

    rresults = ro.r["sota"](standardize(E).T.values, maxCycles, maxEpochs, distance, wcell, pcell, scell, delta, neighb_level, maxDiversity, unrest_growth)

    modules = convert_labels2modules(list(rresults.rx2("clust")), E.columns)

    return modules
コード例 #10
0
def agglom(E,
           k=100,
           linkage="complete",
           simdist_function="pearson_correlation",
           **kwargs):
    distances = simdist(E, simdist_function, similarity=False)
    agglom = sklearn.cluster.AgglomerativeClustering(n_clusters=int(k),
                                                     affinity="precomputed",
                                                     linkage=linkage)
    agglom.fit(distances)
    modules = convert_labels2modules(agglom.labels_, E.columns)
    return modules
コード例 #11
0
def spectral_similarity(E,
                        k=100,
                        seed=None,
                        simdist_function="pearson_correlation",
                        **kwargs):
    similarities = simdist(E, simdist_function, **kwargs)
    spectral = sklearn.cluster.SpectralClustering(n_clusters=int(k),
                                                  affinity="precomputed",
                                                  random_state=seed)
    spectral.fit(similarities + 1)

    return convert_labels2modules(spectral.labels_, E.columns)
コード例 #12
0
def affinity(E, preference_fraction=0.5, simdist_function="pearson_correlation", damping=0.5, max_iter=200):
    similarities = simdist(E, simdist_function)

    similarities_max, similarities_min = similarities.as_matrix().max(), similarities.as_matrix().min()
    preference = (similarities_max - similarities_min) * preference_fraction

    ro.packages.importr("apcluster")

    rresults = ro.r["apcluster"](s=ro.Matrix(similarities.as_matrix()), p=preference)
    labels = np.array(ro.r["labels"](rresults, "enum"))

    modules = convert_labels2modules(labels, E.columns)

    return modules
コード例 #13
0
def affinity(E, preference_fraction=0.5, simdist_function="pearson_correlation", damping=0.5, max_iter=200, **kwargs):
    similarities = simdist(E, simdist_function, **kwargs)

    similarities_max, similarities_min = similarities.values.max(), similarities.values.min()
    preference = (similarities_max - similarities_min) * preference_fraction

    ro.packages.importr("apcluster")

    rresults = ro.r["apcluster"](s=ro.Matrix(similarities.values), p=preference)
    labels = np.array(ro.r["labels"](rresults, "enum"))

    modules = convert_labels2modules(labels, E.columns)

    return modules
コード例 #14
0
def transitivity(E,
                 threshold=0.1,
                 simdist_function="pearson_correlation",
                 cutoff=-1,
                 **kwargs):
    similarities = simdist(E, simdist_function, **kwargs)

    with TemporaryDirectory() as tmpdir:
        #tmpdir = "../tmp/"
        # save similarity and cost files
        # similarity file is only required for fuzzy clustering
        with open(tmpdir + "/sim.tsv", "w") as outfile:
            for i, (g1, col) in enumerate(similarities.iteritems()):
                for j, (g2, value) in enumerate(col.iteritems()):
                    outfile.write(g1 + "\t" + g2 + "\t" + str(value) + "\n")

        cost = similarities.copy()
        cost.values[cost.values < threshold] = threshold - 1
        cost = cost - threshold
        with open(tmpdir + "/cost.tsv", "w") as outfile:
            outfile.write(str(cost.shape[0]) + "\n")
            outfile.write("\n".join(cost.index) + "\n")
            for i, (j, row) in zip(range(cost.shape[0], 1, -1),
                                   cost.iterrows()):
                outfile.write("\t".join(row.astype(str)[-i + 1:]) + "\n")

        if cutoff == -1:
            fuzzytext = ""
            resultsfile = "results.tsv"
        else:
            fuzzytext = " -fuzzy " + str(cutoff)
            resultsfile = "results.tsv_fuzzy"
        # run the transitivity clustering tool
        command = "java -jar " + os.environ[
            "PERSOFTWARELOCATION"] + "/TransClust.jar -i {tmpdir}/cost.tsv -o {tmpdir}/results.tsv -verbose -sim {tmpdir}/sim.tsv {fuzzytext}".format(
                **locals())

        sp.call(command, shell=True)

        results = pd.read_csv(tmpdir + "/" + resultsfile,
                              sep="\t",
                              squeeze=True,
                              index_col=0)

    modules = [[] for i in range(results.max())]
    for g, moduleid in results.iteritems():
        if moduleid > 0:
            modules[moduleid - 1].append(g)

    return modules
コード例 #15
0
def spectral_similarity(E, k=100, seed=None, simdist_function="pearson_correlation"):
    similarities = simdist(E, simdist_function)
    spectral = sklearn.cluster.SpectralClustering(n_clusters=int(k), affinity="precomputed", random_state = seed)
    spectral.fit(similarities+1)
        
    return convert_labels2modules(spectral.labels_, E.columns)