Ejemplo n.º 1
0
class AP(object):
    def __init__(self, damping=.5, max_iter=200, convergence_iter=15,
                 copy=True, preference=None, affinity='euclidean',
                 verbose=False):
        """
        :param damping:
        :param max_iter:
        :param convergence_iter:
        :param copy:
        :param preference:
        :param affinity:
        :param verbose:
        """
        self.model = AffinityPropagation(damping=damping,
                                         max_iter=max_iter,
                                         convergence_iter=convergence_iter,
                                         copy=copy,
                                         preference=preference,
                                         affinity=affinity,
                                         verbose=verbose)

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def fit_predict(self, x, y=None):
        return self.model.fit_predict(X=x, y=y)

    def get_params(self, deep=True):
        return self.model.get_params(deep=deep)

    def predict(self, x):
        return self.model.predict(X=x)

    def set_params(self, **params):
        self.model.set_params(**params)

    def get_attributes(self):
        cluster_centers = self.model.cluster_centers_
        cluster_centers_indices = self.model.cluster_centers_indices_
        labels = self.model.labels_
        affinity_matrix = self.model.affinity_matrix_
        n_iter = self.model.n_iter_

        return cluster_centers, cluster_centers_indices, labels, affinity_matrix, n_iter
Ejemplo n.º 2
0
class ComplexBuilder(object):
    def __init__(self, method="HDBSCAN"):
        ""
        if method == "OPTICS":
            self.clustering = OPTICS(min_samples=2,
                                     metric="precomputed",
                                     n_jobs=4)
        elif method == "AGGLOMERATIVE_CLUSTERING":
            self.clustering = AgglomerativeClustering(affinity="precomputed")
        elif method == "AFFINITY_PROPAGATION":
            self.clustering = AffinityPropagation(affinity="precomputed")
        elif method == "HDBSCAN":
            self.clustering = hdbscan.HDBSCAN(min_cluster_size=2)
        self.method = method

    def set_params(self, params):

        self.clustering.set_params(**params)

    def fit(self,
            X,
            metricColumns,
            scaler=None,
            inv=False,
            poolMethod="min",
            umapKwargs={
                "min_dist": 1e-7,
                "n_neighbors": 4,
                "random_state": 350
            },
            generateSquareMatrix=True,
            preCompEmbedding=None,
            useSquareMatrixForCluster=False,
            entryColumns=["E1", "E2"]):
        """
        Fits predicted interactions to potential macromolecular complexes.


        """
        pooledDistances = None
        if X is not None and generateSquareMatrix and preCompEmbedding is None:
            #  print("Generate Square Matrix ..")
            # print(scaler)
            X, labels, pooledDistances = self._makeSquareMatrix(
                X, metricColumns, scaler, inv, poolMethod, entryColumns)
            # print(X)
            print("Info :: Umap calculations started.")
            umapKwargs["metric"] = "precomputed"
            embed = umap.UMAP(**umapKwargs).fit_transform(X)
        elif preCompEmbedding is not None:
            embed = preCompEmbedding.values
            labels = preCompEmbedding.index.values
            pooledDistances = None
            print("Info :: Aligned UMAP was precomputed. ")
        elif not generateSquareMatrix:
            labels = X.index.values
            umapKwargs["metric"] = "correlation"
            embed = umap.UMAP(**umapKwargs).fit_transform(X)
        else:
            raise ValueError(
                "X and preCompEmbedding are both None. No data for UMAP.")

    #  print("done .. - starting clustering")
        if self.method == "OPTICS":
            clusterLabels = self.clustering.fit_predict(X)
            return clusterLabels, labels, X, self.clustering.reachability_[
                self.clustering.ordering_], self.clustering.core_distances_[
                    self.clustering.ordering_]
        elif self.method in [
                "AGGLOMERATIVE_CLUSTERING", "AFFINITY_PROPAGATION"
        ]:
            clusterResult = self.clustering.fit_predict(X)
            return clusterResult, labels, X, ["None"] * labels.size, [
                "None"
            ] * labels.size
        elif self.method == "HDBSCAN":
            if useSquareMatrixForCluster:
                self.set_params({"metric": "precomputed"})
                clusterResult = self.clustering.fit(X)
            else:
                clusterResult = self.clustering.fit(embed)
        # self.clustering.condensed_tree_.to_pandas()
            return clusterResult.labels_, labels, X, clusterResult.probabilities_, [
                "None"
            ] * labels.size, embed, pooledDistances

    def _makeSquareMatrix(self, X, metricColumns, scaler, inv, poolMethod,
                          entryColumns):

        if scaler is None:
            if poolMethod == "mean":
                X["meanDistance"] = X[metricColumns].mean(axis=1)
            elif poolMethod == "max":
                X["meanDistance"] = X[metricColumns].max(axis=1)
            elif poolMethod == "min":
                X["meanDistance"] = X[metricColumns].min(axis=1)
        else:
            if poolMethod == "mean":
                X["meanDistance"] = scaler(X[metricColumns]).mean(axis=1)
            elif poolMethod == "max":
                X["meanDistance"] = scaler(X[metricColumns]).max(axis=1)
            elif poolMethod == "min":
                X["meanDistance"] = scaler(X[metricColumns]).min(axis=1)

        if inv:
            X['meanDistance'] = 1 - X['meanDistance']

        X = X.dropna(subset=["meanDistance"])

        uniqueValues = np.unique(X[entryColumns])
        uniqueVDict = dict([(value, n)
                            for n, value in enumerate(uniqueValues)])
        nCols = nRows = uniqueValues.size
        print("Info :: Creating {} x {} distance matrix".format(nCols, nCols))
        matrix = np.full(shape=(nRows, nCols),
                         fill_value=2.0 if scaler is not None else 1.0)
        columnNames = entryColumns + ["meanDistance"]
        for row in X[columnNames].values:

            nRow = uniqueVDict[row[0]]
            nCol = uniqueVDict[row[1]]

            matrix[[nRow, nCol], [nCol, nRow]] = row[2]
        if scaler is not None:
            matrix = (matrix - np.min(matrix)) / (np.max(matrix) -
                                                  np.min(matrix))
        np.fill_diagonal(matrix, 0)

        return matrix, uniqueValues, X
Ejemplo n.º 3
0
class AP(object):
    def __init__(self,
                 damping=.5,
                 max_iter=200,
                 convergence_iter=15,
                 copy=True,
                 preference=None,
                 affinity='euclidean',
                 verbose=False,
                 random_state='warn'):
        """

        Parameters
        ----------
        damping : TYPE, optional
        阻尼系数   0.5~1 之间
            DESCRIPTION. The default is .5.
        max_iter : TYPE, optional
        最大迭代次数
            DESCRIPTION. The default is 200.
        convergence_iter : TYPE, optional
        停止收敛的估计簇数没有变化的迭代数
            DESCRIPTION. The default is 15.
        copy : TYPE, optional
        复制输入数据 True
            DESCRIPTION. The default is True.
        preference : TYPE, optional
        
            DESCRIPTION. The default is None.
        affinity : TYPE, optional
        {"euclidean","precomputed"}
        欧氏距离 与与计算
            DESCRIPTION. The default is 'euclidean'.
        verbose : TYPE, optional
            DESCRIPTION. The default is False.
        random_state : TYPE, optional
            DESCRIPTION. The default is 'warn'.

        Returns
        -------
        None.

        """
        self.ap_cluster = AffinityPropagation(
            damping=damping,
            max_iter=max_iter,
            convergence_iter=convergence_iter,
            copy=copy,
            preference=preference,
            affinity=affinity,
            verbose=verbose,
            random_state=random_state)

    def fit(self, x, y=None):
        self.ap_cluster.fit(X=x, y=y)

    def fit_predict(self, x, y=None):
        return self.ap_cluster.fit_predict(X=x, y=y)

    def get_params(self, deep=True):
        return self.ap_cluster.get_params(deep=deep)

    def set_params(self, params):
        self.ap_cluster.set_params(**params)

    def predict(self, x):
        return self.ap_cluster.predict(X=x)

    def get_cluster_centers_indices(self):
        return self.ap_cluster.cluster_centers_indices_

    def get_cluster_centers(self):
        return self.ap_cluster.cluster_centers_

    def get_labels(self):
        return self.ap_cluster.labels_

    def get_affinity_matrix(self):
        return self.ap_cluster.affinity_matrix_

    def get_n_iter(self):
        return self.ap_cluster.n_iter_