class AP(object): def __init__(self, damping=.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False): """ :param damping: :param max_iter: :param convergence_iter: :param copy: :param preference: :param affinity: :param verbose: """ self.model = AffinityPropagation(damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, copy=copy, preference=preference, affinity=affinity, verbose=verbose) def fit(self, x, y=None): self.model.fit(X=x, y=y) def fit_predict(self, x, y=None): return self.model.fit_predict(X=x, y=y) def get_params(self, deep=True): return self.model.get_params(deep=deep) def predict(self, x): return self.model.predict(X=x) def set_params(self, **params): self.model.set_params(**params) def get_attributes(self): cluster_centers = self.model.cluster_centers_ cluster_centers_indices = self.model.cluster_centers_indices_ labels = self.model.labels_ affinity_matrix = self.model.affinity_matrix_ n_iter = self.model.n_iter_ return cluster_centers, cluster_centers_indices, labels, affinity_matrix, n_iter
class ComplexBuilder(object): def __init__(self, method="HDBSCAN"): "" if method == "OPTICS": self.clustering = OPTICS(min_samples=2, metric="precomputed", n_jobs=4) elif method == "AGGLOMERATIVE_CLUSTERING": self.clustering = AgglomerativeClustering(affinity="precomputed") elif method == "AFFINITY_PROPAGATION": self.clustering = AffinityPropagation(affinity="precomputed") elif method == "HDBSCAN": self.clustering = hdbscan.HDBSCAN(min_cluster_size=2) self.method = method def set_params(self, params): self.clustering.set_params(**params) def fit(self, X, metricColumns, scaler=None, inv=False, poolMethod="min", umapKwargs={ "min_dist": 1e-7, "n_neighbors": 4, "random_state": 350 }, generateSquareMatrix=True, preCompEmbedding=None, useSquareMatrixForCluster=False, entryColumns=["E1", "E2"]): """ Fits predicted interactions to potential macromolecular complexes. """ pooledDistances = None if X is not None and generateSquareMatrix and preCompEmbedding is None: # print("Generate Square Matrix ..") # print(scaler) X, labels, pooledDistances = self._makeSquareMatrix( X, metricColumns, scaler, inv, poolMethod, entryColumns) # print(X) print("Info :: Umap calculations started.") umapKwargs["metric"] = "precomputed" embed = umap.UMAP(**umapKwargs).fit_transform(X) elif preCompEmbedding is not None: embed = preCompEmbedding.values labels = preCompEmbedding.index.values pooledDistances = None print("Info :: Aligned UMAP was precomputed. ") elif not generateSquareMatrix: labels = X.index.values umapKwargs["metric"] = "correlation" embed = umap.UMAP(**umapKwargs).fit_transform(X) else: raise ValueError( "X and preCompEmbedding are both None. No data for UMAP.") # print("done .. - starting clustering") if self.method == "OPTICS": clusterLabels = self.clustering.fit_predict(X) return clusterLabels, labels, X, self.clustering.reachability_[ self.clustering.ordering_], self.clustering.core_distances_[ self.clustering.ordering_] elif self.method in [ "AGGLOMERATIVE_CLUSTERING", "AFFINITY_PROPAGATION" ]: clusterResult = self.clustering.fit_predict(X) return clusterResult, labels, X, ["None"] * labels.size, [ "None" ] * labels.size elif self.method == "HDBSCAN": if useSquareMatrixForCluster: self.set_params({"metric": "precomputed"}) clusterResult = self.clustering.fit(X) else: clusterResult = self.clustering.fit(embed) # self.clustering.condensed_tree_.to_pandas() return clusterResult.labels_, labels, X, clusterResult.probabilities_, [ "None" ] * labels.size, embed, pooledDistances def _makeSquareMatrix(self, X, metricColumns, scaler, inv, poolMethod, entryColumns): if scaler is None: if poolMethod == "mean": X["meanDistance"] = X[metricColumns].mean(axis=1) elif poolMethod == "max": X["meanDistance"] = X[metricColumns].max(axis=1) elif poolMethod == "min": X["meanDistance"] = X[metricColumns].min(axis=1) else: if poolMethod == "mean": X["meanDistance"] = scaler(X[metricColumns]).mean(axis=1) elif poolMethod == "max": X["meanDistance"] = scaler(X[metricColumns]).max(axis=1) elif poolMethod == "min": X["meanDistance"] = scaler(X[metricColumns]).min(axis=1) if inv: X['meanDistance'] = 1 - X['meanDistance'] X = X.dropna(subset=["meanDistance"]) uniqueValues = np.unique(X[entryColumns]) uniqueVDict = dict([(value, n) for n, value in enumerate(uniqueValues)]) nCols = nRows = uniqueValues.size print("Info :: Creating {} x {} distance matrix".format(nCols, nCols)) matrix = np.full(shape=(nRows, nCols), fill_value=2.0 if scaler is not None else 1.0) columnNames = entryColumns + ["meanDistance"] for row in X[columnNames].values: nRow = uniqueVDict[row[0]] nCol = uniqueVDict[row[1]] matrix[[nRow, nCol], [nCol, nRow]] = row[2] if scaler is not None: matrix = (matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix)) np.fill_diagonal(matrix, 0) return matrix, uniqueValues, X
class AP(object): def __init__(self, damping=.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False, random_state='warn'): """ Parameters ---------- damping : TYPE, optional 阻尼系数 0.5~1 之间 DESCRIPTION. The default is .5. max_iter : TYPE, optional 最大迭代次数 DESCRIPTION. The default is 200. convergence_iter : TYPE, optional 停止收敛的估计簇数没有变化的迭代数 DESCRIPTION. The default is 15. copy : TYPE, optional 复制输入数据 True DESCRIPTION. The default is True. preference : TYPE, optional DESCRIPTION. The default is None. affinity : TYPE, optional {"euclidean","precomputed"} 欧氏距离 与与计算 DESCRIPTION. The default is 'euclidean'. verbose : TYPE, optional DESCRIPTION. The default is False. random_state : TYPE, optional DESCRIPTION. The default is 'warn'. Returns ------- None. """ self.ap_cluster = AffinityPropagation( damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, copy=copy, preference=preference, affinity=affinity, verbose=verbose, random_state=random_state) def fit(self, x, y=None): self.ap_cluster.fit(X=x, y=y) def fit_predict(self, x, y=None): return self.ap_cluster.fit_predict(X=x, y=y) def get_params(self, deep=True): return self.ap_cluster.get_params(deep=deep) def set_params(self, params): self.ap_cluster.set_params(**params) def predict(self, x): return self.ap_cluster.predict(X=x) def get_cluster_centers_indices(self): return self.ap_cluster.cluster_centers_indices_ def get_cluster_centers(self): return self.ap_cluster.cluster_centers_ def get_labels(self): return self.ap_cluster.labels_ def get_affinity_matrix(self): return self.ap_cluster.affinity_matrix_ def get_n_iter(self): return self.ap_cluster.n_iter_