コード例 #1
0
ファイル: ap_sklearn.py プロジェクト: kiminh/resys_recom
class AP(object):
    def __init__(self, damping=.5, max_iter=200, convergence_iter=15,
                 copy=True, preference=None, affinity='euclidean',
                 verbose=False):
        """
        :param damping:
        :param max_iter:
        :param convergence_iter:
        :param copy:
        :param preference:
        :param affinity:
        :param verbose:
        """
        self.model = AffinityPropagation(damping=damping,
                                         max_iter=max_iter,
                                         convergence_iter=convergence_iter,
                                         copy=copy,
                                         preference=preference,
                                         affinity=affinity,
                                         verbose=verbose)

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def fit_predict(self, x, y=None):
        return self.model.fit_predict(X=x, y=y)

    def get_params(self, deep=True):
        return self.model.get_params(deep=deep)

    def predict(self, x):
        return self.model.predict(X=x)

    def set_params(self, **params):
        self.model.set_params(**params)

    def get_attributes(self):
        cluster_centers = self.model.cluster_centers_
        cluster_centers_indices = self.model.cluster_centers_indices_
        labels = self.model.labels_
        affinity_matrix = self.model.affinity_matrix_
        n_iter = self.model.n_iter_

        return cluster_centers, cluster_centers_indices, labels, affinity_matrix, n_iter
コード例 #2
0
print "Calculating correlation matrix between gene sets... This will be used as similarity matrix for Affinity Propagation"
#matrix_corr = np.corrcoef(df_reconstituted_genesets, rowvar=0) # NUMPY approach | slighty faster than Pandas
matrix_corr = df_reconstituted_genesets.corr(
    method='pearson', min_periods=df_reconstituted_genesets.shape[0]
)  # PANDAS approach | Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results
print "Dimension of correlation matrix: [{} x {}]".format(
    matrix_corr.shape[0], matrix_corr.shape[1])

################## Running AP ##################

#sklearn.cluster.AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False)
af_obj = AffinityPropagation(
    affinity='precomputed', max_iter=10000,
    convergence_iter=1000)  # using almost only default parameters
print "Affinity Propagation parameters:"
for param, val in af_obj.get_params().items():
    print "\t{}: {}".format(param, val)
print "Perfoming Affinity Propagation.."
af = af_obj.fit(matrix_corr)
n_iter = af.n_iter_
print "Affinity Propagation done"
print "Number of iterations used: {}".format(n_iter)

### Saving labels and centers
cluster_centers_indices = af.cluster_centers_indices_  # array, shape (n_clusters, n_features) | cluster center (boolean)s ("exemplars")
# cluster_centers_indices take on values in the range {0...n_samples-1}
labels = af.labels_  # array, shape (n_samples,) | Get the "labels"/assignments of each data point to a cluster index
# labels take on values in the range: {0...n_clusters-1}

### Display some stats
n_clusters = len(cluster_centers_indices)
コード例 #3
0
ファイル: network_plot.py プロジェクト: xhyuo/DEPICT
# http://pandas.pydata.org/pandas-docs/stable/computation.html#correlation
# df.corr() # Compute pairwise correlation of columns, excluding NA/null values
#matrix_corr = df_reconstituted_genesets.corr(method='pearson', min_periods=df_reconstituted_genesets.shape[0]) # --> Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results

### METHOD IN USE
print "Calculating correlation matrix between gene sets... This will be used as similarity matrix for Affinity Propagation"
#matrix_corr = np.corrcoef(df_reconstituted_genesets, rowvar=0) # NUMPY approach | slighty faster than Pandas
matrix_corr = df_reconstituted_genesets.corr(method='pearson', min_periods=df_reconstituted_genesets.shape[0]) # PANDAS approach | Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results
print "Dimension of correlation matrix: [{} x {}]".format(matrix_corr.shape[0], matrix_corr.shape[1])

################## Running AP ##################

#sklearn.cluster.AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False)
af_obj = AffinityPropagation(affinity = 'precomputed', max_iter=10000, convergence_iter=1000) # using almost only default parameters
print "Affinity Propagation parameters:"
for param, val in af_obj.get_params().items():
	print "\t{}: {}".format(param, val)
print "Perfoming Affinity Propagation.."
af = af_obj.fit(matrix_corr)
n_iter = af.n_iter_
print "Affinity Propagation done"
print "Number of iterations used: {}".format(n_iter)

### Saving labels and centers
cluster_centers_indices = af.cluster_centers_indices_  # array, shape (n_clusters, n_features) | cluster center (boolean)s ("exemplars")
													   # cluster_centers_indices take on values in the range {0...n_samples-1}
labels = af.labels_ # array, shape (n_samples,) | Get the "labels"/assignments of each data point to a cluster index
					# labels take on values in the range: {0...n_clusters-1}


コード例 #4
0
class AP(object):
    def __init__(self,
                 damping=.5,
                 max_iter=200,
                 convergence_iter=15,
                 copy=True,
                 preference=None,
                 affinity='euclidean',
                 verbose=False,
                 random_state='warn'):
        """

        Parameters
        ----------
        damping : TYPE, optional
        阻尼系数   0.5~1 之间
            DESCRIPTION. The default is .5.
        max_iter : TYPE, optional
        最大迭代次数
            DESCRIPTION. The default is 200.
        convergence_iter : TYPE, optional
        停止收敛的估计簇数没有变化的迭代数
            DESCRIPTION. The default is 15.
        copy : TYPE, optional
        复制输入数据 True
            DESCRIPTION. The default is True.
        preference : TYPE, optional
        
            DESCRIPTION. The default is None.
        affinity : TYPE, optional
        {"euclidean","precomputed"}
        欧氏距离 与与计算
            DESCRIPTION. The default is 'euclidean'.
        verbose : TYPE, optional
            DESCRIPTION. The default is False.
        random_state : TYPE, optional
            DESCRIPTION. The default is 'warn'.

        Returns
        -------
        None.

        """
        self.ap_cluster = AffinityPropagation(
            damping=damping,
            max_iter=max_iter,
            convergence_iter=convergence_iter,
            copy=copy,
            preference=preference,
            affinity=affinity,
            verbose=verbose,
            random_state=random_state)

    def fit(self, x, y=None):
        self.ap_cluster.fit(X=x, y=y)

    def fit_predict(self, x, y=None):
        return self.ap_cluster.fit_predict(X=x, y=y)

    def get_params(self, deep=True):
        return self.ap_cluster.get_params(deep=deep)

    def set_params(self, params):
        self.ap_cluster.set_params(**params)

    def predict(self, x):
        return self.ap_cluster.predict(X=x)

    def get_cluster_centers_indices(self):
        return self.ap_cluster.cluster_centers_indices_

    def get_cluster_centers(self):
        return self.ap_cluster.cluster_centers_

    def get_labels(self):
        return self.ap_cluster.labels_

    def get_affinity_matrix(self):
        return self.ap_cluster.affinity_matrix_

    def get_n_iter(self):
        return self.ap_cluster.n_iter_
コード例 #5
0
x = m83_data['x'][final_data]
y = m83_data['y'][final_data]
id_ = m83_data['id_'][final_data]
X = np.vstack([colour1, colour2, colour3, colour4, colour5, colour6]).T #, colour4, colour5, colour6, colour7]).T

similarity = pairwise_distances(X)
##############################################################################
# Compute Affinity Propagation
pref = -len(X)*0.1
damp = 0.95
af = AffinityPropagation(preference=pref, damping=damp).fit(similarity)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
print cluster_centers_indices
print af.get_params()
n_clusters_ = len(cluster_centers_indices)

print('Estimated number of clusters: %d' % n_clusters_)
print "objects: {}".format(len(colour1))
print("Silhouette Coefficient: %0.3f"
      % metrics.silhouette_score(X, labels))

##############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle

#plt.close('all')
fig = plt.figure()
ax = fig.add_subplot(111)
コード例 #6
0
    [row['Plataforma'], row['Genero']], clf), axis=1)
teste = teste.drop_duplicates()
pd.DataFrame(data=teste, columns=cols).to_csv(
    r'dados/teste_final.csv', sep=',', index=False)

timeEnd = time.strftime("%H:%M:%S")
# Validando modelo
pureza = getPureza(teste)
entropia = getEntropia(teste)
# Calculando resultados
colunas = ['Versão', 'Pureza', 'Entropia', 'Começo',
           'Término', 'Qtde Clusters', 'Parâmetros do cluster']
qtdeCluster = teste['Agrupamento'].nunique()
result = pd.DataFrame(data=[], columns=colunas)
result = result.append(pd.DataFrame(
    data=[[p, pureza, entropia, timeStart, timeEnd, qtdeCluster, str(clf.get_params())]], columns=colunas))

# avaliacao = pd.DataFrame(data=[], columns={'Escolhas de jogos', 'Recomendações', 'Curtida'})
print("Término do processamento: ", time.strftime("%H:%M:%S"))

# Rotas
# Info da api
@app.route('/api/probe', methods=['GET'])
def probe():
    return result.to_json(orient='records'), 200

# Busca de jogos
@app.route('/search/<name>', methods=['GET'])
def search(name):
    lista = []
    for i, row in teste[teste['Nome'].str.contains(name, case=False)].iterrows():
コード例 #7
0
x = m83_data['x'][final_data]
y = m83_data['y'][final_data]
id_ = m83_data['id_'][final_data]
X = np.vstack([colour1, colour2, colour3, colour4, colour5,
               colour6]).T  #, colour4, colour5, colour6, colour7]).T

similarity = pairwise_distances(X)
##############################################################################
# Compute Affinity Propagation
pref = -len(X) * 0.1
damp = 0.95
af = AffinityPropagation(preference=pref, damping=damp).fit(similarity)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
print cluster_centers_indices
print af.get_params()
n_clusters_ = len(cluster_centers_indices)

print('Estimated number of clusters: %d' % n_clusters_)
print "objects: {}".format(len(colour1))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))

##############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle

#plt.close('all')
fig = plt.figure()
ax = fig.add_subplot(111)