Python AffinityPropagation.fit Examples, sklearn.cluster.AffinityPropagation.fit Python Examples

Example #1

0

Show file

File: ooc.py Project: audy/bfc

def main():
    '''
        >>> main() # stuff happens
    '''

    args = parse_args()
    setup_logging(args.log, verbose=args.verbose)

    chunks = sequence_chunk_generator(args.fasta_file,
                                      chunk_size=args.chunk_size)

    hasher = HashingVectorizer(analyzer='char',
                               n_features = 2 ** 18,
                               ngram_range=(args.ngram_min, args.ngram_max),
                               )

    estimator = AffinityPropagation()

    for chunk in chunks:

        logging.info('hashing chunk')
        chunk_vector = hasher.transform([ str(i.seq) for i in chunk ])

        logging.info('clustering')

        estimator.fit(chunk_vector)

        logging.info('got %s clusters' % len(set(estimator.labels_)))

Example #2

0

Show file

File: ap_validation.py Project: M61A1/time-series-variability-tree

def run_affinity_propagation(affinities, preference):
    ap = AffinityPropagation(affinity='precomputed', preference=preference)
    ap.fit(affinities)
    # print(affinities == ap.affinity_matrix_)
    cluster_centers_indices = ap.cluster_centers_indices_
    n_clusters_ = len(cluster_centers_indices)
    return n_clusters_

Example #3

0

Show file

def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference, random_state=39
    )

    n_clusters_ = len(cluster_centers_indices)

    assert n_clusters == n_clusters_

    af = AffinityPropagation(
        preference=preference, affinity="precomputed", random_state=28
    )
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True, random_state=37)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert np.unique(labels).size == n_clusters_
    assert n_clusters == n_clusters_

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(
        S, preference=preference, copy=False, random_state=74
    )
    assert_array_equal(labels, labels_no_copy)

Example #4

0

Show file

File: clustering.py Project: egaebel/crime-on-the-move-back-end--Python

def affinity_propagation(crime_rows, column_names):
    """
        damping : float, optional, default: 0.5
            Damping factor between 0.5 and 1.
        convergence_iter : int, optional, default: 15
            Number of iterations with no change in the number of estimated 
            clusters that stops the convergence.
        max_iter : int, optional, default: 200
            Maximum number of iterations.
        preference : array-like, shape (n_samples,) or float, optional
            Preferences for each point - points with larger values of preferences 
            are more likely to be chosen as exemplars. 
            The number of exemplars, ie of clusters, is influenced by the input 
            preferences value. If the preferences are not passed as arguments, 
            they will be set to the median of the input similarities.
        affinity : string, optional, default=``euclidean``
            Which affinity to use. At the moment precomputed and euclidean are 
            supported. euclidean uses the negative squared euclidean distance 
            between points.
    """
    crime_xy = [crime[0:2] for crime in crime_rows]
    crime_info = [crime[2:] for crime in crime_rows]
    print("Running Affinity Propagation")
    # TODO: Parameterize this
    affinity_prop = AffinityPropagation()
    #affinity_propagation_labels = affinity_prop.fit_predict(crime_xy)
    affinity_prop.fit(random_sampling(crime_xy, num_samples=5000))
    affinity_propagation_labels = affinity_prop.predict(crime_xy)
    print("formatting....")
    return _format_clustering(affinity_propagation_labels, crime_xy, crime_info, 
            column_names)

Example #5

0

Show file

File: cluster.py Project: qmac/nba-analysis

def cluster(scope):
    # Setup data
    df = pd.read_sql('playtype_data', db_engine)

    # Manipulate data into scope
    if scope == 'Team':
        df = df.drop('Player', 1).groupby('Team', as_index=False).mean()
    elif scope == 'Player':
        df = df.drop('Team', 1)
    else:
        raise Exception('This is never supposed to happen')

    # Normalize the data
    df[FEATURES] = (df[FEATURES] - df[FEATURES].mean()) / (df[FEATURES].max() - df[FEATURES].min())

    # Run clustering
    clstr = AffinityPropagation()
    clstr.fit(df[FEATURES])

    # Clump results
    df['cluster'] = clstr.labels_
    df = df.sort('cluster')

    # Convert results to JSON for frontend
    return clusters_to_json(df, scope)

Example #6

0

Show file

File: visualize.py Project: juliakreutzer/loons

 def clusterAffinityPropagation(self):
     """
     Cluster the embeddings with affinity propagation
     :return:
     """
     affin = AffinityPropagation()
     affin.fit(self.emb1.m)
     aflabels1 = affin.labels_
     afclusters1 = dict()
     word2cluster1 = dict()
     for i,l in enumerate(aflabels1):
         points = afclusters1.setdefault(l,list())
         points.append(self.emb1.rd[i])
     for l,c in afclusters1.items():
         for w in c:
             word2cluster1[w] = l
     self.cluster1 = afclusters1
     self.word2cluster1 = word2cluster1
     affin.fit(self.emb2.m)
     aflabels2 = affin.labels_
     afclusters2 = dict()
     word2cluster2 = dict()
     for i,l in enumerate(aflabels2):
         points = afclusters2.setdefault(l,list())
         points.append(self.emb2.rd[i])
     for l,c in afclusters2.items():
         for w in c:
             word2cluster2[w] = l
     self.cluster2 = afclusters2
     self.word2cluster2 = word2cluster2

Example #7

0

Show file

File: cluster.py Project: xiaoyiou/eotools

    def saxcluster(self, preference=None, lookup=True):

        cls = AffinityPropagation(preference=preference, affinity='precomputed') if lookup else \
            AffinityPropagation(preference=preference)
        if self.dists is None:
            if lookup:
                data = self.dists = self.__saxDists()
            else:
                data = self.dists = self.avdata.values()
        else:
            data = self.dists
        cls.fit(data)
        reps = self.indexes.keys()
        self.cluster_sax = [reps[i] for i in cls.cluster_centers_indices_]
        self.cluster_centers = [self.avdata[sax] for sax in self.cluster_sax]
        self.clusters = collections.defaultdict(list)
        for ind, label in enumerate(cls.labels_):
            sax = self.cluster_sax[label]
            self.clusters[sax] += self.indexes.values()[ind]
        self.asax_data = dict()
        for sax in self.clusters:
            self.asax_data[sax] = self.data[self.clusters[sax], :].mean(axis=0)
        self.ass = [0] * self.N
        for sax in self.cluster_sax:
            v = self.cluster_sax.index(sax)
            for ind in self.clusters[sax]:
                self.ass[ind] = v
        self.n_clusters = len(self.clusters)

Example #8

0

Show file

File: BoVWHelper.py Project: uysalaltas/UrineAnalysis

def affinity_descriptor(descriptor_list):
    print("Affinity Propagation starting...")
    af = AffinityPropagation()
    af.fit(descriptor_list)
    visual_words = af.cluster_centers_
    print("Visual words are ready.")
    return visual_words

Example #9

0

Show file

File: goal_cluster.py Project: a33kuo/procedural_knowledge

	def clustering(self):
		# Calculate similarity matrix
		X = self.create_tfidf_vector()
		X = X.toarray()
		pca = PCA(n_components=300, copy=False)
		X = pca.fit(X).transform(X)
		S = cosine_similarity(X, X)
		# Run affinity propogation
		af = AffinityPropagation()
		af.fit(S)
		# Formulate result
		tmp_clusters = defaultdict(list)
		goal_clusters = defaultdict(list)
		cluster_centers_indices = af.cluster_centers_indices_
		labels = af.labels_
		count = 0
		for label in labels:
			tmp_clusters[\
				self.goal_list[cluster_centers_indices[label]]].append(\
				self.goal_list[count])
			count += 1
		# 2nd-layer clutering of each cluster
		for goal, item_list in tmp_clusters.items():
			subclusters = self.subcluster_by_editdistance(goal, item_list)
			for subgoal, items in subclusters.items():
				goal_clusters[subgoal] = items
		return goal_clusters

Example #10

0

Show file

File: specialism.py Project: deercoder/ndsb2015

def make_cluster_map(damping=0.992):
	test_labels, prediction = pickle.load(open(f_path_pred, 'rb'))
	prob_conf = np.zeros((121, 121))
	for l in range(121):
		inds = np.squeeze(np.array(np.where(test_labels == l)))
		class_conf = prediction[inds, :].mean(axis=0)
		prob_conf[l, :] = class_conf
	F = prob_conf
	D = (1-F)
	np.fill_diagonal(D, 0)
	D_p = 0.5*(D+D.T)


	clst = AP(damping=damping, # damping determines # of clusters
			  max_iter=500, 
			  convergence_iter=15, 
			  affinity='euclidean', 
			  verbose=False)
	clst.fit(D_p)
	print 'Number of cluster:', len(clst.cluster_centers_)
	membership = np.c_[range(121), clst.labels_]

	fine_to_coarse = dict(membership)
	coarse_to_fine = {l: [] for l in clst.labels_}
	for k, v in fine_to_coarse.items():
		coarse_to_fine[v].append(k)
		
	pickle.dump(coarse_to_fine, open(os.path.join(curdir, 'coarse_to_fine.p'), 'wb'))
	pickle.dump(fine_to_coarse, open(os.path.join(curdir, 'fine_to_coarse.p'), 'wb'))

Example #11

0

Show file

File: main.py Project: rugbyprof/4553-Spatial-DS

def affinity():
    # affinity propagation clustering
    from numpy import unique
    from numpy import where
    from sklearn.datasets import make_classification
    from sklearn.cluster import AffinityPropagation
    from matplotlib import pyplot

    # define dataset
    X, _ = make_classification(
        n_samples=1000,
        n_features=2,
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,
        random_state=4,
    )
    print(X)
    # define the model
    model = AffinityPropagation(damping=0.9)
    # fit the model
    model.fit(X)
    # assign a cluster to each example
    yhat = model.predict(X)
    # retrieve unique clusters
    clusters = unique(yhat)
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # create scatter of these samples
        pyplot.scatter(X[row_ix, 0], X[row_ix, 1])
    # show the plot
    pyplot.show()

Example #12

0

Show file

File: Wikipedia_Clustering_Competition_Code.py Project: AkshayAgarwal13/Wikipedia_Clustering_Competition

def doAffinity(X):
    model = AffinityPropagation(damping=0.5,
                                max_iter=250,
                                affinity='euclidean')
    model.fit(X)
    clust_labels2 = model.predict(X)
    return (clust_labels2)

Example #13

0

Show file

File: gpr_model.py Project: rajeeja/Scratch

    def optimize_recommend(self, param_set,
                           max_recommend=3,
                           gamma=1.0, delta=1.0,
                           gpr=None, Xd=None,
                           return_data=False):
        """Optimizes GPR model, using each data point as initial value
        
        Clusters the result using Affinity Propagation, and returns
        the cluster representatives, choosing the number of clusters
        automatically. The results are decoded into parameter sets."""

        x = self.optimize(gamma=gamma, delta=delta, gpr=gpr, Xd=Xd)
        aff = AffinityPropagation()
        aff.fit(x)
        #x_rec = pd.DataFrame(aff.cluster_centers_, columns=x.columns)
        # select the lowest validation loss from each cluster
        x_rec = pd.concat([x, pd.DataFrame({'cluster_id' : aff.labels_})], axis=1)
        x_rec.sort_values(by=['cluster_id', 'gpr_optimum'], inplace=True)
        x_rec = x_rec.groupby('cluster_id').first()
        x_rec.sort_values(by=['gpr_optimum'], inplace=True)
        
        if max_recommend < 1:
            max_recommend = x.shape[0]
        x_rec = x.iloc[:max_recommend]
        #x_rec.index = range(len(x_rec))
        #x_rec = x_rec.drop(['gpr_optimum'], axis=1)
        paramdictlist = self.decode_dummies(x_rec, param_set)
        if return_data:
            return paramdictlist, x_rec
        else:
            return paramdictlist

Example #14

0

Show file

File: detect_paraphrase_full.py Project: ids-cv/coling_iparaphrasing

def get_region2label_table(X, clutter, damping, metric='cosine'):
    '''
    metric: cosine | iou
    '''
    # compute affinity
    if metric == 'cosine':
        A = cosine_similarity(X)
        A = A / 2. + .5
    elif metric == 'iou':
        raise RuntimeError

    pref = np.percentile(A, clutter)

    # bbox clustering
    af = AffinityPropagation(preference=pref,
                             affinity='precomputed',
                             damping=damping)
    af.fit(A)

    # p(l|r)
    # mat of N_label x N_region
    Tcr = A[:, af.cluster_centers_indices_]
    Tcr /= Tcr.sum(axis=1, keepdims=True)
    Tcr = Tcr.T

    return Tcr

Example #15

0

Show file

File: clustering.py Project: doogyb/pun_detection

def affinity_propagation(words, algo="word2vec", use_model=False):
    """
        Uses wordnet similarity to cluster the words in the sentences
        :param words: input sentence
        :return: two lists which correspond the clusters
        """

    words = semantic_similarity.pos_filter(words, False, strict=False)
    words = np.asarray(words)  # So that indexing with a list will work
    if algo == "word2vec":
        lev_similarity = np.array([[semantic_similarity.word2vec_distance(w1, w2, use_model=use_model)
                                    for w1 in words] for w2 in words])

    if algo == "wordnet":
        lev_similarity = np.array([[semantic_similarity.word2vec_distance(w1, w2) for w1 in words] for w2 in words])

    if len(lev_similarity) < 2:
        return [[], []]
    affprop = AffinityPropagation(affinity="precomputed", damping=0.5)
    affprop.fit(lev_similarity)
    if np.isnan(np.sum(affprop.labels_)):
        print "No labels"
        return [[], []]

    clusters = []
    flattened_cluster = []
    centroids = []
    for cluster_id in np.unique(affprop.labels_):
        exemplar = words[affprop.cluster_centers_indices_[cluster_id]]
        centroids.append(words[affprop.cluster_centers_indices_[cluster_id]])
        cluster = np.unique(words[np.nonzero(affprop.labels_ == cluster_id)])
        clusters.append(list(cluster))
        flattened_cluster.extend(cluster)

    return clusters, centroids

Example #16

0

Show file

def get_labels(data_as_list, algorithm='meanshift'):
    dt = np.array(data_as_list)
    labels = []

    print('    Algorithm =', algorithm)

    if algorithm == 'dbscan':
        dbs = DBSCAN(eps=0.1)
        dbs.fit(dt)
        labels = dbs.labels_

    if algorithm == 'kmeans':
        kmeans = KMeans(n_clusters=10)
        kmeans.fit(dt)
        labels = kmeans.labels_

    if algorithm == 'meanshift':
        # The following bandwidth can be automatically detected using
        try:
            bandwidth = estimate_bandwidth(dt, quantile=0.2, n_samples=len(dt))
        except:
            bandwidth = 0.5
        ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
        ms.fit(dt)
        labels = ms.labels_

    if algorithm == 'affinitypropagation':
        af = AffinityPropagation()
        af.fit(dt)
        labels = af.labels_

    return labels

Example #17

0

Show file

File: ClusterAP.py Project: JaysonsdLin/MLPythonLib

def APWithSimilaryMatrix(similaryMatrix):
    p = np.mean(similaryMatrix) * 2
    af = AffinityPropagation(max_iter=2000,
                             preference=p,
                             affinity='precomputed')
    af.fit(similaryMatrix)
    return (af.cluster_centers_indices_, af.labels_)

Example #18

0

Show file

File: test_sap.py Project: bioinfocao/pysapc

def clusterSimilarityWithSklearnAPC(data_file,damping=0.9,max_iter=200,convergence_iter=15,preference='min'):
    """
    Compare Sparse Affinity Propagation (SAP) result with SKlearn Affinity Propagation (AP) Clustering result.
    Please note that convergence condition for Sklearn AP is "no change in the number of estimated clusters",
    for SAP the condition is "no change in the cluster assignment". 
    So SAP may take more iterations and the there will be slightly difference in final cluster assignment (exemplars for each sample).
    """
    # loading data
    simi_mat=loadMatrix(data_file)
    simi_mat_dense=simi_mat.todense()

    # get preference
    if preference=='min':
        preference=np.min(simi_mat_dense)
    elif preference=='median':
        preference=np.median(simi_mat_dense)
    
    print('{0}, start SKlearn Affinity Propagation'.format(datetime.now()))
    af=AffinityPropagation(damping=damping, preference=preference, affinity='precomputed',verbose=True)
    af.fit(simi_mat_dense)
    cluster_centers_indices,labels = af.cluster_centers_indices_,af.labels_
    sk_exemplars=np.asarray([cluster_centers_indices[i] for i in labels])
    print('{0}, start Fast Sparse Affinity Propagation Cluster'.format(datetime.now()))
    sap=SAP(preference=preference,convergence_iter=convergence_iter,max_iter=max_iter,damping=damping,verboseIter=100)
    sap_exemplars=sap.fit_predict(simi_mat_dense)
    
    # Caculate similarity between sk_exemplars and sap_exemplars
    exemplars_similarity=sparseAP_cy.arrSamePercent(np.array(sk_exemplars), np.array(sap_exemplars))
    
    return exemplars_similarity

Example #19

0

Show file

def Affinity_Propagation(data, SBS, C, EP, CP, selected_products):
    ap = AffinityPropagation(preference=-200)
    ap.fit(data)

    n_clusters = len(ap.cluster_centers_)

    EP_Length = len(EP)
    # list of lists
    arr = [[] for i in range(n_clusters)]
    for i, j in enumerate(ap.labels_):
        arr[j].append(i)

    cluster_nos_of_selected_products = [
        ap.labels_[i] for i in selected_products
    ]

    # Run over the cluster from which majority of the products have been selected previously.
    cluster = max(set(cluster_nos_of_selected_products),
                  key=cluster_nos_of_selected_products.count)

    EP_New, CP_New = [], []
    for i in arr[cluster]:
        if i < EP_Length:
            EP_New.append(i)
        else:
            CP_New.append(i)

    return EP_New, CP_New, n_clusters

Example #20

0

Show file

File: specialism.py Project: deercoder/ndsb2015

def make_cluster_map(damping=0.992):
    test_labels, prediction = pickle.load(open(f_path_pred, 'rb'))
    prob_conf = np.zeros((121, 121))
    for l in range(121):
        inds = np.squeeze(np.array(np.where(test_labels == l)))
        class_conf = prediction[inds, :].mean(axis=0)
        prob_conf[l, :] = class_conf
    F = prob_conf
    D = (1 - F)
    np.fill_diagonal(D, 0)
    D_p = 0.5 * (D + D.T)

    clst = AP(
        damping=damping,  # damping determines # of clusters
        max_iter=500,
        convergence_iter=15,
        affinity='euclidean',
        verbose=False)
    clst.fit(D_p)
    print 'Number of cluster:', len(clst.cluster_centers_)
    membership = np.c_[range(121), clst.labels_]

    fine_to_coarse = dict(membership)
    coarse_to_fine = {l: [] for l in clst.labels_}
    for k, v in fine_to_coarse.items():
        coarse_to_fine[v].append(k)

    pickle.dump(coarse_to_fine,
                open(os.path.join(curdir, 'coarse_to_fine.p'), 'wb'))
    pickle.dump(fine_to_coarse,
                open(os.path.join(curdir, 'fine_to_coarse.p'), 'wb'))

Example #21

0

Show file

File: fly_trajectory_classifier.py Project: SashaRayshubskiy/osmotropotaxis_analysis_python

    def classify_core(self, N_CLUSTERS, clusterType, data_for_trial_type, begin_time, end_time):

        BEGIN_TIME_FRAME = begin_time*self.griddy.TIME_GRID_SPACING
        END_TIME_FRAME = end_time*self.griddy.TIME_GRID_SPACING

        data = data_for_trial_type[:,BEGIN_TIME_FRAME:END_TIME_FRAME,self.griddy.VEL_X]

        labels = None
        if clusterType == 'kmeans':
            kmeans = KMeans(n_clusters=N_CLUSTERS)
            kmeans.fit(data)
            labels = kmeans.labels_
        elif clusterType == 'affinity_propagation':
            ap = AffinityPropagation(damping=0.75)
            ap.fit(data)
            labels = ap.labels_
            N_CLUSTERS = np.max(self.labels)+1
        elif clusterType == 'DBSCAN':
            dbscan = DBSCAN()
            dbscan.fit(data)
            labels = dbscan.labels_
            N_CLUSTERS = np.max(labels)+1
            print 'N_CLUSTERS=' + str(N_CLUSTERS)
        elif clusterType == 'AgglomerativeClustering':
            ac = AgglomerativeClustering(n_clusters=N_CLUSTERS)
            ac.fit(data)
            labels = ac.labels_
        else:
            print 'ERROR: clusterType: ' + clusterType + ' is not recognized'

        return (labels, N_CLUSTERS)

Example #22

0

Show file

File: test_affinity_propagation.py Project: dPys/scikit-learn

def test_sparse_input_for_predict():
    # Test to make sure sparse inputs are accepted for predict
    # (non-regression test for issue #20049)
    af = AffinityPropagation(affinity="euclidean", random_state=42)
    af.fit(X)
    labels = af.predict(csr_matrix((2, 2)))
    assert_array_equal(labels, (2, 2))

Example #23

0

Show file

def affinity_propagation(feature_matrix):
    sim = feature_matrix * feature_matrix.T
    sim = sim.todense()
    ap = AffinityPropagation()
    ap.fit(sim)
    clusters = ap.labels_
    return ap, clusters

Example #24

0

Show file

File: cluster_models.py Project: rupakc/Large-Scale-Preprocessing-Evaluation

def get_clustered_data(data_matrix,
                       clustering_algorithm=model_constants.KMEANS,
                       distance_metric='euclidean',
                       num_clusters=3):
    if clustering_algorithm.lower() == model_constants.AFFINITY_PROP:
        aff_prop = AffinityPropagation(affinity=distance_metric)
        aff_prop.fit(data_matrix)
        return aff_prop.labels_, aff_prop
    elif clustering_algorithm.lower() == model_constants.DBSCAN:
        dbscan = DBSCAN(metric=distance_metric)
        dbscan.fit(data_matrix)
        return dbscan.labels_, dbscan
    elif clustering_algorithm.lower() == model_constants.OPTICS:
        optics = OPTICS(metric=distance_metric)
        optics.fit(data_matrix)
        return optics.labels_, optics
    elif clustering_algorithm.lower() == model_constants.MEANSHIFT:
        mean_shift = MeanShift()
        mean_shift.fit(data_matrix)
        return mean_shift.labels_, mean_shift
    elif clustering_algorithm.lower() == model_constants.BIRCH:
        birch = Birch(n_clusters=num_clusters)
        birch.fit(data_matrix)
        return birch.labels_, birch
    elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE:
        agglomerative = AgglomerativeClustering(n_clusters=num_clusters,
                                                affinity=distance_metric)
        agglomerative.fit(data_matrix)
        return agglomerative.labels_, agglomerative
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        kmeans.fit(data_matrix)
        return kmeans.labels_, kmeans

Example #25

0

Show file

File: distributedwordreps.py Project: werayuthgswu/cs224u-1

def cluster(mat, doc_indices):
    X = mat[:, doc_indices].T
    # Other clustering algorithms can easily be swapped in:
    # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.cluster
    clust = AffinityPropagation()
    clust.fit(X)
    return zip(doc_indices,  clust.labels_)

Example #26

0

Show file

def ward_method_clustering(nodes):
    """
    Performs agglomerative hierarchical clustering  of user or transaction addresses with similar behavior patterns.

    :param nodes: The nodes of the network graph
    :return: dict: A dictionary of addresses where keys are the cluster labels and values are members of the same
    cluster
    """

    result = []
    levenshtein_distances = -1 * np.array(
        [[levenshtein_distance(w1, w2) for w1 in nodes] for w2 in nodes])
    affinity_propagation = AffinityPropagation(affinity="precomputed",
                                               damping=0.5)
    affinity_propagation.fit(levenshtein_distances)

    cluster_center_indices = affinity_propagation.cluster_centers_indices_
    unique_labels = np.unique(affinity_propagation.labels_)

    for cluster_id in unique_labels:
        cluster_list = []
        for index, node in enumerate(nodes):
            if index == cluster_center_indices[cluster_id]:
                exemplar = node
                list_of_names = np.nonzero(
                    affinity_propagation.labels_ == cluster_id)
                for i in list_of_names[0]:
                    if index == i:
                        cluster_list.append(node)
                cluster = np.unique(cluster_list)
                # cluster_str = ", ".join(cluster)
        result[exemplar] = cluster

    return result

Example #27

0

Show file

def affinity_propagation(principal_components, principal_df):
    final_df = pd.concat([principal_df], axis=1)
    model = AffinityPropagation(damping=0.9, random_state=0)
    # fit the model
    model.fit(principal_components)
    # assign a cluster to each example
    y_hat = model.predict(principal_components)
    # retrieve unique clusters
    clusters = unique(y_hat)
    final_df['Segment'] = model.labels_
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(y_hat == cluster)
        # create scatter of these samples
        plt.scatter(principal_components[row_ix, 0],
                    principal_components[row_ix, 1],
                    s=75)
    final_df.rename({
        0: 'PC1',
        1: 'PC2',
        2: 'PC3',
        'y': 'Race'
    },
                    axis=1,
                    inplace=True)
    plt.title("Affinity Propagation")
    add_race_labels(final_df)
    calc_silhouette(data=principal_components,
                    prediction=y_hat,
                    n_clusters=len(clusters))
    return final_df

Example #28

0

Show file

    def __dtw_clustering(self, seq_f):
        ### Clustering sequences using affinity propagation, dtw
        ### Computing similarity/affinity matrix using dtw
        p_dist = np.zeros((len(seq_f), len(seq_f)))
        if isinstance(seq_f[0], tuple):
            seq = [item[0] for item in seq_f]
            freq = np.array([item[1] for item in seq_f])
        else:
            seq = seq_f

        for i in range(len(seq)):
            for j in range(i, len(seq)):
                p_dist[i][j] = self.__pattern_distance(seq[i], seq[j])
                if i != j:
                    p_dist[j][i] = p_dist[i][j]

        p_dist_max = np.max(p_dist)
        if p_dist_max == 0:
            p_dist_max = 2
        p_dist = p_dist_max - p_dist

        ### Affinity Propagation
        freq = 2 * p_dist_max * freq / max(freq)
        ap = AffinityPropagation(affinity='precomputed', preference=freq)
        ap.fit(p_dist)

        ### Arranging sequences by cluster label
        cluster_subseqs = dict()
        for seq, label in zip(seq_f, ap.labels_):
            if label not in cluster_subseqs:
                cluster_subseqs.update({label: [seq]})
            else:
                cluster_subseqs[label].append(seq)

        return cluster_subseqs

Example #29

0

Show file

File: cluster.py Project: fwzhuang/hair_modeling

def affinitypropagation(params): 
    distance_path=''
    distance_path+=params["distance_path"]
    print(distance_path)
    distance=np.loadtxt(distance_path,dtype=np.float32)
    print(distance.shape)
    delta=2
    affinity=np.exp(-distance ** 2/ (2. * delta ** 2))

    #using default values, set metric to 'precomputed'
    aff=AffinityPropagation(affinity='precomputed')
    print(aff)

    aff.fit(affinity)
    #get labels
    labels = aff.labels_

    print(labels,labels.shape)
    #get number of clusters
    no_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    print(no_clusters,"no_clusters")

    #for i in range(no_clusters):
        #print('Cluster  : ', np.nonzero(labels == i)[0])

    #print(type(labels))
    return_val=tuple(labels.tolist())
    #print(type(return_val))
    return return_val

Example #30

0

Show file

File: Joost_EPI_affinity_propagation_v1_0.py Project: kasperlab/Joost_et_al_2016_Cell_Systems

def affinity_propagation(dataset,
                         axis,
                         preference,
                         affinity,
                         damping=0.5,
                         max_iter=200,
                         convergence_iter=15,
                         copy=True,
                         verbose=False):
    """
    Helper around sk-learn AffinityPropagation function.
    """

    af = AffinityPropagation(damping=damping,
                             max_iter=max_iter,
                             convergence_iter=convergence_iter,
                             copy=copy,
                             preference=preference,
                             affinity=affinity,
                             verbose=verbose)

    if axis == 0:
        af.fit(dataset.T)
    elif axis == 1:
        af.fit(dataset)

    return af

Example #31

0

Show file

File: pipeline_full_131214.py Project: bsbell21/CapstoneProject

    def cluster(self, feat_mtx, df_lm_allusers):
        # clustering artists based on AffinityPropogation
        start = time.time()
        af = AffinityPropagation()
        af.fit(feat_mtx)
        self.labels = af.labels_
        self.af = af

        # adding cluster labels to least misery dataframe and sorting by rank and cluster
        #df_least_misery_clustered = self.df_least_misery.copy() --> changing to df_lm_allusers
        print 'number of labels: ', len(self.labels)
        print 'labels', self.labels
        
        # print 'least misery clustered length', len(df_least_misery_clustered)
        
        df_least_misery_clustered = df_lm_allusers.copy()
        print 'len df least misery: ', len(df_least_misery_clustered)
        
        df_least_misery_clustered['cluster'] = self.labels
        df_least_misery_clustered[['cluster', self.score_col]] = df_least_misery_clustered[['cluster', self.score_col]].astype(float)
        ''' will do different sorting if not using rank '''
        # now set to false as looking for highest score
        df_least_misery_clustered = df_least_misery_clustered.sort(['cluster', self.score_col], ascending = False)
        self.df_least_misery_clustered = df_least_misery_clustered
        end = time.time()
        print 'clustering completed in: ', end - start  
        return df_least_misery_clustered

Example #32

0

Show file

File: getLines.py Project: HastingsGreer/gridslam2

def reduce(lines):
    if lines is not None:
        af = AffinityPropagation(preference=-.01)
        af.fit(lines[:, 0] / np.array([[300, 1]]))

        real_lines = af.cluster_centers_ * np.array([[300, 1]])
        return np.expand_dims(real_lines, 1)

Example #33

0

Show file

File: analyze.py Project: olveirap/data-science-utils

def cluster_analyze(dataframe, cluster_type='KMeans', n_clusters=None):

    # coloured area plots ??)
    from sklearn.cluster import KMeans, DBSCAN, AffinityPropagation, SpectralClustering, Birch
    from sklearn.metrics import silhouette_samples, silhouette_score

    import matplotlib.pyplot as plt
    import matplotlib.cm as cm
    import numpy as np
    import time

    df_mat = dataframe.as_matrix()
    if cluster_type == 'KMeans':
        assert n_clusters, "Number of clusters argument mandatory"
        cluster_callable = KMeans
        # seed of 10 for reproducibility.
        clusterer = cluster_callable(n_clusters=n_clusters, random_state=10)
    elif cluster_type == 'dbscan':
        assert not n_clusters, "Number of clusters irrelevant for cluster type : %s" % (
            cluster_type)
        cluster_callable = DBSCAN
        clusterer = cluster_callable(eps=0.5)
    elif cluster_type == 'affinity_prob':
        assert not n_clusters, "Number of clusters irrelevant for cluster type : %s" % (
            cluster_type)
        clusterer = AffinityPropagation(damping=.9, preference=-200)
    elif cluster_type == 'spectral':
        assert n_clusters, "Number of clusters argument mandatory"
        clusterer = SpectralClustering(n_clusters=n_clusters,
                                       eigen_solver='arpack',
                                       affinity="nearest_neighbors")
    elif cluster_type == 'birch':
        assert not n_clusters, "Number of clusters irrelevant for cluster type : %s" % (
            cluster_type)
        clusterer = Birch(n_clusters=2)
    else:
        raise "Unknown clustering algorithm type"
    plt.figure(figsize=(2 + 3, 9.5))
    colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
    colors = np.hstack([colors] * 20)
    #plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,hspace=.01)
    t0 = time.time()
    clusterer.fit(df_mat)
    t1 = time.time()
    if hasattr(clusterer, 'labels_'):
        y_pred = clusterer.labels_.astype(np.int)
    else:
        y_pred = clusterer.predict(df_mat)
    dataframe['y_pred'] = y_pred
    # plot
    plt.title(cluster_type, size=18)
    plt.scatter(df_mat[:, 0],
                df_mat[:, 1])  # color=colors[y_pred].tolist(), s=10)

    if hasattr(clusterer, 'cluster_centers_'):
        centers = clusterer.cluster_centers_
        center_colors = colors[:len(centers)]
        plt.scatter(centers[:, 0], centers[:, 1], s=100, c=center_colors)
    plt.show()

Example #34

0

Show file

File: document_clustering.py Project: 000Nelson000/text-analytics-with-python

def affinity_propagation(feature_matrix):
    
    sim = feature_matrix * feature_matrix.T
    sim = sim.todense()
    ap = AffinityPropagation()
    ap.fit(sim)
    clusters = ap.labels_          
    return ap, clusters

Example #35

0

Show file

File: unsurpervised_models.py Project: sancicaXX/SklearnCustomization

def affinity_propagation(X, args={}):
    """
    AffinityPropagation聚类：图聚类的一种
    """
    from sklearn.cluster import AffinityPropagation
    model = AffinityPropagation(**args)
    model.fit(X)
    return model

Example #36

0

Show file

File: similarity.py Project: rmarren1/jhu-projects

def ap(X):
    x, params = X
    x[np.isnan(x)] = 0.0
    x[np.isinf(x)] = 0.0
    x = x - 1
    af = AffinityPropagation(**params)
    af.fit(-x)
    labs = af.labels_
    return labs

Example #37

0

Show file

File: ptcloud_dataset_graph_three_shapes_yefan.py Project: arqam-ai/SVR

def get_partition(matrix, preference, damping=0.75):

    cl = AffinityPropagation(damping=damping,
                             affinity='precomputed',
                             preference=preference)

    cl.fit(matrix)
    partition = cl.labels_
    return partition

Example #38

0

Show file

File: clustering_approach.py Project: urdaalex/Serpensoleum

def getNumClusters(doc_vectors):
    '''
    Given a list of document vectors as returned by makeDocumentVectors,
    this function runs affinity propogation on the vectors to approximate
    the number of clusters the documents would fall into
    '''
    clf = AffinityPropagation()
    clf.fit(doc_vectors)
    return len(clf.cluster_centers_indices_)

Example #39

0

Show file

File: clustering.py Project: spozi/temporal-summarization

    def affinityClustering(series):
        vectors = series.tolist()
        #Clustering
        affinity = AffinityPropagation()
        affinity.fit(vectors)

        #Cluster
        y_affinity = affinity.predict(vectors)
        return y_affinity

Example #40

0

Show file

def affinity_propagation(feature_matrix):
    '''
    Affinity propagation clustering
    '''

    ap = AffinityPropagation()
    ap.fit(feature_matrix.todense())
    clusters = ap.labels_
    return ap, clusters

Example #41

0

Show file

File: cluster.py Project: DevinJeon/soma0612

    def cluster_prop(self, filtered_data):
        prop_dict={}

        for review in filtered_data:
            for dicti in review['line']:
                if not prop_dict.has_key(dicti["prop"][0]):
                    prop_dict[dicti["prop"][0]]={"freq":0,"data":[],"idx":[]}

                prop_dict[dicti["prop"][0]]['idx'].append(review['index'])
                prop_dict[dicti["prop"][0]]["freq"] += 1
                prop_dict[dicti["prop"][0]]["data"].append(dicti)

        d_list=[]
        word_list=[]

        for word in prop_dict:
            try:
                d_list.append(self.wmodel[word])
                word_list.append(word)
            except:
                pass

        Aprop = AffinityPropagation(damping=0.6, convergence_iter=100, max_iter=10000)
        Aprop.fit(d_list)
        cluster_dict = {}

        for idx, each in enumerate(Aprop.labels_):
            vec = d_list[idx]
            if not cluster_dict.has_key(each):
                cluster_dict[each] = {"word":[],"freq":0,"seed":"","sim":0.0}
            cluster_dict[each]["word"].append(word_list[idx])

        total_freq=0

        for each in cluster_dict.keys():
            target_group_id = each
            group_id = each

            last_group_id = target_group_id

            cluster_freq=0
            max_seed=""
            max_freq=0

            for idx,data in enumerate(cluster_dict[each]["word"]):
                cluster_freq+=prop_dict[data]["freq"]
                if prop_dict[data]["freq"] > max_freq:
                    max_freq=prop_dict[data]["freq"]
                    max_seed=data

            cluster_dict[each]["freq"]=cluster_freq
            cluster_dict[each]["seed"]=max_seed

        return (cluster_dict, prop_dict, Aprop)

Example #42

0

Show file

File: 6-mining.py Project: luisc29/ide-usage-data

def clustering_affinity_propagation(data_res):
    """
    Executes sklearn's affinity propagation function with the given data frame
    """
    af = AffinityPropagation()
    af.fit(data_res)

    predictions = af.predict(data_res)
    cluster_centers = af.cluster_centers_

    return predictions, cluster_centers, af

Example #43

0

Show file

File: clustering_algorithms.py Project: avisochek/scastrap_data_pipeline

def affinityprop(lngs, lats, city, cluster_diameter):
	city_area = city["area"]
	city_lng = city["lng"]
	city_lat = city["lat"]
	lngs = np.array(lngs)#*(math.cos(city["lat"])**2)

	affinity = AffinityPropagation(damping=0.75, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False)
	affinity.fit(np.array([lngs, lats]).transpose())
	cluster_labels = np.array(affinity.labels_)

	return labels_to_index(cluster_labels)

Example #44

0

Show file

File: database.py Project: a33kuo/language-learner

def cluster_concepts(context="location"):
    """
	Cluster related concepts of a specific type to different categories
	"""
    db = Database()
    concept_category = ConceptCategory()
    cmd = "SELECT * FROM %s" % (context)
    context_res = db.query_db(cmd)
    concept_list = []
    concept_matrix = []
    for item in context_res:
        concept_list = []
        concept_matrix = []
        if context == "action":
            context_id, context_chinese, context_name = item[:3]
        elif context == "location":
            context_id, context_name, context_chinese = item
        cmd = (
            "SELECT b.name, b.id FROM %s_concept AS a, concept AS b \
				WHERE a.%s_id = %s AND a.concept_id = b.id"
            % (context, context, context_id)
        )
        concept_res = db.query_db(cmd)
        if len(concept_res) == 0:
            continue
        for item in concept_res:
            concept, concept_id = item
            concept_vector = concept_category.concept_axes.row_named(concept)
            concept_list.append((concept_id, concept))
            concept_matrix.append(concept_vector)
            # Run affinity propogation
        S = cosine_similarity(concept_matrix, concept_matrix)
        af = AffinityPropagation()
        af.fit(S)
        cluster_centers_indices = af.cluster_centers_indices_
        labels = af.labels_
        count = 0
        clusters = defaultdict(list)
        for label in labels:
            clusters[concept_list[cluster_centers_indices[label]][1]].append(concept_list[count])
            count += 1
        category_num = 0
        for key, value in clusters.items():
            category_num += 1
            for concept in value:
                cmd = (
                    "UPDATE %s_concept SET category = %d WHERE \
						%s_id = %s AND concept_id = %s"
                    % (context, category_num, context, context_id, concept[0])
                )
                db.query_db(cmd)
                print concept[1].encode("utf-8") + " ",
            print ""
        print "----------" + context_chinese.encode("utf-8") + "----------"

Example #45

0

Show file

def train_model( X,  quantile, shift = 0, isKernel = False):
    if isKernel == False:
        preference = np.percentile(X,q = quantile)-shift
        model_affinityPropagation = AffinityPropagation(preference = preference)
        model_affinityPropagation.fit(X)
        return model_affinityPropagation
    else:
        kernel = pairwise_kernels(X,metric="rbf")
        preference = np.percentile(X,q = quantile)-shift
        model_affinityPropagation = AffinityPropagation(affinity='precomputed',preference = np.percentile(kernel,q = 0.318))
        model_affinityPropagation.fit(kernel)
        return model_affinityPropagation

Example #46

0

Show file

File: thesis_plots.py Project: halfdanrump/MarketSimulation

	def do_issue(data, data_name):
		reduced_points, labels, km = reduce_npoints_kmeans(dataframe = data, dataset_name = dataset, data_name=data_name, n_datapoints = 1000, load_from_file = False)
		transformed_data, pca, components = calculate_pca(reduced_points, n_components=3)
		colormap = brewer2mpl.get_map('RdBu', 'diverging', 4, reverse=True)
		filename = figure_save_path + dataset + '_issue_29_1_%s_reduced_number_of_points.png'%data_name
		print "Making scatter plot of %s data for dataset %s, where the number of points have been reduced by K-Means clustering"%(data_name, dataset)
		make_color_grouped_scatter_plot(data_frame=transformed_data, x_name='d1', y_name='d2', color_by='d3', filename=filename, colormap=colormap)

		ap = AffinityPropagation(damping=affinity_damping)
		ap.fit(reduced_points)
		print "Making scatter plot of Affinity Propagation clusters of %s data for dataset %s"%(data_name, dataset)
		filename = figure_save_path + dataset + '_issue_29_2_%s_affinity.png'%data_name
		make_scatter_plot_for_labelled_data(data_frame=transformed_data, x_name='d1', y_name='d2', labels=ap.labels_, filename=filename, colormap = colormap, legend=True)

Example #47

0

Show file

File: artist_term_clustering.py Project: bsbell21/CapstoneProject

    def cluster(self, feat_mtx):
        # clustering artists based on AffinityPropogation
        af = AffinityPropagation()
        af.fit(feat_mtx)
        self.labels = af.labels_
        self.af = af

        # adding cluster labels to least misery dataframe and sorting by rank and cluster
        df_least_misery_clustered = self.df_least_misery.copy()
        df_least_misery_clustered['cluster'] = self.labels
        df_least_misery_clustered[['cluster', self.score_col]] = df_least_misery_clustered[['cluster', self.score_col]].astype(float)
        ''' will do different sorting if not using rank '''
        df_least_misery_clustered = df_least_misery_clustered.sort(['cluster', self.score_col])
        return df_least_misery_clustered

Example #48

0

Show file

File: clustering.py Project: kgori/treeCl

    def affinity_propagation(self, affinity_matrix=None, sigma=1, **kwargs):
        """

        :param kwargs: damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, verbose=False
        :return:
        """
        if affinity_matrix is None:
            aff = rbf(self.dm.values, sigma)
        else:
            aff = affinity_matrix

        est = AffinityPropagation(affinity='precomputed', **kwargs)
        est.fit(aff.view(np.ndarray))
        return Partition(est.labels_)

Example #49

0

Show file

File: cup99KnNN.py Project: 357589873/mlstudy

def loadKmeansData(dataArrayTest,dataArrayTrain,k,m='load'):
    if m=='load':
        centroidRead=open('centroid','r')
        labelClusterRead=open('labelCluster','r')
        labelPreRead=open('labelPre','r')
        centroid=pickle.load(centroidRead)
        labelCluster=pickle.load(labelClusterRead)
        labelPre=pickle.load(labelPreRead)
    else:
        dataArrayTestNorm = preprocessing.normalize(dataArrayTest)
        dataArrayTrainNorm = preprocessing.normalize(dataArrayTrain)
        #clf=MiniBatchKMeans(init='k-means++', n_clusters=k, n_init=10)
        clf=AffinityPropagation()
        #clf=DBSCAN(min_samples=30)
        pre=clf.fit(dataArrayTrainNorm)


        centroid=pre.cluster_centers_
        centroidWrite=open('centroid','w')
        #pickle.dump(centroid,centroidWrite)

        labelCluster=pre.labels_
        labelClusterWrite=open('labelCluster','w')
        #pickle.dump(labelCluster,labelClusterWrite)

        labelPre=clf.predict(dataArrayTestNorm)
        labelPreWrite=open('labelPre','w')
        #pickle.dump(labelPre,labelPreWrite)

    return centroid,labelCluster,labelPre

Example #50

0

Show file

File: stratified_rand.py Project: e-baumer/sampling

    def create_stratum(self, column_names, **kwargs):
        '''
        Use affinity propagation to find number of strata for each column. 
        column_names is a list of the covariates to be split into strata and 
        used for classification. This funciton adds a column to the data frame
        for each column as column_name_strata that gives the strata designation
        for that variable.  The whole data frame is returned.
        '''

        for colname in column_names:
            X = self.data[colname].reshape(-1, 1)
            
            if np.isnan(X).any():
                raise ValueError("There are NaN values in self.data[%s] that the \
                                  clustering algorithm can't handle" % colname)
                                  
            elif np.unique(self.data[colname]).shape[0] <=2:
                string_name = colname+'_strata'
                self.data[string_name] = self.data[colname].astype(int)
        
            else:
                af_model = AP(damping = 0.9)
                strata_groups = af_model.fit(X)
                
                #cluster_centers_indices = af.cluster_centers_indices_
                #n_clusters_ = len(cluster_centers_indices)
                
                string_name = colname+'_strata'
                self.data[string_name] = strata_groups.labels_
                
        return self.data

Example #51

0

Show file

File: cerena_multivariate_utils.py Project: armatita/GEOMS2

def affinity_propagation_cluster_analysis(x,y,preference):
    # NOT WORKING BECAUSE I DONT REALLY UNDERSTAND WHAT IT DOES...
    # ADAPTED FROM:
    # http://scikit-learn.org/stable/auto_examples/cluster/plot_affinity_propagation.html#example-cluster-plot-affinity-propagation-py
    X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
    af = AffinityPropagation()
    af = af.fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    
    #print("number of estimated clusters : %d" % n_clusters_)
    colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for i in xrange(len(np.unique(labels))):
        my_members = labels == i
        cluster_center = X[cluster_centers_indices[i]]
        plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
        plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
        for j in X[my_members]:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]],c=colors[i],linestyle='--')
    tolx = (X[:,0].max()-X[:,0].min())*0.03
    toly = (X[:,1].max()-X[:,1].min())*0.03
    plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
    plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
    plt.show()
    return labels

Example #52

0

Show file

File: spectral.py Project: jakobjoachim/text-mining-haw-bachelor

def affinity_propagation(x, damping=0.9):
  ap = AffinityPropagation(
    damping=damping, 
    max_iter=400, 
    convergence_iter=30, 
    copy=True, 
    preference=None, 
    affinity='euclidean', 
    verbose=False
  )
  ap.fit(x)
  centroids = ap.cluster_centers_
  c = ap.labels_
  k = len(centroids)
  
  return ap, (centroids, c, k)

Example #53

0

Show file

File: AlloyClustering.py Project: UWMad-Informatics/standardized

def AlloyClustering(k):
    alloy_data = data_parser.parse("../../AlloyComps.csv")
    data = np.asarray(alloy_data.get_data(["Cu","Ni","Mn","P","Si","C"]))
    #est = KMeans(n_clusters=k)
    #est = AgglomerativeClustering(n_clusters = k)
    est = AffinityPropagation()
    est.fit(data)

    labels = est.labels_
    '''print(len(labels))
    for i in range(k):
        print("Cluster #{}".format(i))
        print(np.asarray(alloy_data.get_data("Alloy"))[np.where(labels == i)])
        print()'''

    return (labels,alloy_data)

Example #54

0

Show file

File: algorithms.py Project: kfrancischen/Constellation-clustering

	def runAffinityPropagation(self):
		'''
			This function runs the affinity propagation algorithm
		'''
		distMatrix = distance.squareform(distance.pdist(self.coordinates, 'cosine'))
		size = distMatrix.shape
		for i in range(size[0]):
			for j in range(size[1]):
				distMatrix[i,j] = 2 - distMatrix[i,j]
		model = AffinityPropagation(damping = self.damping, max_iter = self.max_iter,affinity = 'precomputed')
		model.fit(distMatrix)
		self.center_id = model.cluster_centers_indices_.tolist()
		belongs = model.labels_.tolist()
		for i in range(len(belongs)):
			self.assignments[i]['assignment'] = 'centroid_' + str(belongs[i] + 1)
		self.silhouetteScore = metrics.silhouette_score(distMatrix, model.labels_, metric = 'cosine')
		trueLabel = dataProcessing.getTrueLabel(self.assignments)
		self.adjustedScore = metrics.adjusted_rand_score(belongs, trueLabel)

Example #55

0

Show file

File: gpfit.py Project: marcocaccin/LearningMetaDynamics

def dataset_fringes(X, cluster_algo, min_compression=64):
    if cluster_algo =='none' or len(X) <= min_compression:
        return X
    elif cluster_algo == 'AffinityPropagation':
        algo = AffinityPropagation()
        D = -spsp.distance.squareform(sp.spatial.distance.pdist(X))
        algo.fit(D)
        return X[algo.cluster_centers_indices_]
    elif cluster_algo == 'DBSCAN':
        algo = DBSCAN(metric='precomputed', min_samples=2)
        D = -spsp.distance.squareform(sp.spatial.distance.pdist(X))
        labels = algo.fit(D).labels_
        return NearestCentroid().fit(X, labels).centroids_
    elif cluster_algo == 'svm_outlier':
        algo = svm.OneClassSVM(nu=0.95 * 0.25 + 0.05,
                               kernel="rbf") #, gamma=0.1)
        #UNFINISHED!!!
    else:
        print("BOH")

Example #56

0

Show file

File: clustering.py Project: SpatialTranscriptomicsResearch/st_pipeline

def affinity_umi_removal(molecular_barcodes, _):
    """
    Tries to finds clusters of similar UMIs using an affinity based approach. 
    It returns a list with all the non clustered UMIs, for clusters of 
    multiple UMIs a random one will be selected.
    :param molecular_barcodes: a list of UMIs
    :return: a list of unique UMIs
    :rtype: list
    """
    if len(molecular_barcodes) <= 2:
        return countUMINaive(molecular_barcodes, allowed_mismatches)
    words = np.asarray(molecular_barcodes)
    lev_similarity = -1 * np.array([[hamming_distance(w1,w2) for w1 in words] for w2 in words])
    affprop = AffinityPropagation(affinity="precomputed", damping=0.5)
    affprop.fit(lev_similarity)
    unique_clusters = list()
    for cluster_id in np.unique(affprop.labels_):
        exemplar = words[affprop.cluster_centers_indices_[cluster_id]]
        cluster = np.unique(words[np.nonzero(affprop.labels_==cluster_id)])
        unique_clusters.append(random.choice(cluster))
    return unique_clusters

Example #57

0

Show file

File: silhouette.py Project: slipguru/icing

    def _internal(preferences, affinity_matrix, dist_matrix,
                  idx, n_jobs, n, queue_y):
        for i in range(idx, n, n_jobs):
            ap = AffinityPropagation(preference=preferences[i],
                                     affinity='precomputed',
                                     max_iter=500)
            ap.fit(affinity_matrix)

            cluster_labels = ap.labels_.copy()
            nclusts = np.unique(cluster_labels).shape[0]
            save_results_clusters("res_ap_{:03d}_clust.csv"
                                  .format(nclusts),
                                  sample_names, ap.labels_)

            if nclusts > 1:
                try:
                    silhouette_list = silhouette_samples(dist_matrix, ap.labels_,
                                                     metric="precomputed")
                    queue_y[i] = np.mean(silhouette_list)
                except BaseException:
                    print(dist_matrix.shape, ap.labels_.shape)

Example #58

0

Show file

File: grey.py Project: WeiliangXing/Facebook-Data-Mining

def get_label_res2(similar_matrix, n_subs):

    cluster = AffinityPropagation(damping = 0.75 , affinity = 'precomputed') # preference = -1000)# n_clusters = n_subs, affinity = 'precomputed')

    res = cluster.fit(similar_matrix)

    size_labels = len(set(res.labels_))
    assert size_labels < 10, size_labels
    assert size_labels > 1, size_labels

    print res.labels_
    return res.labels_

Example #59

0

Show file

File: thresholds.py Project: ericmjl/influenza-reassortment-detector

def compute_threshold(affmat):
    """
    This function uses affinity propagation to cluster the sequences, and then
    computes minimum of minimum in-cluster pairwise identities to be used as a
    threshold value.
    """
    ap = AffinityPropagation(affinity='precomputed')
    ap.fit(affmat)

    clusters = pd.DataFrame([i for i in zip(affmat.index, ap.labels_)])
    clusters = clusters.set_index(0)
    clusters.columns = ['Cluster']

    minval = 1
    for group in clusters.groupby('Cluster'):
        accessions = group[1].index
        subset = affmat[accessions].loc[accessions, :]

        if np.matrix(subset).min() < minval:
            minval = np.matrix(subset).min()

    return minval