Esempi in Python per AffinityPropagation.fit_predict, esempi in Python per sklearn.cluster.AffinityPropagation.fit_predict

Esempio n. 1

0

Mostra file

File: Database_Auto_Cluster.py Progetto: wujunming1/superalloy-project

def affinity(data):
    space = {'damping': hp.uniform('damping', 0.5, 0.99)}
    algo = partial(tpe.suggest, n_startup_jobs=10)
    best = fmin(hyper_affinity, space, algo=algo, max_evals=30)
    model = AffinityPropagation(damping=best['damping'])
    return best, model.fit_predict(data), sil_score(
        data, model.fit_predict(data)), model.fit(data)

Esempio n. 2

0

Mostra file

def clustering(relation_all_df, all_keys, txt_name, method = 'AffinityPropagation', n_clusters= 5):
    print('begin clustering')
    data = relation_all_df.iloc[:, :].values
    if method == 'AffinityPropagation':
        clustering = AffinityPropagation(damping=0.8).fit(data)
    if method == 'AgglomerativeClustering':
        clustering = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')
        clustering.fit_predict(data)

    res_dict = dict()
    for i in range(len(clustering.labels_)):
        if clustering.labels_[i] not in res_dict:
            res_dict[clustering.labels_[i]] = []
            res_dict[clustering.labels_[i]].append(all_keys[i])
        else:
            res_dict[clustering.labels_[i]].append(all_keys[i])

    for k, v in res_dict.items():
        print(k,v)

    # for key in res_dict.keys():
    #     if type(key) is not str:
    #         res_dict[str(key)] = res_dict[key]
    #         del res_dict[key]

    with open(txt_name, 'w') as the_file:
        # the_file.write(json.dumps(list(res_dict.items())))
        if method == 'AgglomerativeClustering':
            the_file.write('n_clusters: \n')
            the_file.write(str(n_clusters) + '\n')
        for i in res_dict.keys():
            the_file.write(str(i) + '\n')
            the_file.write(','.join([str(x) for x in res_dict[i]]) + "\n")

Esempio n. 3

0

Mostra file

File: st_cluster.py Progetto: ahangchen/TrackViz

    def cluster(self, tracklet):
        # tracklet: [[pid, time, img_id, pseudo_id],...], only img_id is used
        ids = map(lambda arr: arr[2], tracklet)
        # cluster = SpectralClustering(n_clusters=2, affinity='precomputed')
        track_features = []
        for i, img_i in enumerate(ids):
            track_features.append(self.feature[img_i])
        similarity = -euclidean_distances(track_features, squared=True)
        # cls = cluster.fit_predict(affinity)
        cluster = AffinityPropagation(preference=np.median(similarity))
        cls = cluster.fit_predict(track_features).reshape(-1)
        self.spectral_score += metrics.adjusted_rand_score(
            [info[0] for info in tracklet], cls)

        cls_cnt = len(set(cls))
        #
        # cluster = SpectralClustering(n_clusters=cls_cnt)
        # cls = cluster.fit_predict(track_features).reshape(-1)
        # self.ap_score += metrics.adjusted_rand_score([info[0] for info in tracklet], cls)

        cluster = KMeans(n_clusters=cls_cnt)
        cls = cluster.fit_predict(track_features).reshape(-1)
        self.kmeans_score += metrics.adjusted_rand_score(
            [info[0] for info in tracklet], cls)

        self.tracklet_cnt += 1
        return [(ids[i], cls[i]) for i in range(len(ids))]

Esempio n. 4

0

Mostra file

def get_w2v_field(zeta_res, model, zeta_scope, mode):

    if mode == 0:
        words = zeta_res.index[zeta_res[zeta_scope] > 0]
    else:
        words = zeta_res.index[zeta_res[zeta_scope] < 0]

    vecs = []
    for word in words:
        try:
            vecs.append(model[word])

        except KeyError:
            pass

    word_matrix = np.matrix(vecs)
    if mode == 0:
        clu = AffinityPropagation(
            preference=zeta_res[zeta_scope][zeta_res[zeta_scope] > 0])
    else:
        clu = AffinityPropagation(
            preference=zeta_res[zeta_scope][zeta_res[zeta_scope] < 0])
    clu.fit_predict(word_matrix)
    cluster_frame = pd.DataFrame(clu.cluster_centers_)
    cluster_frame["Category"] = mode

    return cluster_frame

Esempio n. 5

0

Mostra file

File: SARNAclust.py Progetto: idotu/SARNAclust

def AffProp(SM):

    af = AffinityPropagation(preference=None, affinity='precomputed')
    af.fit_predict(SM)
    cluster_centers_indices = af.cluster_centers_indices_
    labels_ = af.labels_

    n_clusters_ = len(cluster_centers_indices)
    return n_clusters_, labels_

Esempio n. 6

0

Mostra file

 def clusteringAlgorithm(self, typeOfAlgorithm): 
     #define the model
     clusterPoint = None
     if typeOfAlgorithm == 'affinityPropagation': 
         model = AffinityPropagation(damping=0.9)
         model.fit(x)
         clusterPoint = model.predict(x)
     elif typeOfAlgorithm == 'agglomerativeClustering': 
         model = AgglomerativeClustering(n_clusters=2)
         model.fit(x)
         clusterPoint = model.fit_predict(x)
     elif typeOfAlgorithm == 'BIRCH': 
         model = Birch(threshold=0.01, n_clusters=2)
         model.fit(x)
         clusterPoint = model.predict(x)
     elif typeOfAlgorithm == "DBSCAN": 
         model = DBSCAN(eps=0.30, min_samples=9)
         clusterPoint = model.fit_predict(x)
     elif typeOfAlgorithm == "KMeans": 
         model = KMeans(n_clusters=2)
         model.fit(x)
         clusterPoint = model.predict(x)
     elif typeOfAlgorithm == "MiniBatchKMeans": 
         model = MiniBatchKMeans(n_clusters=2)
         model.fit(x)
         clusterPoint = model.predict(x)
     elif typeOfAlgorithm == "MeanShift": 
         model = MeanShift()
         model.fit(x)
         clusterPoint = model.fit_predict(x)
     elif typeOfAlgorithm == "OPTICS": 
         model = OPTICS(eps=0.8, min_samples=10)
         model.fit(x)
         clusterPoint = model.fit_predict(x)
     elif typeOfAlgorithm == "SpectralClustering":
         model = SpectralClustering(n_clusters=2)
         model.fit(x)   
         clusterPoint =model.fit_predict(x)
     elif typeOfAlgorithm == "GaussianMixture": 
         model = GaussianMixture(n_components=2)
         model.fit(x)
         clusterPoint = model.predict(x)
     else: 
         model = "unknown"
     #fit model
     #assign a cluster to each example
     #retrieve unique clusters
     clusters = unique(clusterPoint)
     for cluster in clusters: 
         #get row indexesMeanShift
         rowIndexes = where(clusterPoint == cluster)
         #create scatter of these samples
         pyplot.scatter(x[rowIndexes, 0], x[rowIndexes, 1])
     pyplot.savefig('img/' + typeOfAlgorithm + '.png')
     pyplot.clf()
     pyplot.cla()
     pyplot.close()

Esempio n. 7

0

Mostra file

File: cluster.py Progetto: Nemo17864220512/201834864HeJiaheng

def affinityaropagation(tfidf_matrix):
    ap_cluster = AffinityPropagation(damping=0.5,
                                     max_iter=200,
                                     convergence_iter=15,
                                     copy=True,
                                     preference=None,
                                     affinity='euclidean',
                                     verbose=False)
    print('affinityaropagation聚类的个数：', end="")
    print(len(set(ap_cluster.fit_predict(tfidf_matrix))))  #聚类的个数
    return ap_cluster.fit_predict(tfidf_matrix)

Esempio n. 8

0

Mostra file

File: cluster_paraphrase.py Progetto: ids-cv/coling_iparaphrasing

def feat_based_cluster(uniquePhrases,
                       gold,
                       phraseFeats,
                       clutter,
                       damping,
                       affinity='cosine',
                       saveto=None):
    phrase_dic = read_phrase(uniquePhrases)
    gt_df = pd.read_csv(gold, encoding="utf-8")
    feat_matrix = np.load(phraseFeats)

    img_list = gt_df.image.unique()
    result = []
    for img in img_list:
        gt_df_img = gt_df.query('image == %i' % img)

        feats = []
        phrases = []
        for _, item in gt_df_img.iterrows():
            phrase = item.phrase
            phrases.append(phrase)
            feats.append(feat_matrix[phrase_dic[phrase]])

        feats = np.vstack(feats)

        labels = []
        if len(phrases) > 1:
            if affinity == 'cosine':
                similarity = cosine_similarity(feats)
                pref = np.percentile(similarity, clutter)
                af = AffinityPropagation(preference=pref,
                                         affinity='precomputed',
                                         damping=damping)
                labels = af.fit_predict(similarity)

            elif affinity == 'euclidean':
                distance = -euclidean_distances(feats, squared=True)
                pref = np.percentile(distance, clutter)
                af = AffinityPropagation(preference=pref, damping=damping)
                labels = af.fit_predict(feats)
            else:
                raise RuntimeError('invalid affinity metric')
            if np.isnan(labels).any():  # when af did not converge
                labels = np.arange(labels.size)
        else:
            labels = [1]

        for i in range(len(phrases)):
            result.append({
                'image': img,
                'phrase': phrases[i],
                'label': labels[i]
            })
    return pd.DataFrame(result)

Esempio n. 9

0

Mostra file

def clusterSamples(model,trainDataIn,testDataIn,params):
    
    if model == 'SOM':
        
        # Map size 
        msz0 = params[0]
        msz1 = params[1]

        #print('SOM size: ', trainDataIn.shape[0])
        sm = SOM.SOM('sm', trainDataIn, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca')
        sm.train(n_job = 1, shared_memory = 'no',verbose='off')
        #sm.set_data_labels(list(instancesCorpus))

        if params[2] == True:
            #print('Hitmap for CORPUS (train, red) and TARGET (test, blue) data:')        
            sm.hit_map(testDataIn)

        testData_proj = sm.project_data(testDataIn)
        trainData_proj = sm.project_data(trainDataIn)
        testData_loc = sm.ind_to_xy(testData_proj)[:,2]
        trainData_loc = sm.ind_to_xy(trainData_proj)[:,2]

        return trainData_loc, testData_loc, sm

    if model == 'AffinityPropagation':
        
        model = AffinityPropagation()
        model.fit(trainDataIn)
        
        return model.predict(trainDataIn), model.predict(testDataIn), model
    
    if model == 'DBSCAN':
        
        model = DBSCAN()
        model.fit(trainDataIn)
        
        return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model

    if model == 'KMeans':
        
        model = KMeans(n_clusters=params[0])
        model.fit(trainDataIn)
        
        return model.predict(trainDataIn), model.predict(testDataIn), model
    
    if model == 'AgglomerativeClustering':
        model = AgglomerativeClustering(n_clusters=params[0])
        model.fit(trainDataIn)
        
        return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model

Esempio n. 10

0

Mostra file

File: clustering.py Progetto: DuncanMearns/Mearns_et_al_2019

def affinity_propagation(D, preference='median'):
    assert D.shape[0] == D.shape[1], 'Matrix is not square!'

    if preference in ['minimum', 'min']:
        preference = np.min(-D)
    elif preference in ['maximum', 'max']:
        preference = np.max(-D)
    elif preference in ['median', 'med']:
        preference = np.median(-D)
    else:
        raise ValueError('preference must be: {minimum, maximum, median}')

    clusterer = AffinityPropagation(affinity='precomputed', preference=preference)
    clusterer.fit_predict(-D)
    return clusterer

Esempio n. 11

0

Mostra file

def cluster(playlist):
    arq = 'Total ' + playlist + '.csv'
    n_clusters = 0
    Full_data = pd.read_csv(arq)
    Full_data = Full_data.dropna(axis=1, how='all')
    Full_data = Full_data.dropna(axis=0, how='any')
    ID = Full_data['id']
    Mode = Full_data['mode']
    length = Full_data['duration_ms']
    artist = Full_data['artist']
    Full_data = Full_data.drop(
        columns=['track', 'album_id', 'artist', 'id', 'mode'])
    Fdata = Full_data.values
    scaler = Scaler()
    data_u = scaler.fit_transform(Fdata)
    # pca_transf = PCA(0.8)
    # PCA_data = pca_transf.fit_transform(data_u)
    clusterer = AffinityPropagation(random_state=None, preference=-500)
    # clusterer = HDBSCAN(min_cluster_size=20)
    # clusterer = MeanShift()
    labels = clusterer.fit_predict(data_u)
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    labels.shape = (len(labels), 1)
    Full_data['cluster'] = labels + 1
    Full_data['id'] = ID
    Full_data['mode'] = Mode
    Full_data['artist'] = artist
    Full_data['duration_ms'] = length
    # Full_data.sort_values(by='cluster')
    Full_data.to_csv('clustered.csv', index=False)
    # sns.pairplot(Full_data, hue="cluster", palette='YlGnBu')
    # plt.show()
    return n_clusters

Esempio n. 12

0

Mostra file

def execute(args):
    ##############################################################################
    if len(args) < 1:
        usage()
        sys.exit()

    names, labels_true, X = parse(args[0])
    indices = [int(i) for i in args[1:]]
    relevant_names = names[1:]
    if len(indices) > 0:
        X = np.asarray([[sample[i] for i in indices] for sample in X])
        relevant_names = [relevant_names[i] for i in indices]
    print "Clustering on", str(relevant_names) + "..."

    ##############################################################################
    # Compute Affinity Propagation
    af = AffinityPropagation(preference=-50)
    # cluster_centers_indices = af.cluster_centers_indices_
    # labels = af.labels_
    #
    # n_clusters_ = len(cluster_centers_indices)

    y_pred = af.fit_predict(X)
    if y_pred is None or len(y_pred) is 0 or type(y_pred[0]) is np.ndarray:
        return 0
    counts = get_cluster_counts(labels_true, y_pred)
    print counts

Esempio n. 13

0

Mostra file

def visual(c, X, y):
  from sklearn.cluster import AffinityPropagation
  cluster_object = AffinityPropagation()
  y_pred = cluster_object.fit_predict(X)
  colors = ['red', 'green', 'blue', 'cyan', 'black', 'yellow', 'magenta', 'brown', 'orange', 'silver', 'goldenrod', 'olive', 'dodgerblue']
  clusters = np.unique(y_pred)
  print("Cluster Labels")
  print(clusters)
  print("Evaluation")
  evaluation_labels(y, y_pred)
  evaluation(X, y_pred)
  for cluster in clusters:
    row_idx = np.where(y == cluster)
    plt.scatter(X[row_idx, 0], X[row_idx, 1])
  plt.title('Dataset')
  plt.xlabel('X1')
  plt.ylabel('X2')
  plt.legend()
  plt.show()
  for cluster in clusters:
    row_idx = np.where(y_pred == cluster)
    plt.scatter(X[row_idx, 0], X[row_idx, 1])
  plt.title('Clusters')
  plt.xlabel('X1')
  plt.ylabel('X2')
  plt.legend()
  plt.show()

Esempio n. 14

0

Mostra file

File: cluster.py Progetto: hoj201/pedestrian_forecasting

def cluster_trajectories( curves ):
    """Given a list of curves, cluster_trajectories will cluster them."""
    n_curves = len(curves)
    X_2B_clstrd = np.zeros( (n_curves, 4) )
    X_2B_clstrd[:,0] = np.array( [ curves[k][0, 0] for k in range(n_curves) ] )
    X_2B_clstrd[:,1] = np.array( [ curves[k][1, 0] for k in range(n_curves) ] )
    X_2B_clstrd[:,2] = np.array( [ curves[k][0,-1] for k in range(n_curves) ] )
    X_2B_clstrd[:,3] = np.array( [ curves[k][1,-1] for k in range(n_curves) ] )
        
    for col in range( 4 ):
        X_2B_clstrd[:,col] /=  X_2B_clstrd[:,col].std()
        
    def distance_metric(a,b):
        #A distance metric on R^4 modulo the involution
        #(x0,x2,x3,x4) -> (x3,x4,x1,x2)
        d = lambda a,b : np.sqrt( np.sum( (a-b)**2 ) )
        T = lambda x: np.array([x[2],x[3],x[0],x[1]])
        return min( d(a,b) , d(T(a),b) )
    from sklearn.cluster import AffinityPropagation
    clusterer = AffinityPropagation(affinity='precomputed', convergence_iter=100)
    aff = np.zeros((n_curves, n_curves))
    for i in range(n_curves):
        for j in range(i+1,n_curves):
            aff[i,j] = np.exp(-distance_metric( X_2B_clstrd[i], X_2B_clstrd[j])**2)
            aff[j,i] = aff[i,j]

    #clusterer.Affinity = aff
    cluster_labels = clusterer.fit_predict(aff)
    out = []
    for label in set( cluster_labels):
        cluster = map( lambda k: curves[k] , filter( lambda k: cluster_labels[k] == label , range( n_curves) ) )
        out.append( cluster )
    return map( align_cluster, out)

Esempio n. 15

0

Mostra file

def community_detection_by_affinity(graph, weight_type=WeightType.ABSOLUTE):

    # Has an high impact on Girvan Newman clustering
    graph = nx.algorithms.tree.mst.maximum_spanning_tree(
        Graph.to_undirected(graph))

    mat, node_to_int = prepare_matrix(graph)

    af = AffinityPropagation(preference=-50)
    labels = af.fit_predict(mat)

    inv_node_to_int = {v: k for k, v in node_to_int.items()}

    clusters = {}
    for index, lab in enumerate(labels):
        class_name = inv_node_to_int[index]
        if lab not in clusters:
            clusters[lab] = []
        clusters[lab].append(class_name)

    print(f"\nClusters: {clusters}")
    print(f"Total Clusters: {len(clusters)}")

    pos = nx.spring_layout(graph)
    nx.draw_networkx(graph,
                     pos=pos,
                     edgelist=[],
                     node_color=labels,
                     with_labels=True,
                     node_size=250,
                     font_size=8)

    return [cluster for cluster in clusters.values()]

Esempio n. 16

0

Mostra file

File: affinity_prop_diarizer.py Progetto: pombredanne/inpladesys

    def fit_predict(self, preprocessed_documents: List[List[tuple]], documents_features: List[np.ndarray],
                    dataset: Dataset, hyperparams=None, task=None) -> List[Segmentation]:
        assert len(documents_features) == len(preprocessed_documents)

        x_scaled = []
        for doc_features in documents_features:
            x_scaled.append(StandardScaler().fit_transform(doc_features))

        predicted_label_lists = []

        for i in range(len(documents_features)):
            start_time = time.time()

            x = documents_features[i]  # documents_features[i]  x_scaled[i]
            true_n_clusters = dataset.segmentations[i].author_count

            assert x.shape[0] == len(preprocessed_documents[i])

            diarizer = AffinityPropagation(damping=hyperparams['damping'],
                                           preference=hyperparams['preference'],
                                           copy=True, affinity='euclidean',
                                           max_iter=100, convergence_iter=5)

            labels = diarizer.fit_predict(x).tolist()
            predicted_label_lists.append(labels)

            estimated_n_clusters = len(set(labels))

            print('Document', i + 1, '/', len(documents_features), x.shape, 'in', time.time() - start_time, 's', )
            print('Real author count = {}, estimated = {}'.format(true_n_clusters, estimated_n_clusters))
            print()

        return generate_segmentation(preprocessed_documents, documents_features,
                                     predicted_label_lists, dataset.documents, task=task)

Esempio n. 17

0

Mostra file

def plotly_embedding(value, key):
    time5 = time.clock()
    "standardisation"
    x_min, x_max = np.min(value, 0), np.max(value, 0)
    reducer = (value - x_min) / (x_max - x_min)
    "clusterisation"

    clusterer = AffinityPropagation()
    cluster_labels = clusterer.fit_predict(reducer)
    X_projected = reducer
    x, y, z = np.random.multivariate_normal(np.array([0, 0, 0]), np.eye(3),
                                            400).transpose()
    trace1 = go.Scatter3d(x=X_projected[:, 0],
                          y=X_projected[:, 1],
                          z=X_projected[:, 2],
                          mode='markers',
                          marker=dict(size=12,
                                      color=cluster_labels,
                                      colorscale='Paired',
                                      opacity=0.8))

    data = [trace1]
    layout = go.Layout(title="TSNE", margin=dict(l=0, r=0, b=0, t=0))
    fig = dict(data=data, layout=layout)
    plot(fig)

Esempio n. 18

0

Mostra file

File: hand_segmentation.py Progetto: dmacmill/hand_gesture_drawing

def group_clusters(roughHull):
    # AffinityPropagation algorithm:
    af = AffinityPropagation(preference=-100).fit(roughHull.squeeze(1))
    cluster_indicators = af.fit_predict(roughHull.squeeze(1))
    
    # map the list that the affinity provided to the current roughHull, then
    # find the mean point of all of them
    cluster_centers = []
    current_cluster = 0
    while(current_cluster < len(af.cluster_centers_indices_)):
        i=0
        cluster=[]
        for x in cluster_indicators:
            if(x == current_cluster):
                cluster.append(roughHull[i][0].tolist())
            i+=1
        # now find the average point between these
        average = [0, 0]
        for y in cluster:
            average = [average[0] + y[0], average[1] + y[1]]
        average = [average[0]/len(cluster), average[1]/len(cluster)]
        cluster_centers.append(np.array([average]))
        current_cluster += 1
    cc = np.array(cluster_centers)
    return cc

Esempio n. 19

0

Mostra file

File: oldAffinityPropagationCluster.py Progetto: nmusgrave/Conducere

def execute(args):
  ##############################################################################
  if len(args) < 1:
    usage()
    sys.exit()

  names, labels_true, X = parse(args[0])
  indices = [int(i) for i in args[1:]]
  relevant_names = names[1:]
  if len(indices) > 0:
    X = np.asarray([[sample[i] for i in indices] for sample in X])
    relevant_names = [relevant_names[i] for i in indices]
  print "Clustering on", str(relevant_names) + "..."

  
  ##############################################################################
  # Compute Affinity Propagation
  af = AffinityPropagation(preference=-50)
  # cluster_centers_indices = af.cluster_centers_indices_
  # labels = af.labels_
  # 
  # n_clusters_ = len(cluster_centers_indices)

  y_pred = af.fit_predict(X)
  if y_pred is None or len(y_pred) is 0 or type(y_pred[0]) is np.ndarray:
    return 0
  counts = get_cluster_counts(labels_true, y_pred)
  print counts

Esempio n. 20

0

Mostra file

File: cluster_paraphrase.py Progetto: ids-cv/coling_iparaphrasing

def score_based_cluster(gold, pair_score, affinity_save_path, clutter,
                        damping):
    gt_df = pd.read_csv(gold, encoding="utf-8")

    img_list = gt_df.image.unique()

    result = []
    for img in img_list:
        gt_df_img = gt_df.query('image == %i' % img)

        phrases = []
        for _, item in gt_df_img.iterrows():
            phrase = item.phrase
            phrases.append(phrase)

        scores = np.load(affinity_save_path + '/' + str(img) + '.npy')
        if scores.size > 1:
            pref = np.percentile(scores, clutter)
            af = AffinityPropagation(preference=pref,
                                     affinity='precomputed',
                                     damping=damping)
            labels = af.fit_predict(scores)
            if np.isnan(labels).any():  # when af did not converge
                labels = np.arange(labels.size)
        else:
            labels = [1]

        for i in range(len(phrases)):
            result.append({
                'image': img,
                'phrase': phrases[i],
                'label': labels[i]
            })
    return pd.DataFrame(result)

Esempio n. 21

0

Mostra file

File: GA_new.py Progetto: wujunming1/superalloy-project

def hyper_affinity(args):
    global basic_data
    global all_data
    ap = AffinityPropagation(damping = args['damping'])
    pred = ap.fit_predict(basic_data)
    temp = sil_score(all_data, pred)
    # print(args)
    return -temp

Esempio n. 22

0

Mostra file

def apply_affinity_prop_consort(include_transformed):
    (X, y) = extract.generate_labelled_data(
        valid_labels=['1'],
        label_type='consort',
        include_transformed=include_transformed)
    am = AffinityPropagation()
    preds = am.fit_predict(X)
    return (X, preds)

Esempio n. 23

0

Mostra file

def cluster_ap_blobs():
    clustering_blobs = AffinityPropagation(affinity='euclidean',
                                           convergence_iter=5,
                                           damping=0.9,
                                           preference=-10.0)
    y_blobs = clustering_blobs.fit_predict(X_blobs)
    plt.scatter(X_blobs[:, 0], X_blobs[:, 1], c=y_blobs)
    print(y_blobs)

Esempio n. 24

0

Mostra file

File: test_affinity_propagation.py Progetto: dPys/scikit-learn

def test_sparse_input_for_fit_predict():
    # Test to make sure sparse inputs are accepted for fit_predict
    # (non-regression test for issue #20049)
    af = AffinityPropagation(affinity="euclidean", random_state=42)
    rng = np.random.RandomState(42)
    X = csr_matrix(rng.randint(0, 2, size=(5, 5)))
    labels = af.fit_predict(X)
    assert_array_equal(labels, (0, 1, 1, 2, 3))

Esempio n. 25

0

Mostra file

    def fit(self, vectors: [int, float]) -> [int, int]:
        vectors_ = list(zip(*vectors))[1]
        cluster_model = AffinityPropagation(damping=0.96, max_iter=10000, convergence_iter=15)
        cluster = cluster_model.fit_predict(vectors_)

        show_two_dimensions_plot(vectors_, cluster)

        return [(i, label) for i, label in enumerate(cluster)]

Esempio n. 26

0

Mostra file

class APModel(ClusteringModel):
    def __init__(self, n_clusters):
        super().__init__()
        self.n_clusters = n_clusters
        self.ap = AffinityPropagation(verbose=True)

    def fit_predict(self, feat):
        pred = self.ap.fit_predict(feat)
        return pred

Esempio n. 27

0

Mostra file

def semantic_clusters(lemmas, unique=True):
    words = lemmas
    if unique:
        words = list(set(lemmas))
    words = _filter_w2v(words)
    m = np.array(_get_matrix(words))
    agg = AffinityPropagation(affinity="precomputed")
    u = agg.fit_predict(m)
    return _group_words(words, agg.labels_)

Esempio n. 28

0

Mostra file

File: recommender.py Progetto: will-cromar/trackr-web

def get_affinity_clusters(listings):
    """Returns a list of cluster IDs based on relative similarity between
    listings."""
    a = get_similarity_matrix(listings)

    clf = AffinityPropagation(affinity='precomputed')
    clusters = clf.fit_predict(a)

    return clusters

Esempio n. 29

0

Mostra file

def main():
    args = parse_arguments()
    verb2vec, subject2vec, object2vec = get_vectors(args.vector_path)
    lines, _, _, _ = get_dict_and_samples(args.input_path, args.min_count, args.first_n, args.step)
    concatenated = concat_vectors(lines, verb2vec, subject2vec, object2vec)
    print(f"Shape: {concatenated.shape}")
    ap = AffinityPropagation()
    result = ap.fit_predict(concatenated)
    groups = group_result(result, lines)
    print(f"Number of clusters: {len(groups)}")

Esempio n. 30

0

Mostra file

File: Affinity.py Progetto: wujunming1/superalloy-project

def affinity(data,damping):
    # metric_list = ['euclidean', 'manhattan', 'chebyshev']
    # ap = AffinityPropagation(damping = args['damping'])

    db = AffinityPropagation(damping = damping)
    db.fit(data)
    pred = db.fit_predict(data)
    score = sil_score(data,pred)
    print(score)
    return db,pred,score

Esempio n. 31

0

Mostra file

 def build_families(self, smiles, affin_matrix):
     cluster = AffinityPropagation()
     cls = cluster.fit_predict(affin_matrix)
     fam = {}
     for a, b in zip(smiles, cls):
         if b in fam:
             fam[b].add(a)
         else:
             fam[b] = set({a})
     return fam

Esempio n. 32

0

Mostra file

File: cluster.py Progetto: yangziyu216/201844911yangziyu

def _AffinityPropagation(corpus, labels):
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(corpus)
    affinity_propagation = AffinityPropagation(damping=.5,
                                               max_iter=200,
                                               convergence_iter=25,
                                               copy=False)
    result_affinity_propagation = affinity_propagation.fit_predict(X.toarray())
    print('AffinityPropagation:',
          normalized_mutual_info_score(result_affinity_propagation, labels))

Esempio n. 33

0

Mostra file

File: py3_4_unsupervised_clustering_affinity.py Progetto: hope0hermes/ScriptsPython

def affinity_propagation():
    """
    AffinityPropagation creates clusters by sending messages between pairs of
    samples until convergence. The messages sent between pairs represent the
    suitability for one sample to be the exemplar of the other, which is updated
    in response to the values from other pairs. this updates occurs iteratively
    until convergence, at which point the final exemplars are chosen and hence
    the final cluster is given.

    Algorithm:

    The message sent between pairs belongs to one of two categories. The first
    is the responsibility, r(i,k), which is the accumulated evidence that sample
    k should the exemplar for sample i. The second is the availability, a(i,k),
    which is the accumulated evidence that sample i should chose sample k to be
    its exemplar, and considers the values for all other samples that k should
    be an exemplar. In this case exemplars are chosen by samples if they are:

        - similar enough to many samples, and
        - chosen by many samples to be representative of themselves.
    """
    # Generate a generic data sample.
    n_samples = 300
    std = 0.3
    seed = 0
    centers = [ [-1., 0.], [0., 1.5], [1., 0.] ]
    data, target = make_blobs(n_samples = n_samples, centers = centers,
        cluster_std = std, random_state = seed)

    # Set the preference for each point: samples with large preference values
    # are more likely to be chosen as exemplars. The number of exemplars, i.e.,
    # clusters, is influenced by the input preference values. If preferences are
    # not passed as arguments, they will be set to the median of the input
    # similarities.
    # pref = [ np.random.randint(low = -50, high = 0) for x in range(n_samples)]
    pref = -50
    # Compute affinity propagation.
    clf = AffinityPropagation(preference = pref)
    aff_y = clf.fit_predict(data)
    # Find mismatches between predicted and true values.
    cnt = int(0)
    for idx in range(n_samples):
        if(target[idx] != aff_y[idx]): cnt += 1
    # Print results.
    print('Approximated number of clusters ', len(clf.cluster_centers_indices_))
    print('Accuracy ', float(n_samples - cnt) / float(n_samples))
    print('Homogeneity ', metrics.homogeneity_score(target, clf.labels_))
    print('Completeness ', metrics.completeness_score(target, clf.labels_))

    # Plot resulting clusters.
    plt.figure(figsize = (8,8))
    plt.scatter(data[:,0], data[:,1], c = aff_y, s = 50)
    plt.title('Affinity clustering')
    plt.show()

Esempio n. 34

0

Mostra file

File: cluster.py Progetto: kedz/newsblaster

def cluster_articles():
  ms = MongoStore()
  articles = [a for a in ms.get_pending_articles()]

  if len(articles) > 0:

    tfidf = TfidfVectorizer(tokenizer=preprocess)


    good_articles = [article for article in articles 
                     if article["text_content"].strip() != ""]

    texts = [article["text_content"] for article in good_articles]

    X_tfidf = tfidf.fit_transform(texts)

    print X_tfidf

    ap = AffinityPropagation(damping=0.95, max_iter=4000, 
            convergence_iter=400, copy=True, preference=-4, 
            affinity='euclidean', verbose=True)

    C = ap.fit_predict(X_tfidf)
    print X_tfidf.shape, C.shape
    print C
    centers = ap.cluster_centers_indices_
    clusters = []
    for c, center in enumerate(centers):

        
        members = np.where(C == c)[0]
        K = cosine_similarity(X_tfidf[members], X_tfidf[center])
        member_sims = [(m, float(k)) for m, k in zip(members, K)]
        member_sims.sort(key=lambda x: x[1], reverse=True)

        cluster = {"articles": [], "date": datetime.now(), "summarized": False}

        if len([member for member, sim in member_sims if sim > .55]) >= 3:
            print texts[center][:75].replace("\n", " ")

            for member, sim in member_sims:

                print "\t{:3.3f} ".format(sim), 
                print good_articles[member]["title"][:60].replace("\n", " ")
                cluster["articles"].append((good_articles[member]["_id"], sim))
        else:
            continue
        
        clusters.append(cluster)

    if len(clusters) > 0:
        ms.insert_clusters(clusters)

    ms.set_clustered_flag(articles)

Esempio n. 35

0

Mostra file

File: context_utils.py Progetto: antoine-tran/yelp

def evaluate_clustering():

    similarity_matrix = get_sense_similarity_submatrix(range(10000))
    matrix_size = len(similarity_matrix)
    print('got matrix')

    affinity_propagation = AffinityPropagation()
    labels1 = affinity_propagation.fit_predict(similarity_matrix)
    print('affinity propagation')

    dbscan = DBSCAN(min_samples=1)
    labels2 = dbscan.fit_predict(similarity_matrix)
    print('print dbscan')

    distance_matrix = np.ndarray((matrix_size, matrix_size))
    for i in range(matrix_size):
        for j in range(matrix_size):
            distance_matrix[i, j] = 1 - similarity_matrix[i, j]

    print(distance_matrix[1, 2])
    print(distance_matrix[1, 1])

    print('created distance matrix')

    cluster_map1 = cluster_evaluation.fpena_get_clusters(labels1)
    cluster_map2 = cluster_evaluation.fpena_get_clusters(labels2)

    print(cluster_map1)
    print(cluster_map2)

    sc1 = sklearn.metrics.silhouette_score(distance_matrix, labels1, metric='euclidean')
    sc2 = sklearn.metrics.silhouette_score(distance_matrix, labels2, metric='euclidean')
    sc5 = cluster_evaluation.fpena_evaluate(cluster_map1, distance_matrix)
    sc6 = cluster_evaluation.fpena_evaluate(cluster_map2, distance_matrix)

    num_elements1 = [len(values) for values in cluster_map1.values()]
    num_elements2 = [len(values) for values in cluster_map2.values()]
    print(num_elements1)
    print(num_elements2)

    print('Number of clusters Affinity Propagation: %f' % len(cluster_map1))
    print('Number of clusters DBSCAN: %f' % len(cluster_map2))
    print('Average elements per cluster Affinity Propagation: %f' % np.mean(num_elements1))
    print('Average elements per cluster DBSCAN: %f' % np.mean(num_elements2))
    print('Standard deviation per cluster Affinity Propagation: %f' % np.std(num_elements1))
    print('Standard deviation per cluster DBSCAN: %f' % np.std(num_elements2))
    print('Silouhette score Affinity Propagation (distance matrix): %f' % sc1)
    print('Silouhette score DBSCAN (distance matrix): %f' % sc2)
    print('Dunn index Affinity Propagation (distance matrix): %f' % sc5)
    print('Dunn index DBSCAN (distance matrix): %f' % sc6)

Esempio n. 36

0

Mostra file

File: geo.py Progetto: kedz/cuttsum

def geo_worker_(job_queue, result_queue, **kwargs):
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    geocache = get_resource_manager(u"GeoCacheResource")
    geoquery = GeoQuery(geocache.get_tsv_path())
    event = kwargs.get(u"event")

    while not job_queue.empty():
        try:
            string_tsv_path, geo_tsv_path = job_queue.get(block=False)

            with gzip.open(string_tsv_path, u"r") as f:
                string_df = pd.io.parsers.read_csv(f, sep="\t", quoting=3, header=0)

            loc_strings = [
                loc_string for loc_string in string_df[u"locations"].tolist() if not isinstance(loc_string, float)
            ]

            coords = []

            for loc_string in loc_strings:
                for location in loc_string.split(","):
                    coord = geoquery.lookup_location(location)
                    if coord is not None:
                        coords.append(coord)

            centers = set()
            if len(coords) > 0:
                coords = np.array(coords)
                D = -geoquery.compute_distances(coords[:, None], coords)
                ap = AffinityPropagation(affinity=u"precomputed")
                Y = ap.fit_predict(D)

                if ap.cluster_centers_indices_ is not None:
                    for center in ap.cluster_centers_indices_:
                        centers.add((coords[center][0], coords[center][1]))

                    centers = [{u"lat": lat, u"lng": lng} for lat, lng in centers]
                    centers_df = pd.DataFrame(centers, columns=[u"lat", u"lng"])

                    with gzip.open(geo_tsv_path, u"w") as f:
                        centers_df.to_csv(f, sep="\t", index=False, index_label=False, na_rep="nan")

            result_queue.put(None)
        except Queue.Empty:
            pass

    return True

Esempio n. 37

0

Mostra file

File: cluster.py Progetto: hoj201/pedestrian_forecasting

def mhd_cluster_trajectories( curves ):
    """Returns clusters based upon the modified Hausdorff distance."""
    n_curves = len(curves)
    from sklearn.cluster import AffinityPropagation
    clusterer = AffinityPropagation(affinity='precomputed', convergence_iter=100)
    aff = np.zeros((n_curves, n_curves))
    for i in range(n_curves):
        for j in range(i+1,n_curves):
            from modified_Hausdorff_distance import modified_Hausdorff_distance as mhd
            aff[i,j] = mhd( curves[i].transpose(), curves[j].transpose() )
            aff[j,i] = aff[i,j]

    #clusterer.Affinity = aff
    cluster_labels = clusterer.fit_predict(aff)
    out = []
    for label in set( cluster_labels):
        cluster = map( lambda k: curves[k] , filter( lambda k: cluster_labels[k] == label , range( n_curves) ) )
        out.append( cluster )
    return map( align_cluster, out)

Esempio n. 38

0

Mostra file

File: SoundSimilarity.py Progetto: MarsCrop/apicultor

def plot_similarity_clusters(desc1, desc2, files, plot = None):
	"""
	find similar sounds using Affinity Propagation clusters

	:param desc1: first descriptor values
	:param desc2: second descriptor values
	:returns:
	  - euclidean_labels: labels of clusters
	""" 

	if plot == True:
		print((Fore.MAGENTA + "Clustering"))
	else:
		pass
         
	min_max = preprocessing.scale(np.vstack((desc1,desc2)).T, with_mean=False, with_std=False)          
	pca = PCA(n_components=2, whiten=True)
	y = pca.fit(min_max).transform(min_max)
	    
	euclidean = AffinityPropagation(convergence_iter=1800, affinity='euclidean')                           
	euclidean_labels= euclidean.fit_predict(y)

	if plot == True:

		time.sleep(5)  

		print((Fore.WHITE + "Cada número representa el grupo al que pertence el sonido como ejemplar de otro/s. El grupo '0' esta coloreado en azul, el grupo '1' esta coloreado en rojo, el grupo '2' esta coloreado en amarillo. Observa el ploteo para ver qué sonidos son ejemplares de otros"))
		print(np.vstack((euclidean_labels,files)).T)

		time.sleep(6)

		plt.scatter(y[euclidean_labels==0,0], y[euclidean_labels==0,1], c='b')
		plt.scatter(y[euclidean_labels==1,0], y[euclidean_labels==1,1], c='r')
		plt.scatter(y[euclidean_labels==2,0], y[euclidean_labels==2,1], c='y')
		plt.scatter(y[euclidean_labels==3,0], y[euclidean_labels==3,1], c='g')
		plt.show()
	else:
		pass

	return euclidean_labels

Esempio n. 39

0

Mostra file

File: pmi_graph.py Progetto: arunpn/Insight

    def cluster(self, normalize=False):
        """
        Cluster the nodes based on the PMI similarity measure. The clustering algorithm used is affinity propagation,
        which automatically choosed the number of clusters.

        :param normalize: If true, then normalize the similarity measured (i.e., the PMI) to be between -1 and 1.
        :return: The cluster labels.
        """
        if normalize:
            # use normalized PMI for similarity metric
            similarity = self.pmi / -np.log(self.joint_probs)
            similarity[np.diag_indices_from(similarity)] = 1.0
        else:
            similarity = self.pmi
            similarity[np.diag_indices_from(similarity)] = 1.1 * similarity.max()
        clustering = AffinityPropagation(affinity='precomputed', verbose=self.verbose,
                                         preference=similarity.min())
        clusters = clustering.fit_predict(similarity)
        if self.verbose:
            print 'Found', len(np.unique(clusters)), 'clusters.'

        return clusters

Esempio n. 40

0

Mostra file

File: categories.py Progetto: nrpeterson/mathstackexpert

def create_tag_categories():
    """Cluster MSE tags in to categories using sklearn AffinityPropogation.

       Any existing category system in the database will be overwritten.
    """
    con = connect_db()
    cur = con.cursor()

    query = """
    SELECT T.id, T.name, COUNT(Q.question_id) AS count FROM
    (
        SELECT tags.id, tags.name, COUNT(qt.question_id) AS count FROM tags
        JOIN question_tags AS qt ON qt.tag_id=tags.id
        WHERE tags.name NOT IN ('advice', 'applications', 'big-list', 
        'education', 'intuition', 'learning', 'math-history', 'math-software',
        'reference-request', 'self-learning', 'soft-question', 'teaching',
        'alternative-proof-strategy', 'proof-writing', 'visualization',
        'alternative-proof', 'proof-strategy', 'proof-verification',
        'solution-verification', 'definition', 'examples-counterexamples',
        'mathematica', 'wolfram-alpha', 'maple', 'matlab', 'sage', 'octave',
        'floor-function', 'ceiling-function', 'article-writing', 'publishing',
        'combinatorial-species', 'gromov-hyperbolic-spaces', 'chemistry',
        'book-recommendation')
        GROUP BY tags.name
    ) AS T
    JOIN question_tags AS Q ON T.id=Q.tag_id
    GROUP BY T.id"""
    cur.execute(query)
    tag_ids = []
    tag_names = []
    tag_indices = dict()
    tag_name_indices = dict()
    counts = []
    for q in cur:
        tag_ids.append(q['id'])
        tag_names.append(q['name'])
        tag_indices[q['id']] = len(tag_ids) - 1
        tag_name_indices[q['name']] = len(tag_ids) - 1
        counts.append(q['count'])

    tag_ids = np.array(tag_ids)
    tag_names = np.array(tag_names)

    query = """
    SELECT t1.id AS tag1, t2.id AS tag2, COUNT(qt1.question_id) as count
    FROM question_tags AS qt1
    JOIN question_tags AS qt2 ON qt1.question_id=qt2.question_id
    JOIN tags AS t1 ON t1.id=qt1.tag_id
    JOIN tags AS t2 ON t2.id=qt2.tag_id
    WHERE t1.id IN ({taglist}) AND t2.id IN ({taglist})
    GROUP BY t1.name, t2.name""".format(taglist=','.join(str(i) for i in tag_ids))
    cur.execute(query)

    paircounts = [[0 for i in range(len(tag_ids))] for j in range(len(tag_ids))]
    for q in cur:
        t1 = q['tag1']
        i1 = tag_indices[t1]
        t2 = q['tag2']
        i2 = tag_indices[t2]
        c = q['count']
        if i1 == i2:
            paircounts[i1][i1] = int(c/2)
        else:
            paircounts[i1][i2] = c

    sim = np.array(paircounts, dtype=np.float_)

    cluster = AffinityPropagation(affinity='precomputed', damping=0.5)

    labels = cluster.fit_predict(sim)

    classes = sorted(list(set(labels)))

    catnames = {i:tag_names[cluster.cluster_centers_indices_[i]] for i in \
            range(len(cluster.cluster_centers_indices_))}
    cur.execute("DELETE FROM categories WHERE 1;")
    cur.execute("DELETE FROM tag_categories WHERE 1;")

    query = "INSERT INTO categories (id,name) VALUES "
    catnames = [tag_names[cluster.cluster_centers_indices_[c]] for c in classes]
    query += ','.join("({},'{}')".format(c,catnames[c]) for c in classes)
    cur.execute(query)

    query = "INSERT INTO tag_categories (tag_id, category_id) VALUES "
    query += ','.join("({},{})".format(tag_ids[i], labels[i]) for i \
            in range(len(labels)))
    cur.execute(query)
    con.commit()

Esempio n. 41

0

Mostra file

File: sklearn.affinity.py Progetto: HowardLander/DataBridge-contrib

def main(argv):
   inputFile = ''
   outputFile = ''
   imax = 0
   jmax = 0
   inputFile = sys.argv[1]
   outputFile = sys.argv[2]
   if (len(sys.argv) < 4):
      # pick a default value.
      thisDamping = .92
   else: 
      # The third argument contains parameters in the format of key1:value1|key2:value2. In this
      # case we are only expecting one: "damping"
      paramList = sys.argv[3].split("|")
      for thisParam in paramList:
         # first and only parameter should be damping
         paramSplit = thisParam.split(":")
         if (paramSplit[0] == "damping"):
             thisDamping = float(paramSplit[1])
   print 'Input file is:', inputFile
   print 'Output file is:', outputFile
   print 'thisDamping is:', str(thisDamping)

            
   with open(inputFile, 'rb') as csvfile:
      csvReader = csv.reader(csvfile, delimiter=',',quotechar='|')
      # First line is the number of distinct nodes.
      headerRows = csvReader.next()
      imax = int(headerRows[0])
      jmax = int(headerRows[0])
      print str(imax) + " "  + str(jmax)

      # define the matrix         
      simMatrix = np.zeros((imax, jmax), dtype=np.float)
      currentNodeIndex = 0
      # We build a map between the matrix we want to build and the node identifiers
      # as we read in the rows.
      thisI = 0
      thisJ = 0
      nodeMap = dict()

      # we also want a list that maps the indices to the node names
      indexList = list()
      for row in csvReader:
         if (row[0] in nodeMap):
            thisI = nodeMap[row[0]]
         else:
            nodeMap[row[0]] = currentNodeIndex
            indexList.append(row[0])
            currentNodeIndex += 1

         if (row[1] in nodeMap):
            thisJ = nodeMap[row[1]]
         else:
            nodeMap[row[1]] = currentNodeIndex
            indexList.append(row[1])
            currentNodeIndex += 1

         # matrix is symetric
         simMatrix[thisI, thisJ] = float(row[2])
         simMatrix[thisJ, thisI] = float(row[2])

      for i in range(0,imax):
         # Set all of the diagonals to 1
         simMatrix[i,i] = 1.

   db = AffinityPropagation(affinity='precomputed',damping=thisDamping)
   labels = db.fit_predict(simMatrix)

   # Number of clusters in labels, ignoring noise if present.
   n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

   #print 'Estimated number of clusters: %d' % n_clusters_ 
   print labels, len(labels)

   with open(outputFile, 'wb') as csvoutfile:
      csvWriter = csv.writer(csvoutfile, delimiter=',',quotechar='|')
      for i in range(0, imax):
         csvWriter.writerow([indexList[i], labels[i]])

Esempio n. 42

0

Mostra file

File: affinity_propagation_sandbox.py Progetto: marlonbetz/BA

print("size X",len(X))
#kernel = gaussian_kde(X_p.T)

pref = [(-(mvn.pdf(x,[0,0],[[1,0],[0,.1]])))*100 for x in X]

alpha = 1
dists = np.array([
                  -(
                    euclidean(
                            u/(alpha*-np.log(mvn.pdf(u,[0,0],[[.01,0],[0,.01]]))),
                             v/(alpha*-np.log(mvn.pdf(v,[0,0],[[.01,0],[0,.01]])))
                              )
                    
#                     (
#                       (-np.log(mvn.pdf(u,[0,0],[[1,0],[0,1]]))
#                       +(-np.log(mvn.pdf(v,[0,0],[[1,0],[0,1]])))
#                       )
#                     )
                ) for u in X for v in X]).reshape((len(X),len(X)))

ap = AffinityPropagation(affinity = "precomputed",
                            #preference=pref
                             )
labels  =ap.fit_predict(dists)
print("n labels", len(set(labels)))
import matplotlib.pyplot as plt
cmap = dict((label,np.random.beta(1,1,3)) for label in labels)
for x,label in zip(X,labels):
    plt.scatter(x[0],x[1],color=cmap[label])
plt.show()

Esempio n. 43

0

Mostra file

File: sense_clusterer.py Progetto: antoine-tran/yelp

def cluster_affinity_propagation(similarity_matrix, desired_keys=None):

    numpy_matrix = similarity_matrix_to_numpy(similarity_matrix, desired_keys)

    clusterer = AffinityPropagation()
    return clusterer.fit_predict(numpy_matrix)

Esempio n. 44

0

Mostra file

File: affinitypropagation.py Progetto: ngoix/cyg-x1

          data_thr.rateC, data_thr.rateCA]
Html_file = open("clustering_files/affinitypropagation.html", "w")

# consider only 10000 data (spectralclustering memory complexity):
ind = np.array(10000 * [1] + (X.shape[0] - 10000) * [0]).astype(bool)
ind = shuffle(ind)
data_thr10 = pd.DataFrame(X[ind])
data_thr10.columns = data.columns

scaler = StandardScaler()
X = scaler.fit_transform(X)

X = X[ind]

km = AffinityPropagation(damping=0.95)
preds = km.fit_predict(X)

print "components:", set(preds)
print np.bincount(preds)

data_thr10['preds'] = pd.Series(preds).astype("category")
color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"] * 25

title = str(np.bincount(preds))
TOOLS = "wheel_zoom,box_zoom,reset,box_select,pan"
plot_width = 900
plot_height = 300
x_name = 'rateCA'
y_name = 'rate'
xmin_p = np.percentile(data_thr10[x_name], 0.1)

Esempio n. 45

0

Mostra file

File: keypoint_tracker.py Progetto: csrf/tracking

def main():
    options = docopt.docopt(__doc__)

    features_file = h5py.File(options['<keypoints>'])
    cap = cv2.VideoCapture(options['<video>'])

    frame_idx = -1
    tracks = None
    frame_pair = (None, None)
    tracking = Tracking()
    cluster_tracks = []
    video_writer = None
    clusters = []

    while options['--max-frames'] is None or frame_idx < int(options['--max-frames']):
        # Read in frame image
        rv, frame = cap.read()
        frame_idx += 1
        
        # If we failed to read in a frame, exit
        if not rv:
            break
        
        if options['--no-video']:
            output_frame = np.zeros_like(frame)
        else:
            output_frame = np.copy(frame)

        if video_writer is None:
            h, w = frame.shape[:2]
            video_writer = cv2.VideoWriter(options['<output>'], cv2.cv.FOURCC(*'MJPG'), 25, (w,h), )

        # Show progress
        if frame_idx % 100 == 0:
            print('Frame index: {0} => {1} tracks'.format(frame_idx, len(tracking.tracks)))
            
        # Convert to greyscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            
        # Update frame pair
        frame_pair = (frame_pair[1], frame_gray)
        
        # Work out where in the keypoints file, keypoints start and end
        frame_kp_start, n_kps = features_file['frames'][frame_idx]
        
        # Find keypoint locations and descriptors
        kp_locs = features_file['keypoints'][frame_kp_start:(frame_kp_start+n_kps)]
        kp_descs = features_file['descriptors'][frame_kp_start:(frame_kp_start+n_kps)]
        
        # Convert locations to image space
        kp_im_locs = np.array(kp_locs, dtype=np.float32)
        h, w = frame_gray.shape
        kp_im_locs[:,0] += 0.5*w
        kp_im_locs[:,1] += 0.5*h

        # Construct a list of key points
        kps = list(Keypoint(frame_idx, loc[:2], loc[2], desc) for loc, desc in zip(kp_im_locs, kp_descs))

        if options['--show-kps']:
            for kp in kps:
                x, y = kp.location
                cv2.circle(output_frame, (int(x), int(y)), 5, (0,0,200), lineType=cv2.CV_AA)
        
        # Track this frame's keypoints
        tracking.add_frame(frame_pair[0], frame_pair[1], frame_idx, kps)
        
        # All states and covariances for this frame
        frame_states, frame_covars, frame_track_kps = [], [], []
        for t in tracking.tracks:
            if t.final_frame_idx < frame_idx or t.initial_frame_idx > frame_idx:
                continue
            frame_states.append(t.states[frame_idx - t.initial_frame_idx])
            frame_covars.append(t.covariances[frame_idx - t.initial_frame_idx].copy())
            frame_track_kps.append(t.associated_keypoints[-1])

        # Draw trails if required
        trail_length = int(options['--trail-length'])
        if trail_length > 0:
            for t in tracking.tracks:
                if t.final_frame_idx <= frame_idx - trail_length or t.initial_frame_idx > frame_idx:
                    continue

                start_frame = frame_idx - trail_length + 1
                start_idx = start_frame - t.initial_frame_idx

                for s1, s2 in zip(t.states[start_idx:-1], t.states[start_idx+1:]):
                    cv2.line(output_frame, (int(s1[0]), int(s1[1])),
                            (int(s2[0]), int(s2[1])), (200,0,200),
                            lineType=cv2.CV_AA)
            
        # Convert states to an array
        frame_states = np.array(frame_states)

        if not options['--no-cluster']:
            # PDF of choosing kp uniformly from image
            h, w = frame.shape[:2]
            non_cluster_pdf = -30

            # Best existing cluster for each state and the associated PDF
            state_association = [(-1, non_cluster_pdf),] * frame_states.shape[0]

            # PDF of choosing states from each active cluster
            for c_idx, cluster in enumerate(clusters):
                # skip elderly clusters
                if cluster.last_update_frame_idx != frame_idx - 1:
                    continue

                cluster_mu, cluster_sigma = cluster.predict(frame_idx)

                for s_idx in xrange(len(state_association)):
                    s = frame_states[s_idx,:]
                    c = frame_covars[s_idx]
                    _, current_pdf = state_association[s_idx]

                    pdf = mv_gaussian_log_pdf(s, cluster_mu, cluster_sigma + c)[0]
                    if pdf > current_pdf:
                        state_association[s_idx] = (c_idx, pdf)

            # Go through associations
            unassigned_states, unassigned_covars = [], []
            cluster_states = [None,] * len(clusters)
            for s, c, assoc in zip(frame_states, frame_covars, state_association):
                c_idx = assoc[0]
                if c_idx < 0:
                    unassigned_states.append(s)
                    unassigned_covars.append(c)
                    continue
                
                if cluster_states[c_idx] is None:
                    cluster_states[c_idx] = [(s, c)]
                else:
                    cluster_states[c_idx].append((s, c))

            for cluster, assignment in zip(clusters, cluster_states):
                if assignment is None:
                    if cluster.final_frame_idx >= frame_idx - 3:
                        cluster.update(frame_idx)
                else:
                    states = np.array(list(s for s,c in assignment))

                    if states.shape[0] >= 2:
                        sigma = np.cov(states.T)
                    else:
                        sigma = cluster.covariances[-1].copy()

                    mu = np.mean(states, axis=0)
                    for _, cov in assignment:
                        sigma += cov

                    cluster.update(frame_idx, mu, sigma)

                    minx, maxx = states[:,0].min(), states[:,0].max()
                    miny, maxy = states[:,1].min(), states[:,1].max()

                    if maxx - minx > 300 or maxy - miny > 300:
                        continue

                    cv2.rectangle(output_frame,
                            (int(minx), int(miny)), (int(maxx), int(maxy)), (0,0,200), lineType=cv2.CV_AA)

                    state, cov = cluster.predict(frame_idx)
                    draw_cov(output_frame, cov[:2,:2], state[:2], (0,0,200), lineType=cv2.CV_AA)

        # Draw 'o' over each frame state
        sc = 10.0
        filtered_states = []
        filtered_covs = []

        for kp, s, c in zip(frame_track_kps, frame_states, frame_covars):
            # Extract sigmas
            sigmas = np.diag(np.linalg.cholesky(c))

            # Only sufficiently 'good' features pass
            if options['--max-position-sigma'] is not None:
                if np.any(sigmas[:2] > float(options['--max-position-sigma'])):
                    continue

            if options['--max-velocity-sigma'] is not None:
                if np.any(sigmas[2:4] > float(options['--max-velocity-sigma'])):
                    continue

            ## Only those with keypoints at this frame
            #if kp.frame_idx != frame_idx:
            #    continue
                
            # Only those with minimum velocity
            #speed = np.sqrt(np.sum(s[2:4]*s[2:4]))
            #if speed < 0.5:
            #    continue
            
            filtered_states.append(s)
            filtered_covs.append(c)

            if not options['--no-show-states']:
                draw_cov(output_frame, c[:2,:2], s[:2], (255,0,0), lineType=cv2.CV_AA)
                cv2.line(output_frame, (int(s[0]), int(s[1])), (int(s[0]+sc*s[2]),
                    int(s[1]+sc*s[3])), (0,200,0), lineType=cv2.CV_AA)
                draw_cov(output_frame, sc*sc*c[2:4,2:4], s[:2]+sc*s[2:4], (0,200,0), lineType=cv2.CV_AA)

        filtered_states = np.array(filtered_states)

        # Cluster unlabelled states
        if not options['--no-cluster'] and len(unassigned_states) > 4:
            cluster_states = np.copy(np.array(unassigned_states))
            cluster_covs = list(unassigned_covars)

            clustering = AffinityPropagation()
            labels = clustering.fit_predict(cluster_states)

            # Process labels
            for label in np.unique(labels):
                label_indices = np.nonzero(labels == label)[0]
                if label_indices.shape[0] < 2:
                    continue

                label_states = cluster_states[label_indices, :]
                label_covs = list(cluster_covs[i] for i in label_indices)
                
                mu = np.mean(label_states, axis=0)
                sigma = np.cov(label_states.T)

                for c in label_covs:
                    sigma += c

                new_cluster = Cluster()
                new_cluster.update(frame_idx, mu, sigma)
                clusters.append(new_cluster)

                draw_cov(output_frame, sigma[:2,:2], mu, (0,200,200), lineType=cv2.CV_AA)
        
        # Write output
        video_writer.write(output_frame)
        
    del video_writer

Esempio n. 46

0

Mostra file

File: cluster.py Progetto: BDTurc/Cosi227B

build_class_labels()
num_classes = len(urls)



sim_matrix = np.zeros((num_classes, num_classes))
record_in_matrix(sim_matrix)
sim_matrix = np.sqrt(sim_matrix)

np.savetxt("sim_mat.txt", sim_matrix)


clst = AffinityPropagation(affinity='precomputed')
#clst = SpectralClustering(n_clusters=7,affinity='precomputed')
classes = clst.fit_predict(sim_matrix)


with open("ap/centers.txt", "w") as f:
    for clst, indx in enumerate(clst.cluster_centers_indices_):
        f.write(all_urls[indx])
        f.write(" ")
        f.write(str(clst))
        f.write("\n")


with open("ap/clusters.txt", "w") as f:
    for idx, cls in enumerate(classes):
        f.write(all_urls[idx])
        f.write(" ")
        f.write(str(cls))

Esempio n. 47

0

Mostra file

File: fonction_principale.py Progetto: HauLou/Stylometrie

# R1 = C1.fit_predict(Gram)
# 
n = len(Gram)
Di = np.reshape(np.diag(Gram),(n,1))
M = Di.dot(np.ones((1,n)))

D = M + M.T - 2*Gram

C2 = AffinityPropagation(affinity='precomputed')
C1 = KMeans(n_clusters = 5)
C3 = AgglomerativeClustering(n_clusters=5, affinity='precomputed',linkage='average')
C4 = SpectralClustering(n_clusters=5,affinity='precomputed')
C5 = SpectralBiclustering(n_clusters=(5,5))

R1 = C1.fit_predict(D)
R2 = C2.fit_predict(D)
R3 = C3.fit_predict(D)
R4 = C4.fit_predict(Gram +11)
R5 = C5.fit(D)

print(R4)

modèle = TSNE(n_components=2,metric='precomputed')
Trans = modèle.fit_transform(D)

G_ACP = ACP(Gram,precomputed=True)

trace_ACP(G_ACP,[10]*5)
##

import propre_TSNE as pt

Esempio n. 48

0

Mostra file

File: cluster.py Progetto: cmoiccool/newsblaster


    good_articles = [article for article in articles 
                     if article["text_content"].strip() != ""]

    texts = [article["text_content"] for article in good_articles]

    X_tfidf = tfidf.fit_transform(texts)

    print X_tfidf

    ap = AffinityPropagation(damping=0.95, max_iter=4000, 
            convergence_iter=400, copy=True, preference=-4, 
            affinity='euclidean', verbose=True)

    C = ap.fit_predict(X_tfidf)
    print X_tfidf.shape, C.shape
    print C
    centers = ap.cluster_centers_indices_
    clusters = []
    for c, center in enumerate(centers):

        
        members = np.where(C == c)[0]
        K = cosine_similarity(X_tfidf[members], X_tfidf[center])
        member_sims = [(m, float(k)) for m, k in zip(members, K)]
        member_sims.sort(key=lambda x: x[1], reverse=True)

        cluster = {"articles": [], "date": datetime.now(), "summarized": False}

        if len([member for member, sim in member_sims if sim > .55]) >= 3:

Esempio n. 49

0

Mostra file

File: correlation_sandbox.py Progetto: marlonbetz/BA

# cluster3 = vectorLinspace([4,1],[7,9], num=50)
# cluster3 = cluster1 + np.random.normal(5,.1,cluster3.shape)
# cluster4 = vectorLinspace([-1,4],[-4,2], num=50)
# cluster4 = cluster1 + np.random.normal(-5,.1,cluster4.shape)

X = cluster1#np.append(cluster1,np.append(cluster2,np.append(cluster3,cluster4,axis=0),axis=0),axis=0)
print(X)
print(pearsonr(X[:,0],X[:,1]),spearmanr(X[:,0],X[:,1]))
dists = np.zeros((len(X),len(X)))
for i1,x1 in enumerate(X): 
    print(i1,"/",len(X))
    for i2,x2 in enumerate(X):
#        for i3,x3 in enumerate(X):
#            if i1 != i2 and i2 != i3 and i1 != i3:
#                 tmp = np.append(x1,np.append(x2,x3,axis=0),axis=0).reshape((-1,2))
#                 #print(tmp)
#                 c = spearmanr(tmp[:,0],tmp[:,1])[0]
        dists[i1,i2] = cosine(x1,x2)
print(dists)
from sklearn.cluster import AffinityPropagation
ap = AffinityPropagation(affinity="precomputed")
y_pred = ap.fit_predict(dists)
print(len(set(y_pred)))
cmap = dict((y,np.random.beta(1,1,3)) for y in y_pred)
import matplotlib.pyplot as plt
for x,y in zip(X,y_pred):
    #plt.annotate(y,x,color=cmap[y])
    pass
plt.scatter(X[:,0],X[:,1])
plt.scatter(cluster2[:,0],cluster2[:,1])
plt.show()

Esempio n. 50

0

Mostra file

File: swarmtest3.py Progetto: damithsenanayake/RBM-GSOM

from sklearn.cluster import AffinityPropagation

from sklearn.manifold import TSNE


dataset = pd.read_csv('~/data/gene_expr_170104.csv')
data = np.array(dataset)[:, 1:].astype(float).T




Y = TSNE().fit_transform(data)
clus = AffinityPropagation()

lab = clus.fit_predict(Y)

x, y  = Y.T



plt.scatter(x, y, alpha=0.9, c = plt.cm.Spectral(lab.astype(float) / lab.max()), edgecolors='none')
# for i, j, t in zip(x, y, range(x.shape[0])):
#     plt.text(i, j, t, color = 'purple')

plt.show()

x, y, = SOS(iterations=10, alpha=1, beta=0, delta=0, theta=3.5).fit_transform(data).T

plt.scatter(x, y, alpha=0.4, c = plt.cm.Spectral(lab.astype(float) / lab.max()), edgecolors='none')
# for i, j, t in zip(x, y, range(x.shape[0])):

Esempio n. 51

0

Mostra file

File: plotter.py Progetto: marlonbetz/BA

if embeddings.shape[1] != 2:
    print("tsne")
    tsne = TSNE(2)
    embeddings_transformed = tsne.fit_transform(embeddings)
else:
    embeddings_transformed = embeddings
    #tsne = TSNE(2)
    #embeddings_transformed = tsne.fit_transform(embeddings)

print("clustering")
c2c = [5,6,7,8]
labels = dict()
from sklearn.cluster import AffinityPropagation
ap = AffinityPropagation()
for c in c2c:
    labels[c] = ap.fit_predict([emb for emb,concept 
                                in zip(embeddings_transformed,concepts) if concept ==c])
print(labels)
print("plotting")
import matplotlib.pyplot as plt
import seaborn
cmap = dict((key,np.random.beta(1,1,3)) for key in cognate_classes)
counters = {5:0,6:0,7:0,8:0}
for asjp_word,emb,cognate_class,concept in zip(asjp_words,embeddings_transformed,cognate_classes,concepts):
#     plt.annotate(asjp_word,emb,color=cmap[cognate_class])
    if concept == 5:
        plt.subplot(2,2,1)
        label = labels[5][counters[5]]
        plt.annotate(asjp_word+"_"+str(label),emb,color=cmap[cognate_class])
        counters[5] += 1
        
    if concept == 6:

Esempio n. 52

0

Mostra file

File: clustering_persons.py Progetto: parking52/weConnectAnalysis

        for j in range(size_berlin):
            if i != j:
                matrix_berlin[i][j] = (list_of_berlin_person[i].distance_of_two_persons(list_of_berlin_person[j]))

    for i in range(size_newcomers):
        for j in range(size_newcomers):
            if i != j:
                matrix_newcomer[i][j] = (list_of_newcomer_person[i].distance_of_two_persons(list_of_newcomer_person[j]))

    print(matrix_berlin)
    print(matrix_newcomer)

    print('_____________________________________')
    clusterer.fit(matrix_newcomer, y=None)
    print('_____________________________________')
    clusterer.fit_predict(matrix_newcomer, y=None)
    print('_____________________________________')
    #
    af = AffinityPropagation().fit(matrix_newcomer)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    print(labels)
    n_clusters_ = len(cluster_centers_indices)

    print('Estimated number of clusters: %d' % n_clusters_)
    # print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    # print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    # print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    # print("Adjusted Rand Index: %0.3f"% metrics.adjusted_rand_score(labels_true, labels))
    # print("Adjusted Mutual Information: %0.3f"% metrics.adjusted_mutual_info_score(labels_true, labels))
    # print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean'))