def ap(z):
    sim_matrix = -squareform(pdist(z, 'euclidean'))

    p = np.median(sim_matrix) * 1
    center, labels = affinity_propagation(sim_matrix, p=p, verbose=True, max_iter=2000)

    return labels
Exemple #2
0
def ap(z):
    sim_matrix = -squareform(pdist(z, 'euclidean'))

    p = np.median(sim_matrix) * 1
    center, labels = affinity_propagation(sim_matrix,
                                          p=p,
                                          verbose=True,
                                          max_iter=2000)

    return labels
Exemple #3
0
def remove_duplicates(cc, sim_func=intersection_sim):
    """
    Use affinity propagation to remove duplicate candidates.
    """
    from scikits.learn.cluster import affinity_propagation
    li = cc.keys()
    sim_matrix = np.zeros((len(li), len(li)))
    for i, e1 in enumerate(li):
        for j, e2 in enumerate(li):
            sim_matrix[i, j] = sim_func(e1, e2)

    centers, _ = affinity_propagation(sim_matrix)

    for i, e in enumerate(li):
        if i not in centers:
            del cc[e]
Exemple #4
0
def remove_duplicates(cc, sim_func=intersection_sim):
    """
    Use affinity propagation to remove duplicate candidates.
    """
    from scikits.learn.cluster import affinity_propagation
    li = cc.keys()
    sim_matrix = np.zeros((len(li), len(li)))
    for i, e1 in enumerate(li):
        for j, e2 in enumerate(li):
            sim_matrix[i,j] = sim_func(e1, e2)

    centers, _ = affinity_propagation(sim_matrix)

    for i, e in enumerate(li):
        if i not in centers:
            del cc[e]
################################################################################
X_norms = np.sum(X*X, axis=1)
S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T)
p = 12*np.median(S)

################################################################################
# Compute Affinity Propagation
################################################################################

#af = AffinityPropagation()
#af.fit(S, p)
#cluster_centers_indices = af.cluster_centers_indices_
#labels = af.labels_

#import affinity
cluster_centers_indices,labels=affinity_propagation(S,p)

n_clusters_ = len(cluster_centers_indices)

print 'Estimated number of clusters: %d' % n_clusters_

################################################################################
# Plot result
################################################################################

import pylab as pl
from itertools import cycle

pl.close('all')
pl.figure(1)
pl.clf()
Exemple #6
0
        'WAG'  : 'Walgreen',
        'HD'   : 'Home Depot',
        'GSK'  : 'GlaxoSmithKline',
        'PFE'  : 'Pfizer',
        'SNY'  : 'Sanofi-Aventis',
        'NVS'  : 'Novartis',
        'KMB'  : 'Kimberly-Clark',
        'R'    : 'Ryder',
        'GD'   : 'General Dynamics',
        'RTN'  : 'Raytheon',
        'CVS'  : 'CVS',
        'CAT'  : 'Caterpillar',
        'DD'   : 'DuPont de Nemours',
    }

symbols, names = np.array(symbol_dict.items()).T

quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True)
                for symbol in symbols]

#volumes = np.array([q.volume for q in quotes]).astype(np.float)
open    = np.array([q.open   for q in quotes]).astype(np.float)
close   = np.array([q.close  for q in quotes]).astype(np.float)
variation = close - open
correlations = np.corrcoef(variation)

_, labels = cluster.affinity_propagation(correlations)

for i in range(labels.max()+1):
    print 'Cluster %i: %s' % ((i+1),
                              ', '.join(names[labels==i]))
    charfeat.set_features(li)
    #Get alphabet.
    feats_train = StringUlongFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
    #CommUlongStringKernel needs sorted features.
    preproc = SortUlongString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    use_sign = False

    #Compute kernel matrix between train features.
    kernel = CommUlongStringKernel(feats_train, feats_train, use_sign)
    km_train = kernel.get_kernel_matrix()
    return km_train


if __name__ == "__main__":
    li = read_reviews()
    li = [" ".join(e) for e in li if e]
    mat = get_kernel_matrix(li)
    center, labels = affinity_propagation(mat)

    li = np.array(li)
    for i in range(len(center)):
        sents = li[np.where(labels==i)]
        for e in sents:
            print e, "#",
        print i, li[center[i]]
Exemple #8
0
    charfeat.set_features(li)
    #Get alphabet.
    feats_train = StringUlongFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    #CommUlongStringKernel needs sorted features.
    preproc = SortUlongString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    use_sign = False

    #Compute kernel matrix between train features.
    kernel = CommUlongStringKernel(feats_train, feats_train, use_sign)
    km_train = kernel.get_kernel_matrix()
    return km_train


if __name__ == "__main__":
    li = read_reviews()
    li = [" ".join(e) for e in li if e]
    mat = get_kernel_matrix(li)
    center, labels = affinity_propagation(mat)

    li = np.array(li)
    for i in range(len(center)):
        sents = li[np.where(labels == i)]
        for e in sents:
            print e, "#",
        print i, li[center[i]]