def ap(z): sim_matrix = -squareform(pdist(z, 'euclidean')) p = np.median(sim_matrix) * 1 center, labels = affinity_propagation(sim_matrix, p=p, verbose=True, max_iter=2000) return labels
def remove_duplicates(cc, sim_func=intersection_sim): """ Use affinity propagation to remove duplicate candidates. """ from scikits.learn.cluster import affinity_propagation li = cc.keys() sim_matrix = np.zeros((len(li), len(li))) for i, e1 in enumerate(li): for j, e2 in enumerate(li): sim_matrix[i, j] = sim_func(e1, e2) centers, _ = affinity_propagation(sim_matrix) for i, e in enumerate(li): if i not in centers: del cc[e]
def remove_duplicates(cc, sim_func=intersection_sim): """ Use affinity propagation to remove duplicate candidates. """ from scikits.learn.cluster import affinity_propagation li = cc.keys() sim_matrix = np.zeros((len(li), len(li))) for i, e1 in enumerate(li): for j, e2 in enumerate(li): sim_matrix[i,j] = sim_func(e1, e2) centers, _ = affinity_propagation(sim_matrix) for i, e in enumerate(li): if i not in centers: del cc[e]
################################################################################ X_norms = np.sum(X*X, axis=1) S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T) p = 12*np.median(S) ################################################################################ # Compute Affinity Propagation ################################################################################ #af = AffinityPropagation() #af.fit(S, p) #cluster_centers_indices = af.cluster_centers_indices_ #labels = af.labels_ #import affinity cluster_centers_indices,labels=affinity_propagation(S,p) n_clusters_ = len(cluster_centers_indices) print 'Estimated number of clusters: %d' % n_clusters_ ################################################################################ # Plot result ################################################################################ import pylab as pl from itertools import cycle pl.close('all') pl.figure(1) pl.clf()
'WAG' : 'Walgreen', 'HD' : 'Home Depot', 'GSK' : 'GlaxoSmithKline', 'PFE' : 'Pfizer', 'SNY' : 'Sanofi-Aventis', 'NVS' : 'Novartis', 'KMB' : 'Kimberly-Clark', 'R' : 'Ryder', 'GD' : 'General Dynamics', 'RTN' : 'Raytheon', 'CVS' : 'CVS', 'CAT' : 'Caterpillar', 'DD' : 'DuPont de Nemours', } symbols, names = np.array(symbol_dict.items()).T quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols] #volumes = np.array([q.volume for q in quotes]).astype(np.float) open = np.array([q.open for q in quotes]).astype(np.float) close = np.array([q.close for q in quotes]).astype(np.float) variation = close - open correlations = np.corrcoef(variation) _, labels = cluster.affinity_propagation(correlations) for i in range(labels.max()+1): print 'Cluster %i: %s' % ((i+1), ', '.join(names[labels==i]))
charfeat.set_features(li) #Get alphabet. feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) #CommUlongStringKernel needs sorted features. preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() use_sign = False #Compute kernel matrix between train features. kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() return km_train if __name__ == "__main__": li = read_reviews() li = [" ".join(e) for e in li if e] mat = get_kernel_matrix(li) center, labels = affinity_propagation(mat) li = np.array(li) for i in range(len(center)): sents = li[np.where(labels==i)] for e in sents: print e, "#", print i, li[center[i]]
charfeat.set_features(li) #Get alphabet. feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) #CommUlongStringKernel needs sorted features. preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() use_sign = False #Compute kernel matrix between train features. kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() return km_train if __name__ == "__main__": li = read_reviews() li = [" ".join(e) for e in li if e] mat = get_kernel_matrix(li) center, labels = affinity_propagation(mat) li = np.array(li) for i in range(len(center)): sents = li[np.where(labels == i)] for e in sents: print e, "#", print i, li[center[i]]