Ejemplo n.º 1
0
def som_cluster_test(data,real_labels, outputfile = None):
    if outputfile != None:
        f = open(outputfile,'w')
        f.write(out_result_header())

    start = time.time()
    ks = range(6,40)
    for k in ks:
        print 'som clustering when k=%d' % k
        predicted = Pycluster.somcluster(data,nxgrid=k,nygrid=1, niter=5, dist='u')[0]
        predicted = [xy[0] for xy in predicted.tolist()]
        cata = tuple(set(predicted))
        for i in range(0,len(predicted)):
            predicted[i]=cata.index(predicted[i])
        if outputfile != None:
            f.write(out_result(predicted, k, real_labels))

    elasped = time.time() - start
    print 'som clustering time: %.3f' % (elasped/float(len(ks)))
Ejemplo n.º 2
0
def self_organizing_map(flat_data, data):
    """ """
    # Self-organizing maps
    clusterid, celldata = pc.somcluster(
                        data=flat_data.values(),
                        transpose=0,
                        nxgrid=5,
                        nygrid=5,
                        inittau=0.02,
                        niter=100,
                        dist='e')
                        
    # load clusters into dictionary
    clusters = defaultdict(list)
    for i, j in zip(clusterid, data):
        clusters[tuple(i)].append(j)
    
    make_plots('SOM (c=%s, m=%s, d=%s)' % (nclusters, method, distance),
                   clusters, flat_data)
Ejemplo n.º 3
0
def som_cluster_test(data, real_labels, outputfile=None):
    if outputfile != None:
        f = open(outputfile, 'w')
        f.write(out_result_header())

    start = time.time()
    ks = range(6, 40)
    for k in ks:
        print 'som clustering when k=%d' % k
        predicted = Pycluster.somcluster(data,
                                         nxgrid=k,
                                         nygrid=1,
                                         niter=5,
                                         dist='u')[0]
        predicted = [xy[0] for xy in predicted.tolist()]
        cata = tuple(set(predicted))
        for i in range(0, len(predicted)):
            predicted[i] = cata.index(predicted[i])
        if outputfile != None:
            f.write(out_result(predicted, k, real_labels))

    elasped = time.time() - start
    print 'som clustering time: %.3f' % (elasped / float(len(ks)))
Ejemplo n.º 4
0
		input_vecs = utils.make_prices_diffs_vecs(data)
	else:
		input_vecs = utils.make_prices_vecs(data)

	# Run clustering algorithm.

	if algorithm_type == ClusterAlg.KMEANS:
		labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters, 
				dist = dist_measure, npass = number_of_iters, 
				method = dist_method)
	elif algorithm_type == ClusterAlg.HIERARCHICAL:
		tree = Pycluster.treecluster(input_vecs, method = dist_method,
				dist = dist_method)
		labels = tree.cut(number_of_clusters)
	elif algorithm_type == ClusterAlg.SELFORGMAPS:
		labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid, 
				nygrid = ygrid, niter = number_of_iters)

	# If algorithm is self-organizing maps each item is assigned to
	# a particular 2D point, so we need to create groups from 2D points.
	# See implementation of making groups from labels for details.

	if algorithm_type == ClusterAlg.SELFORGMAPS:
		clusters = utils.make_groups_from_labels(labels, data, True)
	else:
		clusters = utils.make_groups_from_labels(labels, data)

	# Check with which type of key we have to deal with.
	# Any better idea how to check if object is a pair? :)

	keys_are_2D_points = True
	sample_key = clusters.keys()[0]