Beispiel #1
0
  def test_simple_kmeans(self):
    # Test out a simple kmeans example.
    # Suppose we have two-dimensional data points
    # (1,1), (1,2), (2,2), (3,3), (5,5)
    # 3 clusters
    # initial means: [0, 1, 4]
    dataset = [ [1,1], [1,2], [2,2], [3,3], [5,5] ]

    # initial means:
    # [ 1, 1 ], [ 1, 2 ], [ 5, 5 ]
    # new assignment:
    # [ 0, 1, 1, 1, 2 ]
    # next means:
    # [ 1, 1 ], [ 2, 7 / 3.0 ], [ 5, 5 ]
    # new assignment:
    # [ 0, 0, 1, 1, 2 ]
    # next means:
    # [ 1, 1.5 ], [ 2.5, 2.5 ], [ 5, 5 ]
    # total error:
    # (0.5^2 + 0.5^2 + 2 * 0.5^2 + 2 * 0.5^2) / 5.0
    (means, error) = clust.kmeans(dataset, 3, [ 0, 1, 4 ])
    print means
    self.assertEqual(1, means.count([5, 5]))
    self.assertEqual(1, means.count([1, 1.5]))
    self.assertEqual(1, means.count([2.5, 2.5]))
    self.assertAlmostEqual(6 * math.pow(0.5, 2) / 5.0, error)
Beispiel #2
0
 def task(self):
     dataset = clust.parse_input(open('adults.txt', 'r'), 1000)
     all_errors = []
     # Generate a random initial assignment
     for clusters in self.CLUSTERS:
         # get the lowest error over three runs
         errors = [
             clust.kmeans(dataset, clusters)[1]
             for i in xrange(self.SAMPLES)
         ]
         error = sorted(errors)[0]
         all_errors.append({"x": clusters, "y": error})
     chart = {
         "chart": {
             "defaultSeriesType": "line"
         },
         "xAxis": {
             "title": {
                 "text": "Clusters"
             },
             "min": 1
         },
         "yAxis": {
             "title": {
                 "text": "Mean Squared Error"
             }
         },
         "title": {
             "text": "K-means Results"
         },
         "series": [{
             "data": all_errors
         }]
     }
     return chart
Beispiel #3
0
    def test_simple_kmeans(self):
        # Test out a simple kmeans example.
        # Suppose we have two-dimensional data points
        # (1,1), (1,2), (2,2), (3,3), (5,5)
        # 3 clusters
        # initial means: [0, 1, 4]
        dataset = [[1, 1], [1, 2], [2, 2], [3, 3], [5, 5]]

        # initial means:
        # [ 1, 1 ], [ 1, 2 ], [ 5, 5 ]
        # new assignment:
        # [ 0, 1, 1, 1, 2 ]
        # next means:
        # [ 1, 1 ], [ 2, 7 / 3.0 ], [ 5, 5 ]
        # new assignment:
        # [ 0, 0, 1, 1, 2 ]
        # next means:
        # [ 1, 1.5 ], [ 2.5, 2.5 ], [ 5, 5 ]
        # total error:
        # (0.5^2 + 0.5^2 + 2 * 0.5^2 + 2 * 0.5^2) / 5.0
        (means, error) = clust.kmeans(dataset, 3, [0, 1, 4])
        print means
        self.assertEqual(1, means.count([5, 5]))
        self.assertEqual(1, means.count([1, 1.5]))
        self.assertEqual(1, means.count([2.5, 2.5]))
        self.assertAlmostEqual(6 * math.pow(0.5, 2) / 5.0, error)
Beispiel #4
0
def vclust(cond, q=1, vrep='irate', vargs={}, cmeth='kmeans', nclust=16, cargs={}):
    evts = dist.VREPS[vrep](cond['evts'], **vargs)
    if cmeth == 'tree':
        t = clust.vtree(evts, **cargs)
        clsts = t.cut(nclust)
    elif cmeth == 'kmeans':
        clsts = clust.kmeans(evts, nclust, **cargs)
    elif cmeth == 'mixmod':
        clsts = clust.mixmodpartition(evts, nclust, **cargs)
    #cargs should contain eg. {'model':"Gaussian_pk_Lk_Bk", reps:1}
    else:
        raise StandardError("don't know clustering method %s" % cmeth)
    return cond.fuse(cond.new(evts=clsts))
Beispiel #5
0
 def task(self):
   dataset = clust.parse_input(open('adults.txt', 'r'), 1000)
   all_errors = []
   # Generate a random initial assignment
   for clusters in self.CLUSTERS:
     # get the lowest error over three runs
     errors = [ clust.kmeans(dataset, clusters)[1] for i in xrange(self.SAMPLES) ]
     error = sorted(errors)[0]
     all_errors.append({ "x": clusters, "y": error })
   chart = {"chart": {"defaultSeriesType": "line"},
            "xAxis": {"title": {"text": "Clusters"}, "min": 1 },
            "yAxis": {"title": {"text": "Mean Squared Error"}},
            "title": {"text": "K-means Results"},
            "series": [ {"data": all_errors } ]}
   return chart
Beispiel #6
0
 def test_kmeans(self):
   for i in xrange(2, 5):
     # Test kmeans with many different initial assignments.
     (means, error) = clust.kmeans(self.dataset, i)
     self.assertEqual(i, len(means))
Beispiel #7
0
 def test_kmeans(self):
     for i in xrange(2, 5):
         # Test kmeans with many different initial assignments.
         (means, error) = clust.kmeans(self.dataset, i)
         self.assertEqual(i, len(means))