Esempio n. 1
0
def main():
    #first run the initializer to get starting centroids
    filePath = '/Users/ashwin/repos/mlclass/kMeans3/input.txt'
    centPath = "/Users/ashwin/repos/mlclass/kMeans3/"
    
    kMeansStr = '3'
    mrJob = MRkMeansInit([filePath] + ["--k" , kMeansStr  , "--pathName", centPath])
    #mrJob = MRkMeansInit(args=[filePath])
    with mrJob.make_runner() as runner:
        runner.run()
    
    #pull out the centroid values to compare with values after one iteration
    centPath = "/Users/ashwin/repos/mlclass/kMeans3/"
    
    fileIn = open(centPath + "intermediateResults.txt")
    centroidsJson = fileIn.read()
    fileIn.close()
    
    delta = 10
    #Begin iteration on change in centroids
    while delta > 0.01:
        #parse old centroid values
        oldCentroids = json.loads(centroidsJson)
        #run one iteration
        mrJob2 = MRkMeansIter(args=[filePath] + ["--k" , kMeansStr  , "--pathName", centPath])
        with mrJob2.make_runner() as runner:
            runner.run()
            
        #compare new centroids to old ones
        fileIn = open(centPath + "intermediateResults.txt")
        centroidsJson = fileIn.read()
        fileIn.close()
        newCentroids = json.loads(centroidsJson)
        
        kMeans = len(newCentroids)
        
        delta = 0.0
        for i in range(kMeans):
            delta += dist(newCentroids[i],oldCentroids[i])
        
        print delta
Esempio n. 2
0
def main():
    #first run the initializer to get starting centroids
    filePath = os.path.join(PROJECT_ROOT, 'input.txt')
    mrJob = MRkMeansInit(args=[filePath])
    with mrJob.make_runner() as runner:
        runner.run()

    #pull out the centroid values to compare with values after one iteration
    centPath = os.path.join(PROJECT_ROOT, 'intermediateResults.txt')
    fileIn = open(centPath)
    centroidsJson = fileIn.read()
    fileIn.close()

    delta = 10
    #Begin iteration on change in centroids
    while delta > 0.001:
        #parse old centroid values
        oldCentroids = json.loads(centroidsJson)
        #run one iteration
        mrJob2 = MRkMeansIter(args=[filePath])
        with mrJob2.make_runner() as runner:
            runner.run()

        #compare new centroids to old ones
        fileIn = open(centPath)
        centroidsJson = fileIn.read()
        fileIn.close()
        newCentroids = json.loads(centroidsJson)

        kMeans = len(newCentroids)

        delta = 0.0
        for i in range(kMeans):
            delta += dist(newCentroids[i], oldCentroids[i])

        print "delta={0},  centers={1}".format(delta, str(newCentroids))
Esempio n. 3
0
def main():
    #first run the initializer to get starting centroids
    filePath =  os.path.join(PROJECT_ROOT, 'input.txt')
    mrJob = MRkMeansInit(args=[filePath])
    with mrJob.make_runner() as runner:
        runner.run()
    
    #pull out the centroid values to compare with values after one iteration
    centPath = os.path.join(PROJECT_ROOT, 'intermediateResults.txt')
    fileIn = open(centPath)
    centroidsJson = fileIn.read()
    fileIn.close()
    
    delta = 10
    #Begin iteration on change in centroids
    while delta > 0.001:
        #parse old centroid values
        oldCentroids = json.loads(centroidsJson)
        #run one iteration
        mrJob2 = MRkMeansIter(args=[filePath])
        with mrJob2.make_runner() as runner:
            runner.run()
            
        #compare new centroids to old ones
        fileIn = open(centPath)
        centroidsJson = fileIn.read()
        fileIn.close()
        newCentroids = json.loads(centroidsJson)
        
        kMeans = len(newCentroids)
        
        delta = 0.0
        for i in range(kMeans):
            delta += dist(newCentroids[i],oldCentroids[i])

        print "delta={0},  centers={1}" .format(delta, str(newCentroids))