def test_fast_segmentation(self): n = 360 k = 3 epsilon = 1 generate_input_file(n) data = np.genfromtxt("input.csv", delimiter=" ") p = np.c_[np.mgrid[1:n + 1], data] D = Coreset.build_coreset(p, k, epsilon) print len(D) x = np.empty((0, 4)) for coreset in D: print "coreset range", coreset.e - coreset.b + 1 pts = utils.pt_on_line(xrange(int(coreset.b), int(coreset.e) + 1), coreset.g) # TODO: 2nd parameter should be epsilon w = Coreset.PiecewiseCoreset(len(pts[0]), epsilon) p_coreset = np.column_stack((pts[0], pts[1], pts[2], w)) p_coreset_filtered = p_coreset[p_coreset[:, 3] > 0] # print "weighted points", p_coreset_filtered x = np.append(x, p_coreset_filtered, axis=0) print "num of weighted points", len(x) dividers = ksegment.coreset_k_segment_fast_segmentation(x, k) print "dividers", dividers print "dividers-cost:", utils.calc_cost_dividers(p, dividers) utils.visualize_3d(p, dividers)
def test_fast_segmentation(self): # generate points n = 600 k = 6 epsilon = 10 generate_input_file(n) data = np.genfromtxt("input.csv", delimiter=" ") p = np.c_[np.mgrid[1:n + 1], data] D = Coreset.build_coreset(p, k, epsilon) print D dividers = ksegment.coreset_k_segment_fast_segmentation(D, k, epsilon) print "dividers", dividers print "dividers-cost:", utils.calc_cost_dividers(p, dividers)
# start from here. sc, config, infile = init_spark() initial_num_of_partitions = config.getint("conf", "numOfPartitions") points = sc.textFile(infile, initial_num_of_partitions) \ .map(lambda row: np.fromstring(row, dtype=np.float64, sep=' ')) \ .zipWithIndex() \ .map(lambda pair: np.insert(pair[0], 0, pair[1] + 1)) \ .mapPartitionsWithIndex(k_segment_coreset_read_point_batch) # from text file to (key,numpy_array) # a = points.collect() # print a def computeTree(rdd, f): while rdd.getNumPartitions() != 1: rdd = (rdd .reduceByKey(f) # merge couple and reduce by half .map(lambda x: (x[0] / 2, x[1])) # set new keys .partitionBy(rdd.getNumPartitions() / 2)) # reduce num of partitions return rdd.reduceByKey(f).first()[1] #for case its not a complete binary tree. first is actually everything now.. #return the corest as a numpy array result = computeTree(points, k_segment_merge) print result print len(result) print ksegment.coreset_k_segment_fast_segmentation(result, k, eps)