Example #1
0
    def test_fast_segmentation(self):
        n = 360
        k = 3
        epsilon = 1

        generate_input_file(n)
        data = np.genfromtxt("input.csv", delimiter=" ")
        p = np.c_[np.mgrid[1:n + 1], data]

        D = Coreset.build_coreset(p, k, epsilon)
        print len(D)
        x = np.empty((0, 4))
        for coreset in D:
            print "coreset range", coreset.e - coreset.b + 1
            pts = utils.pt_on_line(xrange(int(coreset.b), int(coreset.e) + 1), coreset.g)
            # TODO: 2nd parameter should be epsilon
            w = Coreset.PiecewiseCoreset(len(pts[0]), epsilon)
            p_coreset = np.column_stack((pts[0], pts[1], pts[2], w))
            p_coreset_filtered = p_coreset[p_coreset[:, 3] > 0]
            # print "weighted points", p_coreset_filtered
            x = np.append(x, p_coreset_filtered, axis=0)
        print "num of weighted points", len(x)
        dividers = ksegment.coreset_k_segment_fast_segmentation(x, k)
        print "dividers", dividers
        print "dividers-cost:", utils.calc_cost_dividers(p, dividers)
        utils.visualize_3d(p, dividers)
Example #2
0
    def test_fast_segmentation(self):
        # generate points
        n = 600
        k = 6
        epsilon = 10
        generate_input_file(n)
        data = np.genfromtxt("input.csv", delimiter=" ")
        p = np.c_[np.mgrid[1:n + 1], data]

        D = Coreset.build_coreset(p, k, epsilon)
        print D
        dividers = ksegment.coreset_k_segment_fast_segmentation(D, k, epsilon)
        print "dividers", dividers
        print "dividers-cost:", utils.calc_cost_dividers(p, dividers)
Example #3
0

    # start from here.
    sc, config, infile = init_spark()
    initial_num_of_partitions = config.getint("conf", "numOfPartitions")

    points = sc.textFile(infile, initial_num_of_partitions) \
        .map(lambda row: np.fromstring(row, dtype=np.float64, sep=' ')) \
        .zipWithIndex() \
        .map(lambda pair: np.insert(pair[0], 0, pair[1] + 1)) \
        .mapPartitionsWithIndex(k_segment_coreset_read_point_batch) # from text file to (key,numpy_array)

    # a = points.collect()
    # print a

    def computeTree(rdd, f):
        while rdd.getNumPartitions() != 1:
            rdd = (rdd
                   .reduceByKey(f)  # merge couple and reduce by half
                   .map(lambda x: (x[0] / 2, x[1]))  # set new keys
                   .partitionBy(rdd.getNumPartitions() / 2))    # reduce num of partitions

        return rdd.reduceByKey(f).first()[1] #for case its not a complete binary tree. first is actually everything now..
        #return the corest as a numpy array


    result = computeTree(points, k_segment_merge)
    print result
    print len(result)
    print ksegment.coreset_k_segment_fast_segmentation(result, k, eps)