Ejemplo n.º 1
0
    def test_best_fit_line_multiple_coresets(self):
        # generate points
        N = 1200
        # for example1 choose N that divides by 6
        data = example1(N)

        P = np.c_[np.mgrid[1:N + 1], data]
        P1 = P[:1000]
        P2 = P[1000:]

        C = Coreset.OneSegmentCorset(P)
        C1 = Coreset.OneSegmentCorset(P1)
        C2 = Coreset.OneSegmentCorset(P2)

        best_fit_line_P = utils.calc_best_fit_line(P)
        best_fit_line_C = utils.calc_best_fit_line(C.repPoints)
        best_fit_line_P1 = utils.calc_best_fit_line(P1)
        best_fit_line_C1 = utils.calc_best_fit_line(C1.repPoints)

        original_cost_not_best_fit_line = utils.sqrd_dist_sum(P, best_fit_line_P)
        single_coreset_cost = utils.sqrd_dist_sum(C.repPoints, best_fit_line_P) * C.weight
        C1_cost = int(utils.sqrd_dist_sum(C1.repPoints, best_fit_line_P) * C1.weight)
        P1_cost = int(utils.sqrd_dist_sum(P1, utils.calc_best_fit_line(P1)))
        C2_cost = int(utils.sqrd_dist_sum(C2.repPoints, best_fit_line_P) * C2.weight)
        dual_coreset_cost = C1_cost + C2_cost

        self.assertEqual(int(original_cost_not_best_fit_line), int(single_coreset_cost))
        self.assertEqual(C1_cost, P1_cost)
        self.assertEqual(int(original_cost_not_best_fit_line), int(dual_coreset_cost))

        res2 = utils.calc_best_fit_line_coreset(C1, C2)

        self.assertEqual(best_fit_line_P, res2)
Ejemplo n.º 2
0
def calc_prep_dist(P):
    prep_dist = np.full((len(P), len(P)), float("inf"))
    for index, value in np.ndenumerate(prep_dist):
        if index[0] < index[1]:
            segment = P[index[0] : index[1] + 1, :]
            best_fit_line = utils.calc_best_fit_line_polyfit(segment)
            prep_dist[index] = utils.sqrd_dist_sum(segment, best_fit_line)
    return prep_dist
Ejemplo n.º 3
0
    def test_OneSegmentCoreset_Cost(self):
        # generate points
        n = 1200
        data = example1(n)

        P = np.c_[np.mgrid[1:n + 1], data]
        P1 = P[:1000]
        C1 = Coreset.OneSegmentCorset(P1)

        best_fit_line_P = utils.calc_best_fit_line(P)
        best_fit_line_P1 = utils.calc_best_fit_line(P1)
        best_fit_line_C1 = utils.calc_best_fit_line(C1.repPoints)

        self.assertEqual(best_fit_line_P1.all(), best_fit_line_C1.all())

        original_cost_not_best_fit_line = utils.sqrd_dist_sum(P1, best_fit_line_P)
        original_cost_best_fit_line = utils.sqrd_dist_sum(P1, best_fit_line_P1)
        single_coreset_cost_not_best_fit_line = utils.sqrd_dist_sum(C1.repPoints, best_fit_line_P) * C1.weight
        single_coreset_cost_best_fit_line = utils.sqrd_dist_sum(C1.repPoints, best_fit_line_C1) * C1.weight

        self.assertEqual(int(original_cost_best_fit_line), int(single_coreset_cost_best_fit_line))
        self.assertEqual(int(original_cost_not_best_fit_line), int(single_coreset_cost_not_best_fit_line))
Ejemplo n.º 4
0
def calc_coreset_prep_dist(D):
    prep_dist = np.full((len(D), len(D)), float("inf"))
    for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist):
        # we only want to calculate for segments that start in
        # starting coreset endpoints and end in ending coreset endpoints
        if first_coreset <= second_coreset:
            C = []
            for coreset in D[first_coreset:second_coreset+1]:
                C.append(coreset)
            coreset_of_coresets = Coreset.OneSegmentCorset(C, True)
            best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True)
            fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight
            prep_dist[first_coreset, second_coreset] = fitting_cost
    return prep_dist
Ejemplo n.º 5
0
def calc_coreset_prep_dist(D):
    prep_dist = np.full((len(D), len(D)), float("inf"))
    for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist):
        # we only want to calculate for segments that start in
        # starting coreset endpoints and end in ending coreset endpoints
        if first_coreset <= second_coreset:
            C = []
            W = []
            for coreset in D[first_coreset:second_coreset+1]:
                # segment = np.vstack([segment, coreset.C.repPoints]) if segment.size else coreset.C.repPoints
                C.append(coreset)
                W.append(coreset.C.weight)
            coreset_of_coresets = Coreset.OneSegmentCorset(C, True)
            best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True)
            # best_fit_line = utils.calc_best_fit_line(segment)
            # fitting_cost = 0
            # for i in xrange(len(C)):
            #    fitting_cost += utils.sqrd_dist_sum(C[i], best_fit_line)*W[i]
            fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight
            prep_dist[first_coreset, second_coreset] = fitting_cost
    return prep_dist