def test_best_fit_line_multiple_coresets(self): # generate points N = 1200 # for example1 choose N that divides by 6 data = example1(N) P = np.c_[np.mgrid[1:N + 1], data] P1 = P[:1000] P2 = P[1000:] C = Coreset.OneSegmentCorset(P) C1 = Coreset.OneSegmentCorset(P1) C2 = Coreset.OneSegmentCorset(P2) best_fit_line_P = utils.calc_best_fit_line(P) best_fit_line_C = utils.calc_best_fit_line(C.repPoints) best_fit_line_P1 = utils.calc_best_fit_line(P1) best_fit_line_C1 = utils.calc_best_fit_line(C1.repPoints) original_cost_not_best_fit_line = utils.sqrd_dist_sum(P, best_fit_line_P) single_coreset_cost = utils.sqrd_dist_sum(C.repPoints, best_fit_line_P) * C.weight C1_cost = int(utils.sqrd_dist_sum(C1.repPoints, best_fit_line_P) * C1.weight) P1_cost = int(utils.sqrd_dist_sum(P1, utils.calc_best_fit_line(P1))) C2_cost = int(utils.sqrd_dist_sum(C2.repPoints, best_fit_line_P) * C2.weight) dual_coreset_cost = C1_cost + C2_cost self.assertEqual(int(original_cost_not_best_fit_line), int(single_coreset_cost)) self.assertEqual(C1_cost, P1_cost) self.assertEqual(int(original_cost_not_best_fit_line), int(dual_coreset_cost)) res2 = utils.calc_best_fit_line_coreset(C1, C2) self.assertEqual(best_fit_line_P, res2)
def calc_prep_dist(P): prep_dist = np.full((len(P), len(P)), float("inf")) for index, value in np.ndenumerate(prep_dist): if index[0] < index[1]: segment = P[index[0] : index[1] + 1, :] best_fit_line = utils.calc_best_fit_line_polyfit(segment) prep_dist[index] = utils.sqrd_dist_sum(segment, best_fit_line) return prep_dist
def test_OneSegmentCoreset_Cost(self): # generate points n = 1200 data = example1(n) P = np.c_[np.mgrid[1:n + 1], data] P1 = P[:1000] C1 = Coreset.OneSegmentCorset(P1) best_fit_line_P = utils.calc_best_fit_line(P) best_fit_line_P1 = utils.calc_best_fit_line(P1) best_fit_line_C1 = utils.calc_best_fit_line(C1.repPoints) self.assertEqual(best_fit_line_P1.all(), best_fit_line_C1.all()) original_cost_not_best_fit_line = utils.sqrd_dist_sum(P1, best_fit_line_P) original_cost_best_fit_line = utils.sqrd_dist_sum(P1, best_fit_line_P1) single_coreset_cost_not_best_fit_line = utils.sqrd_dist_sum(C1.repPoints, best_fit_line_P) * C1.weight single_coreset_cost_best_fit_line = utils.sqrd_dist_sum(C1.repPoints, best_fit_line_C1) * C1.weight self.assertEqual(int(original_cost_best_fit_line), int(single_coreset_cost_best_fit_line)) self.assertEqual(int(original_cost_not_best_fit_line), int(single_coreset_cost_not_best_fit_line))
def calc_coreset_prep_dist(D): prep_dist = np.full((len(D), len(D)), float("inf")) for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist): # we only want to calculate for segments that start in # starting coreset endpoints and end in ending coreset endpoints if first_coreset <= second_coreset: C = [] for coreset in D[first_coreset:second_coreset+1]: C.append(coreset) coreset_of_coresets = Coreset.OneSegmentCorset(C, True) best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True) fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight prep_dist[first_coreset, second_coreset] = fitting_cost return prep_dist
def calc_coreset_prep_dist(D): prep_dist = np.full((len(D), len(D)), float("inf")) for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist): # we only want to calculate for segments that start in # starting coreset endpoints and end in ending coreset endpoints if first_coreset <= second_coreset: C = [] W = [] for coreset in D[first_coreset:second_coreset+1]: # segment = np.vstack([segment, coreset.C.repPoints]) if segment.size else coreset.C.repPoints C.append(coreset) W.append(coreset.C.weight) coreset_of_coresets = Coreset.OneSegmentCorset(C, True) best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True) # best_fit_line = utils.calc_best_fit_line(segment) # fitting_cost = 0 # for i in xrange(len(C)): # fitting_cost += utils.sqrd_dist_sum(C[i], best_fit_line)*W[i] fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight prep_dist[first_coreset, second_coreset] = fitting_cost return prep_dist