def test_calc_best_fit_line_weighted(self): data = np.array([[1, 3.2627812, -3.1364346], [2, 3.4707861, -3.28776192], [3, 3.67879099, -3.43908923]]) w = [1.0, 1.0, 1.0] best_fit_line = utils.calc_best_fit_line_polyfit(data, w) print best_fit_line
def calc_prep_dist(P): prep_dist = np.full((len(P), len(P)), float("inf")) for index, value in np.ndenumerate(prep_dist): if index[0] < index[1]: segment = P[index[0] : index[1] + 1, :] best_fit_line = utils.calc_best_fit_line_polyfit(segment) prep_dist[index] = utils.sqrd_dist_sum(segment, best_fit_line) return prep_dist
def calc_weighted_prep_dist(pw): prep_dist = np.full((len(pw), len(pw)), float("inf")) for index, value in np.ndenumerate(prep_dist): if index[0] < index[1]: if index[1] - index[0] == 1: prep_dist[index] = 0 continue segment = pw[index[0] : index[1] + 1, :3] weights = pw[index[0] : index[1] + 1, 3:].flatten() best_fit_line = utils.calc_best_fit_line_polyfit(segment, weights) prep_dist[index] = utils.sqrd_dist_sum_weighted(segment, best_fit_line, w=weights) return prep_dist
def calc_coreset_prep_dist(D): prep_dist = np.full((len(D), len(D)), float("inf")) for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist): # we only want to calculate for segments that start in # starting coreset endpoints and end in ending coreset endpoints if first_coreset <= second_coreset: C = [] for coreset in D[first_coreset:second_coreset+1]: C.append(coreset) coreset_of_coresets = Coreset.OneSegmentCorset(C, True) best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True) fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight prep_dist[first_coreset, second_coreset] = fitting_cost return prep_dist
def calc_coreset_prep_dist(D): prep_dist = np.full((len(D), len(D)), float("inf")) for (first_coreset, second_coreset), value in np.ndenumerate(prep_dist): # we only want to calculate for segments that start in # starting coreset endpoints and end in ending coreset endpoints if first_coreset <= second_coreset: C = [] W = [] for coreset in D[first_coreset:second_coreset+1]: # segment = np.vstack([segment, coreset.C.repPoints]) if segment.size else coreset.C.repPoints C.append(coreset) W.append(coreset.C.weight) coreset_of_coresets = Coreset.OneSegmentCorset(C, True) best_fit_line = utils.calc_best_fit_line_polyfit(coreset_of_coresets.repPoints, True) # best_fit_line = utils.calc_best_fit_line(segment) # fitting_cost = 0 # for i in xrange(len(C)): # fitting_cost += utils.sqrd_dist_sum(C[i], best_fit_line)*W[i] fitting_cost = utils.sqrd_dist_sum(coreset_of_coresets.repPoints, best_fit_line)*coreset_of_coresets.weight prep_dist[first_coreset, second_coreset] = fitting_cost return prep_dist
def BalancedPartition(P, a, bicritiriaEst, is_coreset=False): Q = [] D = [] points = P # add arbitrary item to list dimensions = points[0].C.repPoints.shape[1] if is_coreset else points.shape[1] if is_coreset: points.append(P[0]) # arbitrary coreset n+1 else: points = np.vstack((points, np.zeros(dimensions))) # arbitrary point n+1 n = len(points) for i in xrange(0, n): Q.append(points[i]) cost = one_seg_cost(np.asarray(Q), is_coreset) # print "bp cost:", cost, "points", Q # if current number of points can be turned into a coreset - 3 conditions : # 1) cost passed threshold # 2) number of points to be packaged greater than dimensions + 1 # 3) number of points left greater then dimensions + 1 (so they could be packaged later) if cost > bicritiriaEst and (is_coreset or (len(Q) > dimensions + 1 and dimensions + 1 <= n - 1 - i)) or i == n - 1: if is_coreset and len(Q) == 1: if i != n - 1: D.append(Q[0]) Q = [] continue T = Q[:-1] C = OneSegmentCorset(T, is_coreset) g = utils.calc_best_fit_line_polyfit(OneSegmentCorset(np.asarray(T), is_coreset).repPoints) if is_coreset: b = T[0].b e = T[-1].e else: b = T[0][0] # signal index of first item in T e = T[-1][0] # signal index of last item in T D.append(coreset(C, g, b, e)) Q = [Q[-1]] return D