def test_rdp_0(self): points = np.array([[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]]) reduced, removed = rdp.rdp(points) desired = np.array([0, 4]) np.testing.assert_array_equal(reduced, desired) desired = np.array([[0, 3]]) np.testing.assert_array_equal(removed, desired)
def main(args): # define clustering methods cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage} # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] points = np.genfromtxt(args.i, delimiter=',') ## Knee detection code ## points_reduced, points_removed = rdp.rdp(points, args.r) knees = kneedle.auto_knees(points_reduced) t_k = pp.filter_worst_knees(points_reduced, knees) t_k = pp.filter_corner_knees(points_reduced, t_k) filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m) if args.a: knees = pp.add_points_even(points, points_reduced, filtered_knees, points_removed) else: knees = rdp.mapping(filtered_knees, points_reduced, points_removed) ########################## """
def test_rdp_mapping_two(self): points = np.array([[0, 3], [1, 3], [2, 3], [3, 2], [4, 1], [5, 0]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1, 2]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 2, 5]) np.testing.assert_array_equal(result, desired)
def test_rdp_mapping_line(self): points = np.array([[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 4]) np.testing.assert_array_equal(result, desired)
def test_rdp_1(self): points = np.array([[1, 5], [2, 5], [3, 6], [4, 6], [5, 6]]) reduced, removed = rdp.rdp(points) desired = np.array([0, 1, 2, 4]) np.testing.assert_array_equal(reduced, desired) desired = np.array([[0, 0], [1, 0], [2, 1]]) np.testing.assert_array_equal(removed, desired)
def main(args): points = np.genfromtxt(args.i, delimiter=',') profiler = cProfile.Profile() profiler.enable() points_reduced, removed = rdp(points, args.r) profiler.disable() stats = pstats.Stats(profiler).sort_stats('cumtime') stats.print_stats() space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) indexes = np.arange(0, len(points_reduced)) indexes = mapping(indexes, points_reduced, removed) x = points[:, 0] y = points[:, 1] plt.plot(x, y) selected = points[indexes] x = selected[:, 0] y = selected[:, 1] plt.plot(x, y, marker='o', markersize=3) plt.show()
def main(args): points = np.genfromtxt(args.i, delimiter=',') points_reduced, removed = rdp(points, args.r) #space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2) #logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) knees = np.arange(1, len(points_reduced)) raw_knees = mapping(knees, points_reduced, removed) #plot_knees(plt, points, raw_knees, 'Knees') #logger.info('Knee extraction') rdp_knees = postprocessing(points_reduced, knees, args) #rankings = slope_ranking(points_reduced, filtered_knees) #logger.info('Clustering and ranking') #filtered_knees = mapping(rdp_knees, points_reduced, removed) #plot_knees(plt, points, filtered_knees, 'Knees') #logger.info('Mapping into raw plot') logger.info(f'Add curvature points...') previous_size = len(rdp_knees) filtered_knees = add_points_even(points, points_reduced, rdp_knees, removed, plt) current_size = len(filtered_knees) logger.info(f'Add curvature points ({current_size-previous_size})') plot_knees(plt, points, filtered_knees, 'Knees (Add points)') # Compute performance evalution average_x, average_y, average_slope, average_coeffients, cost = accuracy_trace(points, filtered_knees) logger.info('Performance %s %s %s %s %s', average_x, average_y, average_slope, average_coeffients, cost) #plot_ranking(plt, points, filtered_knees, rankings, '') # args.o) plot_knees(plt, points, filtered_knees, 'Knees (Final)') plt.show()
def test_rdp_mapping_four(self): points = np.array([[2, 0], [3, 1], [4, 2], [5, 2], [6, 2], [7, 3], [8, 4], [9, 3], [10, 2], [11, 1], [12, 0]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1, 2, 3, 4]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 2, 4, 6, 10]) np.testing.assert_array_equal(result, desired)
def kneedle_novel(points, args): reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All) knees = pp.filter_worst_knees(points_reduced, knees) knees = pp.filter_corner_knees(points_reduced, knees, t=args.c) knees = pp.filter_clustring(points_reduced, knees, clustering.average_linkage, args.t, args.k) knees = rdp.mapping(knees, reduced, removed) return knees
def main(args): path = os.path.expanduser(args.p) if args.tr is Trace.all: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-(lru|arc)\.csv', f)] elif args.tr is Trace.arc: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-arc\.csv', f)] else: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-lru\.csv', f)] scores = [] for i in tqdm(range(len(files))): points = np.genfromtxt(f'{path}{files[i]}', delimiter=',') # open expected file dirname = os.path.dirname(f'{path}{files[i]}') filename = os.path.splitext(os.path.basename(files[i]))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) # get original x_max and y_ranges x_max = [max(x) for x in zip(*points)][0] y_range = [[max(y),min(y)] for y in zip(*points)][1] # run rdp reduced, removed = rdp.rdp(points, t=args.r, cost=args.c, distance=args.d) points_reduced = points[reduced] ## Knee detection code ## knees = zmethod.knees(points_reduced, dx=args.x, dy=args.y, dz=args.z, x_max=x_max, y_range=y_range) knees = knees[knees>0] knees = rdp.mapping(knees, reduced, removed) if len(knees) > 0: cm = evaluation.cm(points, knees, expected) mcc = evaluation.mcc(cm) else: mcc = 0.0 scores.append(mcc) # output the results dirname = os.path.expanduser(args.p) output = os.path.join(os.path.normpath(dirname), f'eval_rdp_metric_output.csv') with open(output, 'w') as f: writer = csv.writer(f) for s in scores: writer.writerow([s])
def compute_knee_points(r, t): # Check if cache already has these values if r in points_cache: points_reduced, removed = points_cache[r] else: points_reduced, removed = rdp(points, r) points_cache[r] = (points_reduced, removed) knees = np.arange(1, len(points_reduced)) filtered_knees = postprocessing(points_reduced, knees, t=t) return points_reduced, removed, filtered_knees
def kneedle_novel(points, args): reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All) #x = points_reduced[:, 0] #y = points_reduced[:, 1] #plt.plot(x, y) #plt.plot(x[knees], y[knees], 'r+') #plt.show() knees = pp.filter_worst_knees(points_reduced, knees) knees = pp.filter_corner_knees(points_reduced, knees, t=args.c) knees = pp.filter_clusters(points_reduced, knees, clustering.average_linkage, args.t, args.k) knees = rdp.mapping(knees, reduced, removed) return knees
def main(args): points = np.genfromtxt(args.i, delimiter=',') if points.ndim == 1: y = points x = np.arange(0, len(y)) points = np.array([x, y]).T reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d) space_saving = round((1.0 - (len(reduced) / len(points))) * 100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving) hull_imp = { ConvexHull.hull: ch.graham_scan, ConvexHull.upper: ch.graham_scan_upper, ConvexHull.lower: ch.graham_scan_lower } selected = points[reduced] if args.s is ConvexHullSource.raw: hull = hull_imp[args.ch](points) hull_points = points[hull] else: hull = hull_imp[args.ch](selected) hull_points = selected[hull] logger.info(hull) x = points[:, 0] y = points[:, 1] plt.plot(x, y) x = selected[:, 0] y = selected[:, 1] plt.plot(x, y, marker='o', markersize=3) x = hull_points[:, 0] y = hull_points[:, 1] plt.plot(x, y, 'o', mec='r', color='none', lw=1, markersize=10) plt.fill(x, y, edgecolor='r', fill=False) plt.show()
def main(args): points = np.genfromtxt(args.i, delimiter=',') if points.ndim == 1: y = points x = np.arange(0, len(y)) points = np.array([x,y]).T reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d) space_saving = round((1.0-(len(reduced)/len(points)))*100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving) points_reduced = points[reduced] # all rdp points are candidates, except extremes knees = np.arange(1, len(points_reduced)) logger.info(f'Knees {len(knees)}') # filter out all non-corner points knees = pp.select_corner_knees(points_reduced, knees, t=args.t1) logger.info(f'Knees {len(knees)}') # cluster points together knees = pp.filter_clusters_corners(points_reduced, knees, clustering.average_linkage, t=args.t2) logger.info(f'Knees {len(knees)}') x = points[:, 0] y = points[:, 1] plt.plot(x, y) # map the points to the original space knees = rdp.mapping(knees, reduced, removed) #rdp_points = points[reduced] #x = rdp_points[:, 0] #y = rdp_points[:, 1] #plt.plot(x, y, marker='o', markersize=3, linestyle = 'None') knee_points = points[knees] x = knee_points[:, 0] y = knee_points[:, 1] plt.plot(x, y, marker='o', markersize=3, linestyle = 'None') plt.show()
def main(args): points = np.genfromtxt(args.i, delimiter=',') if points.ndim == 1: y = points x = np.arange(0, len(y)) points = np.array([x, y]).T reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d) space_saving = round((1.0 - (len(reduced) / len(points))) * 100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving) x = points[:, 0] y = points[:, 1] plt.plot(x, y) selected = points[reduced] x = selected[:, 0] y = selected[:, 1] plt.plot(x, y, marker='o', markersize=3) plt.show()
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] points = np.genfromtxt(args.i, delimiter=',') points_reduced, points_removed = rdp.rdp(points, args.r) space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) names = [ 'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature', 'Tyler (RDP)', 'Tyler', 'RDP' ] methods = [ kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee, dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees ] knees = [] knees_raw = [] # Elbow methods for m, n in zip(methods, names): tmp = m(points_reduced) knees.append(tmp) raw_indexes = rdp.mapping(tmp, points_reduced, points_removed) knees_raw.append(raw_indexes) # Tyler candidates = ps.knees(points) knees.append(candidates) knees_raw.append(candidates) # RDP candidates = np.arange(1, len(points_reduced)) knees.append(candidates) raw_indexes = rdp.mapping(candidates, points_reduced, points_removed) knees_raw.append(raw_indexes) #plot_knees(points, knees_raw, names) cmethod = { Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage } # Cluster and select points filtered_knees_raw = [] rankings = [] for k, n in zip(knees, names): # remove 0 index in the knees: k = k[k != 0] if n == 'Tyler': filtered_knees_raw.append(k) ranks = np.full(len(k), 1.0) #rankings.append(ranking.slope_ranking(points, k)) rankings.append(ranks) else: t_k = pp.filter_worst_knees(points_reduced, k) filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m) rankings.append( ranking.slope_ranking(points_reduced, filtered_knees)) raw_indexes = rdp.mapping(filtered_knees, points_reduced, points_removed) filtered_knees_raw.append(raw_indexes) logger.info(f'Model MSE(knees) MSE(exp) Cost(tr) Cost(kn)') logger.info(f'----------------------------------------------------------') for k, n in zip(filtered_knees_raw, names): if len(expected) > 0: error_mse = evaluation.mse(points, k, expected, evaluation.Strategy.knees) error_mse_exp = evaluation.mse(points, k, expected, evaluation.Strategy.expected) else: error_mse = math.nan error_mse_exp = math.nan _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k) _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k) logger.info( f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}' ) plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) points = np.genfromtxt(args.i, delimiter=',') ## Knee detection code ## reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = np.arange(1, len(reduced)) t_k = pp.filter_worst_knees(points_reduced, knees) t_k = pp.filter_corner_knees(points_reduced, t_k, t=args.c) filtered_knees = pp.filter_clusters(points_reduced, t_k, clustering.average_linkage, args.t, args.k) ########################################################################################## # add even points if args.a: knees = pp.add_points_even(points, reduced, filtered_knees, removed) else: knees = rdp.mapping(filtered_knees, reduced, removed) rmspe_k = evaluation.rmspe(points, knees, expected, evaluation.Strategy.knees) rmspe_e = evaluation.rmspe(points, knees, expected, evaluation.Strategy.expected) cm = evaluation.cm(points, knees, expected, t = 0.01) mcc = evaluation.mcc(cm) logger.info(f'RMSE(knees) RMSE(exp) MCC') logger.info(f'-------------------------------------------') logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E} {mcc:10.2E}') # store outpout if args.o: dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] output = os.path.join(os.path.normpath(dirname), f'{filename}_output.csv') dataset = points[knees] with open(output, 'w') as f: writer = csv.writer(f) writer.writerows(dataset) # display result if args.g: x = points[:, 0] y = points[:, 1] plt.plot(x, y) plt.plot(x[knees], y[knees], 'r+') plt.show()
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) points = np.genfromtxt(args.i, delimiter=',') rs = [0.75, 0.80, 0.85, 0.90, 0.95] ts = [0.01, 0.02, 0.03, 0.04, 0.05] evaluations = [] for r in rs: ## Knee detection code ## points_reduced, points_removed = rdp.rdp(points, r) knees = np.arange(1, len(points_reduced)) t_k = pp.filter_worst_knees(points_reduced, knees) t_k = pp.filter_corner_knees(points_reduced, t_k) for t in ts: ## Clustering ## filtered_knees = pp.filter_clustring(points_reduced, t_k, clustering.average_linkage, t, ClusterRanking.left) final_knees = pp.add_points_even(points, points_reduced, filtered_knees, points_removed) ## Evaluation ## error_rmspe = evaluation.rmspe(points, final_knees, expected, evaluation.Strategy.knees) error_rmspe_exp = evaluation.rmspe(points, final_knees, expected, evaluation.Strategy.expected) _, _, _, _, cost_trace = evaluation.accuracy_trace( points, final_knees) _, _, _, _, cost_knee = evaluation.accuracy_knee( points, final_knees) evaluations.append( [error_rmspe, error_rmspe_exp, cost_trace, cost_knee]) ## Compute the Correlation ## evaluations = np.array(evaluations) rho = np.corrcoef(evaluations.T) rmspe_rmspe_exp = rho[0, 1] rmspe_cost_trace = rho[0, 2] rmspe_cost_knee = rho[0, 3] rmspe_exp_cost_trace = rho[1, 2] rmspe_exp_cost_knee = rho[1, 3] cost_trace_cost_knee = rho[2, 3] #logger.info(f'{rho}') logger.info( f'{rmspe_rmspe_exp}, {rmspe_cost_trace}, {rmspe_cost_knee}, {rmspe_exp_cost_trace}, {rmspe_exp_cost_knee}, {cost_trace_cost_knee}' )
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) points = np.genfromtxt(args.i, delimiter=',') # get original x_max and y_ranges x_max = [max(x) for x in zip(*points)][0] y_range = [[max(y), min(y)] for y in zip(*points)][1] # run rdp reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] ## Knee detection code ## knees = zmethod.knees(points_reduced, dx=args.x, dy=args.y, dz=args.z, x_max=x_max, y_range=y_range) knees = knees[knees > 0] ########################## # add even points if args.a: knees = pp.add_points_even(points, reduced, knees, removed) else: knees = rdp.mapping(knees, reduced, removed) rmspe_k = evaluation.rmspe(points, knees, expected, evaluation.Strategy.knees) rmspe_e = evaluation.rmspe(points, knees, expected, evaluation.Strategy.expected) cm = evaluation.cm(points, knees, expected, t=0.01) mcc = evaluation.mcc(cm) logger.info(f'RMSE(knees) RMSE(exp) MCC') logger.info(f'-------------------------------------------') logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E} {mcc:10.2E}') # store outpout if args.o: dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] output = os.path.join(os.path.normpath(dirname), f'{filename}_output.csv') dataset = points[knees] with open(output, 'w') as f: writer = csv.writer(f) writer.writerows(dataset) # display result if args.g: x = points[:, 0] y = points[:, 1] plt.plot(x, y) plt.plot(x[knees], y[knees], 'r+') plt.show()