def test_rdp_mapping_two(self): points = np.array([[0, 3], [1, 3], [2, 3], [3, 2], [4, 1], [5, 0]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1, 2]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 2, 5]) np.testing.assert_array_equal(result, desired)
def main(args): # define clustering methods cmethod = {Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage} # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] points = np.genfromtxt(args.i, delimiter=',') ## Knee detection code ## points_reduced, points_removed = rdp.rdp(points, args.r) knees = kneedle.auto_knees(points_reduced) t_k = pp.filter_worst_knees(points_reduced, knees) t_k = pp.filter_corner_knees(points_reduced, t_k) filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m) if args.a: knees = pp.add_points_even(points, points_reduced, filtered_knees, points_removed) else: knees = rdp.mapping(filtered_knees, points_reduced, points_removed) ########################## """
def main(args): global points points = np.genfromtxt(args.i, delimiter=',') bounds = np.asarray([[.9, .99], [0.01, 0.1]]) best, score = genetic_algorithm(objective, bounds, selection, crossover, mutation) # Round input parameters r = round(best[0] * 100.0) / 100.0 t = round(best[1] * 100.0) / 100.0 logger.info('%s (%s, %s, %s) = %s', args.i, r, t, args.a, score) points_reduced, removed, knees = compute_knee_points(r, t) rankings = slope_ranking(points_reduced, knees) filtered_knees = mapping(knees, points_reduced, removed) #avg_x, avg_y, avg_s = performance(points, filtered_knees) #logger.info('%s %s %s', avg_x, avg_y, p) if args.o is None: plot_ranking(plt, points, filtered_knees, rankings, '') plt.show() else: plot_ranking(plt, points, filtered_knees, rankings, args.o) plt.savefig(args.o)
def main(args): points = np.genfromtxt(args.i, delimiter=',') points_reduced, removed = rdp(points, args.r) #space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2) #logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) knees = np.arange(1, len(points_reduced)) raw_knees = mapping(knees, points_reduced, removed) #plot_knees(plt, points, raw_knees, 'Knees') #logger.info('Knee extraction') rdp_knees = postprocessing(points_reduced, knees, args) #rankings = slope_ranking(points_reduced, filtered_knees) #logger.info('Clustering and ranking') #filtered_knees = mapping(rdp_knees, points_reduced, removed) #plot_knees(plt, points, filtered_knees, 'Knees') #logger.info('Mapping into raw plot') logger.info(f'Add curvature points...') previous_size = len(rdp_knees) filtered_knees = add_points_even(points, points_reduced, rdp_knees, removed, plt) current_size = len(filtered_knees) logger.info(f'Add curvature points ({current_size-previous_size})') plot_knees(plt, points, filtered_knees, 'Knees (Add points)') # Compute performance evalution average_x, average_y, average_slope, average_coeffients, cost = accuracy_trace(points, filtered_knees) logger.info('Performance %s %s %s %s %s', average_x, average_y, average_slope, average_coeffients, cost) #plot_ranking(plt, points, filtered_knees, rankings, '') # args.o) plot_knees(plt, points, filtered_knees, 'Knees (Final)') plt.show()
def test_rdp_mapping_line(self): points = np.array([[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 4]) np.testing.assert_array_equal(result, desired)
def main(args): points = np.genfromtxt(args.i, delimiter=',') profiler = cProfile.Profile() profiler.enable() points_reduced, removed = rdp(points, args.r) profiler.disable() stats = pstats.Stats(profiler).sort_stats('cumtime') stats.print_stats() space_saving = round((1.0-(len(points_reduced)/len(points)))*100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) indexes = np.arange(0, len(points_reduced)) indexes = mapping(indexes, points_reduced, removed) x = points[:, 0] y = points[:, 1] plt.plot(x, y) selected = points[indexes] x = selected[:, 0] y = selected[:, 1] plt.plot(x, y, marker='o', markersize=3) plt.show()
def test_rdp_mapping_four(self): points = np.array([[2, 0], [3, 1], [4, 2], [5, 2], [6, 2], [7, 3], [8, 4], [9, 3], [10, 2], [11, 1], [12, 0]]) reduced, removed = rdp.rdp(points) indexes = np.array([0, 1, 2, 3, 4]) result = rdp.mapping(indexes, reduced, removed) desired = np.array([0, 2, 4, 6, 10]) np.testing.assert_array_equal(result, desired)
def kneedle_novel(points, args): reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All) knees = pp.filter_worst_knees(points_reduced, knees) knees = pp.filter_corner_knees(points_reduced, knees, t=args.c) knees = pp.filter_clustring(points_reduced, knees, clustering.average_linkage, args.t, args.k) knees = rdp.mapping(knees, reduced, removed) return knees
def main(args): path = os.path.expanduser(args.p) if args.tr is Trace.all: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-(lru|arc)\.csv', f)] elif args.tr is Trace.arc: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-arc\.csv', f)] else: files = [f for f in os.listdir(path) if re.match(r'w[0-9]*-lru\.csv', f)] scores = [] for i in tqdm(range(len(files))): points = np.genfromtxt(f'{path}{files[i]}', delimiter=',') # open expected file dirname = os.path.dirname(f'{path}{files[i]}') filename = os.path.splitext(os.path.basename(files[i]))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) # get original x_max and y_ranges x_max = [max(x) for x in zip(*points)][0] y_range = [[max(y),min(y)] for y in zip(*points)][1] # run rdp reduced, removed = rdp.rdp(points, t=args.r, cost=args.c, distance=args.d) points_reduced = points[reduced] ## Knee detection code ## knees = zmethod.knees(points_reduced, dx=args.x, dy=args.y, dz=args.z, x_max=x_max, y_range=y_range) knees = knees[knees>0] knees = rdp.mapping(knees, reduced, removed) if len(knees) > 0: cm = evaluation.cm(points, knees, expected) mcc = evaluation.mcc(cm) else: mcc = 0.0 scores.append(mcc) # output the results dirname = os.path.expanduser(args.p) output = os.path.join(os.path.normpath(dirname), f'eval_rdp_metric_output.csv') with open(output, 'w') as f: writer = csv.writer(f) for s in scores: writer.writerow([s])
def kneedle_novel(points, args): reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = kneedle.auto_knees(points_reduced, p=kneedle.PeakDetection.All) #x = points_reduced[:, 0] #y = points_reduced[:, 1] #plt.plot(x, y) #plt.plot(x[knees], y[knees], 'r+') #plt.show() knees = pp.filter_worst_knees(points_reduced, knees) knees = pp.filter_corner_knees(points_reduced, knees, t=args.c) knees = pp.filter_clusters(points_reduced, knees, clustering.average_linkage, args.t, args.k) knees = rdp.mapping(knees, reduced, removed) return knees
def main(args): points = np.genfromtxt(args.i, delimiter=',') if points.ndim == 1: y = points x = np.arange(0, len(y)) points = np.array([x,y]).T reduced, removed = rdp.rdp(points, args.r, cost=args.c, distance=args.d) space_saving = round((1.0-(len(reduced)/len(points)))*100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(reduced), space_saving) points_reduced = points[reduced] # all rdp points are candidates, except extremes knees = np.arange(1, len(points_reduced)) logger.info(f'Knees {len(knees)}') # filter out all non-corner points knees = pp.select_corner_knees(points_reduced, knees, t=args.t1) logger.info(f'Knees {len(knees)}') # cluster points together knees = pp.filter_clusters_corners(points_reduced, knees, clustering.average_linkage, t=args.t2) logger.info(f'Knees {len(knees)}') x = points[:, 0] y = points[:, 1] plt.plot(x, y) # map the points to the original space knees = rdp.mapping(knees, reduced, removed) #rdp_points = points[reduced] #x = rdp_points[:, 0] #y = rdp_points[:, 1] #plt.plot(x, y, marker='o', markersize=3, linestyle = 'None') knee_points = points[knees] x = knee_points[:, 0] y = knee_points[:, 1] plt.plot(x, y, marker='o', markersize=3, linestyle = 'None') plt.show()
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] points = np.genfromtxt(args.i, delimiter=',') points_reduced, points_removed = rdp.rdp(points, args.r) space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0, 2) logger.info('Number of data points after RDP: %s(%s %%)', len(points_reduced), space_saving) names = [ 'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature', 'Tyler (RDP)', 'Tyler', 'RDP' ] methods = [ kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee, dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees ] knees = [] knees_raw = [] # Elbow methods for m, n in zip(methods, names): tmp = m(points_reduced) knees.append(tmp) raw_indexes = rdp.mapping(tmp, points_reduced, points_removed) knees_raw.append(raw_indexes) # Tyler candidates = ps.knees(points) knees.append(candidates) knees_raw.append(candidates) # RDP candidates = np.arange(1, len(points_reduced)) knees.append(candidates) raw_indexes = rdp.mapping(candidates, points_reduced, points_removed) knees_raw.append(raw_indexes) #plot_knees(points, knees_raw, names) cmethod = { Clustering.single: clustering.single_linkage, Clustering.complete: clustering.complete_linkage, Clustering.average: clustering.average_linkage } # Cluster and select points filtered_knees_raw = [] rankings = [] for k, n in zip(knees, names): # remove 0 index in the knees: k = k[k != 0] if n == 'Tyler': filtered_knees_raw.append(k) ranks = np.full(len(k), 1.0) #rankings.append(ranking.slope_ranking(points, k)) rankings.append(ranks) else: t_k = pp.filter_worst_knees(points_reduced, k) filtered_knees = pp.filter_clustring(points_reduced, t_k, cmethod[args.c], args.t, args.m) rankings.append( ranking.slope_ranking(points_reduced, filtered_knees)) raw_indexes = rdp.mapping(filtered_knees, points_reduced, points_removed) filtered_knees_raw.append(raw_indexes) logger.info(f'Model MSE(knees) MSE(exp) Cost(tr) Cost(kn)') logger.info(f'----------------------------------------------------------') for k, n in zip(filtered_knees_raw, names): if len(expected) > 0: error_mse = evaluation.mse(points, k, expected, evaluation.Strategy.knees) error_mse_exp = evaluation.mse(points, k, expected, evaluation.Strategy.expected) else: error_mse = math.nan error_mse_exp = math.nan _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k) _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k) logger.info( f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}' ) plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) points = np.genfromtxt(args.i, delimiter=',') ## Knee detection code ## reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] knees = np.arange(1, len(reduced)) t_k = pp.filter_worst_knees(points_reduced, knees) t_k = pp.filter_corner_knees(points_reduced, t_k, t=args.c) filtered_knees = pp.filter_clusters(points_reduced, t_k, clustering.average_linkage, args.t, args.k) ########################################################################################## # add even points if args.a: knees = pp.add_points_even(points, reduced, filtered_knees, removed) else: knees = rdp.mapping(filtered_knees, reduced, removed) rmspe_k = evaluation.rmspe(points, knees, expected, evaluation.Strategy.knees) rmspe_e = evaluation.rmspe(points, knees, expected, evaluation.Strategy.expected) cm = evaluation.cm(points, knees, expected, t = 0.01) mcc = evaluation.mcc(cm) logger.info(f'RMSE(knees) RMSE(exp) MCC') logger.info(f'-------------------------------------------') logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E} {mcc:10.2E}') # store outpout if args.o: dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] output = os.path.join(os.path.normpath(dirname), f'{filename}_output.csv') dataset = points[knees] with open(output, 'w') as f: writer = csv.writer(f) writer.writerows(dataset) # display result if args.g: x = points[:, 0] y = points[:, 1] plt.plot(x, y) plt.plot(x[knees], y[knees], 'r+') plt.show()
def add_points_even(points: np.ndarray, points_reduced: np.ndarray, knees: np.ndarray, removed:np.ndarray, tx:float=0.05, ty:float=0.05) -> np.ndarray: """ Add evenly spaced points between knees points. Whenever a smooth segment between two knew points are further away than tx (on the X-axis) and ty (on the Y axis), even spaced points are added to the result. This function will map the knees (in RDP space) into the space of the complete set of points. Args: points (np.ndarray): numpy array with the points (x, y) points_reduced (np.ndarray): numpy array with the points (x, y) (simplified by RDP) knees (np.ndarray): knees indexes (from the complete set of points) removed (np.ndarray): the points that were removed tx (float): the threshold (X-axis) for adding points (in percentage, default 0.05) ty (float): the threshold (Y-axis) for adding points (in percentage, default 0.05) Returns: np.ndarray: the resulting knees (mapped into the complete set of points) """ # new knees new_knees = [] # compute the delta x and y for the complete trace dx = math.fabs(points[-1][0] - points[0][0]) dy = math.fabs(points[-1][1] - points[0][1]) # compute the candidates candidates = [] # check between knees for i in range(1, len(points_reduced)): left = i-1 right = i pdx = math.fabs(points_reduced[right][0] - points_reduced[left][0])/dx pdy = math.fabs(points_reduced[right][1] - points_reduced[left][1])/dy if pdx > (2.0*tx) and pdy > ty: candidates.append(left) candidates.append(right) # Map candidates into the complete set of points candidates = np.array(candidates) candidates = rdp.mapping(candidates, points_reduced, removed) # Map knees into the complete set of points knees = rdp.mapping(knees, points_reduced, removed) # Process candidates as pairs for i in range(0, len(candidates), 2): left = candidates[i] right = candidates[i+1] pdx = math.fabs(points[right][0] - points[left][0])/dx number_points = int(math.ceil(pdx/(2.0*tx))) inc = int((right-left)/number_points) idx = left for _ in range(number_points): idx = idx + inc new_knees.append(idx) knees_idx = np.concatenate((knees, new_knees)) # np.concatenate generates float array when one is empty (see https://github.com/numpy/numpy/issues/8878) knees_idx = knees_idx.astype(int) knees_idx = np.unique(knees_idx) knees_idx.sort() return knees_idx
def main(args): # get the expected file from the input file dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] expected_file = os.path.join(os.path.normpath(dirname), f'{filename}_expected.csv') expected = None if os.path.exists(expected_file): with open(expected_file, 'r') as f: reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC) expected = list(reader) else: expected = [] expected = np.array(expected) points = np.genfromtxt(args.i, delimiter=',') # get original x_max and y_ranges x_max = [max(x) for x in zip(*points)][0] y_range = [[max(y), min(y)] for y in zip(*points)][1] # run rdp reduced, removed = rdp.rdp(points, args.r) points_reduced = points[reduced] ## Knee detection code ## knees = zmethod.knees(points_reduced, dx=args.x, dy=args.y, dz=args.z, x_max=x_max, y_range=y_range) knees = knees[knees > 0] ########################## # add even points if args.a: knees = pp.add_points_even(points, reduced, knees, removed) else: knees = rdp.mapping(knees, reduced, removed) rmspe_k = evaluation.rmspe(points, knees, expected, evaluation.Strategy.knees) rmspe_e = evaluation.rmspe(points, knees, expected, evaluation.Strategy.expected) cm = evaluation.cm(points, knees, expected, t=0.01) mcc = evaluation.mcc(cm) logger.info(f'RMSE(knees) RMSE(exp) MCC') logger.info(f'-------------------------------------------') logger.info(f'{rmspe_k:10.2E} {rmspe_e:10.2E} {mcc:10.2E}') # store outpout if args.o: dirname = os.path.dirname(args.i) filename = os.path.splitext(os.path.basename(args.i))[0] output = os.path.join(os.path.normpath(dirname), f'{filename}_output.csv') dataset = points[knees] with open(output, 'w') as f: writer = csv.writer(f) writer.writerows(dataset) # display result if args.g: x = points[:, 0] y = points[:, 1] plt.plot(x, y) plt.plot(x[knees], y[knees], 'r+') plt.show()
def add_points_even(points: np.ndarray, reduced: np.ndarray, knees: np.ndarray, removed: np.ndarray, tx: float = 0.05, ty: float = 0.05, extremes: bool = False) -> np.ndarray: """ Add evenly spaced points between knees points. Whenever a smooth segment between two knew points are further away than tx (on the X-axis) and ty (on the Y axis), even spaced points are added to the result. This function will map the knees (in RDP space) into the space of the complete set of points. Args: points (np.ndarray): numpy array with the points (x, y) reduced (np.ndarray): numpy array with the index of the reduced points (x, y) (simplified by RDP) knees (np.ndarray): knees indexes removed (np.ndarray): the points that were removed tx (float): the threshold (X-axis) for adding points (in percentage, default 0.05) ty (float): the threshold (Y-axis) for adding points (in percentage, default 0.05) extremes (bool): if True adds the extreme points (firt and last) (default False) Returns: np.ndarray: the resulting knees (mapped into the complete set of points) """ points_reduced = points[reduced] # compute the delta x and y for the complete trace max_x, max_y = points.max(axis=0) min_x, min_y = points.min(axis=0) dx = math.fabs(max_x - min_x) dy = math.fabs(max_y - min_y) # compute the candidates candidates = [] # check between knees for i in range(1, len(points_reduced)): left = i - 1 right = i pdx = math.fabs(points_reduced[right][0] - points_reduced[left][0]) / dx pdy = math.fabs(points_reduced[right][1] - points_reduced[left][1]) / dy if pdx > (2.0 * tx) and pdy > ty: candidates.append(left) candidates.append(right) #print(f'candidates: {candidates}') # Map candidates into the complete set of points candidates = np.array(candidates) candidates = rdp.mapping(candidates, reduced, removed) # new knees new_knees = [] # Process candidates as pairs for i in range(0, len(candidates), 2): left = candidates[i] right = candidates[i + 1] pdx = math.fabs(points[right][0] - points[left][0]) / dx number_points = int(math.ceil(pdx / (2.0 * tx))) inc = int((right - left) / number_points) idx = left for _ in range(number_points): idx = idx + inc new_knees.append(idx) # filter worst knees that may be added due in this function # but keep the detected knees #new_knees = filter_worst_knees(points, new_knees) # Map knees into the complete set of points knees = rdp.mapping(knees, reduced, removed) # Add extremes points to the output if extremes: extremes_idx = [0, len(points) - 1] knees_idx = np.concatenate((knees, new_knees, extremes_idx)) else: knees_idx = np.concatenate((knees, new_knees)) # np.concatenate generates float array when one is empty (see https://github.com/numpy/numpy/issues/8878) knees_idx = knees_idx.astype(int) knees_idx = np.unique(knees_idx) knees_idx.sort() #return knees_idx return filter_worst_knees(points, knees_idx)