def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-e', '--experiment_paths_regexp') parser.add_argument('-k', type=int) parser.add_argument('-o', '--output_path', required=True) args = parser.parse_args() paths = glob(args.experiment_paths_regexp) # print(args.experiment_paths_regexp) # print(paths) groups_by_method = group_paths(paths, keyfunc=lambda p: p['args'][0], sort_keyfunc=lambda k: float(k['U'])) # print(len(groups_by_method)) setcover_obj_mat = [] calculation_time_mat = [] tree_size_obj_mean_mat = [] tree_size_obj_median_mat = [] index = [] for method, paths in groups_by_method: index.append(method) results = [pkl.load(open(p)) for p in paths] setcover_obj_mat.append( [eval_setcover_obj_func(ctrees, args.k) for ctrees in results]) calculation_time_mat.append( [eval_calculation_time(ctrees) for ctrees in results]) tree_size_obj_mean_mat.append( [eval_tree_size_obj_mean(ctrees) for ctrees in results]) tree_size_obj_median_mat.append( [eval_tree_size_obj_median(ctrees) for ctrees in results]) print('index:', index) columns = [float(parse_result_path(p)['U']) for p in paths] print(columns) print(np.asarray(setcover_obj_mat).shape) print(np.asarray(tree_size_obj_mean_mat).shape) df1 = pd.DataFrame(setcover_obj_mat, index=index, columns=columns) df2 = pd.DataFrame(calculation_time_mat, index=index, columns=columns) df3 = pd.DataFrame(tree_size_obj_mean_mat, index=index, columns=columns) df4 = pd.DataFrame(tree_size_obj_median_mat, index=index, columns=columns) ret = { 'setcover-objective': df1, 'calculation-time': df2, 'treesize-objective-mean': df3, 'treesize-objective-median': df4 } pkl.dump(ret, open(args.output_path, 'w'))
def group_paths(result_paths, keyfunc, sort_keyfunc=None): """ key: lambda return :flattened array """ exp_params = [(p, parse_result_path(p)) for p in result_paths] ret = [] key = lambda (_, params): keyfunc(params) exp_params = sorted(exp_params, key=key) # sort them for k, g in itertools.groupby(exp_params, key): if sort_keyfunc is None: sorted_g = sorted(list(g)) else: sorted_g = sorted(list(g), key=lambda (p, param): sort_keyfunc(param)) ret.append((k, [p for p, _ in sorted_g])) return ret
def evaluate_general( result_paths, interactions_paths, events_paths, metrics, x_axis_name, x_axis_type, group_key, group_key_name_func, sort_keyfunc=None, xticks=[], K=10, ): """ Return a 3D table group_key: the legend part metrics: the y axis x_axis_name, sort_keyfunc: the x axis """ groups = group_paths(result_paths, group_key, sort_keyfunc) # inferring x labels if not xticks: xticks = set() for k, paths in groups: xticks |= set(get_values_by_key(groups[0][1], x_axis_name, x_axis_type)) xticks = sorted(xticks) group_keys = [k for k, _ in groups] legend_names = [group_key_name_func(k) for k in group_keys] # get metric names example_true_events = pkl.load(open(events_paths[0])) example_all_entry_ids = get_interaction_ids(interactions_paths[0]) metric_names = evaluate_meta_tree_result( example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics ).keys() # extra computing # enchance groups with other paths result_path2all_paths = {tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths)} enhanced_groups = defaultdict(list) for k, result_paths in groups: i = 0 for x in xticks: if i < len(result_paths) and x_axis_type(parse_result_path(result_paths[i])[x_axis_name]) == x: enhanced_groups[k].append(result_path2all_paths[result_paths[i]]) i += 1 else: # the result is absent enhanced_groups[k].append((None, None, None)) data3d = [] for method, _ in groups: data2d = [] for result_path, interactions_path, events_path in enhanced_groups[method]: if result_path is None: data2d.append([np.nan for m in metric_names]) else: data2d.append( evaluate_meta_tree_result( pkl.load(open(events_path)), k_best_trees(pkl.load(open(result_path)), K), get_interaction_ids(interactions_path), metrics, ).values() ) data3d.append(data2d) print metric_names # method, x_axis, metric # some checking on size of results for different methods should be done # filling None if possible # 3d array: (method, U, metric) # data3d = np.array([ # [evaluate_meta_tree_result( # pkl.load(open(events_path)), # k_best_trees(pkl.load(open(result_path)), K), # get_interaction_ids(interactions_path), # metrics).values() # for result_path, interactions_path, events_path in enhanced_groups[key]] # for key, _ in groups]) # change axis to to (metric, method, U) data3d = np.swapaxes(data3d, 0, 1) data3d = np.swapaxes(data3d, 0, 2) group_keys = [group_key_name_func(k) for k in group_keys] ret = {} for metric, matrix in itertools.izip(metric_names, data3d): ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys) return ret
def get_values_by_key(result_paths, key, map_func): return [map_func(parse_result_path(p)[key]) for p in result_paths]
def evaluate_general(result_paths, interactions_paths, events_paths, metrics, x_axis_name, x_axis_type, group_key, group_key_name_func, sort_keyfunc=None, xticks=[], K=10): """ Return a 3D table group_key: the legend part metrics: the y axis x_axis_name, sort_keyfunc: the x axis """ groups = group_paths(result_paths, group_key, sort_keyfunc) # inferring x labels if not xticks: xticks = set() for k, paths in groups: xticks |= set( get_values_by_key(groups[0][1], x_axis_name, x_axis_type)) xticks = sorted(xticks) group_keys = [k for k, _ in groups] legend_names = [group_key_name_func(k) for k in group_keys] # get metric names example_true_events = pkl.load(open(events_paths[0])) example_all_entry_ids = get_interaction_ids(interactions_paths[0]) metric_names = evaluate_meta_tree_result( example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics).keys() # extra computing # enchance groups with other paths result_path2all_paths = { tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths) } enhanced_groups = defaultdict(list) for k, result_paths in groups: i = 0 for x in xticks: if i < len(result_paths) and x_axis_type( parse_result_path(result_paths[i])[x_axis_name]) == x: enhanced_groups[k].append( result_path2all_paths[result_paths[i]]) i += 1 else: # the result is absent enhanced_groups[k].append((None, None, None)) data3d = [] for method, _ in groups: data2d = [] for result_path, interactions_path, events_path in enhanced_groups[ method]: if result_path is None: data2d.append([np.nan for m in metric_names]) else: data2d.append( evaluate_meta_tree_result( pkl.load(open(events_path)), k_best_trees(pkl.load(open(result_path)), K), get_interaction_ids(interactions_path), metrics).values()) data3d.append(data2d) print metric_names # method, x_axis, metric # some checking on size of results for different methods should be done # filling None if possible # 3d array: (method, U, metric) # data3d = np.array([ # [evaluate_meta_tree_result( # pkl.load(open(events_path)), # k_best_trees(pkl.load(open(result_path)), K), # get_interaction_ids(interactions_path), # metrics).values() # for result_path, interactions_path, events_path in enhanced_groups[key]] # for key, _ in groups]) # change axis to to (metric, method, U) data3d = np.swapaxes(data3d, 0, 1) data3d = np.swapaxes(data3d, 0, 2) group_keys = [group_key_name_func(k) for k in group_keys] ret = {} for metric, matrix in itertools.izip(metric_names, data3d): ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys) return ret
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-e', '--experiment_paths_regexp') parser.add_argument('-k', type=int) parser.add_argument('-o', '--output_path', required=True) args = parser.parse_args() paths = glob(args.experiment_paths_regexp) # print(args.experiment_paths_regexp) # print(paths) groups_by_method = group_paths(paths, keyfunc=lambda p: p['args'][0], sort_keyfunc=lambda k: float(k['U'])) # print(len(groups_by_method)) setcover_obj_mat = [] calculation_time_mat = [] tree_size_obj_mean_mat = [] tree_size_obj_median_mat = [] index = [] for method, paths in groups_by_method: index.append(method) results = [pkl.load(open(p)) for p in paths] setcover_obj_mat.append([eval_setcover_obj_func(ctrees, args.k) for ctrees in results]) calculation_time_mat.append([eval_calculation_time(ctrees) for ctrees in results]) tree_size_obj_mean_mat.append([eval_tree_size_obj_mean(ctrees) for ctrees in results]) tree_size_obj_median_mat.append([eval_tree_size_obj_median(ctrees) for ctrees in results]) print('index:', index) columns = [float(parse_result_path(p)['U']) for p in paths] print(columns) print(np.asarray(setcover_obj_mat).shape) print(np.asarray(tree_size_obj_mean_mat).shape) df1 = pd.DataFrame(setcover_obj_mat, index=index, columns=columns) df2 = pd.DataFrame(calculation_time_mat, index=index, columns=columns) df3 = pd.DataFrame(tree_size_obj_mean_mat, index=index, columns=columns) df4 = pd.DataFrame(tree_size_obj_median_mat, index=index, columns=columns) ret = { 'setcover-objective': df1, 'calculation-time': df2, 'treesize-objective-mean': df3, 'treesize-objective-median': df4 } pkl.dump(ret, open(args.output_path, 'w'))