def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--experiment_paths_regexp')
    parser.add_argument('-k', type=int)
    parser.add_argument('-o', '--output_path', required=True)

    args = parser.parse_args()

    paths = glob(args.experiment_paths_regexp)
    # print(args.experiment_paths_regexp)
    # print(paths)
    groups_by_method = group_paths(paths,
                                   keyfunc=lambda p: p['args'][0],
                                   sort_keyfunc=lambda k: float(k['U']))
    # print(len(groups_by_method))
    setcover_obj_mat = []
    calculation_time_mat = []
    tree_size_obj_mean_mat = []
    tree_size_obj_median_mat = []

    index = []
    for method, paths in groups_by_method:
        index.append(method)
        results = [pkl.load(open(p)) for p in paths]
        setcover_obj_mat.append(
            [eval_setcover_obj_func(ctrees, args.k) for ctrees in results])
        calculation_time_mat.append(
            [eval_calculation_time(ctrees) for ctrees in results])
        tree_size_obj_mean_mat.append(
            [eval_tree_size_obj_mean(ctrees) for ctrees in results])
        tree_size_obj_median_mat.append(
            [eval_tree_size_obj_median(ctrees) for ctrees in results])

    print('index:', index)
    columns = [float(parse_result_path(p)['U']) for p in paths]
    print(columns)
    print(np.asarray(setcover_obj_mat).shape)
    print(np.asarray(tree_size_obj_mean_mat).shape)
    df1 = pd.DataFrame(setcover_obj_mat, index=index, columns=columns)
    df2 = pd.DataFrame(calculation_time_mat, index=index, columns=columns)
    df3 = pd.DataFrame(tree_size_obj_mean_mat, index=index, columns=columns)
    df4 = pd.DataFrame(tree_size_obj_median_mat, index=index, columns=columns)

    ret = {
        'setcover-objective': df1,
        'calculation-time': df2,
        'treesize-objective-mean': df3,
        'treesize-objective-median': df4
    }

    pkl.dump(ret, open(args.output_path, 'w'))
예제 #2
0
def group_paths(result_paths, keyfunc, sort_keyfunc=None):
    """
    key: lambda
    
    return :flattened array
    """
    exp_params = [(p, parse_result_path(p)) for p in result_paths]
    ret = []
    key = lambda (_, params): keyfunc(params)
    exp_params = sorted(exp_params, key=key)  # sort them
    for k, g in itertools.groupby(exp_params, key):
        if sort_keyfunc is None:
            sorted_g = sorted(list(g))
        else:
            sorted_g = sorted(list(g), key=lambda (p, param): sort_keyfunc(param))
        ret.append((k, [p for p, _ in sorted_g]))
    return ret
def group_paths(result_paths, keyfunc, sort_keyfunc=None):
    """
    key: lambda
    
    return :flattened array
    """
    exp_params = [(p, parse_result_path(p)) for p in result_paths]
    ret = []
    key = lambda (_, params): keyfunc(params)
    exp_params = sorted(exp_params, key=key)  # sort them
    for k, g in itertools.groupby(exp_params, key):
        if sort_keyfunc is None:
            sorted_g = sorted(list(g))
        else:
            sorted_g = sorted(list(g),
                              key=lambda (p, param): sort_keyfunc(param))
        ret.append((k, [p for p, _ in sorted_g]))
    return ret
예제 #4
0
def evaluate_general(
    result_paths,
    interactions_paths,
    events_paths,
    metrics,
    x_axis_name,
    x_axis_type,
    group_key,
    group_key_name_func,
    sort_keyfunc=None,
    xticks=[],
    K=10,
):
    """
    Return a 3D table
    group_key: the legend part
    metrics: the y axis
    x_axis_name, sort_keyfunc: the x axis
    """
    groups = group_paths(result_paths, group_key, sort_keyfunc)

    # inferring x labels
    if not xticks:
        xticks = set()
        for k, paths in groups:
            xticks |= set(get_values_by_key(groups[0][1], x_axis_name, x_axis_type))
        xticks = sorted(xticks)

    group_keys = [k for k, _ in groups]
    legend_names = [group_key_name_func(k) for k in group_keys]

    # get metric names
    example_true_events = pkl.load(open(events_paths[0]))
    example_all_entry_ids = get_interaction_ids(interactions_paths[0])
    metric_names = evaluate_meta_tree_result(
        example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics
    ).keys()  # extra computing

    # enchance groups with other paths
    result_path2all_paths = {tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths)}
    enhanced_groups = defaultdict(list)
    for k, result_paths in groups:
        i = 0
        for x in xticks:

            if i < len(result_paths) and x_axis_type(parse_result_path(result_paths[i])[x_axis_name]) == x:
                enhanced_groups[k].append(result_path2all_paths[result_paths[i]])
                i += 1
            else:
                # the result is absent
                enhanced_groups[k].append((None, None, None))

    data3d = []
    for method, _ in groups:
        data2d = []
        for result_path, interactions_path, events_path in enhanced_groups[method]:
            if result_path is None:
                data2d.append([np.nan for m in metric_names])
            else:
                data2d.append(
                    evaluate_meta_tree_result(
                        pkl.load(open(events_path)),
                        k_best_trees(pkl.load(open(result_path)), K),
                        get_interaction_ids(interactions_path),
                        metrics,
                    ).values()
                )
        data3d.append(data2d)
    print metric_names
    # method, x_axis, metric

    # some checking on size of results for different methods should be done
    # filling None if possible

    # 3d array: (method, U, metric)
    # data3d = np.array([
    #     [evaluate_meta_tree_result(
    #         pkl.load(open(events_path)),
    #         k_best_trees(pkl.load(open(result_path)), K),
    #         get_interaction_ids(interactions_path),
    #         metrics).values()
    #      for result_path, interactions_path, events_path in enhanced_groups[key]]
    #     for key, _ in groups])

    # change axis to to (metric, method, U)
    data3d = np.swapaxes(data3d, 0, 1)
    data3d = np.swapaxes(data3d, 0, 2)

    group_keys = [group_key_name_func(k) for k in group_keys]
    ret = {}
    for metric, matrix in itertools.izip(metric_names, data3d):
        ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys)

    return ret
예제 #5
0
def get_values_by_key(result_paths, key, map_func):
    return [map_func(parse_result_path(p)[key]) for p in result_paths]
def evaluate_general(result_paths,
                     interactions_paths,
                     events_paths,
                     metrics,
                     x_axis_name,
                     x_axis_type,
                     group_key,
                     group_key_name_func,
                     sort_keyfunc=None,
                     xticks=[],
                     K=10):
    """
    Return a 3D table
    group_key: the legend part
    metrics: the y axis
    x_axis_name, sort_keyfunc: the x axis
    """
    groups = group_paths(result_paths, group_key, sort_keyfunc)

    # inferring x labels
    if not xticks:
        xticks = set()
        for k, paths in groups:
            xticks |= set(
                get_values_by_key(groups[0][1], x_axis_name, x_axis_type))
        xticks = sorted(xticks)

    group_keys = [k for k, _ in groups]
    legend_names = [group_key_name_func(k) for k in group_keys]

    # get metric names
    example_true_events = pkl.load(open(events_paths[0]))
    example_all_entry_ids = get_interaction_ids(interactions_paths[0])
    metric_names = evaluate_meta_tree_result(
        example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K),
        example_all_entry_ids, metrics).keys()  # extra computing

    # enchance groups with other paths
    result_path2all_paths = {
        tpl[0]: tpl
        for tpl in zip(result_paths, interactions_paths, events_paths)
    }
    enhanced_groups = defaultdict(list)
    for k, result_paths in groups:
        i = 0
        for x in xticks:

            if i < len(result_paths) and x_axis_type(
                    parse_result_path(result_paths[i])[x_axis_name]) == x:
                enhanced_groups[k].append(
                    result_path2all_paths[result_paths[i]])
                i += 1
            else:
                # the result is absent
                enhanced_groups[k].append((None, None, None))

    data3d = []
    for method, _ in groups:
        data2d = []
        for result_path, interactions_path, events_path in enhanced_groups[
                method]:
            if result_path is None:
                data2d.append([np.nan for m in metric_names])
            else:
                data2d.append(
                    evaluate_meta_tree_result(
                        pkl.load(open(events_path)),
                        k_best_trees(pkl.load(open(result_path)), K),
                        get_interaction_ids(interactions_path),
                        metrics).values())
        data3d.append(data2d)
    print metric_names
    # method, x_axis, metric

    # some checking on size of results for different methods should be done
    # filling None if possible

    # 3d array: (method, U, metric)
    # data3d = np.array([
    #     [evaluate_meta_tree_result(
    #         pkl.load(open(events_path)),
    #         k_best_trees(pkl.load(open(result_path)), K),
    #         get_interaction_ids(interactions_path),
    #         metrics).values()
    #      for result_path, interactions_path, events_path in enhanced_groups[key]]
    #     for key, _ in groups])

    # change axis to to (metric, method, U)
    data3d = np.swapaxes(data3d, 0, 1)
    data3d = np.swapaxes(data3d, 0, 2)

    group_keys = [group_key_name_func(k) for k in group_keys]
    ret = {}
    for metric, matrix in itertools.izip(metric_names, data3d):
        ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys)

    return ret
def get_values_by_key(result_paths, key, map_func):
    return [map_func(parse_result_path(p)[key]) for p in result_paths]
예제 #8
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--experiment_paths_regexp')
    parser.add_argument('-k', type=int)
    parser.add_argument('-o', '--output_path', required=True)

    args = parser.parse_args()
    
    paths = glob(args.experiment_paths_regexp)
    # print(args.experiment_paths_regexp)
    # print(paths)
    groups_by_method = group_paths(paths,
                                   keyfunc=lambda p: p['args'][0],
                                   sort_keyfunc=lambda k: float(k['U']))
    # print(len(groups_by_method))
    setcover_obj_mat = []
    calculation_time_mat = []
    tree_size_obj_mean_mat = []
    tree_size_obj_median_mat = []

    index = []
    for method, paths in groups_by_method:
        index.append(method)
        results = [pkl.load(open(p)) for p in paths]
        setcover_obj_mat.append([eval_setcover_obj_func(ctrees, args.k)
                             for ctrees in results])
        calculation_time_mat.append([eval_calculation_time(ctrees)
                                     for ctrees in results])
        tree_size_obj_mean_mat.append([eval_tree_size_obj_mean(ctrees)
                                       for ctrees in results])
        tree_size_obj_median_mat.append([eval_tree_size_obj_median(ctrees)
                                       for ctrees in results])
        
    print('index:', index)
    columns = [float(parse_result_path(p)['U'])
               for p in paths]
    print(columns)
    print(np.asarray(setcover_obj_mat).shape)
    print(np.asarray(tree_size_obj_mean_mat).shape)
    df1 = pd.DataFrame(setcover_obj_mat,
                       index=index,
                       columns=columns)
    df2 = pd.DataFrame(calculation_time_mat,
                       index=index,
                       columns=columns)
    df3 = pd.DataFrame(tree_size_obj_mean_mat,
                       index=index,
                       columns=columns)
    df4 = pd.DataFrame(tree_size_obj_median_mat,
                       index=index,
                       columns=columns)

    ret = {
        'setcover-objective': df1,
        'calculation-time': df2,
        'treesize-objective-mean': df3,
        'treesize-objective-median': df4
        }

    pkl.dump(ret, open(args.output_path, 'w'))