Example #1
0
def user_cluster_map_eval_ml_metrics(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "user cluster MAP evaluation:")

    print_and_log(logger, "user cluster MAP: {}".format(
            ml_metrics.mapk(result['gt_all_user_cluster'],
                            result['pred_all_user_cluster'])
            ))

    k_list = []
    for i in range(len(result['chain_name'])):
        k_list.append(len(result['gt_all_user_cluster'][i]))
    k_list = sorted(list(set(k_list)))
    k_list.remove(0)
    print_and_log(logger, "all possible k: {}".format(k_list))

    for k in k_list:
        map_at_k = ml_metrics.mapk(result['gt_all_user_cluster'],
                                   result['pred_all_user_cluster'],
                                   k)
        print_and_log(logger, "user cluster MAP@{}: {}".format(
                int(k), map_at_k))

    return result
Example #2
0
def event_type_map_eval_given_gt(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "event type MAP evaluation:")

    result['et_ap'] = list()

    for a, p in zip(result['gt_all_event_id'], result['pred_all_event_id']):
        AP = precision_score(a, p, average='macro')
        result['et_ap'].append(AP)

    map_re = np.mean(result['et_ap'])
    result['et_map'] = map_re

    print_and_log(logger, "event type MAP: {}".format(round(map_re, 4)))

    return result
Example #3
0
def event_type_map_eval(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "event type MAP evaluation:")

    result['et_ap'] = list()

    for a, p in zip(result['gt_all_event_id'], result['pred_all_event_id']):
        AP = compute_AP(a, p)
        result['et_ap'].append(AP)

    map_re = np.mean(result['et_ap'])
    result['et_map'] = map_re

    print_and_log(logger, "event type MAP: {}".format(round(map_re, 4)))

    return result
Example #4
0
def user_cluster_map_eval(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "user cluster MAP evaluation:")

    result['uc_ap'] = list()

    for a, p in zip(result['gt_all_user_cluster'],
                    result['pred_all_user_cluster']):
        AP = compute_AP(a, p)
        result['uc_ap'].append(AP)

    map_re = np.mean(result['uc_ap'])
    result['uc_map'] = map_re

    print_and_log(logger, "user cluster MAP: {}".format(round(map_re, 4)))

    return result
Example #5
0
def event_type_categorical_accuracy_eval_given_gt(
                config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "event type categorical accuracy evaluation:")

    y_true = []
    y_pred = []

    for a, p in zip(result['gt_all_event_id'], result['pred_all_event_id']):
        y_true += a
        y_pred += p

    result['et_cate'] = accuracy_score(y_true, y_pred)

    print_and_log(logger, "event type categorical accuracy: {}".format(
            round(result['et_cate'], 4)))

    return result
Example #6
0
def user_cluster_categorical_accuracy_eval_given_gt(
                config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "user cluster categorical accuracy evaluation:")

    y_true = []
    y_pred = []

    for a, p in zip(result['gt_all_user_cluster'],
                    result['pred_all_user_cluster']):
        y_true += a
        y_pred += p

    result['uc_cate'] = accuracy_score(y_true, y_pred)

    print_and_log(logger, "user cluster categorical accuracy: {}".format(
            round(result['uc_cate'], 4)))

    return result
Example #7
0
def time_delay_overall_evaluation(config, logger, result, result_save_path,
                                  plot_ts=True, chain_length_eval=True):
    print_and_log(logger, "====================================")
    print_and_log(logger, "time delay evaluation:")

    # statistics
    pred_all = []
    gt_all = []
    avg_dtw = []
    avg_mse = []
    result["td_DTW"] = list()
    result["td_MSE"] = list()
    for i in range(len(result['chain_name'])):
        pred_time_delay = result['pred_all_time_delay'][i]
        gt_time_delay = result['gt_all_time_delay'][i]
        if len(pred_time_delay) == 0:
            pred_time_delay = [-1]
        if len(gt_time_delay) == 0:
            gt_time_delay = [-1]
        avg_dtw.append(fastdtw(gt_time_delay, pred_time_delay)[0])
        result["td_DTW"].append(avg_dtw[-1])
        if len(gt_time_delay) == len(pred_time_delay):
            avg_mse.append(mean_squared_error(gt_time_delay, pred_time_delay))
            result["td_MSE"].append(avg_mse[-1])
        else:
            result["td_MSE"].append('null')
        if len(result['pred_all_time_delay'][i]) != 0:
            pred_all += pred_time_delay
        if len(result['gt_all_time_delay'][i]) != 0:
            gt_all += gt_time_delay

    print_and_log(logger, "Average DTW: {}".format(round(np.mean(avg_dtw), 4)))
    if config['given_gt']:
        print_and_log(logger, "Average MSE: {}".format(np.mean(avg_mse)))
    print_and_log(logger, "MAX predicted: {}, ground truth: {}".format(
                          round(max(pred_all), 4),
                          round(max(gt_all), 4)))
    print_and_log(logger, "MIN predicted: {}, ground truth: {}".format(
                          round(min(pred_all), 4),
                          round(min(gt_all), 4)))
    print_and_log(logger, "MEAN predicted: {}, ground truth: {}".format(
                          round(np.mean(pred_all), 4),
                          round(np.mean(gt_all), 4)))
    print_and_log(logger, "STD predicted: {}, ground truth: {}".format(
                          round(np.std(pred_all), 4),
                          round(np.std(gt_all), 4)))

    # chain length evaluation
    if chain_length_eval:
        length_mae = []
        length_stat = dict()
        length_stat["gt_chain_0"] = 0
        length_stat["gt_chain_1"] = 0
        length_stat["Same_as_gt"] = 0
        length_stat["diff_1_to_10"] = 0
        length_stat["diff_10_to_100"] = 0
        length_stat["diff_100+"] = 0

    if 'chains_applied_keep_pred' in result:
        length_stat["applied_threshold"] = len(
                result["chains_applied_keep_pred"])

    sim_start = config['sim_period']['start'].split('T')[0]
    sim_end = config['sim_period']['end'].split('T')[0]

    if plot_ts:
        time_delay_plot_save_path = os.path.join(
                result_save_path, "time_delay_plot")
        if not os.path.exists(time_delay_plot_save_path):
            os.makedirs(time_delay_plot_save_path)

    if chain_length_eval or plot_ts:
        for i in range(len(result['chain_name'])):
            chain = result['chain_name'][i]
            pred_time_delay = result['pred_all_time_delay'][i]
            gt_time_delay = result['gt_all_time_delay'][i]

            if plot_ts:
                plot_time_delay_ts_for_one_chain(chain,
                                                 time_delay_plot_save_path,
                                                 pred_time_delay,
                                                 gt_time_delay,
                                                 sim_start, sim_end)

            if chain_length_eval:
                length_mae.append(
                        abs(len(pred_time_delay) - len(gt_time_delay)))

                if len(gt_time_delay) == 0:
                    length_stat["gt_chain_0"] += 1
                if len(gt_time_delay) == 1:
                    length_stat["gt_chain_1"] += 1
                if len(pred_time_delay) == len(gt_time_delay):
                    length_stat["Same_as_gt"] += 1
                if abs(len(pred_time_delay) - len(gt_time_delay)) < 10 and (
                        abs(len(pred_time_delay) - len(gt_time_delay)) >= 1):
                    length_stat["diff_1_to_10"] += 1
                if abs(len(pred_time_delay) - len(gt_time_delay)) < 100 and (
                        abs(len(pred_time_delay) - len(gt_time_delay)) >= 10):
                    length_stat["diff_10_to_100"] += 1
                if abs(len(pred_time_delay) - len(gt_time_delay)) >= 100:
                    length_stat["diff_100+"] += 1

    if chain_length_eval:
        length_mae = np.mean(length_mae)

    if chain_length_eval:
        print_and_log(logger, "====================================")
        print_and_log(logger, "chain length evaluation:")

        print_and_log(logger, "MAE: {}".format(round(length_mae, 4)))

        print_and_log(logger, "Count of number of simulated "
                      "chains: {}".format(len(result['chain_name'])))

        print_and_log(logger, "Count of number of chains whose "
                      "ground truth length is 0: {}".format(
                              length_stat["gt_chain_0"]
                              ))

        print_and_log(logger, "Count of number of chains whose "
                      "ground truth length is 1: {}".format(
                              length_stat["gt_chain_1"]
                              ))

        if 'chains_applied_keep_pred' in result:
            print_and_log(logger, "Count of number of predicted chains that "
                          "length needed threshold to be applied: {}, "
                          "percentage: {} ".format(
                                  length_stat["applied_threshold"],
                                  round(length_stat["applied_threshold"]/len(
                                          result['chain_name']), 4)))

        print_and_log(logger, "Count of number of predicted "
                      "chains that has "
                      "same length as ground truth"
                      ": {}, percentage: {}".format(
                            length_stat["Same_as_gt"],
                            round(length_stat["Same_as_gt"]/len(
                                    result['chain_name']), 4)))
        print_and_log(logger, "Count of number of predicted chains that "
                      "length difference is 1 to 10: {},"
                      "percentage: {}".format(
                              length_stat["diff_1_to_10"],
                              round(length_stat["diff_1_to_10"]/len(
                                      result['chain_name']), 4)))
        print_and_log(logger, "Count of number of predicted chains that "
                      "length difference is 10 to 100: {}, "
                      "percentage: {}".format(
                              length_stat["diff_10_to_100"],
                              round(length_stat["diff_10_to_100"]/len(
                                      result['chain_name']), 4)))
        print_and_log(logger, "Count of number of predicted chains that "
                      "length difference is 100 and above: {}, "
                      "percentage: {}".format(
                              length_stat["diff_100+"],
                              round(length_stat["diff_100+"]/len(
                                      result['chain_name']), 4)))

    return result
Example #8
0
def user_cluster_nlg_eval(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "user cluster average bleu scores:")
    # print_and_log(logger, "Please install nlg-eval package!\n"
    #               "Reference: https://github.com/Maluuba/nlg-eval")
    # print_and_log(logger, "After installing, please change the package "
    #               "__init__.py file (contact: [email protected]).")

    sys.path.append(config['nlgeval_repo_dir'])
    from nlgeval import compute_individual_metrics

    # avg  bleu
    avg_bleu = dict()
    avg_bleu = dict()
    avg_bleu['Bleu_1'] = list()
    avg_bleu['Bleu_2'] = list()
    avg_bleu['Bleu_3'] = list()
    avg_bleu['Bleu_4'] = list()

    result['uc_bleu1'] = list()
    result['uc_bleu2'] = list()
    result['uc_bleu3'] = list()
    result['uc_bleu4'] = list()
    for i in range(len(result['chain_name'])):
        if len(result['gt_all_user_cluster'][i]) == 0:
            gt_chain = " ".join(['no_event_in_simperiod'])
        else:
            gt_chain = " ".join(
                    [str(ele) for ele in result['gt_all_user_cluster'][i]])
        if len(result['pred_all_user_cluster'][i]) == 0:
            hy_chain = " ".join(['no_event_in_simperiod'])
        else:
            hy_chain = " ".join(
                    [str(ele) for ele in result['pred_all_user_cluster'][i]])
        metrics_dict = compute_individual_metrics(gt_chain, hy_chain,
                                                  no_overlap=(False, True),
                                                  no_skipthoughts=True,
                                                  no_glove=True)
        result['uc_bleu1'].append(metrics_dict['Bleu_1'])
        avg_bleu['Bleu_1'].append(metrics_dict['Bleu_1'])

        if len(result['gt_all_user_cluster'][i]) >= 2:  # and (
                # len(result['pred_all_user_cluster'][i]) >= 2
                # ):
            result['uc_bleu2'].append(metrics_dict['Bleu_2'])
            avg_bleu['Bleu_2'].append(metrics_dict['Bleu_2'])
        else:
            result['uc_bleu2'].append('null')

        if len(result['gt_all_user_cluster'][i]) >= 3:  # and (
                # len(result['pred_all_user_cluster'][i])
                # ):
            result['uc_bleu3'].append(metrics_dict['Bleu_3'])
            avg_bleu['Bleu_3'].append(metrics_dict['Bleu_3'])
        else:
            result['uc_bleu3'].append('null')

        if len(result['gt_all_user_cluster'][i]) >= 4:  # and (
                # len(result['pred_all_user_cluster'][i]) >= 4
                # ):
            result['uc_bleu4'].append(metrics_dict['Bleu_4'])
            avg_bleu['Bleu_4'].append(metrics_dict['Bleu_4'])
        else:
            result['uc_bleu4'].append('null')

    for metric in avg_bleu:
        print_and_log(logger, "{}: {}".format(
                metric, round(np.average(avg_bleu[metric]), 4)))
#        print_and_log(logger, "{}: {}, calculated from {} values".format(
#                metric, round(np.average(avg_bleu[metric]), 4),
#                len(avg_bleu[metric])))
    # pdb.set_trace()
    return result
Example #9
0
def result_evaluation_given_gt(config, result_save_path,
                               only_has_event=False):
    # load result pickle
    with open(os.path.join(result_save_path,
                           'result.pickle'), 'rb') as handle:
        result = pickle.load(handle)
    print('result.pickle loaded!')
    print("result.keys: {}\n\n".format(result.keys()))

    if only_has_event:
        result_new = dict()
        result_new['chain_name'] = list()
        result_new['pred_all_event_id'] = list()
        result_new['pred_all_event_type'] = list()
        result_new['pred_all_time_delay'] = list()
        result_new['pred_all_user_cluster'] = list()
        result_new['gt_all_event_id'] = list()
        result_new['gt_all_event_type'] = list()
        result_new['gt_all_time_delay'] = list()
        result_new['gt_all_user_cluster'] = list()

        for i in range(len(result['chain_name'])):
            if len(result['gt_all_event_id'][i]) != 0:
                result_new['chain_name'].append(
                        result['chain_name'][i]
                        )
                result_new['pred_all_event_id'].append(
                        result['pred_all_event_id'][i]
                        )
                result_new['pred_all_event_type'].append(
                        result['pred_all_event_type'][i]
                        )
                result_new['pred_all_time_delay'].append(
                        result['pred_all_time_delay'][i]
                        )
                result_new['pred_all_user_cluster'].append(
                        result['pred_all_user_cluster'][i]
                        )
                result_new['gt_all_event_id'].append(
                        result['gt_all_event_id'][i]
                        )
                result_new['gt_all_event_type'].append(
                        result['gt_all_event_type'][i]
                        )
                result_new['gt_all_time_delay'].append(
                        result['gt_all_time_delay'][i]
                        )
                result_new['gt_all_user_cluster'].append(
                        result['gt_all_user_cluster'][i]
                        )

        result = result_new

    # logger
    if only_has_event:
        logger = set_logger(os.path.join(
                result_save_path, 'evaluate_only_has_event_given_gt_' +
                dt.now().strftime("%Y-%m-%dT%H-%M-%SZ") + '.log'))
    else:
        logger = set_logger(os.path.join(
                result_save_path, 'evaluate_all_given_gt_' +
                dt.now().strftime("%Y-%m-%dT%H-%M-%SZ") + '.log'))

    print_and_log(logger, "Evaluation over {} simulated chains...".format(
            len(result['chain_name'])))

    # evaluation proceses
    if config['event_type_nlg_eval']:
        result = event_type_nlg_eval(config, logger, result)

    if config['event_type_map_eval']:
        result = event_type_map_eval_given_gt(config, logger, result)

    if config['event_type_categorical_accuracy_eval_given_gt']:
        result = event_type_categorical_accuracy_eval_given_gt(
                config, logger, result)

    if config['event_type_percentage_eval']:
        result = event_type_percentage_eval(config, logger, result)

    if config['user_cluster_nlg_eval']:
        result = user_cluster_nlg_eval(config, logger, result)

    if config['user_cluster_map_eval']:
        result = user_cluster_map_eval_given_gt(config, logger, result)

    if config['user_cluster_categorical_accuracy_eval_given_gt']:
        result = user_cluster_categorical_accuracy_eval_given_gt(
                config, logger, result)

    if config['user_cluster_percentage_eval']:
        result = user_cluster_percentage_eval(config, logger, result)

    if config['time_delay_overall_evaluation']:
        if not only_has_event:
            result = time_delay_overall_evaluation(
                    config, logger, result, result_save_path,
                    plot_ts=config['plot_ts'], chain_length_eval=False)
        else:
            result = time_delay_overall_evaluation(
                    config, logger, result, result_save_path,
                    plot_ts=False, chain_length_eval=False)

    write_result_to_file(config, result, logger)

    del logger

    return
Example #10
0
def user_cluster_percentage_eval(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "user cluster distribution evaluation:")

    gt_class_list = []
    pred_class_list = []
    for i in range(len(result['chain_name'])):
        gt_class_list += result['gt_all_user_cluster'][i]
        pred_class_list += result['pred_all_user_cluster'][i]

    gt_class_list_counter = Counter(gt_class_list)
    pred_class_list_counter = Counter(pred_class_list)

    clusters = list(range(100 + 1))

    counts_per_class = []
    for i in range(len(clusters)):
        counts_per_class.append(
                gt_class_list_counter[i])
    gt_distribution = cal_distribution(counts_per_class)

    counts_per_class = []
    for i in range(len(clusters)):
        counts_per_class.append(
                pred_class_list_counter[i])
    pred_distribution = cal_distribution(counts_per_class)

    print_and_log(logger, "!!!!  ground truth distribution: ")
    for i in range(len(clusters)):
        print_and_log(logger, "{}: {}".format(
                i, round(gt_distribution[i], 4)))

    print_and_log(logger, "!!!!  prediction distribution: ")
    for i in range(len(clusters)):
        print_and_log(logger, "{}: {}".format(
                i, round(pred_distribution[i], 4)))
    return result
Example #11
0
def event_type_percentage_eval(config, logger, result):
    print_and_log(logger, "====================================")
    print_and_log(logger, "event type distribution evaluation:")

    gt_class_list = []
    pred_class_list = []
    for i in range(len(result['chain_name'])):
        gt_class_list += result['gt_all_event_type'][i]
        pred_class_list += result['pred_all_event_type'][i]

    gt_class_list_counter = Counter(gt_class_list)
    pred_class_list_counter = Counter(pred_class_list)

    eventtype_2_id = dict()
    for key in config['eventtype_2_id']:
        eventtype_2_id[key] = config['eventtype_2_id'][key]-1
    id_2_eventtype = dict(zip(eventtype_2_id.values(),
                              eventtype_2_id.keys()))
    # pdb.set_trace()

    counts_per_class = []
    for i in range(len(id_2_eventtype)):
        et = id_2_eventtype[i]
        counts_per_class.append(
                gt_class_list_counter[et])
    gt_distribution = cal_distribution(counts_per_class)

    counts_per_class = []
    for i in range(len(id_2_eventtype)):
        et = id_2_eventtype[i]
        counts_per_class.append(
                pred_class_list_counter[et])
    pred_distribution = cal_distribution(counts_per_class)

    print_and_log(logger, "!!!!  ground truth distribution: ")
    for i in range(len(id_2_eventtype)):
        et = id_2_eventtype[i]
        print_and_log(logger, "{}: {}".format(
                et, round(gt_distribution[i], 4)))

    print_and_log(logger, "!!!!  prediction distribution: ")
    for i in range(len(id_2_eventtype)):
        et = id_2_eventtype[i]
        print_and_log(logger, "{}: {}".format(
                et, round(pred_distribution[i], 4)))
    return result