Ejemplo n.º 1
0
def rank_info(relative_ranks, method, seed, ratio, debug=False, delim=' '):
    return delim.join([
        RANK_STR, method,
        str(seed), '{:g}'.format(ratio),
        float2str(non_nan_mean(relative_ranks)),
        delim.join(map(str, relative_ranks)) if not debug else ''
    ])
Ejemplo n.º 2
0
def result_info(metric,
                results,
                method,
                seed,
                ratio,
                k_eval,
                debug=False,
                delim=' '):
    return delim.join([
        metric, method,
        str(seed), '{:g}'.format(ratio),
        str(k_eval),
        float2str(non_nan_mean(results)),
        delim.join(map(float2str, results)) if not debug else ''
    ])
Ejemplo n.º 3
0
def main():

    print 'Calculating evaluation metrics from the predictions...'

    config_file = sys.argv[1]
    with open(config_file, 'r') as f:
        config = yaml.load(f)

    analysis = config['analysis']
    assert analysis in [FULL, SAMPLE, KEEPK], 'Unknown analysis type {} specified in the config file'.format(analysis)
    data_name = config['data']
    assert data_name in [GEX, WES, CNV, METH], 'Unknown data type {} specified in the config file'.format(data_name)
    methods = config['methods']
    for method in methods:
        assert method in [KRL, LKRL, KBMTL, KRR, RF, EN], 'Unknown method {} specified in the config file'.format(method)

    num_folds = config['cv']
    seeds = np.array(config['seeds'], dtype=int)
    sample_ratios = np.array(config['sample_ratios'], dtype=float)
    keepk_ratios = np.array(config['keepk_ratios'], dtype=float)
    keepk = config['keepk']
    k_evals = np.array(config['k_evals'], dtype=int)

    krl_k = config['krl_k']
    krl_lambdas = np.array(config['krl_lambdas'], dtype=float)
    krl_gammas = np.array(config['krl_gammas'], dtype=float)
    lkrl_k = config['lkrl_k']
    lkrl_lambdas = np.array(config['lkrl_lambdas'], dtype=float)
    kbmtl_alphas = np.array(config['kbmtl_alphas'], dtype=float)
    kbmtl_betas = np.array(config['kbmtl_betas'], dtype=float)
    kbmtl_gammas = np.array(config['kbmtl_gammas'], dtype=float)

    debug = False
    foldwise = analysis == FULL
    ratio_range = sample_ratios if analysis == SAMPLE else keepk_ratios
    cv_metric = NDCG
    single_cv_k = False
    k_percs = [1]
    k_opt = keepk if analysis == KEEPK and krl_k > keepk else krl_k
    cv_ks = set(list(k_evals) + [k_opt])
    rank_type = 'best_prediction'
    rank = True
    perc = True

    for seed in seeds:
        for ratio in ratio_range:
            abs_ranks, relative_ranks = {}, {}
            for method in methods:
                abs_ranks[method], relative_ranks[method] = [], []
                foldwise_ndcgk = {}
                foldwise_precisionk = {}
                percentiles = {}
                for fold1 in range(num_folds):
                    try:
                        results = get_result(method, analysis, data_name, fold1, num_folds, keepk, ratio, seed, cv_metric, cv_ks, krl_lambdas, krl_gammas, lkrl_lambdas, kbmtl_alphas, kbmtl_betas, kbmtl_gammas)
                    except Exception as e:
                        results = None
                        print >> sys.stderr, 'ERROR:', method, analysis, data_name, fold1, keepk, ratio, seed, cv_metric, cv_ks
                        traceback.print_exc()
                        break # if any of the folds is missing, there is no point to continue

                    for k in k_evals:
                        Y_true = results[k]['Y_true'] if not single_cv_k else results[iter(cv_ks).next()]['Y_true']
                        Y_pred = results[k]['Y_pred'] if not single_cv_k else results[iter(cv_ks).next()]['Y_pred']
                        if k not in foldwise_ndcgk: foldwise_ndcgk[k] = []
                        if k not in foldwise_precisionk: foldwise_precisionk[k] = []
                        foldwise_ndcgk[k].append(NDCG(Y_true, Y_pred, k))
                        foldwise_precisionk[k].append(Precision(Y_true, Y_pred, k))

                    for k in k_percs:
                        # do not re-optimise hyper-parameters for every k_perc, use args.k_opt
                        Y_true = results[k_opt]['Y_true'] if not single_cv_k else results[iter(cv_ks).next()]['Y_true']
                        Y_pred = results[k_opt]['Y_pred'] if not single_cv_k else results[iter(cv_ks).next()]['Y_pred']
                        if k not in percentiles: percentiles[k] = []
                        percentiles[k].extend(Percentile(Y_true, Y_pred, k))

                    # do not re-optimise hyper-parameters for method ranking, use args.k_opt
                    Y_true = results[k_opt]['Y_true'] if not single_cv_k else results[iter(cv_ks).next()]['Y_true']
                    Y_pred = results[k_opt]['Y_pred'] if not single_cv_k else results[iter(cv_ks).next()]['Y_pred']
                    abs_ranks[method].extend(Rank(Y_true, Y_pred, rank_type))

                if results is None:
                    del abs_ranks[method]
                    del relative_ranks[method]
                    print >> sys.stderr, 'SKIPPING method {}, ratio {:g}, seed {}'.format(method, ratio, seed) 
                    continue # some of the folds were missing, continue with the next method

                for k in k_evals:
                    if foldwise:
                        ndcg_result = [non_nan_mean(fold) for fold in foldwise_ndcgk[k]]
                        precision_result = [non_nan_mean(fold) for fold in foldwise_precisionk[k]]
                    else:
                        ndcg_result = list(itertools.chain.from_iterable(foldwise_ndcgk[k]))
                        precision_result = list(itertools.chain.from_iterable(foldwise_precisionk[k]))
                    print ndcg_info(ndcg_result, method, seed, ratio, k, debug=debug, delim=DELIM)
                    print precision_info(precision_result, method, seed, ratio, k, debug=debug, delim=DELIM)

                if perc:
                    for k in k_percs:
                        for p_tuple in percentiles[k]:
                            assert len(p_tuple) == k
                            for i, p in enumerate(p_tuple):
                                print percentile_info(p, i, method, seed, ratio, k, delim=DELIM)
                            if debug:
                                print '...'
                                break

            if rank and len(abs_ranks) > 0:
                size = len(abs_ranks.itervalues().next())
                assert np.all([size == len(abs_ranks[method]) for method in abs_ranks.keys()])
                for i in range(size):
                    this_patient = sorted([abs_ranks[method][i] for method in abs_ranks.keys()])
                    for method in abs_ranks.keys():
                        relative_ranks[method].append(this_patient.index(abs_ranks[method][i]) + 1)
                for method in abs_ranks.keys():
                    print rank_info(relative_ranks[method], method, seed, ratio, debug=debug, delim=DELIM)

    print 'Finished.'
Ejemplo n.º 4
0
def update_metric(current_value, result, cv_metric, cv_k):
    return (current_value + non_nan_mean(cv_metric(result['Y_true'], result['Y_pred'], cv_k))) \
            if result is not None and ~np.isnan(current_value) else np.nan