Example #1
0
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None):
    results = glo.ex_load_result(ex, fname)
    value_accessor = lambda job_results: job_results["time_secs"]
    vf_pval = np.vectorize(value_accessor)
    # results['job_results'] is a dictionary:
    # {'test_result': (dict from running perform_test(te) '...':..., }
    times = vf_pval(results["job_results"])
    repeats, _, n_methods = results["job_results"].shape
    time_avg = np.mean(times, axis=0)
    time_std = np.std(times, axis=0)

    xvalues = func_xvalues(results)

    # ns = np.array(results[xkey])
    # te_proportion = 1.0 - results['tr_proportion']
    # test_sizes = ns*te_proportion
    line_styles = func_plot_fmt_map()
    method_labels = get_func2label_map()

    func_names = [f.__name__ for f in results["method_job_funcs"]]
    for i in range(n_methods):
        te_proportion = 1.0 - results["tr_proportion"]
        fmt = line_styles[func_names[i]]
        # plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plt.errorbar(xvalues,
                     time_avg[:, i],
                     yerr=time_std[:, i],
                     fmt=fmt,
                     label=method_label)

    ylabel = "Time (s)"
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.xlim([np.min(xvalues), np.max(xvalues)])
    plt.xticks(xvalues, xvalues)
    plt.legend(loc="best")
    plt.gca().set_yscale("log")
    title = ("%s. %d trials. " % (results["prob_label"], repeats)
             if func_title is None else func_title(results))
    plt.title(title)
    # plt.grid()
    return results
Example #2
0
def plot_prob_reject(ex,
                     fname,
                     func_xvalues,
                     xlabel,
                     func_title=None,
                     return_plot_values=False):
    """
    plot the empirical probability that the statistic is above the threshold.
    This can be interpreted as type-1 error (when H0 is true) or test power
    (when H1 is true). The plot is against the specified x-axis.

    - ex: experiment number
    - fname: file name of the aggregated result
    - func_xvalues: function taking aggregated results dictionary and return the values
        to be used for the x-axis values.
    - xlabel: label of the x-axis.
    - func_title: a function: results dictionary -> title of the plot
    - return_plot_values: if true, also return a PlotValues as the second
      output value.

    Return loaded results
    """
    # from IPython.core.debugger import Tracer
    # Tracer()()

    results = glo.ex_load_result(ex, fname)

    def rej_accessor(jr):
        rej = jr["test_result"]["h0_rejected"]
        # When used with vectorize(), making the value float will make the resulting
        # numpy array to be of float. nan values can be stored.
        return float(rej)

    # value_accessor = lambda job_results: job_results['test_result']['h0_rejected']
    vf_pval = np.vectorize(rej_accessor)
    # results['job_results'] is a dictionary:
    # {'test_result': (dict from running perform_test(te) '...':..., }
    rejs = vf_pval(results["job_results"])
    repeats, _, n_methods = results["job_results"].shape

    # yvalues (corresponding to xvalues) x #methods
    mean_rejs = np.mean(rejs, axis=0)
    # print mean_rejs
    # std_pvals = np.std(rejs, axis=0)
    # std_pvals = np.sqrt(mean_rejs*(1.0-mean_rejs))

    xvalues = func_xvalues(results)

    # ns = np.array(results[xkey])
    # te_proportion = 1.0 - results['tr_proportion']
    # test_sizes = ns*te_proportion
    line_styles = func_plot_fmt_map()
    method_labels = get_func2label_map()

    func_names = [f.__name__ for f in results["method_job_funcs"]]
    plotted_methods = []
    for i in range(n_methods):
        te_proportion = 1.0 - results["tr_proportion"]
        fmt = line_styles[func_names[i]]
        # plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plotted_methods.append(method_label)
        plt.plot(xvalues, mean_rejs[:, i], fmt, label=method_label)
    """
    else:
        # h0 is true 
        z = stats.norm.isf( (1-confidence)/2.0)
        for i in range(n_methods):
            phat = mean_rejs[:, i]
            conf_iv = z*(phat*(1-phat)/repeats)**0.5
            #plt.errorbar(test_sizes, phat, conf_iv, fmt=line_styles[i], label=method_labels[i])
            plt.plot(test_sizes, mean_rejs[:, i], line_styles[i], label=method_labels[i])
    """

    ylabel = "Rejection rate"
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.xticks(np.hstack((xvalues)))

    alpha = results["alpha"]
    plt.legend(loc="best")
    title = ("%s. %d trials. $\\alpha$ = %.2g." %
             (results["prob_label"], repeats, alpha)
             if func_title is None else func_title(results))
    plt.title(title)
    plt.grid()
    if return_plot_values:
        return results, PlotValues(xvalues=xvalues,
                                   methods=plotted_methods,
                                   plot_matrix=mean_rejs.T)
    else:
        return results
Example #3
0
def run_problem(prob_label):
    """Run the experiment"""
    ns, p, ds = get_ns_pqsource(prob_label)
    # ///////  submit jobs //////////
    # create folder name string
    # result_folder = glo.result_folder()
    from sbibm.third_party.kgof.config import expr_configs

    tmp_dir = expr_configs["scratch_path"]
    foldername = os.path.join(tmp_dir, "kgof_slurm", "e%d" % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    # engine = SerialComputationEngine()
    # engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x len(ns) x #methods
    aggregators = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % (
                    prob_label,
                    func_name,
                    n,
                    r,
                    alpha,
                    tr_proportion,
                )
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info("%s exists. Load and return." % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, ni, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    job = Ex1Job(SingleResultAggregator(), p, ds, prob_label,
                                 r, f, n)
                    agg = engine.submit_job(job)
                    aggregators[r, ni, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, n=%rd)" %
                            (f.__name__, r, n))
                # let the aggregator finalize things
                aggregators[r, ni, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, ni, mi].get_final_result().result
                job_results[r, ni, mi] = job_result

    # func_names = [f.__name__ for f in method_job_funcs]
    # func2labels = exglobal.get_func2label_map()
    # method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        "job_results": job_results,
        "data_source": ds,
        "alpha": alpha,
        "repeats": reps,
        "ns": ns,
        "p": p,
        "tr_proportion": tr_proportion,
        "method_job_funcs": method_job_funcs,
        "prob_label": prob_label,
    }

    # class name
    fname = "ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f_trp%.2f.p" % (
        ex,
        prob_label,
        n_methods,
        reps,
        min(ns),
        max(ns),
        alpha,
        tr_proportion,
    )

    glo.ex_save_result(ex, results, fname)
    logger.info("Saved aggregated results to %s" % fname)