def test_basinhopping_canned_example(self):
     """Basinhopping fits a parabola with superimposed local minima."""
     #This is directly from the scipy.optimize docs
     
     fn_to_optimize = lambda x: cos(14.5 * x - 0.3) + (x + 0.2) * x   
     x0 = 1.0
     global_min,f_at_global_min = fit_timeseries(fn_to_optimize,x0,\
       global_optimizer = "basinhopping",local_optimizer="BFGS")
     
     npt.assert_almost_equal(global_min,-0.1951,4)
     npt.assert_almost_equal(f_at_global_min,-1.0009,4)
    def test_fit_timeseries_recovers_OU_params(self):
        """fit_timeseries recovers OU model params"""

        final_errors = {}
        dt = 1
        for n_timepoints in list(range(1, 300)):
            print "Building OU model for %i timepoints" % n_timepoints
            #run ou_process to get history
            ou = Process(start_coord=0.20,motion="Ornstein-Uhlenbeck",\
              history = None, params =\
              {"lambda":0.12,"delta":0.25,"mu":0.5})
            for t in range(0, n_timepoints):
                ou.update(dt)
            print n_timepoints, ou.History
            xs = array(ou.History)
            ts = arange(0, len(ou.History)) * dt
            print xs, ts, dt
            fn_to_optimize = make_OU_objective_fn(xs, ts)
            #Estimate correct parameters
            for niter in [5]:
                for local_optimizer in ['L-BFGS-B']:
                    print("Running optimizer:", local_optimizer)
                    #Using intentionally kinda bad estimates
                    start_Sigma = 0.1
                    start_Lambda = 0.0
                    start_Theta = mean(xs)
                    print "niter=", niter
                    print "start_Theta: ", start_Theta
                    print "n_timepoints: ", n_timepoints
                    xmax = array([1.0, 1.0, 1.0])
                    xmin = array([0.0, 0.0, -1.0])
                    x0 = array([start_Sigma, start_Lambda, start_Theta])

                    global_min,f_at_global_min =\
                    fit_timeseries(fn_to_optimize,x0,xmin,xmax,stepsize=0.005,\
                      niter=niter,local_optimizer=local_optimizer)

                    print("OU result:")
                    print(global_min, f_at_global_min)
                    Sigma, Lambda, Theta = global_min
                    correct_values = array([0.25, 0.12, 0.5])
                    final_error = global_min - correct_values
                    print "Global min:", global_min
                    final_errors["%s_%i_%i" %(local_optimizer,niter,n_timepoints)] =\
                      final_error
                    print "*" * 80
                    print("%s error: %.4f,%.4f,%.4f" %(local_optimizer,\
                      final_error[0],final_error[1],final_error[2]))
                    print "*" * 80
        for opt, err in final_errors.iteritems():
            print("%s error: %.4f,%.4f,%.4f" %(opt,\
              err[0],err[1],err[2]))
Beispiel #3
0
def benchmark(max_tp=300, output=None, verbose=False):
    """
    Verifies that fit_timeseries recovers OU model params
    :param output: location for output log
    :param max_tp: maximum timepoints to test
    :param verbose: verbosity
    :return output dataframe of benchmarked data
    """
    if output:
        f = open(
            output + "fit_timeseries_benchmark" + str(max_tp) + "_log.txt",
            "w+")
    log = []

    # generate several normal distributions
    test_normal_data = {}
    n_obs = 1000
    dt = 0.01
    for delta in [float(i) / 100.0 for i in range(0, 100, 1)]:
        curr_data = norm.rvs(loc=0, size=n_obs, scale=delta**(2 * dt))
        test_normal_data[delta**(2 * dt)] = curr_data
    BasicNormalData = test_normal_data

    # generate OU process for testing
    ou_process = Process(start_coord=0.20,
                         motion="Ornstein-Uhlenbeck",
                         history=None,
                         params={
                             "lambda": 0.20,
                             "delta": 0.25,
                             "mu": 0.0
                         })
    # run ou_process to get history
    for t in range(1, 30):
        dt = 1
        ou_process.update(dt)
    OU = ou_process

    final_errors = {}
    dt = 1
    model, n_tp, key, sim_i, sim_o, exp_o, err_o, nLogLik, aic_o = (
        [] for i in range(9))
    columns = [
        "model", "n_timepoints", "sim_input", "sim_output", "exp_output",
        "error", "aic"
    ]
    for n_timepoints in list(range(1, max_tp + 1)):
        log.append(str("Building OU model for %i timepoints" % n_timepoints))
        n_tp.append(n_timepoints)
        # run ou_process to get history
        ou = Process(start_coord=0.20,
                     motion="Ornstein-Uhlenbeck",
                     history=None,
                     params={
                         "lambda": 0.12,
                         "delta": 0.25,
                         "mu": 0.5
                     })
        for t in range(0, n_timepoints):
            ou.update(dt)
        log.append(str(str(n_timepoints) + ", " + str(ou.History)))
        xs = array(ou.History)
        ts = np.arange(0, len(ou.History)) * dt
        log.append(str(str(xs) + ", " + str(ts) + ", " + str(dt)))
        fn_to_optimize = make_OU_objective_fn(xs, ts)

        # Estimate correct parameters
        for niter in [5]:
            for local_optimizer in ['L-BFGS-B']:
                log.append(str("Running optimizer: " + str(local_optimizer)))
                model.append(local_optimizer)
                # Using intentionally kinda bad estimates
                start_Sigma = 0.1
                start_Lambda = 0.0
                start_Theta = np.mean(xs)
                log.append(str("niter: " + str(niter)))
                log.append(str("start_Theta: " + str(start_Theta)))
                key.append(["sigma", "lambda", "theta"])
                sim_i.append([start_Sigma, start_Lambda, start_Theta])
                log.append(str("n_timepoints: " + str(n_timepoints)))
                xmax = array([1.0, 1.0, 1.0])
                xmin = array([0.0, 0.0, -1.0])
                x0 = array([start_Sigma, start_Lambda, start_Theta])

                global_min, f_at_global_min = \
                    fit_timeseries(fn_to_optimize, x0, xmin, xmax, stepsize=0.005,
                                   niter=niter, local_optimizer=local_optimizer)

                log.append("OU result:")
                Sigma, Lambda, Theta = global_min
                correct_values = array([0.25, 0.12, 0.5])
                exp_o.append([0.25, 0.12, 0.5])
                final_error = global_min - correct_values
                log.append(str("Global min: " + str(global_min)))
                sim_o.append([Sigma, Lambda, Theta])
                log.append(str("f at Global min: " + str(f_at_global_min)))
                nLogLik.append(f_at_global_min)
                # aic calulated with 2*n_params-2*LN(-1*nLogLik)
                aic_t = aic(niter, f_at_global_min)
                log.append(str("aic: " + str(aic_t)))
                aic_o.append(aic_t)

                final_errors["%s_%i_%i" % (local_optimizer, niter,
                                           n_timepoints)] = final_error
                log.append(str("*" * 80))
                log.append(
                    str("%s error: %.4f,%.4f,%.4f" %
                        (local_optimizer, final_error[0], final_error[1],
                         final_error[2])))
                err_o.append([
                    abs(final_error[0]),
                    abs(final_error[1]),
                    abs(final_error[2])
                ])
                log.append(str("*" * 80))
                log.append("")

    df = pd.DataFrame(
        {
            "model": model,
            "n_timepoints": n_tp,
            "key": key,
            "sim_input": sim_i,
            "sim_output": sim_o,
            "exp_output": exp_o,
            "mag_err": err_o,
            "nLogLik": nLogLik,
            "aic": aic_o
        },
        columns=[
            "model", "n_timepoints", "key", "sim_input", "sim_output",
            "exp_output", "mag_err", "nLogLik", "aic"
        ])
    for opt, err in final_errors.items():
        log.append(
            str("%s error: %.4f,%.4f,%.4f" % (opt, err[0], err[1], err[2])))

    for line in log:
        if verbose:
            print(line)
        if output:
            f.write(str(line + "\n"))

    if output:
        if verbose:
            print("Output log saved to: " + output +
                  "fit_timeseries_benchmark_log" + str(max_tp) + ".txt")
            print("Output saved to: " + output + "fit_timeseries_benchmark" +
                  str(max_tp) + ".csv")
        df.to_csv(output + "fit_timeseries_benchmark" + str(max_tp) + ".csv",
                  index=False)
        f.close()

    df = df[["model", "n_timepoints", "mag_err"]]
    return df
Beispiel #4
0
def benchmark_simulated_dataset(n_timepoints,verbose = False,\
    simulation_type = "Ornstein-Uhlenbeck",\
    simulation_params={"lambda": 0.12, "delta": 0.25, "mu": 0.5},local_optimizer=['L-BFGS-B'],\
    local_optimizer_niter=[5],local_optimizer_stepsize = 0.005,log=[],dt = 0.1):

    # run ou_process to get history
    ou = Process(start_coord=0.20,
                 motion=simulation_type,
                 history=[0.20, 0.20],
                 params=simulation_params)

    timepoints = list(range(0, n_timepoints))

    for t in timepoints:
        if verbose:
            print("simulating timepoint t:", t)

        ou.update(dt)

    #Set timepoints and position (x) values
    xs = array(ou.History)
    ts = np.arange(0, len(ou.History)) * dt

    fn_to_optimize = make_OU_objective_fn(xs, ts)

    results = []

    # Estimate correct parameters for each tested local optimizer/niter value
    for local_optimizer in local_optimizer:
        for niter in local_optimizer_niter:

            #Set up simulation parameters
            #(Using intentionally kinda bad starting estimates for benchmark)
            start_Sigma = 0.1
            start_Lambda = 0.0
            start_Theta = np.mean(xs)
            x0 = array([start_Sigma, start_Lambda, start_Theta])

            #set optimizer min/max bounds for each parameter
            xmax = array([1.0, 1.0, 1.0])
            xmin = array([0.0, 0.0, -1.0])

            #Results will be stored in the simulation_result dict
            result = {}
            result['model'] = "_".join(
                [simulation_type, local_optimizer, "niter",
                 str(niter)])
            result['benchmark_name'] = "_".join(
                [local_optimizer, "niter",
                 str(niter), "t",
                 str(n_timepoints)])
            result['xs'] = xs
            result['ts'] = ts
            result['n_timepoints'] = n_timepoints
            result['local_optimizer'] = local_optimizer
            result['local_optimizer_niter'] = niter
            result['model_parameter_names'] = ["sigma", "lambda", "theta"]
            result['local_optimizer_start_param_values'] = [
                start_Sigma, start_Lambda, start_Theta
            ]
            result["expected_parameter_values"] = [
                simulation_params[param]
                for param in ['delta', 'lambda', 'mu']
            ]

            #Look up expected values from user input simulation parameters
            exp_Sigma,exp_Lambda,exp_Mu =\
                result["expected_parameter_values"]

            #Fit the model to the data
            inferred_params, nLogLik = \
              fit_timeseries(fn_to_optimize, x0, xmin, xmax,\
              stepsize= local_optimizer_stepsize,
              niter=niter, local_optimizer=local_optimizer)

            for i, p in enumerate(result['model_parameter_names']):
                result["inferred_{}".format(p)] = inferred_params[i]

            #Report inferred parameter values
            result["nLogLik"] = nLogLik
            result["AIC"] = aic(len(simulation_params), nLogLik)

            #Add results to the log
            log.extend(log_lines_from_result_dict(result))
            #NOTE: we do this now so later log entries with final errors
            #will appear last in the log

            #Calculate errors
            expected = list(result['expected_parameter_values'])
            observed = list(inferred_params)
            parameter_names = result['model_parameter_names']
            model_fit_error_results = calculate_errors(observed, expected,
                                                       parameter_names)

            result.update(model_fit_error_results)
            #Add error results to the log
            log.extend(log_lines_from_result_dict(model_fit_error_results))

            #Update results with
            results.append(result)

    return log, results