def approx_model_policy(domain, data):
    f = lambda pars: err_array(domain, pars, data)
    pars0 = domain.true_pars
    pars = leastsq(f, pars0)[0]
    #print pars
    dynamics = lambda s, u: domain.approx_dynamics(s, u, pars)
    T = deterministic_continuous_to_discrete_model(dynamics, domain.state_centers, domain.action_centers, domain.at_goal)
    states_to_actions, V = value_iteration(T, domain.state_centers, domain.reward, threshold=domain.value_iteration_threshold, pi_init=domain.pi_init)
    return discrete_policy(domain, states_to_actions)
def best_policy(domain, data):
    f = lambda pars: err_array(domain, pars, data)
    pars0 = domain.true_pars
    ml_start = leastsq(f, pars0)[0]
    if 1:
        f = lambda pars: [tuple(pars),  mfmc_evaluation(policy_wrt_approx_model(domain, pars), data, domain.distance_fn, domain.initstate, domain.episode_length, domain.at_goal)]
        pars = hill_climb(f, domain.optimization_pars, ml_start)
    else:
        f = lambda pars: [pars, mfmc_evaluation(policy_wrt_approx_model(domain, pars), data, domain.distance_fn, domain.initstate, domain.episode_length, domain.at_goal)]
        raw_returns = parallel.largeparmap(f, domain.initial_par_search_space)
        ind = np.argmax([raw[1] for raw in raw_returns])
        pars = raw_returns[ind][0]
    #print pars
    dynamics = lambda s, u: domain.approx_dynamics(s, u, pars)
    T = deterministic_continuous_to_discrete_model(dynamics, domain.state_centers, domain.action_centers, domain.at_goal)
    states_to_actions, V = value_iteration(T, domain.state_centers, domain.reward, threshold=domain.value_iteration_threshold)
    return discrete_policy(domain, states_to_actions)
def discrete_model_policy(domain, data):
    T = fit_T(data, domain.state_centers, domain.action_centers, domain.at_goal)
    states_to_actions, V = value_iteration(T, domain.state_centers, domain.reward, threshold=domain.value_iteration_threshold)
    return discrete_policy(domain, states_to_actions)