def get_repl_result(init_dict): """ A function to evaluate the replication result of John Rust's 1987 paper. :param init_dict: A dictionary containing the relevant variables for the replication. :return: The optimization result of the transition probabilities and cost parameters. """ data_path = get_data_storage() group = init_dict["groups"] binsize = str(init_dict["binsize"]) if not os.path.isfile(data_path + "/pkl/group_data/" + group + "_" + binsize + ".pkl"): data_reading() if not os.path.isfile(data_path + "/pkl/replication_data/rep_" + group + "_" + binsize + ".pkl"): data = data_processing(init_dict) else: data = pd.read_pickle(data_path + "/pkl/replication_data/rep_" + group + "_" + binsize + ".pkl") result_transitions, result_fixp = estimate(init_dict, data) return result_transitions, result_fixp
def inputs(): out = {} disc_fac = 0.9999 num_states = 90 scale = 0.01 init_dict = { "model_specifications": { "discount_factor": disc_fac, "number_states": num_states, "maint_cost_func": "square_root", "cost_scale": scale, }, "optimizer": { "approach": "NFXP", "algorithm": "scipy_lbfgsb" }, } df = pd.read_pickle(TEST_FOLDER + "group_4.pkl") result_trans, result_fixp = estimate(init_dict, df) out["trans_est"] = result_trans["x"] out["params_est"] = result_fixp["x"] out["trans_ll"] = result_trans["fun"] out["cost_ll"] = result_fixp["fun"] out["states"] = df.loc[(slice(None), slice(1, None)), "state"].to_numpy(int) out["decisions"] = df.loc[(slice(None), slice(1, None)), "decision"].to_numpy(int) out["disc_fac"] = disc_fac out["num_states"] = num_states out["scale"] = scale out["status"] = result_fixp["status"] return out
def inputs(): out = {} disc_fac = 0.9999 num_states = 90 scale = 1e-8 num_params = 4 lb = np.concatenate((np.full(num_states, -np.inf), np.full(num_params, 0.0))) ub = np.concatenate((np.full(num_states, 50.0), np.full(num_params, np.inf))) init_dict = { "model_specifications": { "discount_factor": disc_fac, "number_states": num_states, "maint_cost_func": "cubic", "cost_scale": scale, }, "optimizer": { "approach": "MPEC", "algorithm": "LD_SLSQP", "gradient": "Yes", "params": np.concatenate( (np.full(num_states, 0.0), np.array([4.0]), np.ones(num_params - 1)) ), "set_ftol_abs": 1e-15, "set_xtol_rel": 1e-15, "set_xtol_abs": 1e-3, "set_lower_bounds": lb, "set_upper_bounds": ub, }, } df = pd.read_pickle(TEST_FOLDER + "group_4.pkl") result_trans, result_fixp = estimate(init_dict, df) out["params_est"] = result_fixp["x"][num_states:].round(8) out["cost_ll"] = result_fixp["fun"] out["status"] = result_fixp["status"] return out
def inputs(): out = {} disc_fac = 0.9999 num_states = 300 num_buses = 200 num_periods = 1000 scale = 0.001 init_dict = { "groups": "group_4", "binsize": 5000, "model_specifications": { "discount_factor": disc_fac, "number_states": num_states, "maint_cost_func": "linear", "cost_scale": scale, }, "optimizer": {"approach": "NFXP", "algorithm": "scipy_L-BFGS-B"}, "simulation": { "discount_factor": disc_fac, "seed": 123, "buses": num_buses, "periods": num_periods, }, } out["trans_base"] = np.loadtxt(TEST_FOLDER + "repl_test_trans.txt") out["params_base"] = np.loadtxt(TEST_FOLDER + "repl_params_linear.txt") trans_mat = create_transition_matrix(num_states, out["trans_base"]) costs = calc_obs_costs(num_states, lin_cost, out["params_base"], scale) ev_known = calc_fixp(trans_mat, costs, disc_fac)[0] df = simulate(init_dict["simulation"], ev_known, costs, trans_mat) result_trans, result_fixp = estimate(init_dict, df) out["trans_est"] = result_trans["x"] out["params_est"] = result_fixp["x"] out["status"] = result_fixp["status"] return out
def inputs(): out = {} disc_fac = 0.9999 num_states = 90 scale = 1e-3 init_dict = { "model_specifications": { "discount_factor": disc_fac, "number_states": num_states, "maint_cost_func": "linear", "cost_scale": scale, }, "optimizer": { "approach": "NFXP", "algorithm": "scipy_L-BFGS-B", "gradient": "No", "params": pd.DataFrame(data=[10, 1, 0], columns=["value"], index=["RC", "theta_11", "omega"]), "constraints": [{ "loc": "omega", "type": "fixed" }] }, } df = pd.read_pickle(TEST_FOLDER + "group_4.pkl") result_trans, result_fixp = estimate(init_dict, df) out["trans_est"] = result_trans["x"] out["params_est"] = result_fixp["x"] out["trans_ll"] = result_trans["fun"] out["cost_ll"] = result_fixp["fun"] out["states"] = df.loc[(slice(None), slice(1, None)), "state"].to_numpy(int) out["decisions"] = df.loc[(slice(None), slice(1, None)), "decision"].to_numpy(int) out["disc_fac"] = disc_fac out["num_states"] = num_states out["scale"] = scale out["status"] = result_fixp["status"] out["params"] = init_dict["optimizer"]["params"] return out
def inputs(): out = {} disc_fac = 0.9999 num_states = 90 num_params = 2 scale = 1e-3 lb = np.concatenate((np.full(num_states, -np.inf), np.full(num_params, 0.0))) ub = np.concatenate((np.full(num_states, 50.0), np.full(num_params, np.inf))) init_dict = { "model_specifications": { "discount_factor": disc_fac, "number_states": num_states, "maint_cost_func": "linear", "cost_scale": scale, }, "optimizer": { "approach": "MPEC", "algorithm": "LD_SLSQP", "derivative": "Yes", "params": np.concatenate( (np.full(num_states, 0.0), np.array([4.0]), np.ones(num_params - 1)) ), "set_ftol_abs": 1e-15, "set_xtol_rel": 1e-15, "set_xtol_abs": 1e-3, "set_lower_bounds": lb, "set_upper_bounds": ub, }, } demand_dict = { "RC_lower_bound": 2, "RC_upper_bound": 13, "demand_evaluations": 100, "tolerance": 1e-10, "num_periods": 12, "num_buses": 1, } df = pd.read_pickle(TEST_FOLDER + "/replication_test/group_4.pkl") result_trans, result_fixp = estimate(init_dict, df) demand_params = np.concatenate((result_trans["x"], result_fixp["x"][-2:])) demand = get_demand(init_dict, demand_dict, demand_params) out["demand_estimate"] = demand["demand"].astype(float).to_numpy() return out
from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose from ruspy.estimation.estimation import estimate from ruspy.ruspy_config import TEST_RESOURCES_DIR from ruspy.estimation.estimation_transitions import create_increases TEST_FOLDER = TEST_RESOURCES_DIR + "replication_test/" init_dict = { "groups": "group_4", "binsize": 5000, "beta": 0.9999, "states": 90 } df = pkl.load(open(TEST_FOLDER + "group_4.pkl", "rb")) result_trans, result_fixp = estimate(init_dict, df, repl_4=True) @pytest.fixture def inputs(): out = dict() out["trans_est"] = result_trans["x"] out["params_est"] = result_fixp["x"] out["trans_ll"] = result_trans["fun"] out["cost_ll"] = result_fixp["fun"] return out @pytest.fixture def outputs(): out = dict()
import yaml import os import pandas as pd from ruspy.data.data_reading import data_reading from ruspy.data.data_processing import data_processing from ruspy.estimation.estimation import estimate from ruspy.data.data_location import get_data_storage data_path = get_data_storage() with open("init_replication.yml") as y: init_dict = yaml.safe_load(y) group = init_dict["replication"]["groups"] binsize = str(init_dict["replication"]["binsize"]) if not os.path.isfile(data_path + "/pkl/group_data/" + group + "_" + binsize + ".pkl"): data_reading() if not os.path.isfile(data_path + "/pkl/replication_data/rep_" + group + "_" + binsize + ".pkl"): data = data_processing(init_dict["replication"]) else: data = pd.read_pickle(data_path + "/pkl/replication_data/rep_" + group + "_" + binsize + ".pkl") result_transitions, result_fixp = estimate(init_dict["replication"], data) print(result_transitions, result_fixp)
def sensitivity_simulation(specification, number_runs, alg_nfxp, tolerance=None, max_cont=20, max_nk=20): """ performs a certain number of estimations with certain specifications on simulated data. Parameters ---------- specification : tuple contains the information about which discount factor, cost function, grid size, derivative and approach is used for the estimation. number_runs : int number of runs per specification. alg_nfxp : string the algorithm used for the NFXP. tolerance : dict specifies the stopping tolerance for the optimizer of the NFXP. max_cont : int maximum number of contraction steps for the NFXP. max_nk : int maximum number of Newton-Kantorovich steps for the NFXP. Returns ------- results : pd.DataFrame contains results such as likelihood, estimated parameters etc per run. """ # set default tolerance if tolerance is None: if alg_nfxp == "estimagic_bhhh": tolerance = {"tol": {"abs": 1e-05, "rel": 1e-08}} elif alg_nfxp == "scipy_L-BFGS-B": tolerance = {"gtol": 1e-05} # Initialize the set up for the nested fixed point algorithm stopping_crit_fixed_point = 1e-13 switch_tolerance_fixed_point = 1e-2 # Initialize the set up for MPEC rel_ipopt_stopping_tolerance = 1e-6 # get specifications in order index_names = [ "Discount Factor", "Cost Function", "Grid Size", "Analytical Gradient", "Approach", ] identifier = specification[1] indexer = list(specification[0]) indexer[1] = list(indexer[1])[0] specification = dict(zip(index_names, specification[0])) # load data data_sets = pickle.load( open( "data/simulated_data_" + str(specification["Grid Size"]) + ".pickle", "rb")) # set up empty dataframe for results index = pd.MultiIndex.from_product( [*[[element] for element in indexer], range(number_runs)], names=[*index_names, "Run"], ) columns = [ "RC", "theta_11", "theta_12", "theta_13", "theta_30", "theta_31", "theta_32", "theta_33", "theta_34", "theta_35", "theta_36", "theta_37", "theta_38", "theta_39", "theta_310", "Likelihood", "Demand", "CPU Time", "Converged", "# of Major Iter.", "# of Func. Eval.", "# of Func. Eval. (Total)", "# of Bellm. Iter.", "# of N-K Iter.", ] results = pd.DataFrame(index=index, columns=columns) if specification["Approach"] == "NFXP": init_dict_nfxp = { "model_specifications": { "discount_factor": specification["Discount Factor"], "number_states": specification["Grid Size"], "maint_cost_func": specification["Cost Function"][0], "cost_scale": specification["Cost Function"][1], }, "optimizer": { "approach": "NFXP", "algorithm": alg_nfxp, "gradient": specification["Analytical Gradient"], "algo_options": tolerance, }, "alg_details": { "threshold": stopping_crit_fixed_point, "switch_tol": switch_tolerance_fixed_point, "max_contr_steps": max_cont, "max_newt_kant_steps": max_nk, }, } column_slicer_nfxp = [ "Likelihood", "CPU Time", "Converged", "# of Major Iter.", "# of Func. Eval.", "# of Bellm. Iter.", "# of N-K Iter.", ] for run in np.arange(number_runs): print(specification, run) # Run estimation data = data_sets[run] try: transition_result_nfxp, cost_result_nfxp = estimate( init_dict_nfxp, data) results.loc[(*indexer, run), ( slice("RC", "theta_13") )][:len(cost_result_nfxp["x"])] = cost_result_nfxp["x"] results.loc[(*indexer, run), (slice("theta_30", "theta_310") )][:len(transition_result_nfxp["x"] )] = transition_result_nfxp["x"] results.loc[(*indexer, run), column_slicer_nfxp] = process_result( specification["Approach"], cost_result_nfxp, alg_nfxp) results.loc[(*indexer, run), "Demand"] = get_qoi( init_dict_nfxp, np.concatenate( (transition_result_nfxp["x"], cost_result_nfxp["x"])), ) # the N-K step sometimes cannot be found due to a LinAlgError # somehow estimagic cannot handle this and translate it into nonconvergence # instead it raises a ValueError # below I manually translate this into nonconvergence except ValueError: results.loc[(*indexer, run), :] = results.shape[1] * np.nan results.loc[(*indexer, run), "Converged"] = 0 results.to_pickle("data/sensitivity/sensitivity_specification_" + alg_nfxp + str(identifier) + ".pickle") elif specification["Approach"] == "MPEC": if specification["Cost Function"][0] in [ "linear", "square root", "hyperbolic" ]: num_cost_params = 2 elif specification["Cost Function"][0] == "quadratic": num_cost_params = 3 else: num_cost_params = 4 init_dict_mpec = { "model_specifications": { "discount_factor": specification["Discount Factor"], "number_states": specification["Grid Size"], "maint_cost_func": specification["Cost Function"][0], "cost_scale": specification["Cost Function"][1], }, "optimizer": { "approach": "MPEC", "algorithm": "ipopt", "gradient": specification["Analytical Gradient"], "tol": rel_ipopt_stopping_tolerance, "set_lower_bounds": np.concatenate(( np.full(specification["Grid Size"], -np.inf), np.full(num_cost_params, 0.0), )), "set_upper_bounds": np.concatenate(( np.full(specification["Grid Size"], 50.0), np.full(num_cost_params, np.inf), )), }, } column_slicer_mpec = [ "Likelihood", "CPU Time", "Converged", "# of Major Iter.", "# of Func. Eval.", "# of Func. Eval. (Total)", ] for run in np.arange(number_runs): # Run estimation data = data_sets[run] transition_result_mpec, cost_result_mpec = estimate( init_dict_mpec, data) results.loc[(*indexer, run), ( slice("RC", "theta_13") )][:len(cost_result_mpec["x"][specification["Grid Size"]:] )] = cost_result_mpec["x"][specification["Grid Size"]:] results.loc[(*indexer, run), ( slice("theta_30", "theta_310") )][:len(transition_result_mpec["x"])] = transition_result_mpec["x"] results.loc[(*indexer, run), column_slicer_mpec] = process_result( specification["Approach"], cost_result_mpec, alg_nfxp) results.loc[(*indexer, run), "Demand"] = get_qoi( init_dict_mpec, np.concatenate(( transition_result_mpec["x"], cost_result_mpec["x"][specification["Grid Size"]:], )), ) results.to_pickle("data/sensitivity/sensitivity_specification_" + str(identifier) + ".pickle") return results
def get_iskhakov_results( discount_factor, approach, starting_cost_params, starting_expected_value_fun, number_runs, number_buses, number_periods, number_states, number_cost_params, ): """ Run the Monte Carlo Simulation to replicate Iskhakov et al. (2016) Parameters ---------- discount_factor : list beta vector for which to run the simulation. approach : list run with NFXP and/or MPEC. starting_cost_params : numpy.array contains the starting values for the cost parameters. starting_expected_value_fun : numpy.array contains the starting values of the expected values for MPEC. number_runs : float number of runs per beta and starting vector combination. number_buses : int number of buses per data set. number_periods : int number of months per data set. number_states : int number of grid points in which the mileage state is discretized. number_cost_params : int number of cost parameters. Returns ------- results : pd.DataFrame contains the estimates for the structural parameters per run. """ # Initialize the set up for the nested fixed point algorithm stopping_crit_fixed_point = 1e-13 switch_tolerance_fixed_point = 1e-2 # Initialize the set up for MPEC lower_bound = np.concatenate( (np.full(number_states, -np.inf), np.full(number_cost_params, 0.0))) upper_bound = np.concatenate( (np.full(number_states, 50.0), np.full(number_cost_params, np.inf))) rel_ipopt_stopping_tolerance = 1e-6 init_dict_nfxp = { "model_specifications": { "number_states": number_states, "maint_cost_func": "linear", "cost_scale": 1e-3, }, "optimizer": { "approach": "NFXP", "algorithm": "estimagic_bhhh", # implies that we use analytical first order derivatives as opposed # to numerical ones "gradient": "Yes", }, "alg_details": { "threshold": stopping_crit_fixed_point, "switch_tol": switch_tolerance_fixed_point, }, } init_dict_mpec = { "model_specifications": { "number_states": number_states, "maint_cost_func": "linear", "cost_scale": 1e-3, }, "optimizer": { "approach": "MPEC", "algorithm": "ipopt", # implies that we use analytical first order derivatives as opposed # to numerical ones "gradient": "Yes", "tol": rel_ipopt_stopping_tolerance, "set_lower_bounds": lower_bound, "set_upper_bounds": upper_bound, }, } # Initialize DataFrame to store the results of each run of the Monte Carlo simulation index = pd.MultiIndex.from_product( [ discount_factor, range(number_runs), range(starting_cost_params.shape[1]), approach, ], names=["Discount Factor", "Run", "Start", "Approach"], ) columns = [ "RC", "theta_11", "theta_30", "theta_31", "theta_32", "theta_33", "CPU Time", "Converged", "# of Major Iter.", "# of Func. Eval.", "# of Bellm. Iter.", "# of N-K Iter.", ] results = pd.DataFrame(index=index, columns=columns) # Main loop to calculate the results for each run for factor in discount_factor: # load simulated data mat = scipy.io.loadmat("data/RustBusTableXSimDataMC250_beta" + str(int(100000 * factor))) for run in range(number_runs): if run in np.arange(10, number_runs, 10): results.to_pickle("data/intermediate/results_" + str(factor)) data = process_data(mat, run, number_buses, number_periods) for start in range(starting_cost_params.shape[1]): # Adapt the Initiation Dictionairy of NFXP for this run init_dict_nfxp["model_specifications"][ "discount_factor"] = factor init_dict_nfxp["optimizer"]["params"] = pd.DataFrame( starting_cost_params[:, start], columns=["value"]) # Run NFXP using ruspy transition_result_nfxp, cost_result_nfxp = estimate( init_dict_nfxp, data) # store the results of this run results.loc[factor, run, start, "NFXP"] = process_result_iskhakov( "NFXP", transition_result_nfxp, cost_result_nfxp, number_states) # Adapt the Initiation Dictionairy of MPEC for this run init_dict_mpec["model_specifications"][ "discount_factor"] = factor init_dict_mpec["optimizer"]["params"] = np.concatenate( (starting_expected_value_fun, starting_cost_params[:, start])) # Run MPEC using ruspy transition_result_mpec, cost_result_mpec = estimate( init_dict_mpec, data) # store the results of this run results.loc[factor, run, start, "MPEC"].loc[~results.columns.isin( ["# of Bellm. Iter.", "# of N-K Iter."] )] = process_result_iskhakov( "MPEC", transition_result_mpec, cost_result_mpec, number_states) return results