def check_estimation(self): """Check model attributes that are only relevant for estimation tasks.""" # Check that class instance is locked. assert self.get_attr("is_locked") # Check that no other estimations are currently running in this directory. assert not os.path.exists(".estimation.respy.scratch") # Distribute class attributes ( optimizer_options, optimizer_used, optim_paras, version, maxfun, num_paras, file_est, ) = dist_class_attributes( self, "optimizer_options", "optimizer_used", "optim_paras", "version", "maxfun", "num_paras", "file_est", ) # Ensure that at least one parameter is free. if sum(optim_paras["paras_fixed"]) == num_paras: raise UserError("Estimation requires at least one free parameter") # Make sure the estimation dataset exists if not os.path.exists(file_est): raise UserError("Estimation dataset does not exist") if maxfun > 0: assert optimizer_used in optimizer_options.keys() # Make sure the requested optimizer is valid if version == "python": assert optimizer_used in OPT_EST_PYTH elif version == "fortran": assert optimizer_used in OPT_EST_FORT else: raise AssertionError return self
def stop(): """ This function sends a signal to the package that the estimation is to be stopped immediately. It results in a gentle termination. """ if os.path.exists(".estimation.respy.scratch"): open(".stop.respy.scratch", "w").close() else: raise UserError("... no estimation running at this time")
def dist_input_arguments(parser): """ Check input for script. """ # Parse arguments args = parser.parse_args() # Distribute arguments init_file = args.init_file # Checks if not os.path.exists(init_file): raise UserError("Initialization file does not exist") if not os.path.exists("est.respy.info"): raise UserError( "Information on parameter values from last step unavailable") # Finishing return init_file
def scripts_update(init_file): """ Update model parametrization in initialization file. """ # Collect baseline update init_dict = read_init_file(init_file) paras_steps = get_est_info()["paras_step"] # While sometimes useful, we cannot use this script if there are missing values in # the parameters due to too large values. if "---" in paras_steps.tolist(): raise UserError("Missing values in est.respy.info") # We need to make sure that the size of the parameter vector does fit the # initialization file. For example, this might not be the case when the number of # types is changed in the initialization file and an update is requested with an # earlier logfile. num_types, num_paras = ( len(init_dict["TYPE SHARES"]["coeffs"]) / 2 + 1, len(paras_steps), ) if num_paras != 53 + (num_types - 1) * 6: raise UserError("Info does not fit the current model specification") optim_paras = distribute_parameters(paras_steps, True) shocks_coeffs = paras_steps[43:53] # Update initialization dictionary init_dict["COMMON"]["coeffs"] = optim_paras["coeffs_common"] init_dict["OCCUPATION A"]["coeffs"] = optim_paras["coeffs_a"] init_dict["OCCUPATION B"]["coeffs"] = optim_paras["coeffs_b"] init_dict["EDUCATION"]["coeffs"] = optim_paras["coeffs_edu"] init_dict["HOME"]["coeffs"] = optim_paras["coeffs_home"] init_dict["BASICS"]["coeffs"] = optim_paras["delta"] init_dict["SHOCKS"]["coeffs"] = shocks_coeffs init_dict["TYPE SHARES"]["coeffs"] = optim_paras["type_shares"][2:] init_dict["TYPE SHIFTS"]["coeffs"] = optim_paras["type_shifts"].flatten( )[4:] # We first print to an intermediate file as otherwise the original file is lost in # case a problem during printing occurs. write_init_file(init_dict, ".model.respy.ini") shutil.move(".model.respy.ini", init_file)
def scripts_check(request, respy_obj): """ Wrapper for the estimation. """ # Distribute model parameters num_periods, edu_spec, num_types, optim_paras = dist_class_attributes( respy_obj, "num_periods", "edu_spec", "num_types", "optim_paras" ) # We need to run additional checks if an estimation is requested. if request == "estimate": # Create the grid of the admissible states. state_space = StateSpace( num_periods, num_types, edu_spec["start"], edu_spec["max"], optim_paras ) # We also check the structure of the dataset. data_array = process_dataset(respy_obj).to_numpy() num_rows = data_array.shape[0] for j in range(num_rows): period = int(data_array[j, 1]) # Extract observable components of state space as well as agent decision. exp_a, exp_b, edu, choice_lagged = data_array[j, 4:].astype(int) # First of all, we need to ensure that all observed years of schooling are # larger than the initial condition of the model. try: np.testing.assert_equal(edu >= 0, True) except AssertionError: raise UserError(ERR_MSG) # Get state indicator to obtain the systematic component of the agents # rewards. This might fail either because the state is simply infeasible at # any period or just not defined for the particular period requested. try: k = state_space.indexer[period, exp_a, exp_b, edu, choice_lagged - 1] np.testing.assert_equal(k >= 0, True) except (IndexError, AssertionError): raise UserError(ERR_MSG) # We also take a special look at the optimizer options. respy_obj.check_estimation()
def dist_input_arguments(parser): """ Check input for estimation script. """ # Parse arguments args = parser.parse_args() # Distribute arguments init_file = args.init_file file_sim = args.file_sim # Check attributes if not os.path.exists(init_file): raise UserError("Initialization file does not exist") # Finishing return init_file, file_sim
def get_est_info(): """Read the parameters from the last step of a previous estimation run.""" def _process_value(input_, type_): try: if type_ == "float": value = float(input_) elif type_ == "int": value = int(input_) except ValueError: value = "---" return value # We need to make sure that the updating file actually exists. if not os.path.exists("est.respy.info"): msg = "Parameter update impossible as " msg += "file est.respy.info does not exist" raise UserError(msg) # Initialize container and ensure a fresh start processing the file linecache.clearcache() rslt = {} # Value of the criterion function line = shlex.split(linecache.getline("est.respy.info", 6)) for key_ in ["start", "step", "current"]: rslt["value_" + key_] = _process_value(line.pop(0), "float") # Total number of evaluations and steps line = shlex.split(linecache.getline("est.respy.info", 49)) rslt["num_step"] = _process_value(line[3], "int") line = shlex.split(linecache.getline("est.respy.info", 51)) rslt["num_eval"] = _process_value(line[3], "int") # Parameter values for i, key_ in enumerate(["start", "step", "current"]): rslt["paras_" + key_] = [] for j in range(13, 99): line = shlex.split(linecache.getline("est.respy.info", j)) if not line: break rslt["paras_" + key_] += [_process_value(line[i + 1], "float")] rslt["paras_" + key_] = np.array(rslt["paras_" + key_]) return rslt
def dist_input_arguments(parser): """ Check input for estimation script. """ # Parse arguments args = parser.parse_args() # Distribute arguments init_file = args.init_file single = args.single # Check attributes assert single in [True, False] if not os.path.exists(init_file): raise UserError("Initialization file does not exist") # Finishing return single, init_file
def scripts_compare(base_init, is_update): """Construct some model fit statistics by comparing the observed and simulated dataset.""" # In case of updating, we create a new initialization file that contains the updated # parameter values. if is_update: init_file = "compare.respy.ini" shutil.copy(base_init, init_file) scripts_update(init_file) else: init_file = base_init # Read in relevant model specification. respy_obj = RespyCls(init_file) respy_obj.write_out("compare.respy.ini") # Distribute some information for further processing. num_periods, num_agents_est, num_agents_sim = dist_class_attributes( respy_obj, "num_periods", "num_agents_est", "num_agents_sim") # The comparison does make sense when the file of the simulated dataset and # estimation dataset are the same. Then the estimation dataset is overwritten by the # simulated dataset. fname_est = respy_obj.attr["file_est"].split(".")[0] fname_sim = respy_obj.attr["file_sim"].split(".")[0] if fname_est == fname_sim: raise UserError(" Simulation would overwrite estimation dataset") data_obs = process_dataset(respy_obj) data_sim = respy_obj.simulate()[1] if num_periods > 1: tf = [] tf += [construct_transition_matrix(data_obs)] tf += [construct_transition_matrix(data_sim)] # Distribute class attributes max_periods = len(data_obs["Period"].unique()) # Prepare results rslt_initial = _prepare_initial(data_obs, data_sim, num_agents_est, num_agents_sim) rslt_choice, rmse_choice = _prepare_choices(data_obs, data_sim) rslt_a = _prepare_wages(data_obs, data_sim, "Occupation A") rslt_b = _prepare_wages(data_obs, data_sim, "Occupation B") with open("compare.respy.info", "w") as file_: file_.write("\n Comparing the Observed and Simulated Economy\n\n") file_.write(" Number of Periods: " + str(max_periods) + "\n\n") file_.write("\n Initial Schooling Shares \n\n") fmt_ = "{:>15}" * 3 + "\n" labels = ["Level", "Observed", "Simulated"] file_.write(fmt_.format(*labels) + "\n") for info in rslt_initial: info[1:] = [format_float(x) for x in info[1:]] file_.write(fmt_.format(*info)) # Comparing the choice distributions file_.write("\n\n Choices \n\n") fmt_ = "{:>15}" * 7 + "\n" labels = ["Data", "Period", "Count", "White", "Blue", "School", "Home"] file_.write(fmt_.format(*labels) + "\n") for period in range(max_periods): for name in ["Observed", "Simulated"]: line = [name, period + 1] + rslt_choice[name][period] fmt_ = "{:>15}" * 3 + "{:15.2f}" * 4 + "\n" file_.write(fmt_.format(*line)) file_.write("\n") line = " Overall RMSE {:14.5f}\n".format(rmse_choice) file_.write(line) # Comparing the transition matrices if num_periods > 1: file_.write("\n\n Transition Matrix \n\n") fmt_ = "{:>15}" * 6 + "\n\n" labels = ["Work A", "Work B", "School", "Home"] file_.write(fmt_.format(*["", ""] + labels)) for i in range(4): for j, source in enumerate(["Observed", "Simulated"]): fmt_ = "{:>15}{:>15}" + "{:15.4f}" * 4 + "\n" line = [source, labels[i]] + tf[j][i, :].tolist() file_.write(fmt_.format(*line)) file_.write("\n") # Comparing the wages distributions file_.write("\n Outcomes \n\n") fmt_ = "{:>15}" * 8 + "\n" labels = [] labels += ["Data", "Period", "Count", "Mean", "Std."] labels += ["25%", "50%", "75%"] file_.write(fmt_.format(*labels) + "\n") for rslt, name in [(rslt_a, "Occupation A"), (rslt_b, "Occupation B")]: file_.write("\n " + name + " \n\n") for period in range(max_periods): for label in ["Observed", "Simulated"]: counts = int(rslt[label][period][0]) line = [label, period + 1, counts] # The occurrence of NAN requires special care. stats = rslt[label][period][1:] stats = [format_float(x) for x in stats] file_.write(fmt_.format(*line + stats)) file_.write("\n")
def check_model_attributes(attr_dict): a = attr_dict # Number of parameters assert isinstance(a["num_paras"], int) assert a["num_paras"] >= 53 # Parallelism assert isinstance(a["num_procs"], int) assert a["num_procs"] > 0 if a["num_procs"] > 1: assert a["version"] == "fortran" assert isinstance(a["num_procs"], int) assert a["num_procs"] > 0 if a["num_procs"] > 1: assert a["version"] == "fortran" assert IS_PARALLELISM_MPI # Version version of package assert a["version"] in ["fortran", "python"] if a["version"] == "fortran": assert IS_FORTRAN assert isinstance(a["num_threads"], int) assert a["num_threads"] >= 1 if a["num_threads"] >= 2: assert a["version"] == "fortran" assert IS_PARALLELISM_OMP # Debug status assert a["is_debug"] in [True, False] # Forward-looking agents assert a["is_myopic"] in [True, False] # Seeds for seed in [a["seed_emax"], a["seed_sim"], a["seed_prob"]]: assert np.isfinite(seed) assert isinstance(seed, int) assert seed > 0 # Number of agents for num_agents in [a["num_agents_sim"], a["num_agents_est"]]: assert np.isfinite(num_agents) assert isinstance(num_agents, int) assert num_agents > 0 # Number of periods assert np.isfinite(a["num_periods"]) assert isinstance(a["num_periods"], int) assert a["num_periods"] > 0 # Number of draws for Monte Carlo integration assert np.isfinite(a["num_draws_emax"]) assert isinstance(a["num_draws_emax"], int) assert a["num_draws_emax"] >= 0 # Debugging mode assert a["is_debug"] in [True, False] # Window for smoothing parameter assert isinstance(a["tau"], float) assert a["tau"] > 0 # Interpolation assert a["is_interpolated"] in [True, False] assert isinstance(a["num_points_interp"], int) assert a["num_points_interp"] > 0 # Simulation of S-ML assert isinstance(a["num_draws_prob"], int) assert a["num_draws_prob"] > 0 # Maximum number of iterations assert isinstance(a["maxfun"], int) assert a["maxfun"] >= 0 # Optimizers assert a["optimizer_used"] in OPT_EST_FORT + OPT_EST_PYTH # Scaling assert a["precond_spec"]["type"] in ["identity", "gradient", "magnitudes"] for key_ in ["minimum", "eps"]: assert isinstance(a["precond_spec"][key_], float) assert a["precond_spec"][key_] > 0.0 # Education assert isinstance(a["edu_spec"]["max"], int) assert a["edu_spec"]["max"] > 0 assert isinstance(a["edu_spec"]["start"], list) assert len(a["edu_spec"]["start"]) == len(set(a["edu_spec"]["start"])) assert all(isinstance(item, int) for item in a["edu_spec"]["start"]) assert all(item > 0 for item in a["edu_spec"]["start"]) assert all(item <= a["edu_spec"]["max"] for item in a["edu_spec"]["start"]) assert all(isinstance(item, float) for item in a["edu_spec"]["share"]) assert all(0 <= item <= 1 for item in a["edu_spec"]["lagged"]) assert all(0 <= item <= 1 for item in a["edu_spec"]["share"]) np.testing.assert_almost_equal(np.sum(a["edu_spec"]["share"]), 1.0, decimal=4) # Derivatives assert a["derivatives"] in ["forward-differences"] # Check model parameters check_model_parameters(a["optim_paras"]) # Check that all parameter values are within the bounds. x = get_optim_paras(a["optim_paras"], a["num_paras"], "all", True) # It is not clear at this point how to impose parameter constraints on # the covariance matrix in a flexible manner. So, either all fixed or # none. As a special case, we also allow for all off-diagonal elements # to be fixed to zero. shocks_coeffs = a["optim_paras"]["shocks_cholesky"][np.tril_indices(4)] shocks_fixed = np.array(a["optim_paras"]["paras_fixed"][43:53]) all_free = not shocks_fixed.any() dim = len(a["optim_paras"]["shocks_cholesky"]) helper = np.zeros((dim, dim)) helper[np.tril_indices(dim)] = shocks_coeffs off_diagonals_zero = np.diag(helper).sum() == helper.sum() helper = np.zeros((dim, dim), dtype=bool) helper[np.tril_indices(dim)] = shocks_fixed off_diagonals_fixed = (helper[np.tril_indices(dim, k=-1)]).all() diagonal_matrix = off_diagonals_zero & off_diagonals_fixed if not (all_free or shocks_fixed.all() or diagonal_matrix): raise UserError(" Misspecified constraints for covariance matrix") # Discount rate and type shares need to be larger than on at all times. for label in ["paras_fixed", "paras_bounds"]: assert isinstance(a["optim_paras"][label], list) assert len(a["optim_paras"][label]) == a["num_paras"] for i in range(1): assert a["optim_paras"]["paras_bounds"][i][0] >= 0.00 for i in range(a["num_paras"]): lower, upper = a["optim_paras"]["paras_bounds"][i] if lower is not None: assert isinstance(lower, float) assert lower <= x[i] assert abs(lower) < PRINT_FLOAT if upper is not None: assert isinstance(upper, float) assert upper >= x[i] assert abs(upper) < PRINT_FLOAT if (upper is not None) and (lower is not None): assert upper >= lower _check_optimizer_options(a["optimizer_options"])