def get_bounds(label, version): """Return a set of valid bounds tailored for each parameter.""" wedge = float(np.random.uniform(0.03, 0.50)) # Questions if label in list(range(1, 46)): lower = float(np.random.uniform(0.01, 0.98 - wedge)) else: # Handle version if version in ['scaled_archimedean']: if label in ['r_self', 'r_other']: lower = float(np.random.uniform(0.01, 5.0 - wedge)) elif label in ['delta', 'self', 'other']: lower = float(np.random.uniform(0.01, 0.98 - wedge)) else: raise TrempyError('flawed request for bounds') elif version in ['nonstationary']: if label in ['alpha', 'beta', 'gamma']: lower = float(np.random.uniform(0.01, 5.0 - wedge)) elif label in ['y_scale']: lower = float(np.random.uniform(0.01, 0.98 - wedge)) elif label.startswith('discount_factors'): lower = float(np.random.uniform(0.01, 0.98 - wedge)) elif label.startswith('unrestricted_weights'): lower = float(np.random.uniform(0.01, 0.98 - wedge)) else: raise TrempyError('flawed request for bounds') else: raise TrempyError('version not implemented') # Get upper bound by adding the wedge upper = lower + wedge # To handle exponential discounting and hyperbolic discounting. if label not in list(range(1, 46)) and label.startswith('discount_factors'): lower = 0.00 # We want to check the case of the default bounds as well. if np.random.choice([True, False], p=[0.1, 0.9]): lower = DEFAULT_BOUNDS[label][0] if np.random.choice([True, False], p=[0.1, 0.9]): upper = DEFAULT_BOUNDS[label][1] bounds = [float(lower), float(upper)] bounds = [np.around(bound, decimals=4) for bound in bounds] return bounds
def get_values(self, perspective, which): """Directly access the values of the parameters.""" # Antibugging np.testing.assert_equal(which in ['all', 'free'], True) # Distribute class attributes para_objs = self.attr['para_objs'] optimizer = self.attr['optimizer'] # Initialize containers values = list() for label in self.attr['para_labels']: for para_obj in para_objs: # We are only interested in the free parameters. if which == 'free' and para_obj.get_attr('is_fixed'): continue # We are only interested in one particular parameter. if label != para_obj.get_attr('label'): continue if perspective in ['econ']: value = para_obj.get_attr('value') elif perspective in ['optim']: # Handle choice of algorithm value = self._to_optimizer(para_obj, optimizer) else: raise TrempyError('misspecified request') values += [value] return values
def set_values(self, perspective, which, values): """Directly set the values of the parameters.""" # Antibugging np.testing.assert_equal(which in ['all', 'free'], True) # Distribute class attributes para_objs = self.attr['para_objs'] optimizer = self.attr['optimizer'] count = 0 for label in self.attr['para_labels']: for para_obj in para_objs: # We are only interested in the free parameters. if which == 'free' and para_obj.get_attr('is_fixed'): continue # We are only interested in one particular parameter. if label != para_obj.get_attr('label'): continue if perspective in ['econ']: value = values[count] elif perspective in ['optim']: bounds = para_obj.get_attr('bounds') value = self._to_econ(values[count], bounds, optimizer) else: raise TrempyError('misspecified request') para_obj.set_attr('value', value) para_obj.check_integrity() count += 1
def check_optional_args(init_dict): """Enforce input requirements for the init_dict.""" version = init_dict['VERSION']['version'] if version in ['scaled_archimedean']: pass elif version in ['nonstationary']: # Set discounting to None if not specified; check correct input. if 'discounting' in init_dict['VERSION'].keys(): discounting = init_dict['VERSION']['discounting'] np.testing.assert_equal( discounting in ['hyperbolic', 'exponential', None], True) else: init_dict['VERSION']['discounting'] = None if 'df_other' in init_dict['VERSION'].keys(): df_other = init_dict['VERSION']['df_other'] np.testing.assert_equal( df_other in ['free', 'linear', 'exponential', 'equal_univariate'], True) else: init_dict['VERSION']['df_other'] = 'equal_univariate' # Fill in stationary_model if not specified by user if 'stationary_model' not in init_dict['VERSION'].keys(): init_dict['VERSION']['stationary_model'] = False # Enfore that there is a boolean variable 'heterogenenity'. Default: False. if 'heterogeneity' not in init_dict['VERSION'].keys(): init_dict['VERSION']['heterogeneity'] = False optional_args = [ 'unrestricted_weights_{}'.format(int(x)) for x in [0, 1, 3, 6, 12, 24] ] for label in optional_args: # If optional argument is not used (None), then we fix it at None. # In this case, the optimizer is not confused! if label in init_dict['DISCOUNTING'].keys(): value, is_fixed, _ = init_dict['DISCOUNTING'][label] if value is None and is_fixed is False: raise TrempyError('Optional argument misspecified.') else: raise TrempyError( 'Please set unused optional arguments to None in init file.' )
def perturbate_single(init_dict, label, value=None): """Perturbate a single parameter and fix all other parameters for estimation. We also set the bounds for the perturbed parameter to its default bounds. This increases the scope for perturbations. """ old_dict = copy.deepcopy(init_dict) version = init_dict['VERSION']['version'] if label not in PREFERENCE_PARAMETERS[version]: raise TrempyError('Version {0} has no parameters {1}'.format(version, label)) # Fix variance for each question. for q in init_dict['QUESTIONS'].keys(): init_dict['QUESTIONS'][q][1] = True # Handle optional parameters if label.startswith('unrestricted_weights'): not_used = (None in init_dict['TEMPORAL'].values()) if not_used: raise TrempyError('Cannot set value for unused argument: {}.'.format(label)) # Fix every parameter except for perturbed one. The perturbed one is "un-fixed". for group in ESTIMATION_GROUP[version]: for key in init_dict[group].keys(): current_value, _, bounds = init_dict[group][key] if key == label: # Reset bounds to default lower, upper = DEFAULT_BOUNDS[label] # If no value is specified, draw a random value. if value is None: value = np.random.uniform(lower + SMALL_FLOAT, upper - SMALL_FLOAT) init_dict[group][key] = [value, False, [lower, upper]] # Also, override old bounds in old dict. old_dict[group][key] = [current_value, False, [lower, upper]] # Fix all other parameters. else: init_dict[group][key] = [current_value, True, bounds] return old_dict, init_dict
def get_optimal_compensations_scaled_archimedean(questions, upper, marginals, r_self, r_other, delta, self, other): """Return the optimal compensations for all questions.""" for question in questions: if question <= 30 and not question == 13: raise TrempyError('Temporal decisions not implemented for scaled_archimedean.') copula = get_copula_scaled_archimedean(upper, marginals, r_self, r_other, delta, self, other) m_optimal = dict() for q in questions: m_optimal[q] = determine_optimal_compensation(copula, q) return m_optimal
def get_para(self, label): """Access a single parameter and get value, free/fixed and bounds.""" # Distribute class attributes para_objs = self.attr['para_objs'] for para_obj in para_objs: if label == para_obj.get_attr('label'): rslt = [ para_obj.get_attr(info) for info in ['value', 'is_fixed', 'bounds'] ] return rslt raise TrempyError('parameter not available')
def get_optimal_compensations(version, paras_obj, questions, **version_specific): """Get optimal compensations based on a model_obj.""" nparas_econ = paras_obj.attr['nparas_econ'] if version in ['scaled_archimedean']: # Handle version-specific objects outside paras_obj # assert 'marginals' in version_specific.keys() # assert 'upper' in version_specific.keys() marginals = version_specific['marginals'] upper = version_specific['upper'] # Variable args r_self, r_other, delta, self, other = paras_obj.get_values('econ', 'all')[:nparas_econ] # Optimal compensation args = [questions, upper, marginals, r_self, r_other, delta, self, other] m_optimal = get_optimal_compensations_scaled_archimedean(*args) elif version in ['nonstationary']: # Variable args # TODO: How to handle optional arguments? If unrestricted_weights is not generated, # this does not work. alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, \ discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, \ unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, \ unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24 = \ paras_obj.get_values('econ', 'all')[:nparas_econ] # Optional arguments discounting = paras_obj.attr['discounting'] stationary_model = paras_obj.attr['stationary_model'] df_other = paras_obj.attr['df_other'] # Optimal compensation args = [questions, alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24, # Optional arguments: discounting, stationary_model, df_other] m_optimal = get_optimal_compensations_nonstationary(*args) else: raise TrempyError('version not implemented') return m_optimal
def simulate(fname): """Simulate the model based on the initialization file.""" model_obj = ModelCls(fname) version = model_obj.attr['version'] # Get fixed args that do not change during simulation. args = [ model_obj, 'sim_agents', 'questions', 'sim_seed', 'sim_file', 'paras_obj', 'cutoffs' ] if version in ['scaled_archimedean']: args += ['upper', 'marginals'] sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs, upper, marginals = \ dist_class_attributes(*args) version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs = \ dist_class_attributes(*args) version_specific = dict() else: raise TrempyError('version not implemented') np.random.seed(sim_seed) m_optimal = get_optimal_compensations(version, paras_obj, questions, **version_specific) # First, get number of preference parameters. Paras with higher index belong to questions! nparas_econ = paras_obj.attr['nparas_econ'] # Now, get standard deviation for the error in each question. sds = paras_obj.get_values('econ', 'all')[nparas_econ:] heterogeneity = paras_obj.attr['heterogeneity'] if heterogeneity: sds_time = sds[1] sds_risk = sds[2] # TODO: This is what I am proposing instead of the loop below # Simulate data # data = [] # agent_identifier = np.arange(sim_agents) # for k, q in enumerate(questions): # lower_cutoff, upper_cutoff = cutoffs[q] # # If we estimate agent by agent, we use only two sds for time and risk quetions. # if heterogeneity: # if q <= 30: # sds_current_q = sds_time * (upper_cutoff - lower_cutoff) / 200 # else: # sds_current_q = sds_risk * (upper_cutoff - lower_cutoff) / 20 # else: # sds_current_q = sds[k] # m_latent = np.random.normal(loc=m_optimal[q], scale=sds_current_q, size=sim_agents) # m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf) # m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS # question_identifier = np.repeat(q, repeats=sim_agents) # data += list(zip(agent_identifier, question_identifier, m_observed)) data = [] for i in range(sim_agents): for k, q in enumerate(questions): lower_cutoff, upper_cutoff = cutoffs[q] # If we estimate agent by agent, we use only two sds for time and risk quetions. if heterogeneity: if q <= 30: sds_current_q = sds_time * (upper_cutoff - lower_cutoff) / 200 else: sds_current_q = sds_risk * (upper_cutoff - lower_cutoff) / 20 else: sds_current_q = sds[k] m_latent = np.random.normal(loc=m_optimal[q], scale=sds_current_q, size=1) m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf) m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS data += [[i, q, m_observed]] # Post-processing step df = pd.DataFrame(data) df.rename({ 0: 'Individual', 1: 'Question', 2: 'Compensation' }, inplace=True, axis='columns') dtype = { 'Individual': np.int, 'Question': np.int, 'Compensation': np.float } df = df.astype(dtype) df.set_index(['Individual', 'Question'], inplace=True, drop=False) df.sort_index(inplace=True) df.to_pickle(sim_file + '.trempy.pkl', protocol=2) x_econ_all_current = paras_obj.get_values('econ', 'all') fval, _ = criterion_function(df, questions, cutoffs, paras_obj, version, sds, **version_specific) write_info(version, x_econ_all_current, df, questions, fval, m_optimal, sim_file + '.trempy.info') return df, fval
def print_init_dict(dict_, fname='test.trempy.ini'): """Print an initialization dictionary.""" version = dict_['VERSION']['version'] keys = ['VERSION', 'SIMULATION', 'ESTIMATION', 'SCIPY-BFGS', 'SCIPY-POWELL', 'SCIPY-L-BFGS-B', 'CUTOFFS', 'QUESTIONS'] # Add keys based on version of the utility function if version in ['scaled_archimedean']: keys += ['UNIATTRIBUTE SELF', 'UNIATTRIBUTE OTHER', 'MULTIATTRIBUTE COPULA'] elif version in ['nonstationary']: keys += ['ATEMPORAL', 'DISCOUNTING'] else: raise TrempyError('version not implemented') questions = list(dict_['QUESTIONS'].keys()) is_cutoffs = False with open(fname, 'w') as outfile: for key_ in keys: # We do not ned to print the CUTOFFS block if none are specified. So we first check # below if there is any need. if key_ not in ['CUTOFFS']: outfile.write(key_ + '\n\n') for label in sorted(dict_[key_].keys()): info = dict_[key_][label] label_internal = label # Manually translate labels to internal labels based on version if version in ['scaled_archimedean']: if label in ['r'] and 'SELF' in key_: label_internal = 'r_self' elif label in ['r'] and 'OTHER' in key_: label_internal = 'r_other' elif version in ['nonstationary']: pass # Build format string for line str_ = '{:<25}' if label_internal in PREFERENCE_PARAMETERS[version] + questions: # Handle optional arguments where None can occur if (isinstance(label_internal, str) and label_internal.startswith('unrestricted_weights') and info[0] is None): str_ += ' {:>25} {:>10} ' # Preference parameters are formatted as floats else: str_ += ' {:25.4f} {:>10} ' else: # All other parameters are formatted as strings str_ += ' {:>25}\n' # Handle string output (e.g. "True" or "None") if label in ['detailed', 'version', 'heterogeneity']: info = str(info) if label in ['discounting', 'stationary_model']: if info is None: info = 'None' else: info = str(info) if (label_internal in PREFERENCE_PARAMETERS[version] + questions and key_ != 'CUTOFFS'): line, str_ = format_coefficient_line(label_internal, info, str_) elif key_ in ['CUTOFFS']: line, str_ = format_cutoff_line(label, info) # We do not need to print a [NONE, None] cutoff. if line.count('None') == 2: continue if not is_cutoffs: is_cutoffs = True outfile.write(key_ + '\n\n') else: line = [label, info] outfile.write(str_.format(*line)) outfile.write('\n')
def get_copula_nonstationary(alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24, discounting=None, stationary_model=False, df_other='equal_univariate' ): """Access the nonstationary utility copula.""" # Anti-bugging. np.testing.assert_equal(discounting in [None, 'hyperbolic', 'exponential'], True) version = 'nonstationary' copula_spec = {'version': version} copula_spec[version] = { 'discounting': discounting, 'version': version, 'y_scale': y_scale, 'alpha': alpha, 'gamma': gamma, 'beta': beta, } # "Nonparametric" discount factors D_t for t in 0,1,3,6,12,24. dfx = { 0: discount_factors_0, 1: discount_factors_1, 3: discount_factors_3, 6: discount_factors_6, 12: discount_factors_12, 24: discount_factors_24, } copula_spec[version]['discount_factors'] = dfx if df_other in ['equal_univariate']: # We use the parametric restrictions on c_t derived from theory. dict_unrestricted = None elif df_other in ['free']: # The weight c_t in the CES function is free. dict_unrestricted = { 0: unrestricted_weights_0, 1: unrestricted_weights_1, 3: unrestricted_weights_3, 6: unrestricted_weights_6, 12: unrestricted_weights_12, 24: unrestricted_weights_24, } if None in dict_unrestricted.values(): raise TrempyError('discount function for other is set to free but contains None type') elif df_other in ['linear']: # Impose a linear structure on c_t in the CES function. dict_unrestricted = { t: max(0, y_scale + t * unrestricted_weights_0) for t in [0, 1, 3, 6, 12, 24] } elif df_other in ['exponential']: # Impose an exponential structure on c_t in the CES function. dict_unrestricted = {t: y_scale * unrestricted_weights_0 ** t for t in [0, 1, 3, 6, 12, 24]} # The model becomes stationary. if stationary_model is True: dict_unrestricted = {key: y_scale for key in [0, 1, 3, 6, 12, 24]} copula_spec[version]['unrestricted_weights'] = dict_unrestricted # Build copula copula = UtilityCopulaCls(copula_spec) return copula
def write_out(self, fname): """Create a initialization dictionary of the current class instance.""" init_dict = dict() version = self.get_attr('version') paras_obj = self.attr['paras_obj'] questions = self.attr['questions'] # Group block labels: basis labels and version specific labels. basis_labels = [ 'VERSION', 'SIMULATION', 'ESTIMATION', 'SCIPY-BFGS', 'SCIPY-POWELL', 'SCIPY-L-BFGS-B', 'CUTOFFS', 'QUESTIONS' ] version_labels = [] if version in ['scaled_archimedean']: version_labels += [ 'UNIATTRIBUTE SELF', 'UNIATTRIBUTE OTHER', 'MULTIATTRIBUTE COPULA' ] elif version in ['nonstationary']: version_labels += ['ATEMPORAL', 'DISCOUNTING'] # Create init dictionary for label in basis_labels + version_labels: init_dict[label] = dict() # Fill dictionary # 1) Version init_dict['VERSION']['version'] = version init_dict['VERSION']['heterogeneity'] = self.attr['heterogeneity'] init_dict['VERSION']['stationary_model'] = self.attr[ 'stationary_model'] init_dict['VERSION']['discounting'] = self.attr['discounting'] init_dict['VERSION']['df_other'] = self.attr['df_other'] # 2) Simulation init_dict['SIMULATION']['agents'] = self.attr['sim_agents'] init_dict['SIMULATION']['seed'] = self.attr['sim_seed'] init_dict['SIMULATION']['file'] = self.attr['sim_file'] # 3) Estimation init_dict['ESTIMATION']['detailed'] = self.attr['est_detailed'] init_dict['ESTIMATION']['optimizer'] = self.attr['optimizer'] init_dict['ESTIMATION']['agents'] = self.attr['est_agents'] init_dict['ESTIMATION']['skip'] = self.attr['num_skip'] init_dict['ESTIMATION']['file'] = self.attr['est_file'] init_dict['ESTIMATION']['maxfun'] = self.attr['maxfun'] init_dict['ESTIMATION']['start'] = self.attr['start'] # 4+5) Optimizer options init_dict['SCIPY-BFGS'] = dict() init_dict['SCIPY-BFGS']['gtol'] = self.attr['opt_options'][ 'SCIPY-BFGS']['gtol'] init_dict['SCIPY-BFGS']['eps'] = self.attr['opt_options'][ 'SCIPY-BFGS']['eps'] init_dict['SCIPY-POWELL'] = dict() init_dict['SCIPY-POWELL']['xtol'] = self.attr['opt_options'][ 'SCIPY-POWELL']['xtol'] init_dict['SCIPY-POWELL']['ftol'] = self.attr['opt_options'][ 'SCIPY-POWELL']['ftol'] init_dict['SCIPY-L-BFGS-B'] = dict() init_dict['SCIPY-L-BFGS-B']['gtol'] = self.attr['opt_options'][ 'SCIPY-L-BFGS-B']['gtol'] init_dict['SCIPY-L-BFGS-B']['ftol'] = self.attr['opt_options'][ 'SCIPY-L-BFGS-B']['ftol'] init_dict['SCIPY-L-BFGS-B']['eps'] = self.attr['opt_options'][ 'SCIPY-L-BFGS-B']['eps'] # 6) Cutoffs init_dict['CUTOFFS'] = self.attr['cutoffs'] # 7) Questions for q in questions: init_dict['QUESTIONS'][q] = paras_obj.get_para(q) # 8) Preference parameters if version in ['scaled_archimedean']: init_dict['UNIATTRIBUTE SELF']['marginal'] = self.attr[ 'marginals'][0] init_dict['UNIATTRIBUTE SELF']['r'] = paras_obj.get_para('r_self') init_dict['UNIATTRIBUTE SELF']['max'] = self.attr['upper'][0] init_dict['UNIATTRIBUTE OTHER']['marginal'] = self.attr[ 'marginals'][1] init_dict['UNIATTRIBUTE OTHER']['r'] = paras_obj.get_para( 'r_other') init_dict['UNIATTRIBUTE OTHER']['max'] = self.attr['upper'][1] init_dict['MULTIATTRIBUTE COPULA']['delta'] = paras_obj.get_para( 'delta') init_dict['MULTIATTRIBUTE COPULA']['self'] = paras_obj.get_para( 'self') init_dict['MULTIATTRIBUTE COPULA']['other'] = paras_obj.get_para( 'other') elif version in ['nonstationary']: init_dict['ATEMPORAL']['alpha'] = paras_obj.get_para('alpha') init_dict['ATEMPORAL']['beta'] = paras_obj.get_para('beta') init_dict['ATEMPORAL']['gamma'] = paras_obj.get_para('gamma') init_dict['ATEMPORAL']['y_scale'] = paras_obj.get_para('y_scale') init_dict['DISCOUNTING']['discount_factors_0'] = \ paras_obj.get_para('discount_factors_0') init_dict['DISCOUNTING']['discount_factors_1'] = \ paras_obj.get_para('discount_factors_1') init_dict['DISCOUNTING']['discount_factors_3'] = \ paras_obj.get_para('discount_factors_3') init_dict['DISCOUNTING']['discount_factors_6'] = \ paras_obj.get_para('discount_factors_6') init_dict['DISCOUNTING']['discount_factors_12'] = \ paras_obj.get_para('discount_factors_12') init_dict['DISCOUNTING']['discount_factors_24'] = \ paras_obj.get_para('discount_factors_24') init_dict['DISCOUNTING']['unrestricted_weights_0'] = \ paras_obj.get_para('unrestricted_weights_0') init_dict['DISCOUNTING']['unrestricted_weights_1'] = \ paras_obj.get_para('unrestricted_weights_1') init_dict['DISCOUNTING']['unrestricted_weights_3'] = \ paras_obj.get_para('unrestricted_weights_3') init_dict['DISCOUNTING']['unrestricted_weights_6'] = \ paras_obj.get_para('unrestricted_weights_6') init_dict['DISCOUNTING']['unrestricted_weights_12'] = \ paras_obj.get_para('unrestricted_weights_12') init_dict['DISCOUNTING']['unrestricted_weights_24'] = \ paras_obj.get_para('unrestricted_weights_24') else: raise TrempyError('version not implemented') print_init_dict(init_dict, fname)
def __init__(self, init_dict): """Initialize the parameter class.""" version = init_dict['VERSION']['version'] self.attr = dict() self.attr['heterogeneity'] = init_dict['VERSION']['heterogeneity'] self.attr['optimizer'] = init_dict['ESTIMATION']['optimizer'] self.attr['version'] = version self.attr['para_labels'] = [] self.attr['para_objs'] = [] if version in ['nonstationary']: self.attr['stationary_model'] = init_dict['VERSION'][ 'stationary_model'] self.attr['discounting'] = init_dict['VERSION']['discounting'] self.attr['df_other'] = init_dict['VERSION']['df_other'] # Preference parameters are handled for each version separately. for label in PREFERENCE_PARAMETERS[version]: if version in ['scaled_archimedean']: if label in ['r_self']: value, is_fixed, bounds = init_dict['UNIATTRIBUTE SELF'][ 'r'] elif label in ['r_other']: value, is_fixed, bounds = init_dict['UNIATTRIBUTE OTHER'][ 'r'] else: value, is_fixed, bounds = init_dict[ 'MULTIATTRIBUTE COPULA'][label] elif version in ['nonstationary']: if label in ['alpha', 'beta', 'gamma', 'y_scale']: value, is_fixed, bounds = init_dict['ATEMPORAL'][label] elif (label.startswith('discount_factors') or label.startswith('unrestricted_weights')): value, is_fixed, bounds = init_dict['DISCOUNTING'][label] else: raise TrempyError('parameter label not implemented') else: raise TrempyError('version not implemented') self.attr['para_objs'] += [ParaCls(label, value, is_fixed, bounds)] self.attr['para_labels'] += [label] # Record created parameters so we can use that later in estimate step to get # standard deviations without using hard-coded numbers self.attr['nparas_econ'] = len(self.attr['para_objs']) # QUESTION specific parameters for label in sorted(init_dict['QUESTIONS'].keys()): value, is_fixed, bounds = init_dict['QUESTIONS'][label] self.attr['para_objs'] += [ ParaCls(int(label), value, is_fixed, bounds) ] self.attr['para_labels'] += [int(label)] self.attr['nparas_questions'] = len( self.attr['para_objs']) - self.attr['nparas_econ'] self.check_integrity()
def __init__(self, fname): """Init class.""" init_dict = read(fname) version = init_dict['VERSION']['version'] # We first tackle the more complex issue of parameter management. self.attr = dict() self.attr['version'] = version self.attr['heterogeneity'] = init_dict['VERSION']['heterogeneity'] self.attr['stationary_model'] = init_dict['VERSION'][ 'stationary_model'] self.attr['discounting'] = init_dict['VERSION']['discounting'] self.attr['df_other'] = init_dict['VERSION']['df_other'] # Parameters paras_obj = ParasCls(init_dict) self.attr['paras_obj'] = paras_obj # Version specific parameters that don't change during estimation. if version in ['scaled_archimedean']: # Information upper = [] upper += [init_dict['UNIATTRIBUTE SELF']['max']] upper += [init_dict['UNIATTRIBUTE OTHER']['max']] self.attr['upper'] = upper # Marginal utility functions marginals = [] marginals += [init_dict['UNIATTRIBUTE SELF']['marginal']] marginals += [init_dict['UNIATTRIBUTE OTHER']['marginal']] self.attr['marginals'] = marginals elif version in ['nonstationary']: pass else: raise TrempyError('version not implemented') # Cutoffs self.attr['cutoffs'] = init_dict['CUTOFFS'] # Simulation self.attr['sim_agents'] = init_dict['SIMULATION']['agents'] self.attr['sim_seed'] = init_dict['SIMULATION']['seed'] self.attr['sim_file'] = init_dict['SIMULATION']['file'] # Estimation self.attr['est_detailed'] = init_dict['ESTIMATION']['detailed'] self.attr['optimizer'] = init_dict['ESTIMATION']['optimizer'] self.attr['est_agents'] = init_dict['ESTIMATION']['agents'] self.attr['num_skip'] = init_dict['ESTIMATION']['skip'] self.attr['est_file'] = init_dict['ESTIMATION']['file'] self.attr['maxfun'] = init_dict['ESTIMATION']['maxfun'] self.attr['start'] = init_dict['ESTIMATION']['start'] # Optimizer options self.attr['opt_options'] = dict() self.attr['opt_options']['SCIPY-BFGS'] = dict() self.attr['opt_options']['SCIPY-BFGS']['gtol'] = init_dict[ 'SCIPY-BFGS']['gtol'] self.attr['opt_options']['SCIPY-BFGS']['eps'] = init_dict[ 'SCIPY-BFGS']['eps'] self.attr['opt_options']['SCIPY-POWELL'] = dict() self.attr['opt_options']['SCIPY-POWELL']['xtol'] = init_dict[ 'SCIPY-POWELL']['xtol'] self.attr['opt_options']['SCIPY-POWELL']['ftol'] = init_dict[ 'SCIPY-POWELL']['ftol'] self.attr['opt_options']['SCIPY-L-BFGS-B'] = dict() self.attr['opt_options']['SCIPY-L-BFGS-B']['gtol'] = init_dict[ 'SCIPY-L-BFGS-B']['gtol'] self.attr['opt_options']['SCIPY-L-BFGS-B']['ftol'] = init_dict[ 'SCIPY-L-BFGS-B']['ftol'] self.attr['opt_options']['SCIPY-L-BFGS-B']['eps'] = init_dict[ 'SCIPY-L-BFGS-B']['eps'] para_objs = paras_obj.get_attr('para_objs') questions = [] for para_obj in para_objs: label = para_obj.get_attr('label') if label in PREFERENCE_PARAMETERS[version]: continue else: questions += [label] self.attr['questions'] = sorted(questions) self.attr['num_questions'] = len(questions) # We now need to check the integrity of the class instance. self._check_integrity()
def estimate(fname): """Estimate the model by the method of maximum likelihood.""" estimate_cleanup() model_obj = ModelCls(fname) # Distribute class parameters except for economic parameters and version-specific thing args = [model_obj, 'version', 'est_file', 'questions', 'paras_obj', 'start', 'cutoffs', 'maxfun', 'est_detailed', 'opt_options', 'optimizer', 'est_agents', 'num_skip'] version, est_file, questions, paras_obj, start, cutoffs, maxfun, est_detailed, \ opt_options, optimizer, est_agents, num_skip = dist_class_attributes(*args) # Handle version-specific objects not included in the para_obj if version in ['scaled_archimedean']: upper, marginals = dist_class_attributes(*[model_obj, 'upper', 'marginals']) version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: version_specific = dict() # We only need to continue if there is at least one parameter to actually estimate. if len(paras_obj.get_values('optim', 'free')) == 0: raise TrempyError('no free parameter to estimate') # Some initial setup df_obs = process(est_file, questions, num_skip, est_agents, cutoffs) estimate_obj = EstimateClass( df=df_obs, cutoffs=cutoffs, questions=questions, paras_obj=copy.deepcopy(paras_obj), max_eval=maxfun, optimizer=optimizer, version=version, **version_specific) # We lock in an evaluation at the starting values as not all optimizers actually start there. if start in ['auto']: paras_obj = get_automatic_starting_values(paras_obj, df_obs, questions, version, **version_specific) # Objects for scipy.minimize x_optim_free_start = paras_obj.get_values('optim', 'free') x_free_bounds = paras_obj.get_bounds('free') estimate_obj.evaluate(x_optim_free_start) # We simulate a sample at the starting point. if est_detailed: estimate_simulate('start', x_optim_free_start, model_obj, df_obs) # Optimization of likelihood function if maxfun > 1: options = dict() if optimizer == 'SCIPY-BFGS': options['gtol'] = opt_options['SCIPY-BFGS']['gtol'] options['eps'] = opt_options['SCIPY-BFGS']['eps'] method = 'BFGS' bounds = None elif optimizer == 'SCIPY-POWELL': options['ftol'] = opt_options['SCIPY-POWELL']['ftol'] options['xtol'] = opt_options['SCIPY-POWELL']['xtol'] method = 'POWELL' bounds = None elif optimizer == 'SCIPY-L-BFGS-B': options['gtol'] = opt_options['SCIPY-L-BFGS-B']['gtol'] options['ftol'] = opt_options['SCIPY-L-BFGS-B']['ftol'] options['eps'] = opt_options['SCIPY-L-BFGS-B']['eps'] method = 'L-BFGS-B' bounds = x_free_bounds # Add bounds else: raise TrempyError('flawed choice of optimization method') try: opt = minimize(estimate_obj.evaluate, x_optim_free_start, method=method, options=options, bounds=bounds) except MaxfunError: opt = dict() opt['message'] = 'Optimization reached maximum number of function evaluations.' opt['success'] = False else: # We are not faced with a serious estimation request. opt = dict() opt['message'] = 'Single evaluation of criterion function at starting values.' opt['success'] = False # Now we can wrap up all estimation related tasks. estimate_obj.finish(opt) # We simulate a sample at the stopping point. if est_detailed: x_econ_all_step = estimate_obj.get_attr('x_econ_all_step') paras_obj.set_values('econ', 'all', x_econ_all_step) x_optim_free_step = paras_obj.get_values('optim', 'free') estimate_simulate('stop', x_optim_free_step, model_obj, df_obs) shutil.copy('stop/compare.trempy.info', 'compare.trempy.info') # We only return the best value of the criterion function and the corresponding parameter # vector. rslt = list() rslt.append(estimate_obj.get_attr('f_step')) rslt.append(estimate_obj.get_attr('x_econ_all_step')) return rslt
def random_dict(constr): """Create a random initialization file.""" dict_ = dict() version = np.random.choice(['scaled_archimedean', 'nonstationary']) num_questions = np.random.randint(8, 14) fname = get_random_string() discounting = np.random.choice([None, 'exponential', 'hyperbolic'], p=[0.8, 0.1, 0.1]) heterogeneity = np.random.choice([True, False], p=[0.1, 0.9]) df_other = np.random.choice( ['equal_univariate', 'free', 'linear', 'exponential'], p=[0.7, 0.1, 0.1, 0.1]) if constr is not None: # Handle version specific data. if 'version' in constr.keys(): version = constr['version'] if 'all_questions' in constr.keys(): num_questions = 45 if 'fname' in constr.keys(): fname = constr['fname'] if 'discounting' in constr.keys(): discounting = constr['discounting'] if 'heterogeneity' in constr.keys(): heterogeneity = constr['heterogeneity'] dict_['VERSION'] = {'version': version} # Optional arguments for model type if version in ['nonstationary']: dict_['VERSION']['stationary_model'] = np.random.choice([False, True], p=[0.9, 0.1]) dict_['VERSION']['heterogeneity'] = heterogeneity dict_['VERSION']['discounting'] = discounting dict_['VERSION']['df_other'] = df_other elif version in ['scaled_archimedean']: dict_['VERSION']['stationary_model'] = True dict_['VERSION']['heterogeneity'] = False dict_['VERSION']['discounting'] = None dict_['VERSION']['df_other'] = 'equal_univariate' heterogeneity = False sim_agents = np.random.randint(2, 10) is_fixed = np.random.choice([True, False], size=num_questions + len(PREFERENCE_PARAMETERS[version])) # We need to ensure at least one parameter is free for a valid estimation request. if is_fixed.tolist().count('False') == 0: is_fixed[0] = 'False' # Bounds and values. Be careful: the order of labels matters! bounds = [ get_bounds(label, version) for label in PREFERENCE_PARAMETERS[version] ] values = [ get_value(bounds[i], label, version) for i, label in enumerate(PREFERENCE_PARAMETERS[version]) ] if version in ['scaled_archimedean']: # Initial setup to ensure constraints across options. marginals = np.random.choice(['exponential', 'power'], 2) upper_bounds = np.random.randint(500, 800 + 1, 2) # We start with sampling all preference parameters. dict_['UNIATTRIBUTE SELF'], i = dict(), 0 dict_['UNIATTRIBUTE SELF']['r'] = [values[i], is_fixed[i], bounds[i]] dict_['UNIATTRIBUTE SELF']['max'] = upper_bounds[i] dict_['UNIATTRIBUTE SELF']['marginal'] = marginals[i] dict_['UNIATTRIBUTE OTHER'], i = dict(), 1 dict_['UNIATTRIBUTE OTHER']['r'] = [values[i], is_fixed[i], bounds[i]] dict_['UNIATTRIBUTE OTHER']['max'] = upper_bounds[i] dict_['UNIATTRIBUTE OTHER']['marginal'] = marginals[i] dict_['MULTIATTRIBUTE COPULA'] = dict() for i, label in enumerate(['delta', 'self', 'other']): # We increment index because (r_self, r_other) are handled above. j = i + 2 dict_['MULTIATTRIBUTE COPULA'][label] = [ values[j], is_fixed[j], bounds[j] ] elif version in ['nonstationary']: dict_['ATEMPORAL'] = dict() dict_['DISCOUNTING'] = dict() for i, label in enumerate(PREFERENCE_PARAMETERS[version]): if label in ['alpha', 'beta', 'gamma', 'y_scale']: dict_['ATEMPORAL'][label] = [values[i], is_fixed[i], bounds[i]] else: dict_['DISCOUNTING'][label] = [ values[i], is_fixed[i], bounds[i] ] # Handle optional arguments. If one argument is not used, set all to None and fix them. optional_args = [ 'unrestricted_weights_{}'.format(int(x)) for x in [0, 1, 3, 6, 12, 24] ] if df_other in ['equal_univariate']: for label in optional_args: dict_['DISCOUNTING'][label] = [None, True, [0.01, 1.00]] elif df_other in ['free']: pass elif df_other in ['linear', 'exponential']: if not label.endswith('_0'): dict_['DISCOUNTING'][label] = [None, True, [0.01, 1.00]] else: raise TrempyError('version not implemented') # General part of the init file that does not change with the version. # Currently 16 questions are implemented. if num_questions >= 45: questions = list(range(1, 46)) else: if version in ['scaled_archimedean']: questions = np.random.choice([13] + list(range(31, 46)), size=num_questions, replace=False) # print('Generated only atemporal questions because version is scaled_archimedean') else: if heterogeneity: questions = np.array([1, 2]) questions = np.append( questions, np.random.choice(list(range(3, 46)), size=(num_questions - 2), replace=False)) else: questions = np.random.choice(list(range(1, 46)), size=num_questions, replace=False) dict_['QUESTIONS'] = dict() for i, q in enumerate(questions): bounds = get_bounds(q, version) value = get_value(bounds, q, version) dict_['QUESTIONS'][q] = [ value, is_fixed[i + len(PREFERENCE_PARAMETERS[version])], bounds ] # If heterogeneity is True, we want to unfix the first two questions and fix the rest. if heterogeneity: dict_['QUESTIONS'][1][1] = False dict_['QUESTIONS'][2][1] = False for q in questions: if q in [1, 2]: continue dict_['QUESTIONS'][q] = [0.5, True, [0, HUGE_FLOAT]] # We now add some cutoff values. dict_['CUTOFFS'] = dict() for q in questions: if np.random.choice([True, False]): dict_['CUTOFFS'][q] = get_cutoffs() # We now turn to all simulation details. dict_['SIMULATION'] = dict() dict_['SIMULATION']['agents'] = sim_agents dict_['SIMULATION']['seed'] = np.random.randint(1, 1000) dict_['SIMULATION']['file'] = fname # We sample valid estimation requests. est_agents = np.random.randint(1, sim_agents) num_skip = np.random.randint(0, sim_agents - est_agents) dict_['ESTIMATION'] = dict() dict_['ESTIMATION']['optimizer'] = np.random.choice( ['SCIPY-BFGS', 'SCIPY-L-BFGS-B', 'SCIPY-POWELL']) dict_['ESTIMATION']['detailed'] = np.random.choice([True, False], p=[0.9, 0.1]) dict_['ESTIMATION']['start'] = np.random.choice(['init', 'auto']) dict_['ESTIMATION']['agents'] = est_agents dict_['ESTIMATION']['skip'] = num_skip dict_['ESTIMATION']['maxfun'] = np.random.randint(1, 10) dict_['ESTIMATION']['file'] = fname + '.trempy.pkl' # We sample optimizer options. dict_['SCIPY-BFGS'] = dict() dict_['SCIPY-BFGS']['gtol'] = np.random.lognormal() dict_['SCIPY-BFGS']['eps'] = np.random.lognormal() dict_['SCIPY-L-BFGS-B'] = dict() dict_['SCIPY-L-BFGS-B']['gtol'] = np.random.lognormal() dict_['SCIPY-L-BFGS-B']['ftol'] = np.random.lognormal() dict_['SCIPY-L-BFGS-B']['eps'] = np.random.lognormal() dict_['SCIPY-POWELL'] = dict() dict_['SCIPY-POWELL']['xtol'] = np.random.lognormal() dict_['SCIPY-POWELL']['ftol'] = np.random.lognormal() # Now we need to impose possible constraints. if constr is not None: if 'maxfun' in constr.keys(): dict_['ESTIMATION']['maxfun'] = constr['maxfun'] if 'num_agents' in constr.keys(): dict_['SIMULATION']['agents'] = constr['num_agents'] dict_['ESTIMATION']['agents'] = constr['num_agents'] dict_['ESTIMATION']['skip'] = 0 if 'est_file' in constr.keys(): dict_['ESTIMATION']['file'] = constr['est_file'] if 'detailed' in constr.keys(): dict_['ESTIMATION']['detailed'] = constr['detailed'] if 'start' in constr.keys(): dict_['ESTIMATION']['start'] = constr['start'] if 'optimizer' in constr.keys(): dict_['ESTIMATION']['optimizer'] = constr['optimizer'] return dict_
def read(fname): """Read the initialization file.""" # Check input np.testing.assert_equal(os.path.exists(fname), True) # Initialization dict_, group = {}, None with open(fname) as in_file: # Get lines file_lines = in_file.readlines() lines = list(file_lines) # Get the version. This is necessary because version is needed always first! for line in lines: list_ = shlex.split(line) # Determine special cases is_empty, is_group, is_comment = process_cases(list_) if is_group or is_comment or is_empty: continue flag, value = list_[:2] if flag in ['version']: version = value # We only needed the version flag break # Now process the file again. for line in lines: list_ = shlex.split(line) # Determine special cases is_empty, is_group, is_comment = process_cases(list_) # Applicability if is_empty or is_comment: continue # Prepare dictionary if is_group: group = ' '.join(list_) dict_[group] = dict() continue # Code below is only executed if the current line is not a group name flag, value = list_[:2] # Handle the VERSION block. if (group in ['VERSION']) and (flag in ['version']): version = value # Type conversions for the NON-CUTOFF block if group not in ['CUTOFFS']: value = type_conversions(flag, value) # We need to make sure questions and cutoffs are not duplicated. if flag in dict_[group].keys(): raise TrempyError('duplicated information') # Handle the basic blocks if group in BASIC_GROUPS: if group in ['CUTOFFS']: dict_[group][flag] = process_cutoff_line(list_) elif group in ['QUESTIONS']: dict_[group][flag] = process_coefficient_line( group, list_, value) else: dict_[group][flag] = value # Handle blocks specific to the 'version' of the utility function. if group in ESTIMATION_GROUP[version]: if version in ['scaled_archimedean']: if flag not in ['max', 'marginal']: dict_[group][flag] = process_coefficient_line( group, list_, value) else: dict_[group][flag] = value elif version in ['nonstationary']: dict_[group][flag] = process_coefficient_line( group, list_, value) else: raise TrempyError('version not implemented') # We allow for initialization files where no CUTOFFS are specified. if "CUTOFFS" not in dict_.keys(): dict_['CUTOFFS'] = dict() # We want to ensure that the keys to the questions are integers for label in ['QUESTIONS', 'CUTOFFS']: dict_[label] = {int(x): dict_[label][x] for x in dict_[label].keys()} # We do some modifications on the cutoff values. Instead of None, we will simply use # HUGE_FLOAT and we fill up any missing cutoff values for any possible questions.. for q in QUESTIONS_ALL: if q not in dict_['CUTOFFS'].keys(): dict_['CUTOFFS'][q] = [-HUGE_FLOAT, HUGE_FLOAT] else: for i in range(2): if dict_['CUTOFFS'][q][i] is None: dict_['CUTOFFS'][q][i] = (-1)**i * -HUGE_FLOAT # Enforce input requirements for optional arguments # such as: discounting, stationary_model, unrestricted_weights, heterogeneity,... check_optional_args(dict_) heterogeneity_preparations(dict_) return dict_