def __init__(self, initial, firststage, shock, opts, secondstage=None): self.opts = opts self.data = pd.DataFrame(initial['rank'], columns=['OverallRank']) self.data['treat'] = initial['treat'] self.data['RankTreat'] = initial['rank'] * initial['treat'] self.data['GPATreat'] = initial['treat'] self.active = np.array(self.data['OverallRank'] > 0) self.data['demand'] = np.zeros(initial['rank'].shape) self.firststage = deepcopy(firststage) # Select the functions associated with correct information structure self.firststage['params']['appadmit'] = \ self.firststage['params']['appadmit'][ 'treat' + str(initial['treat']) ] self.secondstage = secondstage if np.any(self.secondstage): self.struc_param_unpacker() self.shock = shock self.reac_vars = lu.reaction_spec(opts['reaction'])[0] for rvar in self.reac_vars: self.data[rvar] = np.zeros(initial['rank'].shape) self.data[rvar + '_comp'] = np.zeros(initial['rank'].shape) self.data['revenue'] = self.data['demand'] * self.data['Tuition'] self.add_donations() self.data['year'] = 2013 self.data['const'] = 1
def perturb_policy(self, data): """ Generate perturbations for policy functions. Randomly picks one school per alternate policy and generates multiplicative perturbations for that school. Returns ------- list of length lc.N_ALTPOL, with each element a pandas DataFrame, dimension nxp with n = (# of schools) and p = (# of equations to perturb). Each row is filled with ones, except the row corresponding to the perturbed school Note that the first element of the list will just be an array of ones """ pol_vars = lu.reaction_spec(self.opts['reaction'])[0] # Multiplicative perturbations to have same SE as raw data stdev = np.std(data[pol_vars]) perturbs = np.zeros((self.data.shape[0], len(pol_vars))) perturbs = [pd.DataFrame(perturbs, columns=pol_vars)] for policy in xrange(1, lc.N_ALTPOL): # Use this line instead to have data-based perturbations #school = np.random.normal(1, stdev) school = np.random.normal(size=len(stdev)) perturbs.append(deepcopy(perturbs[0])) perturbs[policy].loc[self.perturb['school'][policy], :] = school return perturbs
def lags(self): """ Generate various lags (including tuition alignment) """ lag_vars = ["OverallRank", "Ranked"] lag_vars = [lag_vars, lu.reaction_spec("full")[0]] lag_vars.append(lag_vars[1]) lag_vars[2] = [el + "_comp" for el in lag_vars[2]] lag_vars = [el for sublist in lag_vars for el in sublist] for lvar in lag_vars: self.data[lvar + "L"] = self.data.groupby("school").apply(pd.DataFrame.shift)[lvar]
def __init__(self, fs_params, constants, opts, ss_params=None): self.fs_params = fs_params self.ss_params = ss_params self.constants = constants self.opts = opts self.reac_vars = lu.reaction_spec(opts['reaction'])[0] self.out = {'demand': np.array([]), 'OverallRank': np.array([]), 'revenue': np.array([])} for rvar in self.reac_vars: self.out[rvar] = np.array([])
def perturb_gen(self, rank): """ Generate perturbation inputs (zeros) for simulation Parameters ---------- rank: ndarray Rank array in simulated market to be used for dimensions """ pol_vars = lu.reaction_spec(self.opts['reaction'])[0] pol_vars.append('entry') perturb = np.zeros((len(rank), len(pol_vars))) return pd.DataFrame(perturb, columns=pol_vars)
def competition(self): """ Generate averages in competition sets """ # Generate competition variables reac_vars = lu.reaction_spec("full")[0] comp_vars = ["OverallRank"] comp_vars = [reac_vars, comp_vars] comp_vars = [el for sublist in comp_vars for el in sublist] comp_vars_comp = [el + "_comp" for el in comp_vars] comp_add = pd.DataFrame(np.zeros((self.data.shape[0], len(comp_vars_comp))), columns=comp_vars_comp) self.data = self.data.join(comp_add) for year in self.data["year"].unique(): for cvar in comp_vars: mask = (1 - np.isnan(self.data[cvar])).astype(bool) mdata = deepcopy(self.data[mask]) comp_mat = lu.comp_mat_gen(mdata.loc[mdata["year"] == year, "OverallRank"]) mdata.loc[mdata["year"] == year, cvar + "_comp"] = np.dot( comp_mat, mdata.loc[mdata["year"] == year, cvar] ) self.data[mask] = mdata
def test_reaction(self): self.evolve_market('reaction') nrmse = [] for rvar in lu.reaction_spec(self.opts['reaction'])[0]: mask = np.array(1 - np.isnan( self.data.loc[self.data['year'] == 2013, rvar] )).astype(bool) data = np.array( self.data.loc[self.data['year'] == 2013, rvar][mask] ) rmse = (np.array(self.market.data[rvar][mask]) - data)**2 rmse = np.sqrt(np.mean(rmse)) # Normalize for percentage nrmse.append(rmse / (max(data) - min(data))) print("RMSE RVARS") print(nrmse) print("Expected") expected = np.array([0.12452261429473817, 0.18392353736966649, 0.2677874577849565]) assert np.allclose(np.array(nrmse), expected)
def reaction(self): """ Estimate reaction functions """ print(" * Estimating reaction functions") reac_vars = lu.reaction_spec(self.react)[0] models = {} data = copy(self.data_p[self.data_p['OverallRank'] < np.max(self.data_p['OverallRank'])]) for rvar in reac_vars: data_reac = data[['OverallRank', 'treat', 'year', rvar]] data_reac = data_reac.dropna() params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1, 'learning_rate': 0.01, 'loss': 'ls'} model = ensemble.GradientBoostingRegressor(**params) model.fit(data_reac[['OverallRank', 'treat', 'year']], data_reac[rvar]) models[rvar] = model if self.opts['verbose']: print("--------- %s Policy Function Estimate ---------" % rvar) print("MSE (training): ", mean_squared_error( data_reac[rvar], model.predict(data_reac[['OverallRank', 'treat', 'year']]) )) return models
def perturb_gen(self, rank): """ Generate perturbation inputs """ pol_vars = lu.reaction_spec(self.opts['reaction'])[0] pol_vars.append('entry') perturb = np.ones((len(rank), len(pol_vars))) return pd.DataFrame(perturb, columns=pol_vars)