def setup(wns=None, zns=None): if wns is None: wns = [100] if zns is None: zns = [100] dic = {} for wn, zn in it.product(wns, zns): params = load_params() wl = params['wl'][0] wu = params['wu'][0] w_grid = np.linspace(wl, wu, wn) params['w_grid'] = w_grid, 'Wage support.' ln_dist_lb, _ = params['ln_dist_lb'] ln_dist_ub, _ = params['ln_dist_ub'] ln_dist, _ = params['full_ln_dist'] z_grid = np.linspace(ln_dist_lb, ln_dist_ub, zn) params['z_grid'] = z_grid, "Trucnated support of shocks." params['wn'] = wn, ' ' params['zn'] = zn, ' ' dic[(wn, zn)] = params return dic
def make_and_save(pi, lambda_, wses=None, nperiods=4, log=True, figkwargs=None, axkwargs=None): """ Wrapper for all the plotting funcitonality. Returns an axes and does IO. """ if wses is None: params = load_params() all_files = ar.get_all_files(params) wses = ar.read_output(all_files, kind='ws') df = get_df(pi, lambda_, wses[pi, lambda_], params) fig, ax = plot_wage_change_dist(df, pi, lambda_, nperiods=nperiods, log=log, figkwargs=figkwargs, axkwargs=axkwargs) savefig_(fig, pi, lambda_, nperiods=nperiods, log=log) return fig, ax
def get_all_files(params=None): """Get the files from the results path""" if params is None: params = load_params() pth = params['results_path'][0] all_files = os.listdir(pth) return all_files
def main(): params = load_params() all_files = ar.get_all_files(params) wses = ar.read_output(all_files, kind='ws') keys = wses.keys() pis, lambdas = zip(*keys) # FTW pis_u, lambdas_u = sorted(set(pis)), sorted(set(lambdas)) # unique for pi, lambda_ in keys: make_and_save(pi, lambda_) print('Saved {}, {}'.format(pi, lambda_))
def run(): """ This method is the main entry point for this processing block """ # pylint: disable=E1121 ensure_data_directories_exist() params = load_params() # type: dict input_metadata = load_metadata() # type: FeatureCollection lcc = KMeansClustering.from_dict(params) result = lcc.process_feature(input_metadata) # type: FeatureCollection save_metadata(result)
def __init__(self, hyperparams): self.pi, self.lambda_ = hyperparams self.piname = str(self.pi).replace('.', '') self.lname = str(self.lambda_).replace('.', '') self.out_name = '_'.join([self.piname, self.lname]) params = load_params() params['pi'] = self.pi, 'inflation target' params['lambda_'] = self.lambda_, 'rigidity' params['call_dir'] = os.getcwd(), 'Path from which the script was called.' self.params = params self.res_by_run = [] self.res_by_cat = defaultdict(list)
def __init__(self, hyperparams): self.pi, self.lambda_ = hyperparams self.piname = str(self.pi).replace('.', '') self.lname = str(self.lambda_).replace('.', '') self.out_name = '_'.join([self.piname, self.lname]) params = load_params() params['pi'] = self.pi, 'inflation target' params['lambda_'] = self.lambda_, 'rigidity' params['call_dir'] = os.getcwd( ), 'Path from which the script was called.' self.params = params self.res_by_run = [] self.res_by_cat = defaultdict(list)
def get_g(pi, lambda_, period=28): """ Helper function to get to a wage distribution. Warning: Will not touch the params in your global state. If you go on the to more things make sure to adjust those params. """ import analyze_run as ar params = load_params() params['pi'] = pi, 'a' params['lambda_'] = lambda_, 'b' all_files = ar.get_all_files(params) wses = ar.read_output(all_files, kind='ws') ws = wses[(pi, lambda_)] pths, shks = sample_path(ws, params, nseries=1000, nperiods=30, seed=42) pth, shocks = pths[28], shks[28] shocks = np.sort(shocks) g = ecdf(np.sort(pth)) return g, shocks
def wage_dist_ecdf_refactor(): """ This implements the refactor of gp's to ecdfs. Also takes care of the output. """ with open('results/fixup_notice.txt', 'a') as f: t = str(datetime.datetime.now()) f.write("FIXED gps AT {}\n".format(t)) params = load_params() params['results_path/'] = 'results/', 'a' all_files = ar.get_all_files(params) gps = ar.read_output(all_files, kind='gp') wses = ar.read_output(all_files, kind='ws') z_grid = params['z_grid'][0] flex_ws = Interp(z_grid, ss_wage_flexible(params, shock=z_grid)) for key in gps.iterkeys(): piname, lambda_ = [str(x).replace('.', '') for x in key] out_name = 'results/gp_' + piname + '_' + lambda_ + '.pkl' shutil.copy2(out_name, 'results/replaced_results/') ws = wses[key] params['pi'] = key[0], 'you' params['lambda_'] = key[1], 'idiot' new_g, shocks = get_new_g(ws, params) with open(out_name, 'w') as f: cPickle.dump(new_g, f) print("Fixed wage distribution for {}.".format(key)) new_rigid_out = get_rigid_output(ws, params, flex_ws, new_g, shocks) out_name = 'results/rigid_output_' + piname + '_' + lambda_ + '_.txt' with open(out_name, 'w') as f: f.write(str(new_rigid_out)) with open('results/fixup_notice.txt', 'a') as f: f.write("Fixed {}\n".format(key))
def train_lstm( dim_proj=128, # word embeding dimension and LSTM number of hidden units. patience=10, # Number of epoch to wait before early stop if no progress max_epochs=5000, # The maximum number of epoch to run dispFreq=10, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) n_words=10000, # Vocabulary size optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. saveto='lstm_model.npz', # The best model will be saved there validFreq=370, # Compute the validation error after this number of update. saveFreq=1110, # Save the parameters after every saveFreq updates maxlen=100, # Sequence longer then this get ignored batch_size=16, # The batch size during training. valid_batch_size=64, # The batch size used for validation/test set. dataset='imdb', # Parameter for extra option noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. ): # Model options model_options = locals().copy() print "model options", model_options load_data, prepare_data = get_dataset(dataset) print 'Loading data' train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=maxlen) if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) ydim = numpy.max(train[1]) + 1 model_options['ydim'] = ydim print 'Building model' # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params('lstm_model.npz', params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U'] ** 2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, mask, y], cost, name='f_cost') grads = T.grad(cost, wrt=tparams.values()) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = T.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index] x = [train[0][t]for t in train_index] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cost = f_grad_shared(x, mask, y) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost if saveto and numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err]) if (uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = unzip(tparams) bad_counter = 0 print ('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size) train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err if saveto: numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') # In[ ]: helpers.save_params((seq_length, save_dir)) # In[ ]: _, vocab_to_int, int_to_vocab, token_dict = helpers.load_preprocess() seq_length, load_dir = helpers.load_params() # In[ ]: def get_tensors(loaded_graph): """ Get input, initial state, final state, and probabilities tensor from <loaded_graph> :param loaded_graph: TensorFlow graph loaded from file :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor) """ InputTensor = loaded_graph.get_tensor_by_name("input:0") InitialStateTensor = loaded_graph.get_tensor_by_name("initial_state:0")
new_dir = 'previous' + str(new_num) except ValueError: new_dir = 'previous0' os.makedirs(os.path.join('results', new_dir)) for f in old_files: shutil.copy2(os.path.join('.', f), os.path.join('.', new_dir, f)) if __name__ == '__main__': import sys params_path = sys.argv[1] # keep load_params outside so that each fork has the same random seed. np.random.seed(42) params = load_params(params_path) try: os.makedirs('./results/intermediate') except OSError: pass move_prior_runs() write_metadeta(params) pi_low = params['pi_low'][0] pi_high = params['pi_high'][0] pi_n = params['pi_n'][0] pi_grid = np.linspace(pi_low, pi_high, pi_n) # Parallel(n_jobs=-1)(delayed(iter_bellman_wrapper)(unique_params) # for unique_params in unique_param_generator(params))
new_num = np.max(old_nums) + 1 new_dir = 'previous' + str(new_num) except ValueError: new_dir = 'previous0' os.makedirs(os.path.join('results', new_dir)) for f in old_files: shutil.copy2(os.path.join('.', f), os.path.join('.', new_dir, f)) if __name__ == '__main__': import sys params_path = sys.argv[1] # keep load_params outside so that each fork has the same random seed. np.random.seed(42) params = load_params(params_path) try: os.makedirs('./results/intermediate') except OSError: pass move_prior_runs() write_metadeta(params) pi_low = params['pi_low'][0] pi_high = params['pi_high'][0] pi_n = params['pi_n'][0] pi_grid = np.linspace(pi_low, pi_high, pi_n) # Parallel(n_jobs=-1)(delayed(iter_bellman_wrapper)(unique_params) # for unique_params in unique_param_generator(params))