def yield_jobs(): for i,n in enumerate(n_samples): Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = True, eps = eps) for r in xrange(n_runs): # initialize features with unit norm theta_init = numpy.random.standard_normal((dim+1, k)) if reward_init: theta_init[:-1,-1] = m.R # XXX set last column to reward theta_init[-1,-1] = 0 theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample data: training, validation, and test sets S, Sp, R, _, = mdp.sample_grid_world(n, distribution = weighting); S = numpy.vstack((S, Sp[-1,:])) S_val, Sp_val, R_val, _, = mdp.sample_grid_world(n, distribution = weighting) S_val = scipy.sparse.vstack((S_val, Sp_val[-1,:])) S_test, Sp_test, R_test, _, = mdp.sample_grid_world(n, distribution = weighting) S_test = scipy.sparse.vstack((S_test, Sp_test[-1,:])) bb = BellmanBasis(dim+1, k, beta_ratio, partition = partition, theta = theta_init, w = w_init, record_loss = losses, nonlin = nonlin) for j,tm in enumerate(training_methods): yield (condor_job,[(i,r,j), bb, m, tm, S, R, S_val, R_val, S_test, R_test, Mphi, Mrew, patience, max_iter, weighting])
def yield_jobs(): for i,n in enumerate(n_samples or [n_states]): logger.info('creating job with %i samples/states' % n) # build bellman operator matrices logger.info('making mixing matrices') Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = bool(n_samples), eps = eps) for r in xrange(n_runs): n_features = encoder.n_features # initialize parameters theta_init = numpy.random.standard_normal((n_features, k)) theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample or gather full info data X_data, R_data, weighting = sample(n) if n_samples else full_info() bb_params = [n_features, [k], beta_ratio] bb_dict = dict( alpha = alpha, reg_tuple = reg, nonlin = nonlin, nonzero = nonzero, thetas = [theta_init]) for j, tm in enumerate(training_methods): loss_list, wrt_list = tm assert len(loss_list) == len(wrt_list) run_param_values = [k, tm, encoder, n, n_reward_samples, n_reward_runs, env_size, weighting, lam, gam, alpha, eta, reg[0]+str(reg[1]) if reg else 'None', nonlin if nonlin else 'None'] d_run_params = dict(izip(run_param_keys, run_param_values)) yield (train_basis,[d_run_params, bb_params, bb_dict, env, m, losses, # environment, model and loss list X_data, R_data, Mphi, Mrew, # training data max_iter, patience, min_imp, min_delta, # optimization params fldir, record_runs]) # recording params