def gather_data4(env, epochs, data_points, train=True, unpack=False): if env.spec.id in ['Pendulum-v0', 'MountainCarContinuous-v0']: return gather_data(env, epochs=epochs, unpack=unpack) elif train == True: return gather_data3(env, data_points=data_points, unpack=unpack) else: data = [] count = 0 while True: state = env.reset() while True: action = np.random.uniform(low=env.action_space.low, high=env.action_space.high) next_state, reward, done, _ = env.step(action) data.append([state, action, reward, next_state, done]) state = np.copy(next_state) if done: count += 1 break if count == epochs: break if unpack == False: return data else: states, actions, rewards, next_states = [ np.stack(ele, axis=0) for ele in zip(*data)[:-1] ] return states, actions, rewards[..., np.newaxis], next_states
def main_loop(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='Pendulum-v0') parser.add_argument("--unroll_steps", type=int, default=200) parser.add_argument("--discount_factor", type=float, default=.995) parser.add_argument("--gather_data_epochs", type=int, default=3, help='Epochs for initial data gather.') parser.add_argument("--train_hp_iterations", type=int, default=2000 * 10) parser.add_argument("--train_policy_batch_size", type=int, default=30) parser.add_argument("--no_samples", type=int, default=1) parser.add_argument("--basis_dim", type=int, default=256) parser.add_argument("--hidden_dim", type=int, default=32) parser.add_argument("--rffm_seed", type=int, default=1) parser.add_argument("--Agent", type=str, choices=['', '2', '3'], default='') parser.add_argument("--learn_reward", type=int, choices=[0, 1], default=1) parser.add_argument("--max_train_hp_datapoints", type=int, default=20000) parser.add_argument("--matern_param_reward", type=float, default=np.inf) parser.add_argument("--basis_dim_reward", type=int, default=600) parser.add_argument("--use_mean_reward", type=int, default=0) parser.add_argument("--update_hyperstate", type=int, default=1) parser.add_argument("--policy_use_hyperstate", type=int, default=1) parser.add_argument("--cma_maxiter", type=int, default=1000) parser.add_argument("--learn_diff", type=int, choices=[0, 1], default=0) args = parser.parse_args() print sys.argv print args from blr_regression2_sans_hyperstate import Agent2 from blr_regression2_tf import Agent3 env = gym.make(args.environment) regression_wrappers = [ RegressionWrapper(input_dim=env.observation_space.shape[0] + env.action_space.shape[0], basis_dim=args.basis_dim, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=args.rffm_seed, train_hp_iterations=args.train_hp_iterations) for _ in range(env.observation_space.shape[0]) ] if args.learn_reward == 1: regression_wrappers.append( RegressionWrapperReward( environment=args.environment, input_dim=env.observation_space.shape[0] + env.action_space.shape[0], basis_dim=args.basis_dim_reward, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=args.rffm_seed, train_hp_iterations=args.train_hp_iterations, matern_param=args.matern_param_reward)) agent = eval('Agent' + args.Agent)( environment=env.spec.id, x_dim=env.observation_space.shape[0] + env.action_space.shape[0], y_dim=env.observation_space.shape[0], state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], observation_space_low=env.observation_space.low, observation_space_high=env.observation_space.high, action_space_low=env.action_space.low, action_space_high=env.action_space.high, unroll_steps=args.unroll_steps, no_samples=args.no_samples, discount_factor=args.discount_factor, random_matrices=[rw.random_matrix for rw in regression_wrappers], biases=[rw.bias for rw in regression_wrappers], basis_dims=[rw.basis_dim for rw in regression_wrappers], hidden_dim=args.hidden_dim, learn_reward=args.learn_reward, use_mean_reward=args.use_mean_reward, update_hyperstate=args.update_hyperstate, policy_use_hyperstate=args.policy_use_hyperstate, learn_diff=args.learn_diff) flag = False data_buffer = gather_data(env, args.gather_data_epochs) data_buffer = scrub_data(args.environment, data_buffer, True) init_states = np.stack( [env.reset() for _ in range(args.train_policy_batch_size)], axis=0) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if args.environment == 'Pendulum-v0' and args.learn_reward == 0: weights = pickle.load( open('../custom_environments/weights/pendulum_reward.p', 'rb')) sess.run(agent.assign_ops0, feed_dict=dict(zip(agent.placeholders_reward, weights))) for epoch in range(1000): #Train hyperparameters and update systems model. states_actions, states, rewards, next_states = unpack(data_buffer) targets = np.concatenate([ next_states - states if args.learn_diff else next_states, rewards ], axis=-1) for i in range(env.observation_space.shape[0] + args.learn_reward): if flag == False: regression_wrappers[i]._train_hyperparameters( states_actions, targets[:, i:i + 1]) regression_wrappers[i]._reset_statistics( states_actions, targets[:, i:i + 1]) else: regression_wrappers[i]._update(states_actions, targets[:, i:i + 1]) if len(data_buffer) >= args.max_train_hp_datapoints: flag = True if flag: data_buffer = [] tmp_data_buffer = [] #Fit policy network. XX, Xy, hyperparameters = zip(*[[rw.XX, rw.Xy, rw.hyperparameters] for rw in regression_wrappers]) agent._fit(args.cma_maxiter, np.copy(init_states), [np.copy(ele) for ele in XX], [np.copy(ele) for ele in Xy], [np.copy(ele) for ele in hyperparameters], sess) #Get hyperstate & hyperparameters hyperstate = zip(*[[ scipy.linalg.cholesky(np.copy(rw.XX) + (rw.noise_sd / rw.prior_sd)**2 * np.eye(rw.basis_dim), lower=True)[np.newaxis, ...], np.copy(rw.Xy)[np.newaxis, ...] ] for rw in regression_wrappers]) total_rewards = 0. state = env.reset() while True: #env.render() action = agent._forward(agent.thetas, state[np.newaxis, ...], hyperstate)[0] next_state, reward, done, _ = env.step(action) #hyperstate = update_hyperstate_old(agent, XX, hyperstate, hyperparameters, [state, action, reward, next_state, done], agent.state_dim+agent.learn_reward, args.learn_diff) hyperstate = update_hyperstate( agent, hyperstate, hyperparameters, [state, action, reward, next_state, done], agent.state_dim + agent.learn_reward, args.learn_diff) tmp_data_buffer.append( [state, action, reward, next_state, done]) total_rewards += float(reward) state = np.copy(next_state) if done: print 'epoch:', epoch, 'total_rewards:', total_rewards data_buffer.extend( scrub_data(args.environment, tmp_data_buffer, False)) break
def plotting_experiments(): parser = argparse.ArgumentParser() parser.add_argument( "--environment", type=str, choices=[ 'Pendulum-v0', 'MountainCarContinuous-v0', 'MinitaurBulletEnv-v0', 'CartPoleBulletEnv-v0', 'HumanoidBulletEnv-v0', 'AntBulletEnv-v0', 'HopperBulletEnv-v0', 'HalfCheetahBulletEnv-v0', 'Walker2DBulletEnv-v0', 'InvertedPendulumBulletEnv-v0', 'InvertedDoublePendulumBulletEnv-v0', 'InvertedPendulumSwingupBulletEnv-v0' ], default='Pendulum-v0') parser.add_argument("--train-hp-iterations", type=int, default=2000) parser.add_argument("--basis-dim", type=int, default=256) parser.add_argument("--basis-dim-reward", type=int, default=600) parser.add_argument("--matern-param", type=float, default=np.inf) parser.add_argument("--matern-param-reward", type=float, default=np.inf) parser.add_argument( "--train-hit-wall", type=int, default=0) #Only used when --environment=MountainCarContinuous-v0 parser.add_argument( "--train-reach-goal", type=int, default=0) #Only used when --environment=MountainCarContinuous-v0 parser.add_argument( "--test-hit-wall", type=int, default=0) #Only used when --environment=MountainCarContinuous-v0 parser.add_argument( "--test-reach-goal", type=int, default=0) #Only used when --environment=MountainCarContinuous-v0 parser.add_argument("--update-hyperstate", type=int, default=0) args = parser.parse_args() print args import matplotlib.pyplot as plt from utils import get_mcc_policy if args.environment == 'MountainCarContinuous-v0': train_set_size = 1 else: train_set_size = 3 env = gym.make(args.environment) predictors = [] for i in range(env.observation_space.shape[0]): predictors.append( RegressionWrapper2(input_dim=env.observation_space.shape[0] + env.action_space.shape[0], basis_dim=args.basis_dim, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=1, train_hp_iterations=args.train_hp_iterations, matern_param=args.matern_param)) predictors.append( RegressionWrapperReward2(args.environment, input_dim=env.observation_space.shape[0] + env.action_space.shape[0], basis_dim=args.basis_dim_reward, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=1, train_hp_iterations=args.train_hp_iterations, matern_param=args.matern_param_reward)) if args.environment == 'MountainCarContinuous-v0': states, actions, rewards, next_states = get_mcc_policy( env, hit_wall=bool(args.train_hit_wall), reach_goal=bool(args.train_reach_goal), train=True) else: states, actions, rewards, next_states = gather_data(env, train_set_size, unpack=True) states_actions = np.concatenate([states, actions], axis=-1) for i in range(env.observation_space.shape[0]): predictors[i]._train_hyperparameters(states_actions, next_states[:, i:i + 1]) predictors[-1]._train_hyperparameters(states_actions, rewards) while True: for i in range(env.observation_space.shape[0]): predictors[i]._reset_statistics(states_actions, next_states[:, i:i + 1], bool(args.update_hyperstate)) predictors[-1]._reset_statistics(states_actions, rewards, bool(args.update_hyperstate)) if args.environment == 'MountainCarContinuous-v0': states2, actions2, rewards2, next_states2 = get_mcc_policy( env, hit_wall=bool(args.test_hit_wall), reach_goal=bool(args.test_reach_goal), train=False) else: states2, actions2, rewards2, next_states2 = gather_data( env, 1, unpack=True, test=True) states_actions2 = np.concatenate([states2, actions2], axis=-1) plt.figure() for i in range(env.observation_space.shape[0]): plt.subplot(4, env.observation_space.shape[0], i + 1) predict_mu, predict_sigma = predictors[i]._predict( states_actions2, False) plt.plot(np.arange(len(next_states2[:, i:i + 1])), next_states2[:, i:i + 1]) plt.errorbar(np.arange(len(predict_mu)), predict_mu, yerr=np.sqrt(predict_sigma), color='m', ecolor='g') plt.grid() traj_reward = [] traj = [] no_lines = 50 state = np.tile(np.copy(states2[0:1, ...]), [no_lines, 1]) for a in actions2: action = np.tile(a[np.newaxis, ...], [no_lines, 1]) state_action = np.concatenate([state, action], axis=-1) mu_reward, sigma_reward = predictors[-1]._predict( state_action, bool(args.update_hyperstate)) reward = np.stack([ np.random.normal(loc=mu, scale=sigma) for mu, sigma in zip(mu_reward, sigma_reward) ], axis=0) traj_reward.append(reward) mu_vec = [] sigma_vec = [] for i in range(env.observation_space.shape[0]): predict_mu, predict_sigma = predictors[i]._predict( state_action, bool(args.update_hyperstate)) mu_vec.append(predict_mu) sigma_vec.append(predict_sigma) mu_vec = np.concatenate(mu_vec, axis=-1) sigma_vec = np.concatenate(sigma_vec, axis=-1) state = np.stack([ np.random.multivariate_normal(mu, np.diag(sigma)) for mu, sigma in zip(mu_vec, sigma_vec) ], axis=0) state = np.clip(state, env.observation_space.low, env.observation_space.high) traj.append(np.copy(state)) for i in range(env.observation_space.shape[0]): predictors[i]._update_hyperstate(state_action, state[:, i:i + 1], bool(args.update_hyperstate)) predictors[-1]._update_hyperstate(state_action, reward, bool(args.update_hyperstate)) traj_reward = np.stack(traj_reward, axis=-1) traj = np.stack(traj, axis=-1) plt.subplot(4, 1, 4) for j in range(no_lines): y = traj_reward[j, 0, :] plt.plot(np.arange(len(y)), y, color='r') plt.plot(np.arange(len(rewards2)), rewards2) plt.grid() for i in range(env.observation_space.shape[0]): plt.subplot(4, env.observation_space.shape[0], env.observation_space.shape[0] + i + 1) for j in range(no_lines): y = traj[j, i, :] plt.plot(np.arange(len(y)), y, color='r') plt.plot(np.arange(len(next_states2[..., i])), next_states2[..., i]) plt.grid() plt.subplot(4, 1, 3) predict_mu, predict_sigma = predictors[-1]._predict( states_actions2, False) plt.plot(np.arange(len(rewards2)), rewards2) plt.errorbar(np.arange(len(predict_mu)), predict_mu, yerr=np.sqrt(predict_sigma), color='m', ecolor='g') plt.grid() plt.show(block=True)
def main_loop(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='Pendulum-v0') parser.add_argument("--unroll_steps", type=int, default=200) parser.add_argument("--discount_factor", type=float, default=.995) parser.add_argument("--gather_data_epochs", type=int, default=3, help='Epochs for initial data gather.') parser.add_argument("--train_hp_iterations", type=int, default=2000 * 10) parser.add_argument("--train_policy_batch_size", type=int, default=30) parser.add_argument("--no_samples", type=int, default=1) parser.add_argument("--basis_dim", type=int, default=256) parser.add_argument("--hidden_dim", type=int, default=32) parser.add_argument("--rffm_seed", type=int, default=1) parser.add_argument("--Agent", type=str, choices=['', '2'], default='') parser.add_argument("--learn_reward", type=int, choices=[0, 1], default=1) parser.add_argument("--max_train_hp_datapoints", type=int, default=20000) parser.add_argument("--matern_param_reward", type=float, default=np.inf) parser.add_argument("--basis_dim_reward", type=int, default=600) parser.add_argument("--use_mean_reward", type=int, default=0) parser.add_argument("--update_hyperstate", type=int, default=1) parser.add_argument("--policy_use_hyperstate", type=int, default=1) parser.add_argument("--cma_maxiter", type=int, default=1000) parser.add_argument("--learn_diff", type=int, choices=[0, 1], default=0) parser.add_argument("--dump_model", type=int, choices=[0, 1], default=0) args = parser.parse_args() print(sys.argv) print(args) from blr_regression2_sans_hyperstate_multioutput import Agent2 env = gym.make(args.environment) regression_wrapper_state = MultiOutputRegressionWrapper( input_dim=env.observation_space.shape[0] + env.action_space.shape[0], output_dim=env.observation_space.shape[0], basis_dim=args.basis_dim, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=args.rffm_seed, train_hp_iterations=args.train_hp_iterations) regression_wrapper_reward = RegressionWrapperReward( environment=args.environment, input_dim=env.observation_space.shape[0] + env.action_space.shape[0], basis_dim=args.basis_dim_reward, length_scale=1., signal_sd=1., noise_sd=5e-4, prior_sd=1., rffm_seed=args.rffm_seed, train_hp_iterations=args.train_hp_iterations, matern_param=args.matern_param_reward) agent = eval('Agent' + args.Agent)( environment=env.spec.id, x_dim=env.observation_space.shape[0] + env.action_space.shape[0], y_dim=env.observation_space.shape[0], state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], observation_space_low=env.observation_space.low, observation_space_high=env.observation_space.high, action_space_low=env.action_space.low, action_space_high=env.action_space.high, unroll_steps=args.unroll_steps, no_samples=args.no_samples, discount_factor=args.discount_factor, random_matrix_state=regression_wrapper_state.random_matrix, bias_state=regression_wrapper_state.bias, basis_dim_state=regression_wrapper_state.basis_dim, random_matrix_reward=regression_wrapper_reward.random_matrix, bias_reward=regression_wrapper_reward.bias, basis_dim_reward=regression_wrapper_reward.basis_dim, #random_matrices=[rw.random_matrix for rw in regression_wrappers], #biases=[rw.bias for rw in regression_wrappers], #basis_dims=[rw.basis_dim for rw in regression_wrappers], hidden_dim=args.hidden_dim, learn_reward=args.learn_reward, use_mean_reward=args.use_mean_reward, update_hyperstate=args.update_hyperstate, policy_use_hyperstate=args.policy_use_hyperstate, learn_diff=args.learn_diff, dump_model=args.dump_model) #I have to work on the classes before working on the code below. flag = False data_buffer = gather_data(env, args.gather_data_epochs) data_buffer = scrub_data(args.environment, data_buffer, True) init_states = np.stack( [env.reset() for _ in range(args.train_policy_batch_size)], axis=0) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if args.environment == 'Pendulum-v0' and args.learn_reward == 0: weights = pickle.load( open('../custom_environments/weights/pendulum_reward.p', 'rb')) sess.run(agent.assign_ops0, feed_dict=dict(zip(agent.placeholders_reward, weights))) for epoch in range(1000): if epoch == 0: #Train hyperparameters and update systems model. states_actions, states, rewards, next_states = unpack( data_buffer) next_states_train = next_states.copy() - states.copy( ) if args.learn_diff else next_states.copy() rewards_train = rewards.copy() if flag == False: regression_wrapper_state._train_hyperparameters( states_actions, next_states_train) regression_wrapper_state._reset_statistics( states_actions, next_states_train) regression_wrapper_reward._train_hyperparameters( states_actions, rewards_train) regression_wrapper_reward._reset_statistics( states_actions, rewards_train) else: regression_wrapper_state._update(states_actions, next_states_train) regression_wrapper_reward._update(states_actions, rewards_train) if len(data_buffer) >= args.max_train_hp_datapoints: flag = True if flag: data_buffer = [] tmp_data_buffer = [] #Fit policy network. #XX, Xy, hyperparameters = zip(*[[rw.XX, rw.Xy, rw.hyperparameters] for rw in regression_wrappers]) #eval('agent.'+args.fit_function)(args.cma_maxiter, np.copy(init_states), [np.copy(ele) for ele in XX], [np.copy(ele) for ele in Xy], [np.copy(ele) for ele in hyperparameters], sess) if epoch == 0: agent._fit(args.cma_maxiter, init_states.copy(), regression_wrapper_state.XX.copy(), regression_wrapper_state.Xy.copy(), regression_wrapper_state.hyperparameters.copy(), regression_wrapper_reward.XX.copy(), regression_wrapper_reward.Xy.copy(), regression_wrapper_reward.hyperparameters.copy(), sess) #Get hyperstate & hyperparameters hyperstate_params = [ regression_wrapper_state.Llower.copy()[None, ...], regression_wrapper_state.Xy.copy()[None, ...], regression_wrapper_reward.Llower.copy()[None, ...], regression_wrapper_reward.Xy.copy()[None, ...] ] total_rewards = 0. state = env.reset() while True: #env.render() action = agent._forward(agent.thetas, state[np.newaxis, ...], hyperstate_params)[0] next_state, reward, done, _ = env.step(action) if env.spec.id == 'InvertedPendulumBulletEnv-v0': reward = next_state[2] hyperstate_params = update_hyperstate( agent, hyperstate_params, regression_wrapper_state.hyperparameters.copy(), regression_wrapper_reward.hyperparameters.copy(), [state, action, reward, next_state, done], args.learn_diff) tmp_data_buffer.append( [state, action, reward, next_state, done]) total_rewards += float(reward) state = np.copy(next_state) if done: print('epoch:', epoch, 'total_rewards:', total_rewards) #This for reward shaping... if env.spec.id == 'InvertedPendulumBulletEnv-v0': for _ in range(10): action = np.random.uniform( low=env.action_space.low, high=env.action_space.high) next_state, _, done, _ = env.step(action) tmp_data_buffer.append([ state, action, next_state[2], next_state, done ]) state = next_state.copy() data_buffer.extend( scrub_data(args.environment, tmp_data_buffer, False)) break
def main(): param = { 'N_time_step': 100, 'N_quench': 0, 'Ti': 0.04, 'action_set': 0, 'hx_initial_state': -2.0, 'hx_final_state': 2.0, 'delta_t': 0.001, 'hx_i': -4.0, 'RL_CONSTRAINT': True, 'L': 6, 'J': 1.00, 'hz': 1.0, 'symmetrize': False } file_name = ut.make_file_name(param) res = ut.gather_data(param, "../data/") print(compute_observable.Ed_Ad_OP(res['h_protocol'], 4)) plotting.protocol(range(100), res['h_protocol'][0]) #plotting.protocol(range(100),res['h_protocol'][1]) #print(res['fid']) #print(res.keys()) print(file_name) #with open(' exit() import os #=========================================================================== # pca=PCA(n_components=2) # param['N_time_step']=10 # dc=ut.gather_data(param,'../data/') # pca.fit(dc['h_protocol']/4.) # X=pca.transform(dc['h_protocol']/4.) # # plt.scatter(X[:,0],X[:,1]) # plt.title('PCA, $t=0.1$, continuous protocol') # plt.savefig("PCA_AS2_t-0p1.pdf") # plt.show() # exit() #=========================================================================== #=========================================================================== # dataBB8=[] # param['action_set']=0 # param['N_time_step']=60 # # param['delta_t']=0.5/60. # dc=ut.gather_data(param,'../data/') # pca=PCA(n_components=2) # pca.fit(dc['h_protocol']/4.) # print(pca.explained_variance_ratio_) # exit() # # param['delta_t']=3.0/60. # dc=ut.gather_data(param,'../data/') # X=pca.transform(dc['h_protocol']/4.) # # title='PCA$_{50}$, $t=3.0$, continuous protocol, nStep$=60$' # out_file="PCA_AS0_t-3p0_nStep-60.pdf" # plotting.visne_2D(X[:,0],X[:,1],dc['fid'],zlabel="Fidelity",out_file=out_file,title=title,show=True,xlabel='PCA-1',ylabel='PCA-2') # #=========================================================================== #exit() #plt.scatter(X[:,0],X[:,1]) #plt.title('PCA$_{50}$, $t=1.5$, continuous protocol, nStep$=60$') #plt.savefig("PCA_AS0_t-0p8_nStep-60.pdf") #plt.show() #exit() # exit() #=========================================================================== # param['N_time_step']=2 # param['action_set']=0 # dc=ut.gather_data(param,'../data/') # print(dc['h_protocol']) # exit() # dataBB8=[] #=========================================================================== #=============================================================================== # # param['action_set']=0 # param['N_time_step']=60 # param['delta_t']=0.5/60 # # dc=ut.gather_data(param,'../data/') # # protocols=dc['h_protocol'] # #print(np.shape(dc['h_protocol'])) # sort_f=np.argsort(dc['fid'])[::-1] # # print(sort_f[0]) # # #protocols[sort_f[0]] # # best_prot=protocols[sort_f[0:10]] # x=np.array(range(60))*1.0/60 # #print(best_prot.reshape) # #print(x.shape) # #print(np.array(range(60))*0.1/60) # #print(best_prot) # #print(np.shape(best_prot)) # #print(np.shape(np.arange(0.1,3.05,0.1)*0.05)) # # plotting.protocol(protocols[:2],x,labels=dc['fid'][:2],show=True) # # exit() # # #=============================================================================== param['N_time_step'] = 60 param['action_set'] = 0 dataBB8 = [] compTime = [] x = [] for t in np.arange(0.1, 3.05, 0.1): dt = t / param['N_time_step'] param['delta_t'] = dt # Changed it to be returning False if file is not found ... dc = ut.gather_data(param, '../data/') if dc is not False: eaop = compute_observable.Ed_Ad_OP(dc['h_protocol'], 4.0) print(t, eaop, dc['fid'].shape, '\t', np.mean(dc['n_fid'])) compTime.append(np.mean(dc['n_fid'])) dataBB8.append(eaop) x.append(t) else: print("Data not available for %.3f" % dt) y = compTime plotting.observable(y, x, title='Depth of search for bang-bang protocol', ylabel='\# of fidelity evaluations', xlabel='$T$', marker="-", labels=['Obtained time (SGD)']) exit() #=========================================================================== # param['action_set']=0 # param['delta_t']=0.01 #=========================================================================== #=========================================================================== # for i in range(2,300,4): # param['N_time_step']=i # is_there,dc=ut.gather_data(param,'../data/') # if is_there: # eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0) # print(i,eaop,dc['fid'].shape,'\t',np.mean(dc['n_fid'])) # compTime.append(np.mean(dc['n_fid'])) # dataBB8.append(eaop) # x.append(i) # else: # print("Data not available for %i"%i) # #=========================================================================== #=========================================================================== # param['N_time_step']=150 # is_there,dc=ut.gather_data(param,'../data/') # x=np.arange(0,150*0.01,0.01) # plotting.protocol(dc['h_protocol'][:3],x,labels=dc['fid'][:3],show=True) # exit() # #x=np.array(range(2,300,4))*0.01 #=========================================================================== param['action_set'] = 0 param['delta_t'] = 0.01 mean_fid_BB = [] h_protocol_BB = {} fid_BB = {} n_fid_BB = [] x = [] sigma_fid = [] EA_OP = [] for i in range(2, 300, 4): param['N_time_step'] = i data_is_available, dc = ut.gather_data(param, '../data/') if data_is_available: mean_fid_BB.append(np.mean(dc['fid'])) sigma_fid.append(np.std(dc['fid'])) fid_BB[i] = dc['fid'] EA_OP.append(compute_observable.Ed_Ad_OP(dc['h_protocol'], 4.0)) h_protocol_BB[i] = dc['h_protocol'] n_fid_BB.append(np.mean(dc['n_fid'])) x.append(i * param['delta_t']) #print(fid_BB[130]) #mean=np.mean(fid_BB[130]) #sns.distplot(fid_BB[130],bins=np.linspace(mean-0.005,mean+0.005,100)) #plt.tick_params(labelleft='off') #plt.show() x = np.array(x) y = [ n / (x[i] / param['delta_t']) for n, i in zip(n_fid_BB, range(len(n_fid_BB))) ] plotting.observable(y, x, title='Depth of search for bang-bang protocol', ylabel='(\# of fidelity evaluations)/$N$', xlabel='$T$', marker="-", labels=['Minimum time', 'Obtained time (SGD)']) #plotting.protocol(h_protocol_BB[130][20:25],np.arange(0,130,1)*param['delta_t']) exit() #pca.fit() #=========================================================================== # dataCONT=[] # for t in range(2,300,4): # print(t) # param['N_time_step']=t # dc=ut.gather_data(param,'../data/') # #print(dc['h_protocol'].shape) # eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0) # print(eaop) # dataCONT.append(eaop) # # file="../data/EAOP_"+ut.make_file_name(param) # with open(file,'wb') as f: # pickle.dump(dataCONT,f);f.close(); # # exit() # #=========================================================================== #=========================================================================== # param['action_set']=0 # dataBB8=[] # for t in range(2,300,4): # print(t) # param['N_time_step']=t # dc=ut.gather_data(param,'../data/') # eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0) # print(eaop) # #print(dc['h_protocol'].shape) # dataBB8.append(eaop) # # file="../data/EAOP_"+ut.make_file_name(param) # with open(file,'wb') as f: # pickle.dump(dataBB8,f);f.close(); # # exit() #=========================================================================== #=========================================================================== # param['N_time_step']=298 # param['action_set']=0 # file="../data/EAOP_"+ut.make_file_name(param) # with open(file,'rb') as f: # dataBB8=pickle.load(f);f.close(); # # param['action_set']=2 # f="../data/EAOP_"+ut.make_file_name(param) # with open(f,'rb') as file: # dataCONT=pickle.load(file); # # time_axis=np.array(range(2,300,4))*0.01 # title="Edward-Anderson parameter ($n=400$) vs. evolution time for SGD\n with the different action protocols ($L=1$)" # plotting.observable([dataBB8,dataCONT],[time_axis,time_axis],title=title, # out_file="SGD_EAOPvsT_AS0-2.pdf",show=True, # ylabel="$q_{EA}$",xlabel="$t$",labels=['bang-bang8','continuous']) #=========================================================================== #=========================================================================== # param['N_time_step']=250 # dc=ut.gather_data(param,'../data/') # sns.distplot(dc['fid'],kde=False,label='$t=%.3f$'%(param['N_time_step']*0.01)) # plt.legend(loc='best') # plt.savefig('SGD_hist_fid_t2p5.pdf') # plt.show() # exit() #=========================================================================== #=========================================================================== # title="Fidelity ($n=400$) vs. evolution time for SGD\n with the different action protocols ($L=1$)" # plotting.observable(np.array(data),np.array(range(2,300,4))*0.01,title=title, # out_file="SGD_FvsT_AS2.pdf",show=True, # ylabel="$F$",xlabel="$t$",labels=['continuous']) # #=========================================================================== exit()
import utils csv_files = utils.get_all_reports() df = utils.gather_data(csv_files) output_df = utils.create_empty_df(df['date']) months = utils.get_input_months(df['date']) for (month, year) in months: income_sum = utils.income_month(df, year, month) output_df = utils.save_value_to_output(output_df, year, month, income_sum) print(output_df) # TODO create class data # TODO celkem výdaje v měsící # TODO součty každého měsíce podle budgetů # TODO stav po každém měsíci podle budgetů # TODO rozdíl v měsíci # TODO procento neutracených peněz # TODO měsíční zůstatek na účtu # TODO měsíční průměr zůstatků na účtu # TODO roční zůstatek po všech měsících # TODO převést výpočty z sheetu
def main(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='Pendulum-v0') parser.add_argument("--train-hp-iterations", type=int, default=2000) args = parser.parse_args() print args env = gym.make(args.environment) states, actions, _, next_states = gather_data(env, 5, unpack=True) states_actions = np.concatenate([states, actions], axis=-1) output_dim = 128 * 2 noise_sd_clip_threshold = 5e-5 rffm = RandomFourierFeatureMapper(states_actions.shape[-1], int(output_dim)) hyperparameters = [] for i in range(env.observation_space.shape[0]): thetas0 = np.array([1., 1., 5e-4, 1.]) options = {'maxiter': args.train_hp_iterations, 'disp': True} _res = minimize(log_marginal_likelihood, thetas0, method='nelder-mead', args=(rffm, states_actions, next_states[:, i:i + 1], output_dim, noise_sd_clip_threshold), options=options) length_scale, signal_sd, noise_sd, prior_sd = _res.x hyperparameters.append([ length_scale, signal_sd, np.maximum(noise_sd, noise_sd_clip_threshold), prior_sd ]) print hyperparameters # Quick plotting experiment (for sanity check). import matplotlib.pyplot as plt if args.environment == 'Pendulum-v0': states2, actions2, _, next_states2 = gather_data(env, 1, unpack=True) elif args.environment == 'MountainCarContinuous-v0': from utils import mcc_get_success_policy states2, actions2, next_states2 = mcc_get_success_policy(env) states_actions2 = np.concatenate([states2, actions2], axis=-1) states3, actions3, _, next_states3 = gather_data(env, 3, unpack=True) states_actions3 = np.concatenate([states3, actions3], axis=-1) predictors = [] for i in range(env.observation_space.shape[0]): length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[i] predictors.append( predictor(output_dim, length_scale=length_scale, signal_sd=signal_sd, noise_sd=noise_sd, prior_sd=prior_sd)) for sa, ns in zip([states_actions, states_actions3], [next_states, next_states3]): plt.figure() for i in range(env.observation_space.shape[0]): plt.subplot(2, env.observation_space.shape[0], i + 1) length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[i] predictors[i].update(rffm, sa, ns[:, i:i + 1]) predict_mu, predict_sigma = predictors[i].predict( rffm, states_actions2) plt.plot(np.arange(len(next_states2[:, i:i + 1])), next_states2[:, i:i + 1]) plt.errorbar(np.arange(len(predict_mu)), predict_mu, yerr=np.sqrt(predict_sigma), color='m', ecolor='g') plt.grid() traj = [] no_lines = 50 state = np.tile(np.copy(states2[0:1, ...]), [no_lines, 1]) for a in actions2: action = np.tile(a[np.newaxis, ...], [no_lines, 1]) state_action = np.concatenate([state, action], axis=-1) mu_vec = [] sigma_vec = [] for i in range(env.observation_space.shape[0]): predict_mu, predict_sigma = predictors[i].predict( rffm, state_action) mu_vec.append(predict_mu) sigma_vec.append(predict_sigma) mu_vec = np.concatenate(mu_vec, axis=-1) sigma_vec = np.concatenate(sigma_vec, axis=-1) state = np.stack([ np.random.multivariate_normal(mu, np.diag(sigma)) for mu, sigma in zip(mu_vec, sigma_vec) ], axis=0) traj.append(np.copy(state)) traj = np.stack(traj, axis=-1) for i in range(env.observation_space.shape[0]): plt.subplot(2, env.observation_space.shape[0], env.observation_space.shape[0] + i + 1) for j in range(no_lines): y = traj[j, i, :] plt.plot(np.arange(len(y)), y, color='r') plt.plot(np.arange(len(next_states2[..., i])), next_states2[..., i]) plt.grid() plt.show(block=False) raw_input("Press Enter to continue ...")
model1_0=load_model('stack_model1/5_folds_stack_model0.h5') model1_1=load_model('stack_model1/5_folds_stack_model1.h5') model1_2=load_model('stack_model1/5_folds_stack_model2.h5') model1_3=load_model('stack_model1/5_folds_stack_model3.h5') model1_4=load_model('stack_model1/5_folds_stack_model4.h5') model2_0=load_model('stack_model2/5_folds_stack_model0.h5') model2_1=load_model('stack_model2/5_folds_stack_model1.h5') model2_2=load_model('stack_model2/5_folds_stack_model2.h5') model2_3=load_model('stack_model2/5_folds_stack_model3.h5') model2_4=load_model('stack_model2/5_folds_stack_model4.h5') rep=results.tolist() #转化成list final_results=[] for i in range(len(results)): if np.max(results[i])<0.95: initial_sample=gather_data(test[i:i+1]).reset_index(drop=True) new_sample=np.hstack(( 0.2*(model1_0.predict(initial_sample)+\ model1_1.predict(initial_sample)+\ model1_2.predict(initial_sample)+\ model1_3.predict(initial_sample)+\ model1_4.predict(initial_sample)),\ 0.2*(model2_0.predict(initial_sample)+\ model2_1.predict(initial_sample)+\ model2_2.predict(initial_sample)+\ model2_3.predict(initial_sample)+\ model2_4.predict(initial_sample)),\ feat_onehot(initial_sample,test.columns) )) res=model.predict(new_sample) final_results.append(
def main2(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='Pendulum-v0') #parser.add_argument("--path", type=str, default='') args = parser.parse_args() print args uid = str(uuid.uuid4()) env = gym.make(args.environment) #states_actions, rewards, states_actions2, rewards2 = pickle.load(open(args.path, 'rb')) states, actions, rewards, _ = gather_data(env, 3, unpack=True) states_actions = np.concatenate([states, actions], axis=-1) # rbf = RegressionWrappers(input_dim=states_actions.shape[-1], kern='rbf') # rbf._train_hyperparameters(states_actions, rewards) # matern = RegressionWrappers(input_dim=states_actions.shape[-1], kern=args.kernel) matern._train_hyperparameters(states_actions, rewards) # # rq = RegressionWrappers(input_dim=states_actions.shape[-1], kern='rq') # rq._train_hyperparameters(states_actions, rewards) states2, actions2, rewards2, _ = gather_data(env, 1, unpack=True) states_actions2 = np.concatenate([states2, actions2], axis=-1) pickle.dump([states_actions, rewards, states_actions2, rewards2], open(uid + '.p', 'wb')) # mu, sigma = rbf._predict(states_actions2, states_actions, rewards) # # mu = np.squeeze(mu, axis=-1) # sd = np.sqrt(np.diag(sigma)) # # plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='m', ecolor='g') # mu, sigma = matern._predict(states_actions2, states_actions, rewards) mu = np.squeeze(mu, axis=-1) sd = np.sqrt(np.diag(sigma)) plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='y', ecolor='c') # # mu, sigma = rq._predict(states_actions2, states_actions, rewards) # # mu = np.squeeze(mu, axis=-1) # sd = np.sqrt(np.diag(sigma)) # # plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='b', ecolor='g') rwl = RWL(input_dim=states_actions.shape[-1], basis_dim=1024) rwl._train_hyperparameters(states_actions, rewards) rwl._reset_statistics(states_actions, rewards) mu, sigma = rwl._predict(states_actions2) plt.errorbar(np.arange(len(mu)), mu, yerr=np.sqrt(sigma), color='r', ecolor='k') rwl2 = RWL(input_dim=states_actions.shape[-1], basis_dim=1024, matern_param=0.) rwl2._train_hyperparameters(states_actions, rewards) rwl2._reset_statistics(states_actions, rewards) mu, sigma = rwl2._predict(states_actions2) plt.errorbar(np.arange(len(mu)), mu, yerr=np.sqrt(sigma), color='g', ecolor='b') plt.scatter(np.arange(len(rewards2)), rewards2) plt.grid() plt.title(uid) #plt.show() plt.savefig(uid + '.pdf')