def main(): import argparse parser = argparse.ArgumentParser() #parser.add_argument('expert_policy_file', type=str) parser.add_argument('envname1', type=str) parser.add_argument('envname2', type=str) parser.add_argument('--render', action='store_true') parser.add_argument("--max_timesteps", type=int) parser.add_argument('--num_rollouts', type=int, default=20, help='Number of expert roll outs') args = parser.parse_args() env_name = [args.envname1, args.envname2] with tf.Session(): exp_m = [] exp_s = [] st_m = [] st_s = [] for ev in env_name: tf_util.initialize() model = keras.models.load_model('Trained_model/' + ev + '_model.h5') policy_fn = load_policy.load_policy('experts/' + ev + '.pkl') import gym env = gym.make(ev) max_steps = args.max_timesteps or env.spec.timestep_limit print(ev) exp_mean, exp_std, __, __ = my_utils.run_simulation( env, args.num_rollouts, policy_fn, args.render, max_steps) st_mean, st_std, __, __ = my_utils.run_simulation( env, args.num_rollouts, model.predict, args.render, max_steps) exp_m.append(exp_mean) exp_s.append(exp_std) st_m.append(st_mean) st_s.append(st_std) i = 0 for ev in env_name: print(ev) print('Expert\'s Mean: {} Std: {}'.format(exp_m[i], exp_s[i])) print('Student\'s Mean: {} Std: {}'.format(st_m[i], st_s[i])) i += 1
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('expert_policy_file', type=str) parser.add_argument('envname', type=str) parser.add_argument('--render', action='store_true') parser.add_argument("--max_timesteps", type=int) parser.add_argument('--num_rollouts', type=int, default=20, help='Number of expert roll outs') args = parser.parse_args() file_name = args.envname print('loading and building expert policy') policy_fn = load_policy.load_policy(args.expert_policy_file) print('loaded and built') ### Load Expert Data with open('expert_data/'+file_name + '.pkl', 'rb') as f: data = pickle.loads(f.read()) data_ob = data['observations'] data_ac = data['actions'] #data_ob = data_ob[:,None,:] data_ac = data_ac.reshape(data_ac.shape[0],data_ac.shape[2]) EPOCHS = 50 d_epochs = 5 with tf.Session(): tf_util.initialize() model = keras.models.load_model('Trained_model/'+args.envname+ '_model.h5') model_dag = keras.models.load_model('Trained_model/'+args.envname+ '_DaggerModel.h5') import gym env = gym.make(args.envname) max_steps = args.max_timesteps or env.spec.timestep_limit exp_mean_records = [] exp_std_records = [] bc_mean_records = [] bc_std_records = [] dag_mean_records = [] dag_std_records = [] for i in range(d_epochs): print("Running Dagger iteration: {}".format(i)) ### Train model based on expert data model_dag.fit(data_ob, data_ac, epochs=EPOCHS, validation_split=0.2, verbose=0, callbacks=[PrintDot()]) # Generate new data mean, std, observations, actions = my_utils.run_simulation(env, args.num_rollouts, model_dag.predict, args.render, max_steps) observations_st = np.array(observations); actions_st = np.array(actions) ### Expert labels the data actions_ex = policy_fn(observations_st) ### Update the model_dag via the new data data_ob = np.concatenate((data_ob, observations_st)) data_ac = np.concatenate((data_ac, actions_ex)) ### Test after one Dagger iteration and save the results # Dagger Result print("Testing Dagger") dag_mean, dag_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, model_dag.predict, args.render, max_steps) # Expert Result print('Testing Expert') exp_mean, exp_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, policy_fn, args.render, max_steps) # Behavior Cloning Result print('Testing BC') bc_mean, bc_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, model.predict, args.render, max_steps) dag_mean_records.append(dag_mean) dag_std_records.append(dag_std) exp_mean_records.append(exp_mean) exp_std_records.append(exp_std) bc_mean_records.append(bc_mean) bc_std_records.append(bc_std) model_dag.save(file_name+'_DaggerModel.h5') dic = {'Expert': (exp_mean_records, exp_std_records), 'Dagger': (dag_mean_records,dag_std_records), 'BC':(bc_mean_records,bc_std_records)} my_utils.pltbars( args.envname , dic)
means = [] stds = [] with tf.Session(): tf_util.initialize() import gym env = gym.make(file_name) max_steps = env.spec.timestep_limit model = build_model(data_ob.shape[1], data_ac.shape[1]) print("Features (observations):{}".format(data_ob.shape)) print("Features (actions):{}".format(data_ac.shape)) model.summary() # Store training stats for i in range(6): history = model.fit(data_ob, data_ac, epochs=EPOCHS, validation_split=0.2, verbose=0, callbacks=[PrintDot()]) #plot_history(history) mean, std, __, __ = my_utils.run_simulation(env, num_rollouts, model.predict, render, max_steps) means.append(mean) stds.append(std) dic = {'Behavior Cloning': (means, stds)} my_utils.plt_bars_23(file_name, dic) model.save(file_name + '_model.h5')
def main(): import argparse parser = argparse.ArgumentParser() #parser.add_argument('expert_policy_file', type=str) parser.add_argument('ev', type=str) #parser.add_argument('envname2', type=str) parser.add_argument('--render', action='store_true') parser.add_argument("--max_timesteps", type=int) parser.add_argument('--num_rollouts', type=int, default=20, help='Number of expert roll outs') args = parser.parse_args() with tf.Session(): exp_m = [] exp_s = [] st_m = [] st_s = [] ev = args.ev tf_util.initialize() model_32 = keras.models.load_model(ev + '32_model.h5') model_64 = keras.models.load_model(ev + '_model.h5') model_128 = keras.models.load_model(ev + '128_model.h5') model_32dag = keras.models.load_model(ev + '32_DaggerModel.h5') model_64dag = keras.models.load_model(ev + '_DaggerModel.h5') model_128dag = keras.models.load_model(ev + '128_DaggerModel.h5') policy_fn = load_policy.load_policy('experts/' + ev + '.pkl') import gym env = gym.make(ev) max_steps = args.max_timesteps or env.spec.timestep_limit print(ev) exp_mean, exp_std, __, __ = my_utils.run_simulation( env, args.num_rollouts, policy_fn, args.render, max_steps) mean_32, std_32, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_32.predict, args.render, max_steps) mean_64, std_64, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_64.predict, args.render, max_steps) mean_128, std_128, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_128.predict, args.render, max_steps) mean_32dag, std_32dag, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_32dag.predict, args.render, max_steps) mean_64dag, std_64dag, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_64dag.predict, args.render, max_steps) mean_128dag, std_128dag, __, __ = my_utils.run_simulation( env, args.num_rollouts, model_128dag.predict, args.render, max_steps) print('Expert\'s Mean: {} Std: {}'.format(exp_mean, exp_std)) print('Student(32)\'s Mean: {} Std: {}'.format(mean_32, std_32)) print('Student(64)\'s Mean: {} Std: {}'.format(mean_64, std_64)) print('Student(128)\'s Mean: {} Std: {}'.format(mean_128, std_128)) print('Student(32 DAgger)\'s Mean: {} Std: {}'.format( mean_32dag, std_32dag)) print('Student(64 DAgger)\'s Mean: {} Std: {}'.format( mean_64dag, std_64dag)) print('Student(128 DAgger)\'s Mean: {} Std: {}'.format( mean_128dag, std_128dag))