def noescExpTest(): env = ('AttFC_GyroErr-MotorVel_M4_Ep-v0', 'AttFC_GyroErr-MotorVel_M4_Con-v0', 'AttFC_GyroErr1_M4_Ep-v0') cur_env = env[2] tp = tg.trainParams() pp = tg.policyParams() # Set up parameters tp.timesteps_per_actorbatch = 500 tp.optim_batchsize = 32 tp.optim_epochs = 5 # Set training length num_eps = 1 tp.num_timesteps = num_eps * 1000 # Name Model tp.modelName('z10') #Run Training with tg.U.tf.Graph().as_default(): tg.train(tp, pp, cur_env) # Model Evaluation me = eg.ModelEval(tp.model_name, cur_env) me.evalModel(15) me.saveEval()
def runEvals(exps): #FINISH # Run and save an evaluation for all trained models for i in range(len(exps['Model'])): me = eg.ModelEval(exps['Model'][i], 'AttFC_GyroErr1_M4_Ep-v0') me.evalModel(50) me.saveEval()
def checkR(model): me = eg.ModelEval(model, 'AttFC_GyroErr1_M4_Ep-v0') me.evalModel(3) print('Average Reward: ', me.proc_eval.eps_avg_r) print('Max Rewards: ', me.proc_eval.eps_max_r) print('Rise times: ') print(me.proc_eval.eps_r_rise, me.proc_eval.eps_p_rise, me.proc_eval.eps_y_rise, sep='\n') print('-----Beginning-----') print('Rewards: ', me.eps[0]['rewards'][0:4], me.eps[1]['rewards'][0:4], me.eps[2]['rewards'][0:4], sep='\n') print('-----End-----') print('Rewards: ', me.eps[0]['rewards'][990:994], me.eps[1]['rewards'][990:994], me.eps[2]['rewards'][990:994], sep='\n') print('-------------') print('Desired: ', [ me.eps[0]['droll_v'][0], me.eps[0]['dpitch_v'][0], me.eps[0]['dyaw_v'][0] ]) print('Total R: ', np.sum(me.eps[0]['rewards']), np.sum(me.eps[1]['rewards']), np.sum(me.eps[2]['rewards']))
def initRewards(exps): # FINISH # Creates rewards from untrained models to compare to trained models to assess reward elements learned me = eg.ModelEval(exps['Model'][i], 'AttFC_GyroErr1_M4_Ep-v0') me.evalModel() me.saveEval(save_name='init' + str(i))