예제 #1
0
def noescExpTest():
    env = ('AttFC_GyroErr-MotorVel_M4_Ep-v0',
           'AttFC_GyroErr-MotorVel_M4_Con-v0', 'AttFC_GyroErr1_M4_Ep-v0')
    cur_env = env[2]
    tp = tg.trainParams()
    pp = tg.policyParams()

    # Set up parameters
    tp.timesteps_per_actorbatch = 500
    tp.optim_batchsize = 32
    tp.optim_epochs = 5

    # Set training length
    num_eps = 1
    tp.num_timesteps = num_eps * 1000

    # Name Model
    tp.modelName('z10')

    #Run Training
    with tg.U.tf.Graph().as_default():
        tg.train(tp, pp, cur_env)

    # Model Evaluation
    me = eg.ModelEval(tp.model_name, cur_env)
    me.evalModel(15)
    me.saveEval()
예제 #2
0
def runEvals(exps):
    #FINISH
    # Run and save an evaluation for all trained models
    for i in range(len(exps['Model'])):
        me = eg.ModelEval(exps['Model'][i], 'AttFC_GyroErr1_M4_Ep-v0')
        me.evalModel(50)
        me.saveEval()
예제 #3
0
def checkR(model):
    me = eg.ModelEval(model, 'AttFC_GyroErr1_M4_Ep-v0')
    me.evalModel(3)
    print('Average Reward: ', me.proc_eval.eps_avg_r)
    print('Max Rewards: ', me.proc_eval.eps_max_r)
    print('Rise times: ')
    print(me.proc_eval.eps_r_rise,
          me.proc_eval.eps_p_rise,
          me.proc_eval.eps_y_rise,
          sep='\n')
    print('-----Beginning-----')
    print('Rewards: ',
          me.eps[0]['rewards'][0:4],
          me.eps[1]['rewards'][0:4],
          me.eps[2]['rewards'][0:4],
          sep='\n')
    print('-----End-----')
    print('Rewards: ',
          me.eps[0]['rewards'][990:994],
          me.eps[1]['rewards'][990:994],
          me.eps[2]['rewards'][990:994],
          sep='\n')
    print('-------------')
    print('Desired: ', [
        me.eps[0]['droll_v'][0], me.eps[0]['dpitch_v'][0],
        me.eps[0]['dyaw_v'][0]
    ])
    print('Total R: ', np.sum(me.eps[0]['rewards']),
          np.sum(me.eps[1]['rewards']), np.sum(me.eps[2]['rewards']))
예제 #4
0
def initRewards(exps):
    # FINISH
    # Creates rewards from untrained models to compare to trained models to assess reward elements learned
    me = eg.ModelEval(exps['Model'][i], 'AttFC_GyroErr1_M4_Ep-v0')
    me.evalModel()
    me.saveEval(save_name='init' + str(i))