from src.utils import load_qnet, error_info from collections import deque # if gpu is to be used use_cuda = torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor Tensor = FloatTensor if __name__ == "__main__": env = gym.make("CartPole-v0") config = cartpole_config eval_qnet = QNet(config.state_dim,config.dqn_hidden_dims,config.action_size) load_qnet(eval_qnet,filename='qnet_cp_short.pth.tar') eval_qnet.eval() methods = ['Model', 'DR', 'WDR', 'Soft DR', 'Soft WDR', 'Model Bsl', 'DR Bsl', 'WDR Bsl', 'Soft DR Bsl', 'Soft WDR Bsl', 'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE', 'Soft WDR MSE', 'MRDR Q', 'MRDR', 'WMRDR', 'Soft MRDR', 'Soft WMRDR', 'MRDR-w Q', 'MRDR-w', 'WMRDR-w', 'Soft MRDR-w', 'Soft WMRDR-w', 'IS', 'WIS', 'Soft IS', 'Soft WIS', 'PDIS', 'WPDIS', 'Soft PDIS', 'Soft WPDIS'] num_method = len(methods) max_name_length = len(max(methods,key=len)) mse = [deque() for method in methods] ind_mse = [deque() for method in methods] for i_run in range(config.N):
from src.utils import load_qnet, error_info from collections import deque # if gpu is to be used use_cuda = False # torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor Tensor = FloatTensor if __name__ == "__main__": env = gym.make("MountainCar-v0") config = mountaincar_config eval_qnet = QNet(config.state_dim,config.dqn_hidden_dims,config.action_size) load_qnet(eval_qnet,filename='qnet_mc.pth.tar') eval_qnet.eval() methods = ['Model', 'DR', 'WDR', 'Soft DR', 'Soft WDR', 'Model Bsl', 'DR Bsl', 'WDR Bsl', 'Soft DR Bsl', 'Soft WDR Bsl', 'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE', 'Soft WDR MSE', 'MRDR Q', 'MRDR', 'WMRDR', 'Soft MRDR', 'Soft WMRDR', 'MRDR-w Q', 'MRDR-w', 'WMRDR-w', 'Soft MRDR-w', 'Soft WMRDR-w', 'IS', 'WIS', 'Soft IS', 'Soft WIS', 'PDIS', 'WPDIS', 'Soft PDIS', 'Soft WPDIS'] num_method = len(methods) max_name_length = len(max(methods,key=len)) mse = [deque() for method in methods] ind_mse = [deque() for method in methods] for i_run in range(config.N):
mse_1, mse_2, mse_3 = error_info( results[i_method], target, methods[i_method].ljust(max_name_length)) mse[i_method] = mse_1 ind_mse[i_method] = mse_2 mse_w[i_method] = mse_3 return (mse, ind_mse, mse_w) if __name__ == "__main__": env = gym.make("MountainCar-v0") config = mountaincar_config eval_qnet = QNet(config.state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(eval_qnet, filename='qnet_mc.pth.tar') eval_qnet.eval() bhv_qnet = QNet(config.state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(bhv_qnet, filename='qnet_mc.pth.tar') # target policy bhv_qnet.eval() # 読み込んだモデルのモードを切り替える methods = [ 'Model', 'DR', 'DML_RepBM', 'DML_RepBM_estpz', 'DML_RepBM_estpz_wis', 'DML_RepBM_estpz_sis', 'DML_RepBM_estpz_swis', 'DML-DR-CROSS-K-ND', 'dml_dr_cross_k_estpz_nd', 'dml_dr_cross_k_estpz_wis_nd', 'dml_dr_cross_k_estpz_sis_nd', 'dml_dr_cross_k_estpz_swis_nd', 'dml_dr_cross_k_chunk_nd', 'WDR', 'Soft DR', 'Soft WDR', 'Model Bsl', 'DR Bsl', 'DR EstPz Bsl', 'WDR EstPz Bsl', 'WDR Bsl', 'Soft DR Bsl', 'Soft WDR Bsl', 'Model MSE', 'DR MSE', 'WDR MSE', 'Soft DR MSE',
for i_method in range(num_method): mse_1, mse_2, mse_3 = error_info(results[i_method], target, methods[i_method].ljust(max_name_length)) mse[i_method] = mse_1 ind_mse[i_method] = mse_2 mse_w[i_method] = mse_3 return(mse, ind_mse, mse_w) if __name__ == "__main__": env = gym.make("CartPole-v0") config = cartpole_config state_dim = config.state_dim + config.noise_dim eval_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(eval_qnet, filename='cp_s.pth.tar') # target policy eval_qnet.eval() # 読み込んだモデルのモードを切り替える bhv_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(bhv_qnet, filename='cp_s.pth.tar') # target policy bhv_qnet.eval() # 読み込んだモデルのモードを切り替える methods = ['DML-DR-CROSS-K-ND', 'dml_dr_cross_k_estpz_nd', 'dml_dr_cross_k_estpz_wis_nd', 'dml_dr_cross_k_estpz_sis_nd', 'dml_dr_cross_k_estpz_swis_nd', 'dml_dr_cross_k_chunk_nd', 'Model Bsl', 'DR Bsl', 'DR EstPz Bsl',
mse[i_method] = mse_1 ind_mse[i_method] = mse_2 mse_w[i_method] = mse_3 return(mse, ind_mse, mse_w) if __name__ == "__main__": env = gym.make("Acrobot-v1") config = acrobot_config noise_dim = config.noise_dim state_dim = config.state_dim + noise_dim eval_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(eval_qnet, filename='acrobat.pth.tar') # target policy eval_qnet.eval() # 読み込んだモデルのモードを切り替える bhv_qnet = QNet(state_dim, config.dqn_hidden_dims, config.action_size) load_qnet(bhv_qnet, filename='acrobat.pth.tar') # target policy bhv_qnet.eval() # 読み込んだモデルのモードを切り替える methods = ['DML-DR-CROSS-K-ND', 'dml_dr_cross_k_estpz_nd', 'dml_dr_cross_k_estpz_wis_nd', 'dml_dr_cross_k_estpz_sis_nd', 'dml_dr_cross_k_estpz_swis_nd', 'dml_dr_cross_k_chunk_nd', 'Model Bsl', 'DR Bsl',