예제 #1
0
  for (action, value), r in zip(saved_actions, rewards):
    reward = r - value.data[0,0]
    action.reinforce(reward)
    value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
  optimizer.zero_grad()
  final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
  gradients = [torch.ones(1)] + [None] * len(saved_actions)
  autograd.backward(final_nodes, gradients)
  optimizer.step()
  del model.rewards[:]
  del model.saved_actions[:]

#train
env = SenseEnv(vars(args))
print("action space: ",env.action_space())
model = Policy(env.observation_space(),env.action_space_n())
cnn = CNN(env.classification_n())
if args.gpu and torch.cuda.is_available():
  model.cuda()
  cnn.cuda()
if args.model_path:
  if os.path.exists(args.model_path+"/model.pkl"):
    print("loading pretrained models")
    model.load_state_dict(torch.load(args.model_path+"/model.pkl"))
    cnn.load_state_dict(torch.load(args.model_path+"/cnn.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

classifier_criterion = nn.CrossEntropyLoss()
classifier_optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
예제 #2
0
    num_games = 20
    game_length = 1000
    e_greedy_inc = 0.05 / game_length  # want to increase by 0.05 per game so we spend enough time exploring
    mem_size = num_games * game_length

    cnn_features_TD = np.zeros((num_games, 40000), dtype=np.int8)
    cnn_labels_TD = np.zeros(num_games, dtype=np.int8)

    cnn_features_ED = np.zeros((num_games, 40000), dtype=np.int8)

    cnn_labels_ED = np.zeros(num_games, dtype=np.int8)

    TD_cnt = 0  # counter to keep track of how many times we touch in the training phase

    RL = DeepQNetwork(n_actions=env.action_space_n(),
                      n_features=env.observation_space(),
                      learning_rate=0.1,
                      e_greedy=0.9,
                      replace_target_iter=100,
                      memory_size=mem_size,
                      e_greedy_increment=e_greedy_inc)

    if args.mode == "train" or args.mode == "all":

        games_where_touched = 0
        total_steps = 0

        ep_r = np.zeros(num_games)
        ep_touch = np.zeros(num_games)

        for i_episode in range(num_games):