rewards = torch.Tensor(rewards)
  rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
  for (action, value), r in zip(saved_actions, rewards):
    reward = r - value.data[0,0]
    action.reinforce(reward)
    value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
  optimizer.zero_grad()
  final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
  gradients = [torch.ones(1)] + [None] * len(saved_actions)
  autograd.backward(final_nodes, gradients)
  optimizer.step()
  del model.rewards[:]
  del model.saved_actions[:]

#train
env = SenseEnv(vars(args))
print("action space: ",env.action_space())
model = Policy(env.observation_space(),env.action_space_n())
cnn = CNN(env.classification_n())
if args.gpu and torch.cuda.is_available():
  model.cuda()
  cnn.cuda()
if args.model_path:
  if os.path.exists(args.model_path+"/model.pkl"):
    print("loading pretrained models")
    model.load_state_dict(torch.load(args.model_path+"/model.pkl"))
    cnn.load_state_dict(torch.load(args.model_path+"/cnn.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
Exemple #2
0
    'moves': 5,
    'action': 1
}, {
    'moves': 10,
    'action': 3
}, {
    'moves': 15,
    'action': 4
}, {
    'moves': 10,
    'action': 3
}]  #top of pyramid
if len(sys.argv) == 2:
    obj_file = sys.argv[1]
    print("loading", obj_file)
env = SenseEnv({'render': True, 'debug': True, 'obj_path': obj_file})
action_plan_counter = 0
action_step = 0


def max_action_plan_steps(action_plan):
    return sum([x['moves'] for x in action_plan])


def process_action_plan(action_plan, action_plan_counter=0, action_step=0):
    max_steps = max_action_plan_steps(action_plan)
    if action_step == 0:
        start_for_current_action = 0
    else:
        start_for_current_action = sum(
            [x['moves'] for x in action_plan[:action_step]])
    R = r + args.gamma * R
    rewards.insert(0, R)
  rewards = torch.Tensor(rewards)
  rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
  for action, r in zip(model.saved_actions, rewards):
    action.reinforce(r)
  optimizer.zero_grad()
  autograd.backward(model.saved_actions, [None for _ in model.saved_actions])
  optimizer.step()
  del model.rewards[:]
  del model.saved_actions[:]


# Training:

env = SenseEnv(vars(args))
print("action space: ",env.action_space())
print("class count: ",env.classification_n())
model = Policy(env.observation_space(),env.action_space_n())
cnn_lstm = CNNLSTM(env.classification_n())
if args.gpu and torch.cuda.is_available():
  model.cuda()
  cnn_lstm.cuda()
if model_path:
  if os.path.exists(model_path+"/model.pkl"):
    print("loading pretrained models")
    model.load_state_dict(torch.load(model_path+"/model.pkl"))
    cnn_lstm.load_state_dict(torch.load(model_path+"/cnn_lstm.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
                        type=str,
                        help='log experiment to tensorboard')
    parser.add_argument('--model_path',
                        type=str,
                        help='path to store/retrieve model at')
    parser.add_argument('--mode',
                        type=str,
                        default="train",
                        help='train/test/all model')
    parser.add_argument('--data_path',
                        type=str,
                        default="../../touchable_data/objects/")

    args = parser.parse_args()

    env = SenseEnv(vars(args))

    num_games = 20
    game_length = 1000
    e_greedy_inc = 0.05 / game_length  # want to increase by 0.05 per game so we spend enough time exploring
    mem_size = num_games * game_length

    cnn_features_TD = np.zeros((num_games, 40000), dtype=np.int8)
    cnn_labels_TD = np.zeros(num_games, dtype=np.int8)

    cnn_features_ED = np.zeros((num_games, 40000), dtype=np.int8)

    cnn_labels_ED = np.zeros(num_games, dtype=np.int8)

    TD_cnt = 0  # counter to keep track of how many times we touch in the training phase