def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() j = 0 while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def nonadaptivea3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[num] targets = targets[num] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load( model_to_open, map_location=lambda storage, loc: storage ) shared_model.load_state_dict(saved_state['model']) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() j = 0 while count < max_count: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes, possible_targets, targets, glove=glove) player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, False) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, ) count += 1 reset_player(player) j = (j + 1) % len(args.scene_types) player.exit() res_queue.put({"END": True})
def savn_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) model_options = ModelOptions() j = 0 while not end_flag.value: start_time = time.time() new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 # Accumulate loss over all meta_train episodes. while True: # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true). if args.verbose: print("New inner step") total_reward = run_episode(player, args, total_reward, model_options, True) if player.done: break if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] # reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward if args.verbose: print("meta gradient") # Compute the meta_gradient, i.e. differentiate w.r.t. theta. meta_gradient = torch.autograd.grad( loss["total_loss"], [v for _, v in params_list[0].items()], allow_unused=True, ) end_episode( player, res_queue, title=args.scene_types[idx[j]], episode_num=0, total_time=time.time() - start_time, total_reward=total_reward, ) # Copy the meta_gradient to shared_model and step. transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id) optimizer.step() reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def savn_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, glove_file=None, img_file=None, ): # glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[num] targets = targets[num] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open is not None: saved_state = torch.load(model_to_open, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player_actions = {} model_options = ModelOptions() while count < max_count: count += 1 start_time = time.time() new_episode(args, player, scenes, possible_targets, targets, glove=glove_file, img_file=None) player_start_state = copy.deepcopy(player.environment.controller.state) if args.verbose: print(player_start_state) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 player_actions['scene'] = player.environment.scene_name player_actions['target_object'] = player.episode.task_data[0] player_actions['positions'] = [] player_actions['positions'].append(str(player_start_state)) while True: total_reward = run_episode(player, args, total_reward, model_options, False) player_actions['positions'].append( str(player.environment.controller.state)) if player.done: break if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) player_actions['success'] = player.success for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success, player.actions[-1], player.arrive) if args.record_route: with open( '/home/duhm/Code/savn_deployment/players_action_test.json', 'a') as write_file: json.dump(player_actions, write_file) end_episode( player, res_queue, total_time=time.time() - start_time, spl=spl, **reward_dict, **bucketed_spl, ) reset_player(player) player.exit() res_queue.put({"END": True})
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): # print('Now Im in nonadaptivea3c_train') glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) # print('We have glove embeddings and data wow') random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) # print('scene types have been shuffled') setproctitle.setproctitle("Training Agent: {}".format(rank)) # print('some set proctitle bullshit') gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] # print('something gpu id') import torch # print('Torch imported') torch.cuda.set_device(gpu_id) # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id)) torch.manual_seed(args.seed + rank) if gpu_id >= 0: # print('gpu id > 0, who knows what happens next') torch.cuda.manual_seed(args.seed + rank) # print('Done doing gpu bullshit') player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) # print('agent initialized') compute_grad = not isinstance(player, RandomNavigationAgent) # print('Something something compute gradient') model_options = ModelOptions() j = 0 print('Right before while loop') while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) print('End of while loop') player.exit()