def __init__(self, args): action_space = args.action_space hidden_state_sz = args.hidden_state_sz super(MJOLNIR_O, self).__init__() # get and normalize adjacency matrix. np.seterr(divide='ignore') A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) n = int(A.shape[0]) self.n = n self.embed_action = nn.Linear(action_space, 10) lstm_input_sz = 10 + n * 5 + 512 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) # glove embeddings for all the objs. with open("./data/gcn/objects.txt") as f: objects = f.readlines() self.objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[self.objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.W0 = nn.Linear(401, 401, bias=False) self.W1 = nn.Linear(401, 401, bias=False) self.W2 = nn.Linear(401, 5, bias=False) self.W3 = nn.Linear(10, 1, bias=False) self.final_mapping = nn.Linear(n, 512)
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.glove_dim resnet_embedding_sz = 512 hidden_state_sz = args.hidden_state_sz super(GCN_MLP, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) self.embed_glove = nn.Linear(target_embedding_sz, 64) self.embed_action = nn.Linear(action_space, 10) pointwise_in_channels = 138 self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 + 512 mlp_input_sz = lstm_input_sz self.hidden_state_sz = hidden_state_sz num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) n = 83 self.n = n # get and normalize adjacency matrix. A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) # last layer of resnet18. resnet18 = models.resnet18(pretrained=True) modules = list(resnet18.children())[-2:] self.resnet18 = nn.Sequential(*modules) for p in self.resnet18.parameters(): p.requires_grad = False # glove embeddings for all the objs. objects = open("./data/gcn/objects.txt").readlines() objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.get_word_embed = nn.Linear(300, 512) self.get_class_embed = nn.Linear(1000, 512) self.W0 = nn.Linear(1024, 1024, bias=False) self.W1 = nn.Linear(1024, 1024, bias=False) self.W2 = nn.Linear(1024, 1, bias=False) self.final_mapping = nn.Linear(n, 512) hidden_o = mlp_input_sz // 2 self.W0m = nn.Linear(mlp_input_sz, 512, bias=False)
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() j = 0 while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def nonadaptivea3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[num] targets = targets[num] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load( model_to_open, map_location=lambda storage, loc: storage ) shared_model.load_state_dict(saved_state['model']) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() j = 0 while count < max_count: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes, possible_targets, targets, glove=glove) player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, False) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, ) count += 1 reset_player(player) j = (j + 1) % len(args.scene_types) player.exit() res_queue.put({"END": True})
def savn_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[0] targets = targets[0] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 #setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open is not None: saved_state = torch.load( model_to_open #, map_location=lambda storage, loc: storage ) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) model_options = ModelOptions() while count < max_count: count += 1 start_time = time.time() new_episode(args, player, scenes, possible_targets, targets, glove=glove) player_start_state = copy.deepcopy(player.environment.controller.state) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 #print(player.s) #return while True: total_reward = run_episode(player, args, total_reward, model_options, False) if player.done: break if False: #if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] # reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) end_episode( player, res_queue, total_time=time.time() - start_time, spl=spl, **reward_dict, **bucketed_spl, ) reset_player(player) player.exit() res_queue.put({"END": True})
def savn_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) model_options = ModelOptions() j = 0 while not end_flag.value: start_time = time.time() new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 # Accumulate loss over all meta_train episodes. while True: # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true). if args.verbose: print("New inner step") total_reward = run_episode(player, args, total_reward, model_options, True) if player.done: break if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] # reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward if args.verbose: print("meta gradient") # Compute the meta_gradient, i.e. differentiate w.r.t. theta. meta_gradient = torch.autograd.grad( loss["total_loss"], [v for _, v in params_list[0].items()], allow_unused=True, ) end_episode( player, res_queue, title=args.scene_types[idx[j]], episode_num=0, total_time=time.time() - start_time, total_reward=total_reward, ) # Copy the meta_gradient to shared_model and step. transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id) optimizer.step() reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.glove_dim resnet_embedding_sz = 512 hidden_state_sz = args.hidden_state_sz super(MJOLNIR_R, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) self.embed_glove = nn.Linear(target_embedding_sz, 64) self.embed_action = nn.Linear(action_space, 10) pointwise_in_channels = 64 + 64 + 10 self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 + 512 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) # get and normalize adjacency matrix. np.seterr(divide='ignore') A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) n = int(A.shape[0]) self.n = n # last layer of resnet18. resnet18 = models.resnet18(pretrained=True) modules = list(resnet18.children())[-2:] self.resnet18 = nn.Sequential(*modules) for p in self.resnet18.parameters(): p.requires_grad = False # glove embeddings for all the objs. with open("./data/gcn/objects.txt") as f: objects = f.readlines() self.objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[self.objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.W0 = nn.Linear(401, 401, bias=False) self.W1 = nn.Linear(401, 401, bias=False) self.W2 = nn.Linear(401, 5, bias=False) self.W3 = nn.Linear(10, 1, bias=False) self.final_mapping = nn.Linear(n, 512)
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, global_ep, ): glove = None protos = None pre_metadata = None curriculum_meta = None scene_types = args.scene_types if args.glove_file: glove = Glove(args.glove_file) if args.proto_file: protos = Prototype(args.proto_file) if args.data_source == "ithor": from datasets.ithor_data import get_data scenes, possible_targets, targets = get_data(scene_types, args.train_scenes) elif args.data_source == "robothor": from datasets.robothor_data import get_data # check if use pinned_scene mode if args.pinned_scene: # TODO: design a flexible scene allocating strategy scene_types = [scene_types[(rank % len(scene_types))]] pre_metadata = preload_metadata(args, scene_types) scenes, possible_targets, targets = get_data(scene_types) if args.curriculum_learning: curriculum_meta = get_curriculum_meta(args, scenes) # is pinned_scene set to True, pre-load all metadata for controller # constructed in new_episode() random.seed(args.seed + rank) idx = list(range(len(scene_types))) random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() j = 0 while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 # new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos, # pre_metadata=pre_metadata, curriculum_meta=curriculum_meta, total_ep=global_ep.value) scene = new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos, pre_metadata=pre_metadata, curriculum_meta=curriculum_meta) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # plot trajectory , by wuxiaodong if args.demo_trajectory and global_ep.value % args.demo_trajectory_freq == 0: print(len(player.episode.episode_trajectories)) # todo delete # scene = 'FloorPlan_Train1_1' trajectory_pil = get_trajectory(scene, [str(loc) for loc in player.episode.episode_trajectories], birdview_root='./demo_robothor/data/birdview/', init_loc_str=player.episode.init_pos_str, target_loc_str=player.episode.target_pos_str, actions=player.episode.actions_taken, success=player.success, target_name=player.episode.target_object) demo_out_dir = os.path.join(args.log_dir, '../output_trajecgtory', args.title) if not os.path.exists(demo_out_dir): os.makedirs(demo_out_dir) trajectory_pil.save(os.path.join(demo_out_dir, '{}_init_{}_target_{}_iter{}.png'.format( player.episode.object_type, player.episode.init_pos_str, player.episode.target_pos_str, global_ep.value ))) print('ploting {}_init_{}_target_{}_iter{}.png'.format( player.episode.object_type, player.episode.init_pos_str, player.episode.target_pos_str, global_ep.value )) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared(player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) # print("Training Agent {}: finished episodes on {}, local loss {}".format( # rank, scene, loss.cpu().detach().numpy() )) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, policy_loss=loss['policy_loss'], value_loss=loss['value_loss'] ) reset_player(player) j = (j + 1) % len(scene_types) player.exit()
def nonadaptivea3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = None protos = None pre_metadata = None curriculum_meta = None scene_types = [scene_type] offline_shortest_data = None if args.glove_file: glove = Glove(args.glove_file) if args.proto_file: protos = Prototype(args.proto_file) if args.data_source == "ithor": from datasets.ithor_data import get_data, name_to_num scenes, possible_targets, targets = get_data(scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[0] targets = targets[0] elif args.data_source == "robothor": from datasets.robothor_data import get_data # TODO: design a flexible scene allocating strategy pre_metadata = preload_metadata(args, scene_types) scenes, possible_targets, targets = get_data(scene_types) scenes = scenes[0] targets = targets[0] if args.curriculum_learning: curriculum_meta = get_curriculum_meta(args, scenes) if args.offline_shortest_data: offline_shortest_data = load_offline_shortest_path_data( args, scenes) setproctitle.setproctitle("Val Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load(model_to_open, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() while count < max_count: # Get a new episode. total_reward = 0 player.eps_len = 0 scene = new_episode(args, player, scenes, possible_targets, targets, glove=glove, protos=protos, pre_metadata=pre_metadata, curriculum_meta=curriculum_meta) if scene == None: # iteration stopped break player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. # player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, False) # Compute the loss. # loss = compute_loss(args, player, gpu_id, model_options) if not player.done: reset_player(player) # for k in loss: # loss[k] = loss[k].item() if offline_shortest_data: # assume data_source == robothor and curriculum_learning is True scene = player.environment.scene_name episode_id = player.episode.episode_id best_path_length = offline_shortest_data[scene][episode_id] spl = player.success * (best_path_length / float(player.eps_len)) else: spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) if args.curriculum_learning: end_episode(player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, scene_type=scene_type, difficulty=player.episode.difficulty) else: end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, scene_type=scene_type, ) count += 1 reset_player(player) player.exit() res_queue.put({ "END": True, "scene_type": scene_type, "total_episodes": count })
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): # print('Now Im in nonadaptivea3c_train') glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) # print('We have glove embeddings and data wow') random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) # print('scene types have been shuffled') setproctitle.setproctitle("Training Agent: {}".format(rank)) # print('some set proctitle bullshit') gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] # print('something gpu id') import torch # print('Torch imported') torch.cuda.set_device(gpu_id) # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id)) torch.manual_seed(args.seed + rank) if gpu_id >= 0: # print('gpu id > 0, who knows what happens next') torch.cuda.manual_seed(args.seed + rank) # print('Done doing gpu bullshit') player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) # print('agent initialized') compute_grad = not isinstance(player, RandomNavigationAgent) # print('Something something compute gradient') model_options = ModelOptions() j = 0 print('Right before while loop') while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) print('End of while loop') player.exit()