def forward(self, map, goal_pos): batch_size = len(map) map = torch.cat(map, dim=0) if map.size(1) > self.channels_in: map = map[:, 0:self.channels_in, :, :] loss_out = None for i in range(batch_size): goal_pos_i = goal_pos[i] map_i = map[i:i+1] goal_coords_in_map = as_to_img(goal_pos_i, self.map_world_size).long() neg_samples = 1 neg_coords_size = list(goal_coords_in_map.size()) neg_coords_size[0] = neg_coords_size[0] * neg_samples all_coords_size = list(goal_coords_in_map.size()) all_coords_size[0] += neg_coords_size[0] goal_negative_coords_in_map = empty_float_tensor(neg_coords_size) range_min = 0 range_max = self.map_world_size goal_negative_coords_in_map.uniform_(range_min, range_max) goal_coords_in_map = goal_coords_in_map goal_negative_coords_in_map = goal_negative_coords_in_map goal_negative_coords_in_map = cuda_var(goal_negative_coords_in_map.long(), self.is_cuda, self.cuda_device) sample_pt_coords = torch.cat([goal_coords_in_map, goal_negative_coords_in_map], dim=0).long() sample_pt_labels = cuda_var(empty_float_tensor([all_coords_size[0]]).long(), self.is_cuda, self.cuda_device) sample_pt_labels[0] = 1 sample_pt_labels[1:] = 0 sample_pt_features = self.gather_2d(map_i, sample_pt_coords) if DBG: self.plot_pts(map[0], sample_pt_coords) pt_predictions = self.goal_linear(sample_pt_features) aux_loss_goal = self.loss(pt_predictions, sample_pt_labels) _, pred_idx = torch.max(pt_predictions.data, 1) correct = torch.sum((pred_idx == sample_pt_labels.data).long()) total = float(len(sample_pt_labels)) accuracy = correct / total self.accuracy_meter.put(accuracy) log_value(self.name + "/accuracy", self.accuracy_meter.get()) if loss_out is None: loss_out = aux_loss_goal else: loss_out += aux_loss_goal # TODO: Consider batch size / count return loss_out, batch_size
def forward(self, instructions_batch): token_lists, _ = instructions_batch batch_size = len(token_lists) dims = (self.num_layers, batch_size, self.hidden_dim) hidden = (Variable( empty_float_tensor(*dims, self.is_cuda, self.cuda_device)), Variable( empty_float_tensor(*dims, self.is_cuda, self.cuda_device))) # pad text tokens with 0's text_lengths = np.array([len(tokens) for tokens in token_lists]) tokens_batch = [[] for _ in range(batch_size)] for i in range(batch_size): num_zeros = text_lengths[0] - text_lengths[i] tokens_batch[i] = token_lists[i] + [0] * num_zeros tokens_batch = cuda_var(torch.from_numpy(np.array(tokens_batch))) # swap so batch dimension is second, sequence dimension is first tokens_batch = tokens_batch.transpose(0, 1) emb_sentence = self.embedding(tokens_batch) packed_input = pack_padded_sequence(emb_sentence, text_lengths) lstm_out_packed, _ = self.lstm(packed_input, hidden) # return average output embedding lstm_out, seq_lengths = pad_packed_sequence(lstm_out_packed) lstm_out = lstm_out.transpose(0, 1) sum_emb_list = [] for i, seq_out in enumerate(lstm_out): seq_len = seq_lengths[i] sum_emb = torch.sum(seq_out[:seq_len], 0) / seq_len sum_emb_list.append(sum_emb.view(1, -1)) return torch.cat(sum_emb_list)
def forward(self, images, instructions, instruction_masks): emb = self.sentence_embedding(instructions, torch.sum(instruction_masks, 1)) # If the embedding returns an internal auxiliary, loss, pass it along emb_loss = cuda_var(torch.zeros([1]), self.is_cuda, self.cuda_device) if type(emb) is tuple: emb, emb_loss = emb feature_map = self.feature_net(images) feature_map = self.dropout2d(feature_map) if self.ground_loss: self.lang_filter.precompute_conv_weights(emb) ground_map = self.lang_filter(feature_map) feature_map = torch.cat([feature_map, ground_map], dim=1) # TODO: Testing breaking of gradients between ResNet and UNet if cut_gradients: feature_map_fwd = Variable(feature_map.data) else: feature_map_fwd = feature_map #if self.ground_loss: # feature_map_fwd = feature_map_fwd[:, 0:3, :, :] pred_mask = self.unet(feature_map_fwd, emb) return pred_mask, feature_map, emb_loss
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for paper and presentation if False: self.save_viz(images_np_pure) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > 0.5 else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None for tok in instruction: if tok >= self.params["vocab_size"] or tok < 0: raise Exception("Word embeddings out of bounds") instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if (stop_prob > 0.5 or self.seq_step >= self.trajectory_len - 5) else 0 output_action[3] = output_stop #print("action: ", output_action) return output_action
def forward(self, cam_pose): batch_size = len(cam_pose) out_cpu = empty_float_tensor( [batch_size, self.map_size, self.map_size, 2]) # TODO: parallel for loop this for i in range(batch_size): mapping_i_np = self.projector.get_projection_mapping( cam_pose[i].position.cpu().data.numpy(), cam_pose[i].orientation.cpu().data.numpy(), range1=True) mapping_i = torch.from_numpy(mapping_i_np).float() out_cpu[i, :, :, :] = mapping_i out = cuda_var(out_cpu, self.is_cuda, self.cuda_device) return out
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() print("action: ", output_action) stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0 output_action[3] = output_stop return output_action
def cuda_var(self, tensor): return cuda_var(tensor, self.is_cuda, self.cuda_device)
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) passive_mode_debug_projections = True if passive_mode_debug_projections: self.show_landmark_locations(loop=False, states=state) self.reset() # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions) if self.params.get("run_auxiliaries_at_test_time"): _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store, reduce_average=True) overlaid = self.get_overlaid_classification_results( whole_batch=False) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action
def sup_loss_on_batch(self, batch, eval=False, viz=False): if eval: self.eval() else: self.train() images = cuda_var(batch["images"], self.is_cuda, self.cuda_device) instructions = cuda_var(batch["instr"], self.is_cuda, self.cuda_device) instruction_masks = cuda_var(batch["instr_mask"], self.is_cuda, self.cuda_device) label_masks = cuda_var(batch["traj_labels"], self.is_cuda, self.cuda_device) # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment. # Reset model state (embedding etc) between batches, but not between segments. # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all. batch_size = len(images) total_class_loss = Variable(empty_float_tensor([1], self.is_cuda, self.cuda_device), requires_grad=True) total_ground_loss = Variable(empty_float_tensor([1], self.is_cuda, self.cuda_device), requires_grad=True) count = 0 label_masks = self.label_pool(label_masks) mask_pred, features, emb_loss = self(images, instructions, instruction_masks) if BCE: mask_pred_flat = mask_pred.view(-1, 1) label_masks_flat = label_masks - torch.min(label_masks) label_masks_flat = label_masks_flat / ( torch.max(label_masks_flat) + 1e-9) label_masks_flat = label_masks_flat.view(-1, 1).clamp(0, 1) main_loss = self.mask_loss(mask_pred_flat, label_masks_flat) elif NLL: mask_pred_1 = F.softmax(mask_pred, 1, _stacklevel=5) mask_pred_2 = 1 - mask_pred_1 mask_pred_1 = mask_pred_1.unsqueeze(1) mask_pred_2 = mask_pred_2.unsqueeze(1) mask_pred = torch.cat((mask_pred_1, mask_pred_2), dim=1) label_masks = label_masks.clamp(0, 1) if self.is_cuda: label_masks = label_masks.type(torch.cuda.LongTensor) else: label_masks = label_masks.type(torch.LongTensor) main_loss = self.mask_loss(mask_pred, label_masks) elif CE: # Crossentropy2D internally applies logsoftmax to mask_pred, # but labels are already assumed to be a valid probability distribution, so no softmax is applied main_loss = self.mask_loss(mask_pred, label_masks) # So for nice plotting, we must manually do it mask_pred = self.spatialsoftmax(mask_pred) else: main_loss = self.mask_loss(mask_pred, label_masks) # sum emb loss if batch size > 1 if type(emb_loss) == tuple: emb_loss = sum(emb_loss) # Extract the feature vectors corresponding to every landmark's location in the map # Apply a linear layer to classify which of the 64 landmarks it is # The landmark positions have to be divided by the same factor as the ResNet scaling factor lcount = 0 for i in range(batch_size): if self.class_loss and len(batch["lm_pos"][i]) > 0: lcount += 1 landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda, self.cuda_device) landmark_indices = cuda_var(batch["lm_indices"][i], self.is_cuda, self.cuda_device) landmark_coords = (landmark_pos / 8).long() lm_features = self.gather2d(features[i:i + 1, 0:32], landmark_coords) lm_pred = self.aux_class_linear(lm_features) class_loss = self.aux_loss(lm_pred, landmark_indices) total_class_loss = total_class_loss + class_loss if self.ground_loss and len(batch["lm_pos"][i]) > 0: landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda, self.cuda_device) landmark_mentioned = cuda_var(batch["lm_mentioned"][i], self.is_cuda, self.cuda_device) landmark_coords = (landmark_pos / 8).long() g_features = self.gather2d(features[i:i + 1, 32:35], landmark_coords) lm_pred = self.aux_ground_linear(g_features) ground_loss = self.aux_loss(lm_pred, landmark_mentioned) total_ground_loss = total_ground_loss + ground_loss total_class_loss = total_class_loss / (lcount + 1e-9) total_ground_loss = total_ground_loss / (lcount + 1e-9) count += 1 # Just visualization and debugging code if self.get_iter() % 50 == 0: presenter = Presenter() pred_viz_np = presenter.overlaid_image(images[0].data, mask_pred[0].data) labl_viz_np = presenter.overlaid_image(images[0].data, label_masks[0].data) comp = np.concatenate((pred_viz_np, labl_viz_np), axis=1) presenter.show_image(comp, "path_pred") if hasattr(self.sentence_embedding, "save_att_map"): self.sentence_embedding.save_att_map(self.get_iter(), i) total_loss = main_loss + 0.1 * total_class_loss + 0.001 * emb_loss + 0.1 * total_ground_loss total_loss = total_loss / (count + 1e-9) self.write_summaires("eval" if eval else "train", self.get_iter(), total_loss, main_loss, emb_loss, total_class_loss, total_ground_loss) self.inc_iter() return total_loss
def evaluate(): P.initialize_experiment() model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() model.eval() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(data=None, envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=4, pin_memory=True, timeout=0) count = 0 success = 0 total_dist = 0 for batch in dataloader: if batch is None: print("None batch!") continue images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment. # Reset model state (embedding etc) between batches, but not between segments. # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all. # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch. batch_size = len(images) print("batch: ", count) print("successes: ", success) for i in range(batch_size): num_segments = len(instructions[i]) for s in range(num_segments): instruction = cuda_var(instructions[i][s], model.is_cuda, model.cuda_device) instruction_mask = torch.ones_like(instruction) image = cuda_var(images[i][s], model.is_cuda, model.cuda_device) label_mask = cuda_var(label_masks[i][s], model.is_cuda, model.cuda_device) label_mask = model.label_pool(label_mask) goal_mask_l = label_mask[0, 1, :, :] goal_mask_l_np = goal_mask_l.data.cpu().numpy() goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1]) max_index_l = np.argmax(goal_mask_l_flat) argmax_loc_l = np.asarray([ int(max_index_l / goal_mask_l_np.shape[1]), int(max_index_l % goal_mask_l_np.shape[1]) ]) if np.sum(goal_mask_l_np) < 0.01: continue mask_pred, features, emb_loss = model(image, instruction, instruction_mask) goal_mask = mask_pred[0, 1, :, :] goal_mask_np = goal_mask.data.cpu().numpy() goal_mask_flat = np.reshape(goal_mask_np, [-1]) max_index = np.argmax(goal_mask_flat) argmax_loc = np.asarray([ int(max_index / goal_mask_np.shape[1]), int(max_index % goal_mask_np.shape[1]) ]) dist = np.linalg.norm(argmax_loc - argmax_loc_l) if dist < OK_DIST: success += 1 count += 1 total_dist += dist print("Correct goal predictions: ", success) print("Total evaluations: ", count) print("total dist: ", total_dist) print("avg dist: ", total_dist / float(count)) print("success rate: ", success / float(count))
def reset(self): self.last_h = cuda_var(torch.zeros(1, 1, self.hidden_size), self.is_cuda, self.cuda_device) self.last_c = cuda_var(torch.zeros(1, 1, self.hidden_size), self.is_cuda, self.cuda_device)
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) #if first_step: # say(debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] print(f"P(STOP): {stop_prob}") output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action