def test(rank): nav_model_kwargs = {'question_vocab': load_vocab(args.vocab_json)} nav_model = NavPlannerControllerModel(**nav_model_kwargs) nav_checkpoint = torch.load(args.nav_weight) #load checkpoint weights nav_model.load_state_dict(nav_checkpoint['state']) #create model print('--- nav_model loaded checkpoint ---') cnn_kwargs = {'num_classes': 191, 'pretrained': True} cnn = MultitaskCNN(**cnn_kwargs) cnn.eval() cnn.cuda() #create cnn model vqa_model_kwargs = {'vocab': load_vocab(args.vocab_json)} vqa_model = VqaLstmCnnAttentionModel(**vqa_model_kwargs) vqa_checkpoint = torch.load(args.vqa_weight) #load checkpoint weights vqa_model.load_state_dict(vqa_checkpoint['state']) print('--- vqa_model loaded checkpoint ---') # need cnn? scene = "test-10-obj-100.txt" my_env = enviroment.Environment(is_testing=0, testing_file=scene) object_exist_list = my_env.ur5.object_type print("Objetcts that exist: ") print(object_exist_list) #create simulation enviroment my_question = Qusetion(object_exist_list) #create testing question testing_questions = my_question.createQueue() vocab = my_question.create_vocab() for question in testing_questions: planner_hidden = None max_action = 30 position = [0, 0] action_in_raw = [0] #start action_in actions = [] print(question['question']) #question questionTokens = my_question.tokenize(question['question'], punctToRemove=['?'], addStartToken=False) encoded_question_raw = my_question.encode(questionTokens, vocab['questionTokenToIdx']) encoded_question_raw.append(0) #encode question encoded_question_raw = np.array(encoded_question_raw) encoded_question_tensor = _dataset_to_tensor(encoded_question_raw) encoded_question = Variable(encoded_question_tensor) encoded_question = encoded_question.unsqueeze(0) print(encoded_question) action_times = 0 push_signal = 0 push_point = 0 while (action_times < max_action): #print(planner_img_feats_var.size()) action_in_tensor = _dataset_to_tensor(action_in_raw) action_in = Variable(action_in_tensor) action_in = action_in.unsqueeze(0) action_in = action_in.unsqueeze(0) _, rgb_image_raw = my_env.camera.get_camera_data() #before position_in, planner_img_feats_var = data2input( position, rgb_image_raw, cnn) output_data, planner_hidden = nav_model.planner_step( encoded_question, planner_img_feats_var, action_in, position_in, planner_hidden) planner_possi = F.log_softmax(output_data, dim=1) planner_data = planner_possi.data.numpy() planner_data = planner_data[0] action_out = np.where(planner_data == np.max(planner_data)) action_out = action_out[0][0] actions.append(action_out) action_in_raw = [action_out] if action_out == 9: print('stop') break elif action_out == 0: push_signal = 1 push_point = action_times else: dx, dy = order2action(action_out) position[0] += dx position[1] += dy action_times += 1 if len(actions) > 2 and push_signal == 0: action_position = position + position my_env.UR5_action(action_position, 2) #sucking elif len(actions) > 2 and push_signal == 1: #pushing position_start = [0, 0] position_end = [0, 0] for i in range(len(actions)): if i <= push_point: #the first step dx, dy = order2action(actions[i]) position_start[0] += dx position_start[1] += dy position_end[0] += dx position_end[1] += dy else: #the second step dx, dy = order2action(actions[i]) position_end[0] += dx position_end[1] += dy action_position = position_start + position_end my_env.UR5_action(action_position, 1) #pushing # get image after actions _, rgb_image_after = my_env.camera.get_camera_data( ) # image after actions shrink = cv.resize(rgb_image_raw, (224, 224), interpolation=cv.INTER_AREA) shrink = np.array(shrink) shrink = shrink.transpose((2, 0, 1)) shrink = shrink.reshape(1, 3, 224, 224) shrink = (shrink / 255.0).astype(np.float32) images = torch.FloatTensor(shrink) images = Variable(images) images = images.unsqueeze(0) # process images # answer question in vqa now # encoded_question already done scores, _ = vqa_model(images, encoded_question) scores = scores.data.numpy() scores = scores[0] answer_predict = np.where(scores == np.max(scores)) answer_predict = answer_predict[0][0] if answer_predict == 0: print('--- Predict: Exists not') elif answer_predict == 1: print('--- Predict: Exists') else: raise Exception('Prediction neither 0 nor 1')
class EqaDataset(Dataset): def __init__(self, questions_h5, vocab, num_frames=1, data_json=False, split='train', gpu_id=0, input_type='ques', max_threads_per_gpu=10, to_cache=False, target_obj_conn_map_dir=False, map_resolution=1000, overfit=False, max_controller_actions=5, max_actions=None): self.questions_h5 = questions_h5 self.vocab = load_vocab(vocab) self.num_frames = num_frames self.max_controller_actions = max_controller_actions np.random.seed() self.data_json = data_json self.split = split self.gpu_id = gpu_id self.input_type = input_type self.max_threads_per_gpu = max_threads_per_gpu self.target_obj_conn_map_dir = target_obj_conn_map_dir self.map_resolution = map_resolution self.overfit = overfit self.to_cache = to_cache self.img_data_cache = {} print('Reading question data into memory from', questions_h5) self.idx = _dataset_to_tensor(questions_h5['idx']) self.questions = _dataset_to_tensor(questions_h5['questions']) self.answers = _dataset_to_tensor(questions_h5['answers']) self.actions = _dataset_to_tensor(questions_h5['action_labels']) self.action_lengths = _dataset_to_tensor( questions_h5['action_lengths']) print('... finished running dataset_to_tensor operations from', questions_h5) if max_actions: #max actions will allow us to create arrays of a certain length. Helpful if you only want to train with 10 actions. print('... entering max_actions conditions block from', questions_h5) assert isinstance(max_actions, int) num_data_items = self.actions.shape[0] new_actions = np.zeros((num_data_items, max_actions + 2), dtype=np.int64) new_lengths = np.ones( (num_data_items, ), dtype=np.int64) * max_actions for i in range(num_data_items): action_length = int(self.action_lengths[i]) new_actions[i, 0] = 1 new_actions[i, 1:max_actions + 1] = self.actions[ i, action_length - max_actions:action_length].numpy() self.actions = torch.LongTensor(new_actions) self.action_lengths = torch.LongTensor(new_lengths) print('... finished running max_actions conditions block from', questions_h5) if self.data_json != False: print('... entering data_json false condition block from', questions_h5) data = json.load(open(self.data_json, 'r')) self.envs = data['envs'] self.env_idx = data[self.split + '_env_idx'] self.env_list = [self.envs[x] for x in self.env_idx] self.env_set = list(set(self.env_list)) self.env_set.sort() if self.overfit: self.env_idx = self.env_idx[:1] self.env_set = self.env_list = [ self.envs[x] for x in self.env_idx ] print('Trying to overfit to [house %s]' % self.env_set[0]) logging.info('Trying to overfit to [house {}]'.format( self.env_set[0])) print(questions_h5, 'Total envs: %d' % len(list(set(self.envs)))) print( questions_h5, 'Envs in %s: %d' % (self.split, len(list(set(self.env_idx))))) if input_type != 'ques': '''' If training, randomly sample and load a subset of environments, train on those, and then cycle through to load the rest. On the validation and test set, load in order, and cycle through. For both, add optional caching so that if all environments have been cycled through once, then no need to re-load and instead, just the cache can be used. ''' self.api_threads = [] self._load_envs(start_idx=0, in_order=True) cnn_kwargs = {'num_classes': 191, 'pretrained': True} self.cnn = MultitaskCNN(**cnn_kwargs) self.cnn.eval() self.cnn.cuda() self.pos_queue = data[self.split + '_pos_queue'] self.boxes = data[self.split + '_boxes'] if max_actions: for i in range(len(self.pos_queue)): self.pos_queue[i] = self.pos_queue[i][-1 * max_actions:] print('... finished running data_json false condition block from', questions_h5) if input_type == 'pacman': print('... entering input_type pacman condition block from', questions_h5) self.planner_actions = self.actions.clone().fill_(0) self.controller_actions = self.actions.clone().fill_(-1) self.planner_action_lengths = self.action_lengths.clone().fill_(0) self.controller_action_lengths = self.action_lengths.clone().fill_( 0) self.planner_hidden_idx = self.actions.clone().fill_(0) self.planner_pos_queue_idx, self.controller_pos_queue_idx = [], [] # parsing flat actions to planner-controller hierarchy for i in tqdm(range(len(self.actions))): pa, ca, pq_idx, cq_idx, ph_idx = flat_to_hierarchical_actions( actions=self.actions[i][:self.action_lengths[i] + 1], controller_action_lim=max_controller_actions) self.planner_actions[i][:len(pa)] = torch.Tensor(pa) self.controller_actions[i][:len(ca)] = torch.Tensor(ca) self.planner_action_lengths[i] = len(pa) - 1 self.controller_action_lengths[i] = len(ca) self.planner_pos_queue_idx.append(pq_idx) self.controller_pos_queue_idx.append(cq_idx) self.planner_hidden_idx[i][:len(ca)] = torch.Tensor(ph_idx) print( '... finished running input_type pacman condition block from', questions_h5) print('... finished instantiating EqaDataset from', questions_h5) def _pick_envs_to_load(self, split='train', max_envs=10, start_idx=0, in_order=False): if split in ['val', 'test'] or in_order == True: pruned_env_set = self.env_set[start_idx:start_idx + max_envs] else: if max_envs < len(self.env_set): env_inds = np.random.choice(len(self.env_set), max_envs, replace=False) else: env_inds = np.random.choice(len(self.env_set), max_envs, replace=True) pruned_env_set = [self.env_set[x] for x in env_inds] return pruned_env_set def _load_envs(self, start_idx=-1, in_order=False): #self._clear_memory() if start_idx == -1: start_idx = self.env_set.index(self.pruned_env_set[-1]) + 1 # Pick envs self.pruned_env_set = self._pick_envs_to_load( split=self.split, max_envs=self.max_threads_per_gpu, start_idx=start_idx, in_order=in_order) if len(self.pruned_env_set) == 0: return # Load api threads start = time.time() if len(self.api_threads) == 0: for i in range(self.max_threads_per_gpu): self.api_threads.append( objrender.RenderAPIThread(w=224, h=224, device=self.gpu_id)) self.cfg = load_config('../../House3D/tests/config.json') print('[%.02f] Loaded %d api threads' % (time.time() - start, len(self.api_threads))) start = time.time() # Load houses from multiprocessing import Pool _args = ([h, self.cfg, self.map_resolution] for h in self.pruned_env_set) with Pool(len(self.pruned_env_set)) as pool: self.all_houses = pool.starmap(local_create_house, _args) print('[%.02f] Loaded %d houses' % (time.time() - start, len(self.all_houses))) start = time.time() # Load envs self.env_loaded = {} for i in range(len(self.all_houses)): print('[%02d/%d][split:%s][gpu:%d][house:%s]' % (i + 1, len(self.all_houses), self.split, self.gpu_id, self.all_houses[i].house['id'])) environment = Environment(self.api_threads[i], self.all_houses[i], self.cfg) self.env_loaded[self.all_houses[i].house['id']] = House3DUtils( environment, target_obj_conn_map_dir=self.target_obj_conn_map_dir, build_graph=False) # [TODO] Unused till now self.env_ptr = -1 print('[%.02f] Loaded %d house3d envs' % (time.time() - start, len(self.env_loaded))) # Mark available data indices self.available_idx = [ i for i, v in enumerate(self.env_list) if v in self.env_loaded ] # [TODO] only keeping legit sequences # needed for things to play well with old data temp_available_idx = self.available_idx.copy() for i in range(len(temp_available_idx)): if self.action_lengths[temp_available_idx[i]] < 5: self.available_idx.remove(temp_available_idx[i]) print('Available inds: %d' % len(self.available_idx)) # Flag to check if loaded envs have been cycled through or not # [TODO] Unused till now self.all_envs_loaded = False def _clear_api_threads(self): for i in range(len(self.api_threads)): del self.api_threads[0] self.api_threads = [] def _clear_memory(self): if hasattr(self, 'episode_house'): del self.episode_house if hasattr(self, 'env_loaded'): del self.env_loaded if hasattr(self, 'api_threads'): del self.api_threads self.api_threads = [] def _check_if_all_envs_loaded(self): print('[CHECK][Cache:%d][Total:%d]' % (len(self.img_data_cache), len(self.env_list))) if len(self.img_data_cache) == len(self.env_list): self.available_idx = [i for i, v in enumerate(self.env_list)] return True else: return False def set_camera(self, e, pos, robot_height=1.0): assert len(pos) == 4 e.env.cam.pos.x = pos[0] e.env.cam.pos.y = robot_height e.env.cam.pos.z = pos[2] e.env.cam.yaw = pos[3] e.env.cam.updateDirection() def render(self, e): return e.env.render() def get_frames(self, e, pos_queue, preprocess=True): if isinstance(pos_queue, list) == False: pos_queue = [pos_queue] res = [] for i in range(len(pos_queue)): self.set_camera(e, pos_queue[i]) img = np.array(self.render(e), copy=False, dtype=np.float32) if preprocess == True: img = img.transpose(2, 0, 1) img = img / 255.0 res.append(img) return np.array(res) def get_hierarchical_features_till_spawn(self, actions, backtrack_steps=0, max_controller_actions=5): action_length = len(actions) - 1 pa, ca, pq_idx, cq_idx, ph_idx = flat_to_hierarchical_actions( actions=actions, controller_action_lim=max_controller_actions) # count how many actions of same type have been encountered pefore starting navigation backtrack_controller_steps = actions[1:action_length - backtrack_steps + 1:][::-1] counter = 0 # Removed try/except here to try to tease out pdb-related errors in Abhishek's code that are firing in other # parts of training for me as well. # try: if len(backtrack_controller_steps) > 0: # Edited condition: counter <= len(backtrack_controller_steps to strictly less than to avoid out of bounds # error on following loop; unsure what cascading problems that might cause since I don't know the downsteam # logic for how counter is used, but the loop as written was asking for a bug in execution get getting it. # I also reversed the order of the conditions so that the index check is -after- the verification that # counter is within bounds, since otherwise it doesn't fire until after the out of bounds error has # happened (tho, again, maybe this will cause downstream issues if counter is supposed to be allowed to # float up to value len(backtrack_controller_steps) + 1, which is now higher than it can reach). while ((counter <= self.max_controller_actions) and (counter < len(backtrack_controller_steps)) and (backtrack_controller_steps[counter] == backtrack_controller_steps[0])): counter += 1 # except: # import pdb; # pdb.set_trace() #If you have breakpoint here, you probably found an error in the logit above to figure out the correct counter step. Still working on this and checking. target_pos_idx = action_length - backtrack_steps controller_step = True if target_pos_idx in pq_idx: controller_step = False pq_idx_pruned = [v for v in pq_idx if v <= target_pos_idx] pa_pruned = pa[:len(pq_idx_pruned) + 1] images = self.get_frames(self.episode_house, self.episode_pos_queue, preprocess=True) raw_img_feats = self.cnn(Variable( torch.FloatTensor(images).cuda())).data.cpu().numpy().copy() controller_img_feat = torch.from_numpy( raw_img_feats[target_pos_idx].copy()) controller_action_in = pa_pruned[-1] - 2 planner_img_feats = torch.from_numpy( raw_img_feats[pq_idx_pruned].copy()) planner_actions_in = torch.from_numpy(np.array(pa_pruned[:-1]) - 1) return planner_actions_in, planner_img_feats, controller_step, controller_action_in, \ controller_img_feat, self.episode_pos_queue[target_pos_idx], counter def __getitem__(self, index): # [VQA] question-only if self.input_type == 'ques': idx = self.idx[index] question = self.questions[index] answer = self.answers[index] return (idx, question, answer) # [VQA] question+image elif self.input_type == 'ques,image': index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] actions_in = actions[action_length - self.num_frames:action_length] actions_out = actions[action_length - self.num_frames + 1:action_length + 1] if self.to_cache == True and index in self.img_data_cache: images = self.img_data_cache[index] else: pos_queue = self.pos_queue[index][ -self.num_frames:] # last 5 frames images = self.get_frames(self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) if self.to_cache == True: self.img_data_cache[index] = images.copy() return (idx, question, answer, images, actions_in, actions_out, action_length) # [NAV] question+cnn elif self.input_type in ['cnn', 'cnn+q']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames(self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) img_feats = self.cnn(Variable( torch.FloatTensor( images).cuda())).data.cpu().numpy().copy() if self.to_cache == True: self.img_data_cache[index] = img_feats # for val or test (evaluation), or # when target_obj_conn_map_dir is defined (reinforce), # load entire shortest path navigation trajectory # and load connectivity map for intermediate rewards if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[ self.env_list[index]].objects[obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[ self.env_list[index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break assert target_obj_id != False assert target_room != False self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[ self.env_list[index]].objects[target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[ self.env_list[index]].objects[target_obj_id] actions_in = actions[:action_length] actions_out = actions[1:action_length + 1] - 2 return (idx, question, answer, img_feats, actions_in, actions_out, action_length) # if action_length is n # images.shape[0] is also n # actions[0] is <START> # actions[n] is <END> # grab 5 random frames # [NOTE]: this'll break for longer-than-5 navigation sequences start_idx = np.random.choice(img_feats.shape[0] + 1 - self.num_frames) img_feats = img_feats[start_idx:start_idx + self.num_frames] actions_in = actions[start_idx:start_idx + self.num_frames] actions_out = actions[start_idx + self.num_frames] - 2 return (idx, question, answer, img_feats, actions_in, actions_out, action_length) # [NAV] question+lstm elif self.input_type in ['lstm', 'lstm+q']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] if self.split == 'train': if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) raw_img_feats = self.cnn( Variable(torch.FloatTensor( images).cuda())).data.cpu().numpy().copy() img_feats = np.zeros( (self.actions.shape[1], raw_img_feats.shape[1]), dtype=np.float32) img_feats[:raw_img_feats.shape[0], :] = raw_img_feats.copy( ) if self.to_cache == True: self.img_data_cache[index] = img_feats actions_in = actions.clone() - 1 actions_out = actions[1:].clone() - 2 actions_in[action_length:].fill_(0) mask = actions_out.clone().gt(-1) if len(actions_out) > action_length: actions_out[action_length:].fill_(0) # for val or test (evaluation), or # when target_obj_conn_map_dir is defined (reinforce), # load entire shortest path navigation trajectory # and load connectivity map for intermediate rewards if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[ self.env_list[index]].objects[obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[ self.env_list[index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break assert target_obj_id != False assert target_room != False self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[ self.env_list[index]].objects[target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[ self.env_list[index]].objects[target_obj_id] return (idx, question, answer, False, actions_in, actions_out, action_length, mask) return (idx, question, answer, img_feats, actions_in, actions_out, action_length, mask) # [NAV] planner-controller elif self.input_type in ['pacman']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] planner_actions = self.planner_actions[index] controller_actions = self.controller_actions[index] planner_action_length = self.planner_action_lengths[index] controller_action_length = self.controller_action_lengths[index] planner_hidden_idx = self.planner_hidden_idx[index] if self.split == 'train': if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) raw_img_feats = self.cnn( Variable(torch.FloatTensor( images).cuda())).data.cpu().numpy().copy() img_feats = np.zeros( (self.actions.shape[1], raw_img_feats.shape[1]), dtype=np.float32) img_feats[:raw_img_feats.shape[0], :] = raw_img_feats.copy( ) if self.to_cache == True: self.img_data_cache[index] = img_feats if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[ self.env_list[index]].objects[obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[ self.env_list[index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break if target_obj_id == False or target_room == False: return None self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[ self.env_list[index]].objects[target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[ self.env_list[index]].objects[target_obj_id] return (idx, question, answer, actions, action_length) planner_pos_queue_idx = self.planner_pos_queue_idx[index] controller_pos_queue_idx = self.controller_pos_queue_idx[index] planner_img_feats = np.zeros( (self.actions.shape[1], img_feats.shape[1]), dtype=np.float32) planner_img_feats[:planner_action_length] = img_feats[ planner_pos_queue_idx] planner_actions_in = planner_actions.clone() - 1 planner_actions_out = planner_actions[1:].clone() - 2 planner_actions_in[planner_action_length:].fill_(0) planner_mask = planner_actions_out.clone().gt(-1) if len(planner_actions_out) > planner_action_length: planner_actions_out[planner_action_length:].fill_(0) controller_img_feats = np.zeros( (self.actions.shape[1], img_feats.shape[1]), dtype=np.float32) controller_img_feats[:controller_action_length] = img_feats[ controller_pos_queue_idx] controller_actions_in = actions[1:].clone() - 2 if len(controller_actions_in) > controller_action_length: controller_actions_in[controller_action_length:].fill_(0) controller_out = controller_actions controller_mask = controller_out.clone().gt(-1) if len(controller_out) > controller_action_length: controller_out[controller_action_length:].fill_(0) # zero out forced controller return for i in range(controller_action_length): if i >= self.max_controller_actions - 1 and controller_out[i] == 0 and \ (self.max_controller_actions == 1 or controller_out[i - self.max_controller_actions + 1:i].sum() == self.max_controller_actions - 1): controller_mask[i] = 0 return (idx, question, answer, planner_img_feats, planner_actions_in, planner_actions_out, planner_action_length, planner_mask, controller_img_feats, controller_actions_in, planner_hidden_idx, controller_out, controller_action_length, controller_mask) def __len__(self): if self.input_type == 'ques': return len(self.questions) else: return len(self.available_idx)
class EqaDataset(Dataset): def __init__(self, questions_h5, vocab, num_frames=1, data_json=False, split='train', gpu_id=0, input_type='ques', max_threads_per_gpu=10, to_cache=False, target_obj_conn_map_dir=False, map_resolution=1000): self.questions_h5 = questions_h5 self.vocab = load_vocab(vocab) self.num_frames = num_frames np.random.seed() self.data_json = data_json self.split = split self.gpu_id = gpu_id self.input_type = input_type self.max_threads_per_gpu = max_threads_per_gpu self.target_obj_conn_map_dir = target_obj_conn_map_dir self.map_resolution = map_resolution self.to_cache = to_cache self.img_data_cache = {} if self.data_json != False: data = json.load(open(self.data_json, 'r')) self.envs = data['envs'] self.env_idx = data[self.split + '_env_idx'] self.env_list = [self.envs[x] for x in self.env_idx] self.env_set = list(set(self.env_list)) self.env_set.sort() print('Total envs: %d' % len(list(set(self.envs)))) print('Envs in %s: %d' % (self.split, len(list(set(self.env_idx))))) if input_type != 'ques': '''' If training, randomly sample and load a subset of environments, train on those, and then cycle through to load the rest. On the validation and test set, load in order, and cycle through. For both, add optional caching so that if all environments have been cycled through once, then no need to re-load and instead, just the cache can be used. ''' self.api_threads = [] self._load_envs(start_idx=0, in_order=True) cnn_kwargs = {'num_classes': 191, 'pretrained': True} self.cnn = MultitaskCNN(**cnn_kwargs) self.cnn.eval() self.cnn.cuda() self.pos_queue = data[self.split + '_pos_queue'] self.boxes = data[self.split + '_boxes'] print('Reading question data into memory') self.idx = _dataset_to_tensor(questions_h5['idx']) self.questions = _dataset_to_tensor(questions_h5['questions']) self.answers = _dataset_to_tensor(questions_h5['answers']) self.actions = _dataset_to_tensor(questions_h5['action_labels']) self.action_lengths = _dataset_to_tensor( questions_h5['action_lengths']) if input_type == 'pacman': self.planner_actions = self.actions.clone().fill_(0) self.controller_actions = self.actions.clone().fill_(-1) self.planner_action_lengths = self.action_lengths.clone().fill_(0) self.controller_action_lengths = self.action_lengths.clone().fill_( 0) self.planner_hidden_idx = self.actions.clone().fill_(0) self.planner_pos_queue_idx, self.controller_pos_queue_idx = [], [] # parsing flat actions to planner-controller hierarchy for i in tqdm(range(len(self.actions))): pa, ca, pq_idx, cq_idx, ph_idx = flat_to_hierarchical_actions(self.actions[i][:self.action_lengths[i]+1]) self.planner_actions[i][:len(pa)] = torch.Tensor(pa) self.controller_actions[i][:len(ca)] = torch.Tensor(ca) self.planner_action_lengths[i] = len(pa)-1 self.controller_action_lengths[i] = len(ca) self.planner_pos_queue_idx.append(pq_idx) self.controller_pos_queue_idx.append(cq_idx) self.planner_hidden_idx[i][:len(ca)] = torch.Tensor(ph_idx) def _pick_envs_to_load(self, split='train', max_envs=10, start_idx=0, in_order=False): if split in ['val', 'test'] or in_order == True: pruned_env_set = self.env_set[start_idx:start_idx + max_envs] else: if max_envs < len(self.env_set): env_inds = np.random.choice( len(self.env_set), max_envs, replace=False) else: env_inds = np.random.choice( len(self.env_set), max_envs, replace=True) pruned_env_set = [self.env_set[x] for x in env_inds] return pruned_env_set def _load_envs(self, start_idx=-1, in_order=False): if start_idx == -1: start_idx = self.env_set.index(self.pruned_env_set[-1]) + 1 # Pick envs self.pruned_env_set = self._pick_envs_to_load( split=self.split, max_envs=self.max_threads_per_gpu, start_idx=start_idx, in_order=in_order) if len(self.pruned_env_set) == 0: return # Load api threads start = time.time() if len(self.api_threads) == 0: for i in range(len(self.pruned_env_set)): self.api_threads.append( objrender.RenderAPIThread( w=224, h=224, device=self.gpu_id)) self.cfg = load_config('../House3D/tests/config.json') print('[%.02f] Loaded %d api threads' % (time.time() - start, len(self.api_threads))) start = time.time() # Load houses from multiprocessing import Pool _args = ([h, self.cfg, self.map_resolution] for h in self.pruned_env_set) with Pool(len(self.pruned_env_set)) as pool: self.all_houses = pool.starmap(local_create_house, _args) print('[%.02f] Loaded %d houses' % (time.time() - start, len(self.all_houses))) start = time.time() # Load envs self.env_loaded = {} for i in range(len(self.all_houses)): print('[%02d/%d][split:%s][gpu:%d][house:%s]' % (i + 1, len(self.all_houses), self.split, self.gpu_id, self.all_houses[i].house['id'])) self.env_loaded[self.all_houses[i].house['id']] = House3DUtils( Environment(self.api_threads[i], self.all_houses[i], self.cfg), target_obj_conn_map_dir=self.target_obj_conn_map_dir, build_graph=False) # [TODO] Unused till now self.env_ptr = -1 print('[%.02f] Loaded %d house3d envs' % (time.time() - start, len(self.env_loaded))) # Mark available data indices self.available_idx = [ i for i, v in enumerate(self.env_list) if v in self.env_loaded ] print('Available inds: %d' % len(self.available_idx)) # Flag to check if loaded envs have been cycled through or not # [TODO] Unused till now self.all_envs_loaded = False def _clear_api_threads(self): for i in range(len(self.api_threads)): del self.api_threads[0] self.api_threads = [] def _check_if_all_envs_loaded(self): print('[CHECK][Cache:%d][Total:%d]' % (len(self.img_data_cache), len(self.env_list))) if len(self.img_data_cache) == len(self.env_list): self.available_idx = [i for i, v in enumerate(self.env_list)] return True else: return False def set_camera(self, e, pos, robot_height=1.0): assert len(pos) == 4 e.env.cam.pos.x = pos[0] e.env.cam.pos.y = robot_height e.env.cam.pos.z = pos[2] e.env.cam.yaw = pos[3] e.env.cam.updateDirection() def render(self, e): return e.env.render() def get_frames(self, e, pos_queue, preprocess=True): if isinstance(pos_queue, list) == False: pos_queue = [pos_queue] res = [] for i in range(len(pos_queue)): self.set_camera(e, pos_queue[i]) img = np.array(self.render(e), copy=False, dtype=np.float32) if preprocess == True: img = img.transpose(2, 0, 1) img = img / 255.0 res.append(img) return np.array(res) def get_hierarchical_features_till_spawn(self, actions, backtrack_steps=0): action_length = len(actions)-1 pa, ca, pq_idx, cq_idx, ph_idx = flat_to_hierarchical_actions(actions) target_pos_idx = action_length - backtrack_steps controller_step = True if target_pos_idx in pq_idx: controller_step = False pq_idx_pruned = [v for v in pq_idx if v <= target_pos_idx] pa_pruned = pa[:len(pq_idx_pruned)+1] images = self.get_frames( self.episode_house, self.episode_pos_queue, preprocess=True) raw_img_feats = self.cnn( Variable(torch.FloatTensor(images) .cuda())).data.cpu().numpy().copy() controller_img_feat, controller_action_in = False, False if controller_step == True: controller_img_feat = torch.from_numpy(raw_img_feats[target_pos_idx].copy()) controller_action_in = pa_pruned[-1] - 2 planner_img_feats = torch.from_numpy(raw_img_feats[pq_idx_pruned].copy()) planner_actions_in = torch.from_numpy(np.array(pa_pruned[:-1]) - 1) return planner_actions_in, planner_img_feats, controller_step, controller_action_in, controller_img_feat, self.episode_pos_queue[target_pos_idx] def __getitem__(self, index): # [VQA] question-only if self.input_type == 'ques': idx = self.idx[index] question = self.questions[index] answer = self.answers[index] return (idx, question, answer) # [VQA] question+image elif self.input_type == 'ques,image': index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] actions_in = actions[action_length - self.num_frames:action_length] actions_out = actions[action_length - self.num_frames + 1: action_length + 1] if self.to_cache == True and index in self.img_data_cache: images = self.img_data_cache[index] else: pos_queue = self.pos_queue[index][ -self.num_frames:] # last 5 frames images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) if self.to_cache == True: self.img_data_cache[index] = images.copy() return (idx, question, answer, images, actions_in, actions_out, action_length) # [NAV] question+cnn elif self.input_type in ['cnn', 'cnn+q']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) img_feats = self.cnn( Variable(torch.FloatTensor(images) .cuda())).data.cpu().numpy().copy() if self.to_cache == True: self.img_data_cache[index] = img_feats # for val or test (evaluation), or # when target_obj_conn_map_dir is defined (reinforce), # load entire shortest path navigation trajectory # and load connectivity map for intermediate rewards if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[self.env_list[index]].objects[ obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[self.env_list[ index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break assert target_obj_id != False assert target_room != False self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[self.env_list[index]].objects[ target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[self.env_list[ index]].objects[target_obj_id] actions_in = actions[:action_length] actions_out = actions[1:action_length + 1] - 2 return (idx, question, answer, img_feats, actions_in, actions_out, action_length) # if action_length is n # images.shape[0] is also n # actions[0] is <START> # actions[n] is <END> # grab 5 random frames # [NOTE]: this'll break for longer-than-5 navigation sequences start_idx = np.random.choice(img_feats.shape[0] + 1 - self.num_frames) img_feats = img_feats[start_idx:start_idx + self.num_frames] actions_in = actions[start_idx:start_idx + self.num_frames] actions_out = actions[start_idx + self.num_frames] - 2 return (idx, question, answer, img_feats, actions_in, actions_out, action_length) # [NAV] question+lstm elif self.input_type in ['lstm', 'lstm+q']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] if self.split == 'train': if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) raw_img_feats = self.cnn( Variable(torch.FloatTensor(images) .cuda())).data.cpu().numpy().copy() img_feats = np.zeros( (self.actions.shape[1], raw_img_feats.shape[1]), dtype=np.float32) img_feats[:raw_img_feats.shape[ 0], :] = raw_img_feats.copy() if self.to_cache == True: self.img_data_cache[index] = img_feats actions_in = actions.clone() - 1 actions_out = actions[1:].clone() - 2 actions_in[action_length:].fill_(0) mask = actions_out.clone().gt(-1) if len(actions_out) > action_length: actions_out[action_length:].fill_(0) # for val or test (evaluation), or # when target_obj_conn_map_dir is defined (reinforce), # load entire shortest path navigation trajectory # and load connectivity map for intermediate rewards if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[self.env_list[index]].objects[ obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[self.env_list[ index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break assert target_obj_id != False assert target_room != False self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[self.env_list[index]].objects[ target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[self.env_list[ index]].objects[target_obj_id] return (idx, question, answer, False, actions_in, actions_out, action_length, mask) return (idx, question, answer, img_feats, actions_in, actions_out, action_length, mask) # [NAV] planner-controller elif self.input_type in ['pacman']: index = self.available_idx[index] idx = self.idx[index] question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] planner_actions = self.planner_actions[index] controller_actions = self.controller_actions[index] planner_action_length = self.planner_action_lengths[index] controller_action_length = self.controller_action_lengths[index] planner_hidden_idx = self.planner_hidden_idx[index] if self.split == 'train': if self.to_cache == True and index in self.img_data_cache: img_feats = self.img_data_cache[index] else: pos_queue = self.pos_queue[index] images = self.get_frames( self.env_loaded[self.env_list[index]], pos_queue, preprocess=True) raw_img_feats = self.cnn( Variable(torch.FloatTensor(images) .cuda())).data.cpu().numpy().copy() img_feats = np.zeros( (self.actions.shape[1], raw_img_feats.shape[1]), dtype=np.float32) img_feats[:raw_img_feats.shape[ 0], :] = raw_img_feats.copy() if self.to_cache == True: self.img_data_cache[index] = img_feats if self.split in ['val', 'test' ] or self.target_obj_conn_map_dir != False: target_obj_id, target_room = False, False bbox_obj = [ x for x in self.boxes[index] if x['type'] == 'object' and x['target'] == True ][0]['box'] for obj_id in self.env_loaded[self.env_list[index]].objects: box2 = self.env_loaded[self.env_list[index]].objects[ obj_id]['bbox'] if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \ all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True: target_obj_id = obj_id break bbox_room = [ x for x in self.boxes[index] if x['type'] == 'room' and x['target'] == False ][0] for room in self.env_loaded[self.env_list[ index]].env.house.all_rooms: if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \ all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]): target_room = room break assert target_obj_id != False assert target_room != False self.env_loaded[self.env_list[index]].set_target_object( self.env_loaded[self.env_list[index]].objects[ target_obj_id], target_room) # [NOTE] only works for batch size = 1 self.episode_pos_queue = self.pos_queue[index] self.episode_house = self.env_loaded[self.env_list[index]] self.target_room = target_room self.target_obj = self.env_loaded[self.env_list[ index]].objects[target_obj_id] return (idx, question, answer, actions, action_length) planner_pos_queue_idx = self.planner_pos_queue_idx[index] controller_pos_queue_idx = self.controller_pos_queue_idx[index] planner_img_feats = np.zeros( (self.actions.shape[1], img_feats.shape[1]), dtype=np.float32) planner_img_feats[:planner_action_length] = img_feats[ planner_pos_queue_idx] planner_actions_in = planner_actions.clone() - 1 planner_actions_out = planner_actions[1:].clone() - 2 planner_actions_in[planner_action_length:].fill_(0) planner_mask = planner_actions_out.clone().gt(-1) if len(planner_actions_out) > planner_action_length: planner_actions_out[planner_action_length:].fill_(0) controller_img_feats = np.zeros( (self.actions.shape[1], img_feats.shape[1]), dtype=np.float32) controller_img_feats[:controller_action_length] = img_feats[ controller_pos_queue_idx] controller_actions_in = actions[1:].clone() - 2 if len(controller_actions_in) > controller_action_length: controller_actions_in[controller_action_length:].fill_(0) controller_out = controller_actions controller_mask = controller_out.clone().gt(-1) if len(controller_out) > controller_action_length: controller_out[controller_action_length:].fill_(0) return (idx, question, answer, planner_img_feats, planner_actions_in, planner_actions_out, planner_action_length, planner_mask, controller_img_feats, controller_actions_in, planner_hidden_idx, controller_out, controller_action_length, controller_mask) def __len__(self): if self.input_type == 'ques': return len(self.questions) else: return len(self.available_idx)
class EqaDataset(Dataset): def __init__(self, questions_h5, vocab, num_frames=1, split='train', gpu_id=0, input_type='ques', max_threads_per_gpu=10, map_resolution=1000): self.questions_h5 = questions_h5 self.vocab = load_vocab(vocab) np.random.seed() self.split = split self.gpu_id = gpu_id self.num_frames = num_frames self.input_type = input_type self.max_threads_per_gpu = max_threads_per_gpu self.map_resolution = map_resolution print('Reading question data into memory') self.questions = _dataset_to_tensor(questions_h5['questions']) self.answers = _dataset_to_tensor(questions_h5['answers']) self.actions = _dataset_to_tensor(questions_h5['actions']) self.actions = self.actions.unsqueeze(2) self.robot_positions = _dataset_to_tensor( questions_h5['robot_positions'], dtype=np.float32) self.action_images = questions_h5['images'] self.action_lengths = _dataset_to_tensor( questions_h5['action_lengths']) self.action_masks = _dataset_to_tensor(questions_h5['mask']) #if input_type != 'ques': ''' If training, randomly sample and load a subset of environments, train on those, and then cycle through to load the rest. On the validation and test set, load in order, and cycle through. For both, add optional caching so that if all environments have been cycled through once, then no need to re-load and instead, just the cache can be used. ''' cnn_kwargs = {'num_classes': 191, 'pretrained': True} self.cnn = MultitaskCNN(**cnn_kwargs) self.cnn.eval() self.cnn.cuda() def __getitem__(self, index): # [VQA] question-only if self.input_type in ['pacman']: idx = index question = self.questions[index] #answer = self.answers[index] answer = self.answers[index] actions = self.actions[index] actions_masks = self.action_masks[index] robot_positions = self.robot_positions[index] action_lengths = self.action_lengths[index] if self.split in ['val', 'test']: #return the data directly return (idx, question, answer, actions, robot_positions, action_lengths) if self.split == 'train': #get iamge from data_set planner_images = self.action_images[index] planner_img_feats = self.cnn( Variable(torch.FloatTensor( planner_images).cuda())).data.cpu().numpy().copy() actions_in = actions.clone() actions_out = actions[1:].clone() actions_masks = actions_masks[:39].clone().gt(0) robot_positions = robot_positions.clone() return (idx, question, answer, planner_img_feats, actions_in, actions_out, robot_positions, actions_masks, action_lengths) elif self.input_type == 'ques,image': idx = index question = self.questions[index] answer = self.answers[index] action_length = self.action_lengths[index] actions = self.actions[index] actions_in = actions[action_length - self.num_frames:action_length] actions_out = actions[action_length - self.num_frames + 1:action_length + 1] images = self.action_images[index][ action_length - self.num_frames:action_length].astype( np.float32) return (idx, question, answer, images, actions_in, actions_out, action_length) def __len__(self): if self.input_type == 'ques': return len(self.questions) else: return len(self.questions)
def test(rank, test_model_dir): model_kwargs = {'question_vocab': load_vocab(args.vocab_json)} model = NavPlannerControllerModel(**model_kwargs) checkpoint = torch.load(test_model_dir) #load check point model.load_state_dict(checkpoint['state']) #create model cnn_kwargs = {'num_classes': 191, 'pretrained': True} cnn = MultitaskCNN(**cnn_kwargs) cnn.eval() cnn.cuda() #create cnn model scene = "test-10-obj-00.txt" my_env = enviroment.Environment(is_testing=1, testing_file=scene) object_exist_list = my_env.ur5.object_type print("the objetct which is exist:") print(object_exist_list) #create simulation enviroment my_question = Qusetion(object_exist_list) #create testing question testing_questions = my_question.createQueue() vocab = my_question.create_vocab() for question in testing_questions: planner_hidden = None max_action = 30 position = [0, 0] action_in_raw = [0] #start action_in actions = [] print(question['question']) #question questionTokens = my_question.tokenize(question['question'], punctToRemove=['?'], addStartToken=False) encoded_question_raw = my_question.encode(questionTokens, vocab['questionTokenToIdx']) encoded_question_raw.append(0) #encode question encoded_question_raw = np.array(encoded_question_raw) encoded_question_tensor = _dataset_to_tensor(encoded_question_raw) encoded_question = Variable(encoded_question_tensor) encoded_question = encoded_question.unsqueeze(0) action_times = 0 while (action_times < max_action): #print(planner_img_feats_var.size()) action_in_tensor = _dataset_to_tensor(action_in_raw) action_in = Variable(action_in_tensor) action_in = action_in.unsqueeze(0) action_in = action_in.unsqueeze(0) _, rgb_image_raw = my_env.camera.get_camera_data() position_in, planner_img_feats_var = data2input( position, rgb_image_raw, cnn) output_data, planner_hidden = model.planner_step( encoded_question, planner_img_feats_var, action_in, position_in, planner_hidden) planner_possi = F.log_softmax(output_data, dim=1) planner_data = planner_possi.data.numpy() planner_data = planner_data[0] action_out = np.where(planner_data == np.max(planner_data)) action_out = action_out[0][0] actions.append(action_out) action_in_raw = [action_out] if action_out == 9: print('stop') break else: dx, dy = order2action(action_out) position[0] += dx position[1] += dy action_times += 1 if len(actions) > 2 and len(actions) < 20: action_position = position + position my_env.UR5_action(action_position, 2) #sucking elif len(actions) >= 20: #pushing position_start = [0, 0] position_end = [0, 0] for i in range(len(actions)): if i < len(actions) / 2: #the first step dx, dy = order2action(actions[i]) position_start[0] += dx position_start[1] += dy position_end[0] += dx position_end[1] += dy else: #the second step dx, dy = order2action(actions[i]) position_end[0] += dx position_end[1] += dy action_position = position_start + position_end my_env.UR5_action(action_position, 1) #pushing
class EqaDataset(Dataset): def __init__(self, questions_h5, vocab, num_frames=1, split='train', gpu_id=0, input_type='ques', max_threads_per_gpu=10, map_resolution=1000): self.questions_h5 = questions_h5 self.vocab = load_vocab(vocab) np.random.seed() self.split = split self.gpu_id = gpu_id self.num_frames = num_frames self.input_type = input_type self.max_threads_per_gpu = max_threads_per_gpu self.map_resolution = map_resolution print('Reading question data into memory') self.questions = _dataset_to_tensor(questions_h5['questions']) self.answers = _dataset_to_tensor(questions_h5['answers']) self.actions = _dataset_to_tensor(questions_h5['actions']) self.actions = self.actions.unsqueeze(2) self.robot_positions = _dataset_to_tensor(questions_h5['robot_positions'],dtype = np.float32) self.action_images = questions_h5['images'] self.action_maps = questions_h5['heatmaps'] self.action_lengths = _dataset_to_tensor(questions_h5['action_lengths']) self.action_masks = _dataset_to_tensor(questions_h5['mask']) cnn_kwargs = {'num_classes': 191, 'pretrained': True} self.cnn = MultitaskCNN(**cnn_kwargs) self.cnn.eval() self.cnn.cuda() def __getitem__(self, index): # [VQA] question-only if self.input_type in ['nomap']: idx = index question = self.questions[index] #answer = self.answers[index] answer = self.answers[index] if answer > 13: answer = answer - 1 actions = self.actions[index] actions_masks = self.action_masks[index] robot_positions = self.robot_positions[index] action_lengths = self.action_lengths[index] if self.split in ['val', 'test']: #return the data directly return (idx, question, answer, actions, robot_positions,action_lengths) if self.split == 'train': #get iamge from data_set planner_images = self.action_images[index][0] planner_var = Variable(torch.FloatTensor(planner_images) .cuda()) planner_var = planner_var.unsqueeze(0) planner_img_feats = self.cnn(planner_var).data.cpu().numpy().copy() actions_in = actions.clone() actions_out = actions[1:].clone() actions_masks = actions_masks[:39].clone().gt(0) robot_positions = robot_positions.clone() return (idx, question, answer, planner_img_feats, actions_in, actions_out, robot_positions, actions_masks,action_lengths) elif self.input_type == 'addmap': idx = index question = self.questions[index] #answer = self.answers[index] answer = self.answers[index] if answer > 13: answer = answer - 1 actions = self.actions[index] actions_masks = self.action_masks[index] robot_positions = self.robot_positions[index] action_lengths = self.action_lengths[index] if self.split in ['val', 'test']: #return the data directly return (idx, question, answer, actions, robot_positions,action_lengths) if self.split == 'train': #get iamge from data_set planner_images = self.action_images[index][0] planner_var = Variable(torch.FloatTensor(planner_images) .cuda()) planner_var = planner_var.unsqueeze(0) planner_img_feats = self.cnn(planner_var).data.cpu().numpy().copy() planner_maps = self.action_maps[index][0] planner_maps_feats = Variable(torch.FloatTensor(planner_maps) .cuda()) #planner_maps_feats = planner_maps_var.view(-1,32*32*20) actions_in = actions.clone() actions_out = actions[1:].clone() actions_masks = actions_masks[:39].clone().gt(0) robot_positions = robot_positions.clone() return (idx, question, answer, planner_img_feats, planner_maps_feats,actions_in, actions_out, robot_positions, actions_masks,action_lengths) elif self.input_type == 'ques,image': idx = index question = self.questions[index] answer = self.answers[index] if answer > 13: answer = answer - 1 action_length = self.action_lengths[index] actions = self.actions[index] actions_in = actions[action_length - self.num_frames:action_length] actions_out = actions[action_length - self.num_frames + 1: action_length + 1] images = self.action_images[index][0:2].astype(np.float32) return (idx, question, answer, images, actions_in, actions_out, action_length) def __len__(self): if self.input_type == 'ques': return len(self.questions) else: return len(self.questions)