def __getitem__(self, idx): problem_data = [] problem_threads = [] sample_module = self.sampled_modules[np.random.randint( 0, len(self.sampled_modules))][1] for _ in range(self.k_shot): problem_threads.append( Thread(target=self.supportProblem, args=( sample_module, problem_data, ))) problem_threads[-1].start() problem = sample_from_module(sample_module, show_dropped=False)[0] query_ques = torch.LongTensor( pd.DataFrame(np_encode_string(str( problem[0]))).fillna(PAD).values.reshape(1, -1)) query_ans = torch.LongTensor( pd.DataFrame(np_encode_string(str( problem[1]))).fillna(PAD).values.reshape(1, -1)) for p_t in problem_threads: p_t.join() support_ques, support_ans = zip(*problem_data) support_ques = torch.LongTensor( pd.DataFrame(support_ques).fillna(PAD).values) support_ans = torch.LongTensor( pd.DataFrame(support_ans).fillna(PAD).values) return support_ques, support_ans, query_ques, query_ans
def __getitem__(self, idx): problem = sample_from_module(self.sampled_modules[np.random.randint( 0, len(self.sampled_modules), (1))[0]][1], show_dropped=False)[0] # converts to tokens and adds BOS and EOS tokens ques, anws = np_encode_string(str(problem[0])), np_encode_string( str(problem[1])) return ques, anws
def __getitem__(self, idx): if np.random.random( ) < self.starting_eps + self.current_iteration * self.eps_grad: selected_module = self.sampled_modules[np.random.randint( 0, len(self.sampled_modules))][1] else: selected_module = self.sampled_modules[torch.multinomial( self.category_probabilities, 1)[0]][1] problem = sample_from_module(selected_module, show_dropped=False)[0] # converts to tokens and adds BOS and EOS tokens ques, anws = np_encode_string(str(problem[0])), np_encode_string( str(problem[1])) return ques, anws
def predict_single(question, model, device='cpu', beam_size=5, max_token_seq_len=MAX_ANSWER_SIZE, n_best=1): generator = Generator(model, device, beam_size=beam_size, max_token_seq_len=max_token_seq_len, n_best=n_best) qs = [np_encode_string(question)] # qs = qs.to(device) # max_q_len = max(len(q) for q in qs) # batch_qs = [] # for q in qs: # batch_qs.append(np.pad(q, (0, max_q_len - len(q)), mode='constant', constant_values=PAD)) # batch_qs = torch.LongTensor(batch_qs) batch_qs = torch.LongTensor(qs) all_hyp, all_scores = generator.generate_batch(batch_qs) resp = np_decode_string(np.array(all_hyp[0][0])) resps = [] for i, idx_seqs in enumerate(all_hyp): for j, idx_seq in enumerate(idx_seqs): r = np_decode_string(np.array(idx_seq)) s = all_scores[i][j].cpu().item() resps.append({"resp":r, "score":s}) return resps
def __getitem__(self, idx): difficulty = self.current_iteration / self.total_iterations initial_modules = modules.train(_make_entropy_fn(difficulty, 1)) filtered_modules = _filter_and_flatten(self.categories, initial_modules) self.sampled_modules = list(six.iteritems(filtered_modules)) problem = sample_from_module(self.sampled_modules[np.random.randint( 0, len(self.sampled_modules), (1))[0]][1], show_dropped=False)[0] # converts to tokens and adds BOS and EOS tokens ques, anws = np_encode_string(str(problem[0])), np_encode_string( str(problem[1])) self.current_iteration += 1 return ques, anws
def predict_multiple(questions, model, device='cpu', beam_size=5, max_token_seq_len=MAX_ANSWER_SIZE, n_best=1, batch_size=1, num_workers=1): questions = list(map(lambda q: np_encode_string(q), questions)) questions = data.DataLoader(questions, batch_size=1, shuffle=False, collate_fn=question_to_batch_collate_fn)#, num_workers=1) generator = Generator(model, device, beam_size=beam_size, max_token_seq_len=max_token_seq_len, n_best=n_best) return predict(generator, questions, device)
def supportProblem(self, sample_module, problem_data): support_problem = sample_from_module(sample_module, show_dropped=False)[0] support_problem = (np_encode_string(str(support_problem[0])), np_encode_string(str(support_problem[1]))) problem_data.append(support_problem)