def vectorize(self, observations): """Convert a list of observations into input & target tensors.""" is_training = any(['labels' in obs for obs in observations]) xs, ys, labels, valid_inds, _, _ = PaddingUtils.pad_text( observations, self.dict, end_idx=self.END_IDX, null_idx=self.NULL_IDX, dq=True, eval_labels=True, truncate=self.truncate) if xs is None: return None, None, None, None, None, None, None xs = torch.LongTensor(xs) if ys is not None: ys = torch.LongTensor(ys) if self.use_cuda: # copy to gpu self.xs.resize_(xs.size()) self.xs.copy_(xs) xs = Variable(self.xs) if ys is not None: self.ys.resize_(ys.size()) self.ys.copy_(ys) ys = Variable(self.ys) else: xs = Variable(xs) if ys is not None: ys = Variable(ys) cands = None valid_cands = None if not is_training and self.rank: # set up candidates cands = [] valid_cands = [] for i, v in enumerate(valid_inds): if 'label_candidates' in observations[v]: curr_lcs = list(observations[v]['label_candidates']) curr_cands = [{'text': c} for c in curr_lcs] cs, _, _, valid_c_inds, *_ = PaddingUtils.pad_text( curr_cands, self.dict, null_idx=self.NULL_IDX, dq=True, truncate=self.truncate) valid_cands.append( (i, v, [curr_lcs[j] for j in valid_c_inds])) cs = torch.LongTensor(cs) if self.use_cuda: cs = cs.cuda() cands.append(cs) return xs, ys, labels, valid_inds, cands, valid_cands, is_training
def vectorize(self, observations): """Convert a list of observations into input & target tensors.""" is_training = any(['labels' in obs for obs in observations]) xs, ys, labels, valid_inds, _, _ = PaddingUtils.pad_text( observations, self.dict, end_idx=None, null_idx=self.NULL_IDX, dq=True, eval_labels=True, truncate=self.truncate) if xs is None: return None, None, None, None, None, None, None xs = torch.LongTensor(xs) ys = torch.LongTensor(ys) if self.use_cuda: # copy to gpu self.xs.resize_(xs.size()) self.xs.copy_(xs) xs = Variable(self.xs) if ys is not None: self.ys.resize_(ys.size()) self.ys.copy_(ys) ys = Variable(self.ys) else: xs = Variable(xs) if ys is not None: ys = Variable(ys) return xs, ys, labels, valid_inds, is_training
def vectorize(self, observations): """Convert a list of observations into input & target tensors.""" is_training = any(('labels' in obs for obs in observations)) # utility function for padding text and returning lists of indices # parsed using the provided dictionary xs, ys, labels, valid_inds, _, _ = PaddingUtils.pad_text( observations, self.dict, end_idx=self.END_IDX, null_idx=self.NULL_IDX, dq=False, eval_labels=True) if xs is None: return None, None, None, None, None # move lists of indices returned above into tensors xs = torch.LongTensor(xs) if self.use_cuda: xs = xs.cuda() xs = Variable(xs) if ys is not None: ys = torch.LongTensor(ys) if self.use_cuda: ys = ys.cuda() ys = Variable(ys) return xs, ys, labels, valid_inds, is_training
def vectorize(self, observations): """Convert a list of observations into input & target tensors.""" ys = None xs, ys, labels, valid_inds, _, _ = PaddingUtils.pad_text( observations, self.dict, self.END_IDX, self.NULL_IDX, dq=True, eval_labels=False, truncate=self.truncate) if xs is None: return None, None, None, None, None, None if self.use_cuda: # copy to gpu self.xs.resize_(xs.size()) self.xs.copy_(xs, async=True) xs = Variable(self.xs) if ys is not None: self.ys.resize_(ys.size()) self.ys.copy_(ys, async=True) ys = Variable(self.ys) else: xs = Variable(xs) if ys is not None: ys = Variable(ys) # set up candidates cands = None valid_cands = None if ys is None and self.rank: # only do ranking when no targets available and ranking flag set parsed_cs = [] valid_cands = [] for i, v in enumerate(valid_inds): if 'label_candidates' in observations[v]: # each candidate tuple is a pair of the parsed version and # the original full string cs = list(observations[v]['label_candidates']) curr_dqs = [deque(maxlen=self.truncate) for _ in cs] for dq, c in zip(curr_dqs, cs): dq.extendleft(reversed(self.parse(c))) parsed_cs.append(curr_dqs) valid_cands.append((i, v, cs)) if len(parsed_cs) > 0: # TODO: store lengths of cands separately, so don't have zero # padding for varying number of cands per example # found cands, pack them into tensor max_c_len = max(max(len(c) for c in cs) for cs in parsed_cs) max_c_cnt = max(len(cs) for cs in parsed_cs) for cs in parsed_cs: for c in cs: c += [self.NULL_IDX] * (max_c_len - len(c)) cs += [self.NULL_IDX] * (max_c_cnt - len(cs)) cands = torch.LongTensor(parsed_cs) if self.use_cuda: # copy to gpu self.cands.resize_(cands.size()) self.cands.copy_(cands, async=True) cands = Variable(self.cands) else: cands = Variable(cands) return xs, ys, labels, valid_inds, cands, valid_cands
def vectorize(self, observations, seq_len, is_training): """Convert a list of observations into input & target tensors.""" labels = None valid_inds = None y_lens = None if is_training: for obs in observations: if obs: if 'text2vec' in obs: self.next_batch += obs['text2vec'] if len(self.next_batch) <= self.batchsize: return None, None, None, None, None else: data_list = [] targets_list = [] # total is the number of batches total = len(self.next_batch) // self.batchsize for _ in range(total): batch = self.next_batch[:self.batchsize] self.next_batch = self.next_batch[self.batchsize:] source = torch.LongTensor(batch).t().contiguous() data = Variable(source[:seq_len]) targets = Variable(source[1:]) if self.use_cuda: data = data.cuda() targets = targets.cuda() data_list.append(data) targets_list.append(targets) else: # here we get valid examples and pad them with zeros xs, ys, labels, valid_inds, _, y_lens = PaddingUtils.pad_text( observations, self.dict, end_idx=self.END_IDX, null_idx=self.NULL_IDX) if self.use_cuda: if xs is not None: xs = Variable(torch.LongTensor(xs)).cuda() if ys is not None: ys = Variable(torch.LongTensor(ys)).cuda() else: if xs is not None: xs = Variable(torch.LongTensor(xs)) if ys is not None: ys = Variable(torch.LongTensor(ys)) data_list = [xs] targets_list = [ys] return data_list, targets_list, labels, valid_inds, y_lens
def vectorize(self, observations, seq_len, is_training): """Convert a list of observations into input & target tensors.""" labels = None valid_inds = None y_lens = None if is_training: for obs in observations: if obs: if 'text2vec' in obs: self.next_batch += obs['text2vec'] if len(self.next_batch) <= self.batchsize: return None, None, None, None, None else: data_list = [] targets_list = [] # total is the number of batches total = len(self.next_batch)//self.batchsize for i in range(total): batch = self.next_batch[:self.batchsize] self.next_batch = self.next_batch[self.batchsize:] source = torch.LongTensor(batch).t().contiguous() data = Variable(source[:seq_len]) targets = Variable(source[1:]) if self.use_cuda: data = data.cuda() targets = targets.cuda() data_list.append(data) targets_list.append(targets) else: # here we get valid examples and pad them with zeros xs, ys, labels, valid_inds, _, y_lens = PaddingUtils.pad_text( observations, self.dict, self.END_IDX, self.NULL_IDX) if self.use_cuda: xs = Variable(xs).cuda() ys = Variable(ys).cuda() else: xs = Variable(xs) ys = Variable(ys) data_list = [xs] targets_list = [ys] return data_list, targets_list, labels, valid_inds, y_lens