def __call__(self, texts_a: List[str], texts_b: Optional[List[str]] = None) -> List[InputFeatures]: """Call Bert convert_examples_to_features function to tokenize and create masks. texts_a and texts_b are separated by [SEP] token Args: texts_a: list of texts, texts_b: list of texts, it could be None, e.g. single sentence classification task Returns: batch of InputFeatures with subtokens, subtoken ids, subtoken mask, segment mask. """ if texts_b is None: texts_b = [None] * len(texts_a) # unique_id is not used examples = [ InputExample(unique_id=0, text_a=text_a, text_b=text_b) for text_a, text_b in zip(texts_a, texts_b) ] return convert_examples_to_features(examples, self.max_seq_length, self.tokenizer)
def __call__(self, batch: List[List[str]]) -> List[List[InputFeatures]]: """Call BERT convert_examples_to_features function to tokenize and create masks. Args: batch: list of elemenents where the first element represents the batch with contexts and the rest of elements represent response candidates batches Returns: list of feature batches with subtokens, subtoken ids, subtoken mask, segment mask for the context and each of response candidates separately. """ if isinstance(batch[0], str): batch = [batch] samples = [] for i in range(len(batch[0])): s = [] for el in batch: s.append(el[i]) samples.append(s) s_empt = [None] * len(samples[0]) # TODO: add unique id examples = [] for s in samples: ex = [InputExample(unique_id=0, text_a=text_a, text_b=text_b) for text_a, text_b in zip(s, s_empt)] examples.append(ex) features = [convert_examples_to_features(el, self.max_seq_length, self.tokenizer) for el in examples] return features
def __call__(self, batch: List[List[str]]) -> List[List[InputFeatures]]: """Call BERT convert_examples_to_features function to tokenize and create masks. Args: batch: list of elemenents where the first element represents the batch with contexts and the rest of elements represent response candidates batches Returns: list of feature batches with subtokens, subtoken ids, subtoken mask, segment mask. """ if isinstance(batch[0], str): batch = [batch] cont_resp_pairs = [] if len(batch[0]) == 1: contexts = batch[0] responses_empt = [None] * len(batch) cont_resp_pairs.append(zip(contexts, responses_empt)) else: contexts = [el[0] for el in batch] for i in range(1, len(batch[0])): responses = [] for el in batch: responses.append(el[i]) cont_resp_pairs.append(zip(contexts, responses)) examples = [] for s in cont_resp_pairs: ex = [InputExample(unique_id=0, text_a=context, text_b=response) for context, response in s] examples.append(ex) features = [convert_examples_to_features(el, self.max_seq_length, self.tokenizer) for el in examples] return features
def __call__(self, batch): if isinstance(batch[0], str): batch = [batch] samples = [] for i in range(len(batch[0])): s = [] for el in batch: s.append(el[i]) samples.append(s) s_dummy = [None] * len(samples[0]) # TODO: add unique id examples = [] for s in samples: ex = [ InputExample(unique_id=0, text_a=text_a, text_b=text_b) for text_a, text_b in zip(s, s_dummy) ] examples.append(ex) features = [ convert_examples_to_features(el, self.max_seq_length, self.tokenizer) for el in examples ] return features