class InputConfig(ConfigBase): right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() right_dense: Optional[FloatListTensorizer.Config] = None left_dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
def test_roberta_tensorizer(self): text = "Prototype" tokens = [[0, 4, 5, 2]] pad_masks = [[1, 1, 1, 1]] segment_labels = [[0, 0, 0, 0]] positions = [[0, 1, 2, 3]] expected = [tokens, pad_masks, segment_labels, positions] tensorizer = RoBERTaTensorizer.from_config( RoBERTaTensorizer.Config( tokenizer=GPT2BPETokenizer.Config( bpe_encoder_path="pytext/data/test/data/gpt2_encoder.json", bpe_vocab_path="pytext/data/test/data/gpt2_vocab.bpe", ), vocab_file="pytext/data/test/data/gpt2_dict.txt", max_seq_len=256, )) tensors = tensorizer.tensorize([tensorizer.numberize({"text": text})]) for tensor, expect in zip(tensors, expected): self.assertEqual(tensor.tolist(), expect) tensorizer_impl = RoBERTaTensorizerScriptImpl( tokenizer=DoNothingTokenizer(), vocab=tensorizer.vocab, max_seq_len=tensorizer.max_seq_len, ).torchscriptify() per_sentence_tokens = [tensorizer.tokenizer.tokenize(text)] tokens_2d, segment_labels_2d, seq_lens_1d, positions_2d = zip( *[tensorizer_impl.numberize(per_sentence_tokens)]) script_tensors = tensorizer_impl.tensorize(tokens_2d, segment_labels_2d, seq_lens_1d, positions_2d) for tensor, expect in zip(script_tensors, expected): self.assertEqual(tensor.tolist(), expect)
class RegressionModelInput(ConfigBase): tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() labels: NumericLabelTensorizer.Config = NumericLabelTensorizer.Config( )
class InputConfig(ConfigBase): tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() right_dense: FloatListTensorizer.Config = None left_dense: FloatListTensorizer.Config = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
class InputConfig(ConfigBase): tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config()