class InputConfig(ConfigBase):
            right_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            left_tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: Optional[FloatListTensorizer.Config] = None
            left_dense: Optional[FloatListTensorizer.Config] = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()
Exemple #2
0
    def test_roberta_tensorizer(self):
        text = "Prototype"
        tokens = [[0, 4, 5, 2]]
        pad_masks = [[1, 1, 1, 1]]
        segment_labels = [[0, 0, 0, 0]]
        positions = [[0, 1, 2, 3]]
        expected = [tokens, pad_masks, segment_labels, positions]

        tensorizer = RoBERTaTensorizer.from_config(
            RoBERTaTensorizer.Config(
                tokenizer=GPT2BPETokenizer.Config(
                    bpe_encoder_path="pytext/data/test/data/gpt2_encoder.json",
                    bpe_vocab_path="pytext/data/test/data/gpt2_vocab.bpe",
                ),
                vocab_file="pytext/data/test/data/gpt2_dict.txt",
                max_seq_len=256,
            ))
        tensors = tensorizer.tensorize([tensorizer.numberize({"text": text})])
        for tensor, expect in zip(tensors, expected):
            self.assertEqual(tensor.tolist(), expect)

        tensorizer_impl = RoBERTaTensorizerScriptImpl(
            tokenizer=DoNothingTokenizer(),
            vocab=tensorizer.vocab,
            max_seq_len=tensorizer.max_seq_len,
        ).torchscriptify()
        per_sentence_tokens = [tensorizer.tokenizer.tokenize(text)]
        tokens_2d, segment_labels_2d, seq_lens_1d, positions_2d = zip(
            *[tensorizer_impl.numberize(per_sentence_tokens)])
        script_tensors = tensorizer_impl.tensorize(tokens_2d,
                                                   segment_labels_2d,
                                                   seq_lens_1d, positions_2d)
        for tensor, expect in zip(script_tensors, expected):
            self.assertEqual(tensor.tolist(), expect)
Exemple #3
0
 class RegressionModelInput(ConfigBase):
     tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
     labels: NumericLabelTensorizer.Config = NumericLabelTensorizer.Config(
     )
        class InputConfig(ConfigBase):
            tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
            right_dense: FloatListTensorizer.Config = None
            left_dense: FloatListTensorizer.Config = None

            labels: LabelTensorizer.Config = LabelTensorizer.Config()
Exemple #5
0
 class InputConfig(ConfigBase):
     tokens: RoBERTaTensorizer.Config = RoBERTaTensorizer.Config()
     labels: LabelTensorizer.Config = LabelTensorizer.Config()