def test_input_too_long(self):
        torch.manual_seed(0)
        config = BartConfig(
            vocab_size=257,
            d_model=32,
            encoder_layers=1,
            decoder_layers=1,
            encoder_ffn_dim=32,
            decoder_ffn_dim=32,
            # So any text > 4 should raise an exception
            max_position_embeddings=4,
            encoder_attention_heads=1,
            decoder_attention_heads=1,
            max_length=4,
            min_length=1,
            forced_eos_token_id=None,
        )
        model = BartForConditionalGeneration(config)
        # Bias output towards L
        V, C = model.lm_head.weight.shape

        bias = torch.zeros(V)
        bias[76] = 10

        model.lm_head.bias = torch.nn.Parameter(bias)

        # # Generated with:
        # import tempfile
        # from tokenizers import Tokenizer, models
        # from transformers import PreTrainedTokenizerFast
        # model_max_length = 4
        # vocab = [(chr(i), i) for i in range(256)]
        # tokenizer = Tokenizer(models.Unigram(vocab))
        # with tempfile.NamedTemporaryFile() as f:
        #     tokenizer.save(f.name)
        #     real_tokenizer = PreTrainedTokenizerFast(tokenizer_file=f.name, model_max_length=model_max_length)
        # real_tokenizer._tokenizer.save("tokenizer.json")
        # # + add missing config.json with albert as model_type
        tokenizer = AutoTokenizer.from_pretrained(
            "Narsil/small_summarization_test")
        summarizer = pipeline(task="summarization",
                              model=model,
                              tokenizer=tokenizer)

        with self.assertLogs("transformers", level="WARNING"):
            with self.assertRaises(IndexError):
                _ = summarizer("This is a test")

        output = summarizer("This is a test",
                            truncation=TruncationStrategy.ONLY_FIRST)
        # 2 is default BOS from Bart.
        self.assertEqual(output, [{"summary_text": "\x02 L L L"}])
Example #2
0
def _get_models(config):
    encoder_decoder_tuples = config['encoder_decoder_model_name_or_path'].split(',')
    encoder_decoder_tuples = tuple(encoder_decoder_tuples)
    enc_model = encoder_decoder_tuples[0]
    dec_model = encoder_decoder_tuples[0] if len(encoder_decoder_tuples) == 1 else encoder_decoder_tuples[1]
    share_model = 'share_model' in config and config['share_model']
    if 'bart' in enc_model:
        model = BartLMHeadModel.from_pretrained(enc_model, torchscript=True)
        _reset_bart_config(model.config)
        _reset_bart_config(model.base_model.config)
    else:
        model = EncoderDecoderModel.from_encoder_decoder_pretrained(enc_model, dec_model,
                                                                    share_model=share_model)
    return model
 def __init__(self, ckpt_path="./n_title_epoch_3"):
     self.model = BartForConditionalGeneration.from_pretrained(
         ckpt_path).cuda()
     self.tokenizer = get_kobart_tokenizer()
Example #4
0
def load_model():
    model = BartForConditionalGeneration.from_pretrained('./translation_binary')
    # tokenizer = get_kobart_tokenizer()
    return model
Example #5
0
def load_model():
    model = BartForConditionalGeneration.from_pretrained('./n_title_epoch_9/')
    # tokenizer = get_kobart_tokenizer()
    return model
Example #6
0
def load_model():
    model = BartForConditionalGeneration.from_pretrained('./kobart_summary')
    return model