def test_input_too_long(self): torch.manual_seed(0) config = BartConfig( vocab_size=257, d_model=32, encoder_layers=1, decoder_layers=1, encoder_ffn_dim=32, decoder_ffn_dim=32, # So any text > 4 should raise an exception max_position_embeddings=4, encoder_attention_heads=1, decoder_attention_heads=1, max_length=4, min_length=1, forced_eos_token_id=None, ) model = BartForConditionalGeneration(config) # Bias output towards L V, C = model.lm_head.weight.shape bias = torch.zeros(V) bias[76] = 10 model.lm_head.bias = torch.nn.Parameter(bias) # # Generated with: # import tempfile # from tokenizers import Tokenizer, models # from transformers import PreTrainedTokenizerFast # model_max_length = 4 # vocab = [(chr(i), i) for i in range(256)] # tokenizer = Tokenizer(models.Unigram(vocab)) # with tempfile.NamedTemporaryFile() as f: # tokenizer.save(f.name) # real_tokenizer = PreTrainedTokenizerFast(tokenizer_file=f.name, model_max_length=model_max_length) # real_tokenizer._tokenizer.save("tokenizer.json") # # + add missing config.json with albert as model_type tokenizer = AutoTokenizer.from_pretrained( "Narsil/small_summarization_test") summarizer = pipeline(task="summarization", model=model, tokenizer=tokenizer) with self.assertLogs("transformers", level="WARNING"): with self.assertRaises(IndexError): _ = summarizer("This is a test") output = summarizer("This is a test", truncation=TruncationStrategy.ONLY_FIRST) # 2 is default BOS from Bart. self.assertEqual(output, [{"summary_text": "\x02 L L L"}])
def _get_models(config): encoder_decoder_tuples = config['encoder_decoder_model_name_or_path'].split(',') encoder_decoder_tuples = tuple(encoder_decoder_tuples) enc_model = encoder_decoder_tuples[0] dec_model = encoder_decoder_tuples[0] if len(encoder_decoder_tuples) == 1 else encoder_decoder_tuples[1] share_model = 'share_model' in config and config['share_model'] if 'bart' in enc_model: model = BartLMHeadModel.from_pretrained(enc_model, torchscript=True) _reset_bart_config(model.config) _reset_bart_config(model.base_model.config) else: model = EncoderDecoderModel.from_encoder_decoder_pretrained(enc_model, dec_model, share_model=share_model) return model
def __init__(self, ckpt_path="./n_title_epoch_3"): self.model = BartForConditionalGeneration.from_pretrained( ckpt_path).cuda() self.tokenizer = get_kobart_tokenizer()
def load_model(): model = BartForConditionalGeneration.from_pretrained('./translation_binary') # tokenizer = get_kobart_tokenizer() return model
def load_model(): model = BartForConditionalGeneration.from_pretrained('./n_title_epoch_9/') # tokenizer = get_kobart_tokenizer() return model
def load_model(): model = BartForConditionalGeneration.from_pretrained('./kobart_summary') return model