def __init__( self, model_name_or_path: str = "facebook/rag-token-nq", retriever: Optional[DensePassageRetriever] = None, generator_type: RAGeneratorType = RAGeneratorType.TOKEN, top_k_answers: int = 2, max_length: int = 200, min_length: int = 2, num_beams: int = 2, embed_title: bool = True, prefix: Optional[str] = None, use_gpu: bool = True, ): """ Load a RAG model from Transformers along with passage_embedding_model. See https://huggingface.co/transformers/model_doc/rag.html for more details :param model_name_or_path: Directory of a saved model or the name of a public model e.g. 'facebook/rag-token-nq', 'facebook/rag-sequence-nq'. See https://huggingface.co/models for full list of available models. :param retriever: `DensePassageRetriever` used to embedded passage :param generator_type: Which RAG generator implementation to use? RAG-TOKEN or RAG-SEQUENCE :param top_k_answers: Number of independently generated text to return :param max_length: Maximum length of generated text :param min_length: Minimum length of generated text :param num_beams: Number of beams for beam search. 1 means no beam search. :param embed_title: Embedded the title of passage while generating embedding :param prefix: The prefix used by the generator's tokenizer. :param use_gpu: Whether to use GPU (if available) """ self.model_name_or_path = model_name_or_path self.max_length = max_length self.min_length = min_length self.generator_type = generator_type self.num_beams = num_beams self.embed_title = embed_title self.prefix = prefix self.retriever = retriever if top_k_answers > self.num_beams: top_k_answers = self.num_beams logger.warning( f'top_k_answers value should not be greater than num_beams, hence setting it to {num_beams}' ) self.top_k_answers = top_k_answers if use_gpu and torch.cuda.is_available(): self.device = torch.device("cuda") raise AttributeError( "Currently RAGenerator does not support GPU, try with use_gpu=False" ) else: self.device = torch.device("cpu") self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path) if self.generator_type == RAGeneratorType.SEQUENCE: raise NotImplementedError( "RagSequenceForGeneration is not implemented yet") # TODO: Enable when transformers have it. Refer https://github.com/huggingface/transformers/issues/7905 # Also refer refer https://github.com/huggingface/transformers/issues/7829 # self.model = RagSequenceForGeneration.from_pretrained(model_name_or_path) else: self.model = RagTokenForGeneration.from_pretrained( model_name_or_path)
def token_model(self): return ( RagTokenForGeneration.from_pretrained_question_encoder_generator( "facebook/dpr-question_encoder-single-nq-base", "facebook/bart-large-cnn").to(torch_device).eval())
def test_rag_token_from_pretrained(self): rag_config = self.get_rag_config() rag_decoder_tokenizer = BartTokenizer.from_pretrained( "facebook/bart-large-cnn") rag_question_encoder_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained( "facebook/dpr-question_encoder-single-nq-base") rag_retriever = RagRetriever( rag_config, question_encoder_tokenizer=rag_question_encoder_tokenizer, generator_tokenizer=rag_decoder_tokenizer, ) input_ids = rag_question_encoder_tokenizer( "who sings does he love me with reba", return_tensors="pt").input_ids decoder_input_ids = rag_decoder_tokenizer( "Linda Davis", return_tensors="pt").input_ids input_ids = input_ids.to(torch_device) decoder_input_ids = decoder_input_ids.to(torch_device) with tempfile.TemporaryDirectory() as tmp_dirname: rag_token = RagTokenForGeneration.from_pretrained_question_encoder_generator( "facebook/dpr-question_encoder-single-nq-base", "facebook/bart-large-cnn", retriever=rag_retriever, config=rag_config, ).to(torch_device) # check that the from pretrained methods work rag_token.save_pretrained(tmp_dirname) rag_token.from_pretrained(tmp_dirname, retriever=rag_retriever) rag_token.to(torch_device) with torch.no_grad(): output = rag_token( input_ids, labels=decoder_input_ids, ) loss_pretrained = output.loss del rag_token question_encoder = AutoModel.from_pretrained( "facebook/dpr-question_encoder-single-nq-base") generator = AutoModelForSeq2SeqLM.from_pretrained( "facebook/bart-large-cnn") rag_token = RagTokenForGeneration(config=rag_config, question_encoder=question_encoder, generator=generator, retriever=rag_retriever) rag_token.to(torch_device) with torch.no_grad(): output = rag_token( input_ids, labels=decoder_input_ids, ) loss_init = output.loss self.assertAlmostEqual(loss_pretrained.item(), loss_init.item(), places=4)
from transformers import pipeline # Open and read the article question = "What is the capital of the Netherlands?" # The 'r' means raw string so ignores escape codes e.g. ignores /n context = r"The four largest cities in the Netherlands are Amsterdam, Rotterdam, The Hague and Utrecht.[17] Amsterdam is the country's most populous city and nominal capital,[18] while The Hague holds the seat of the States General, Cabinet and Supreme Court.[19] The Port of Rotterdam is the busiest seaport in Europe, and the busiest in any country outside East Asia and Southeast Asia, behind only China and Singapore." # Generating an answer to the question in context qa = pipeline("question-answering") answer = qa(question=question, context=context) # Print the answer print(f"Question: {question}") print(f"Answer: '{answer['answer']}' with score {answer['score']}") # Test RAG working from transformers import RagTokenizer, RagRetriever, RagTokenForGeneration tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq") retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True) model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever) input_dict = tokenizer.prepare_seq2seq_batch( "who holds the record in 100m freestyle", return_tensors="pt") generated = model.generate(input_ids=input_dict["input_ids"]) print(tokenizer.batch_decode(generated, skip_special_tokens=True)[0])
def __init__( self, model_name_or_path: str = "facebook/rag-token-nq", model_version: Optional[str] = None, retriever: Optional[DensePassageRetriever] = None, generator_type: RAGeneratorType = RAGeneratorType.TOKEN, top_k: int = 2, max_length: int = 200, min_length: int = 2, num_beams: int = 2, embed_title: bool = True, prefix: Optional[str] = None, use_gpu: bool = True, ): """ Load a RAG model from Transformers along with passage_embedding_model. See https://huggingface.co/transformers/model_doc/rag.html for more details :param model_name_or_path: Directory of a saved model or the name of a public model e.g. 'facebook/rag-token-nq', 'facebook/rag-sequence-nq'. See https://huggingface.co/models for full list of available models. :param model_version: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash. :param retriever: `DensePassageRetriever` used to embedded passage :param generator_type: Which RAG generator implementation to use? RAG-TOKEN or RAG-SEQUENCE :param top_k: Number of independently generated text to return :param max_length: Maximum length of generated text :param min_length: Minimum length of generated text :param num_beams: Number of beams for beam search. 1 means no beam search. :param embed_title: Embedded the title of passage while generating embedding :param prefix: The prefix used by the generator's tokenizer. :param use_gpu: Whether to use GPU (if available) """ # save init parameters to enable export of component config as YAML self.set_config( model_name_or_path=model_name_or_path, model_version=model_version, retriever=retriever, generator_type=generator_type, top_k=top_k, max_length=max_length, min_length=min_length, num_beams=num_beams, embed_title=embed_title, prefix=prefix, use_gpu=use_gpu, ) self.model_name_or_path = model_name_or_path self.max_length = max_length self.min_length = min_length self.generator_type = generator_type self.num_beams = num_beams self.embed_title = embed_title self.prefix = prefix self.retriever = retriever if top_k > self.num_beams: top_k = self.num_beams logger.warning( f'top_k value should not be greater than num_beams, hence setting it to {num_beams}' ) self.top_k = top_k self.device, _ = initialize_device_settings(use_cuda=use_gpu) self.tokenizer = RagTokenizer.from_pretrained(model_name_or_path) if self.generator_type == RAGeneratorType.SEQUENCE: raise NotImplementedError( "RagSequenceForGeneration is not implemented yet") # TODO: Enable when transformers have it. Refer https://github.com/huggingface/transformers/issues/7905 # Also refer refer https://github.com/huggingface/transformers/issues/7829 # self.model = RagSequenceForGeneration.from_pretrained(model_name_or_path) else: self.model = RagTokenForGeneration.from_pretrained( model_name_or_path, revision=model_version).to(self.device)
def main(): global args, best_acc1 args = parser.parse_args() ######################################################################################### # Create options ######################################################################################### options = { 'vqa': { 'trainsplit': args.vqa_trainsplit }, 'logs': { 'dir_logs': args.dir_logs }, 'model': { 'arch': args.arch, 'seq2vec': { 'type': args.st_type, 'dropout': args.st_dropout, 'fixed_emb': args.st_fixed_emb } }, 'optim': { 'lr': args.learning_rate, 'batch_size': args.batch_size, 'epochs': args.epochs } } if args.path_opt is not None: with open(args.path_opt, 'r') as handle: options_yaml = yaml.load(handle) options = utils.update_values(options, options_yaml) print('## args') pprint(vars(args)) print('## options') pprint(options) if args.help_opt: return # Set datasets options if 'vgenome' not in options: options['vgenome'] = None ######################################################################################### # Create needed datasets ######################################################################################### trainset = datasets.factory_VQA(options['vqa']['trainsplit'], options['vqa'], options['coco'], options['vgenome']) train_loader = trainset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers, shuffle=True) if options['vqa']['trainsplit'] == 'train': valset = datasets.factory_VQA('val', options['vqa'], options['coco']) val_loader = valset.data_loader(batch_size=2, num_workers=args.workers) if options['vqa']['trainsplit'] == 'trainval' or args.evaluate: testset = datasets.factory_VQA('test', options['vqa'], options['coco']) test_loader = testset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers) ######################################################################################### # Create model, criterion and optimizer ######################################################################################### config = RagConfig.from_pretrained("facebook/rag-token-nq") config.index_name = "legacy" config.use_dummy_dataset = False config.question_encoder.return_dict = True config.n_docs = 10 # config.n_docs = 15 # import pdb; # pdb.set_trace () if not args.evaluate and not args.resume: tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base", config=config) retriever = RagRetriever.from_pretrained("facebook/rag-token-base", config=config) model = RagTokenForGeneration.from_pretrained( "facebook/rag-token-base", retriever=retriever, config=config) else: tokenizer = RagTokenizer.from_pretrained(os.path.join( options['logs']['dir_logs'], "epoch_{}".format(args.start_epoch)), config=config) retriever = RagRetriever.from_pretrained(os.path.join( options['logs']['dir_logs'], "epoch_{}".format(args.start_epoch)), config=config) model = RagTokenForGeneration.from_pretrained(os.path.join( options['logs']['dir_logs'], "epoch_{}".format(args.start_epoch)), retriever=retriever, config=config) model.cuda() criterion = criterions.factory(options['vqa'], cuda=True) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=options['optim']['lr'], eps=1e-8) # optimizer = torch.optim.SGD(optimizer_grouped_parameters, lr=options['optim']['lr'], momentum=0.9) ######################################################################################### # args.resume: resume from a checkpoint OR create logs directory ######################################################################################### exp_logger = None # Or create logs directory # os.system('mkdir -p ' + options['logs']['dir_logs']) path_new_opt = os.path.join(options['logs']['dir_logs'], os.path.basename(args.path_opt)) path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml') with open(path_new_opt, 'w') as f: yaml.dump(options, f, default_flow_style=False) with open(path_args, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) if exp_logger is None: # Set loggers exp_name = os.path.basename( options['logs']['dir_logs']) # add timestamp exp_logger = logger.Experiment(exp_name, options) exp_logger.add_meters('train', make_meters()) exp_logger.add_meters('test', make_meters()) if options['vqa']['trainsplit'] == 'train': exp_logger.add_meters('val', make_meters()) exp_logger.info['model_params'] = utils.params_count(model) print('Model has {} parameters'.format( exp_logger.info['model_params'])) ######################################################################################### # args.evaluate: on valset OR/AND on testset ######################################################################################### if args.evaluate: path_logger_json = os.path.join(options['logs']['dir_logs'], 'logger.json') if options['vqa']['trainsplit'] == 'train': acc1, val_results = engine.validate(val_loader, model, retriever, tokenizer, criterion, exp_logger, args.start_epoch, 100) # save results and compute OpenEnd accuracy exp_logger.to_json(path_logger_json) save_results(val_results, args.start_epoch, valset.split_name(), options['logs']['dir_logs'], options['vqa']['dir']) return else: for epoch in range(args.start_epoch + 1, options['optim']['epochs']): engine.train(train_loader, model, retriever, tokenizer, criterion, optimizer, exp_logger, epoch, args.print_freq) # remember best prec@1 and save checkpoint is_best = True best_accs1 = -1 save_checkpoint( { 'epoch': epoch, 'arch': options['model']['arch'], 'best_acc1': best_acc1, 'exp_logger': exp_logger }, model, tokenizer, retriever, options['logs']['dir_logs'], args.save_model, True)