def loadRobertaCheckpoint(pathBERTCheckpoint, pathData, from_pretrained=False): """ Load Roberta model from checkpoint. If load a pretrained model from fairseq, set from_pretrained=True. """ if from_pretrained: # Require connection to download bpe, possible errors for trained checkpoint that contains cfg roberta = RobertaModel.from_pretrained(dirname(pathBERTCheckpoint), basename(pathBERTCheckpoint), pathData) else: # Set up the args Namespace model_args = argparse.Namespace(task='masked_lm', seed=-1, output_dictionary_size=-1, data=pathData, path=pathBERTCheckpoint) # Setup task task = tasks.setup_task(model_args) # Load model models, _model_args = checkpoint_utils.load_model_ensemble( [model_args.path], task=task) model = models[0] # Wrap-up to RobertaHubInterface (to be consistent with RobertaModel.from_pretrained) roberta = RobertaHubInterface(_model_args, task, model) return roberta
def _load_model(self, path: str, bpe: str, bpe_filename:str) -> RobertaHubInterface: if path == "xlmr.large" or path == "xlmr.base": return hub.load("pytorch/fairseq", path, force_reload=True) else: checkpoint_file = "model.pt" if os.path.exists(os.path.join(path, "model.pt")) else "checkpoint_best.pt" loaded = hub_utils.from_pretrained( model_name_or_path=path, checkpoint_file=checkpoint_file, data_name_or_path=path, bpe=bpe, sentencepiece_vocab=os.path.join(path, bpe_filename), sentencepiece_model=os.path.join(path, bpe_filename), load_checkpoint_heads=True, archive_map=RobertaModel.hub_models(), cpu=False ) return RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0])
def evaluate_task(self): checkpoints_output_dir = os.path.join("checkpoints", self.model_name, self.task.spec().output_path()) checkpoint_file = "checkpoint_last.pt" if self.task.spec( ).no_dev_set else "checkpoint_best.pt" loaded = hub_utils.from_pretrained( model_name_or_path=checkpoints_output_dir, checkpoint_file=checkpoint_file, data_name_or_path=self.task_output_dir, bpe="sentencepiece", sentencepiece_vocab=os.path.join(self.model_dir, "sentencepiece.bpe.model"), load_checkpoint_heads=True, archive_map=RobertaModel.hub_models()) roberta = RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0]) evaluator = TaskEvaluator(self.task, self.task_id, roberta, self.input_dir, checkpoints_output_dir) return evaluator.evaluate()
from fairseq import hub_utils from fairseq.models.roberta import RobertaModel, RobertaHubInterface import os from tqdm import tqdm model_path = "polish_roberta_large_no_finetune" loaded = hub_utils.from_pretrained(model_name_or_path=model_path, data_name_or_path=model_path, bpe="sentencepiece", sentencepiece_vocab=os.path.join( model_path, "sentencepiece.bpe.model"), load_checkpoint_heads=True, archive_map=RobertaModel.hub_models(), cpu=False) roberta = RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0]) roberta.eval() roberta.cuda() preds = roberta.fill_mask('Ala <mask>, kota', topk=3) #import pdb; pdb.set_trace() def predict(f_in_path, f_out_path): f_in = open(f_in_path, 'r', newline='\n') f_out = open(f_out_path, 'w', newline='\n') for line in tqdm(f_in, total=19986): _, _, before, after = line.split('\t') before = ' '.join(
vocab_model_file="wikipedia_upper_voc_32000_sen10000000.model" vocab_path = os.path.join(root_path, "vocab", vocab_model_file) #%% loaded = hub_utils.from_pretrained( model_name_or_path=model_path, checkpoint_file=checkpoint_file, data_name_or_path='./', bpe="sentencepiece", sentencepiece_vocab=vocab_path, load_checkpoint_heads=True, archive_map=RobertaModel.hub_models(), cpu=True ) roberta = RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0]) roberta.eval() #%% def print_mask(s, predicted): print(s) for p in predicted: print(f'\t{p[2]} - {p[0]} - confidence {p[1]}') sentences = [ 'Bolesław Bierut objął rządy w <mask> roku.', #1948 'Największym <mask> we współczesnym świecie jest głód.', 'Wikipedia powstała jako projekt uzupełniający dla <mask>, darmowej encyklopedii internetowej', #Nupedii 'W Olsztynie pracował Mikołaj Kopernik, ten który <mask> ziemię a wstrzymał słońce.',