def build_model(cls, args, task): model_fast = RobertaModel.build_model(args, task) model_slow = RobertaModel.build_model(args, task) if args.roberta_model_path != "": state = checkpoint_utils.load_checkpoint_to_cpu(args.roberta_model_path) model_fast.load_state_dict(state["model"], strict=True, args=args) model_slow.load_state_dict(state["model"], strict=True, args=args) else: model_slow.load_state_dict(model_fast.state_dict(), strict=True, args=args) proj = None if args.use_proj: # NOTE alway be share_proj langs = ["share_lang"] proj = build_projection_dict(langs, args.encoder_embed_dim, args.activation_fn, args.fp16) if "xlco_queue_size" in args: xlco_queue_size = args.xlco_queue_size else: xlco_queue_size = 1 print("xlco_queue_size is set as %d" % xlco_queue_size, flush=True) queue = torch.randn(xlco_queue_size, args.encoder_embed_dim) return cls(model_fast, model_slow, queue, proj=proj)
def add_args(parser): RobertaModel.add_args(parser) parser.add_argument( "--no-final-layer-norm", action="store_true", help=("don't add final layernorm (only applicable when " "--encoder-normalize-before=True"), )
def __init__(self): if not os.path.exists(AGGREGATOR_DIR): os.makedirs(AGGREGATOR_DIR) if not os.path.isfile(AGGREGATOR_2015_2016): print("Downloading aggregators from s3...") wget.download(AGGREGATOR_2015_2016_URL, AGGREGATOR_2015_2016, bar=self._download_progress_bar) if not os.path.isfile(AGGREGATOR_2015_2017): print("Downloading aggregators from s3...") wget.download(AGGREGATOR_2015_2017_URL, AGGREGATOR_2015_2017, bar=self._download_progress_bar) if not os.path.isfile(AGGREGATOR_2015_2016_8_dim): print("Downloading aggregators from s3...") wget.download(AGGREGATOR_2015_2016_8_dim_URL, AGGREGATOR_2015_2016_8_dim, bar=self._download_progress_bar) if not os.path.isfile(AGGREGATOR_2015_2017_8_dim): print("Downloading aggregators from s3...") wget.download(AGGREGATOR_2015_2017_8_dim_URL, AGGREGATOR_2015_2017_8_dim, bar=self._download_progress_bar) if not os.path.isfile(ROBERTA_STS_PATH + '/checkpoint_best.pt'): print("Downloading ROBERTA STS model from s3...") wget.download(ROBERTA_STS_URL, ROBERTA_STS_PATH + '/checkpoint_best.pt', bar=self._download_progress_bar) if not os.path.isfile(ROBERTA_MNLI_PATH + '/model_mnli.pt'): print("Downloading ROBERTA MNLI model from s3...") wget.download(ROBERTA_MNLI_URL, ROBERTA_MNLI_PATH + '/model_mnli.pt', bar=self._download_progress_bar) self.roberta_STS = RobertaModel.from_pretrained( checkpoint_file='checkpoint_best.pt', model_name_or_path=ROBERTA_STS_PATH) self.roberta_STS.eval() self.roberta_MNLI = RobertaModel.from_pretrained( checkpoint_file='model_mnli.pt', model_name_or_path=ROBERTA_MNLI_PATH) self.roberta_MNLI.eval() self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2') self.gpt_model = GPT2LMHeadModel.from_pretrained('gpt2') self.agg_one = load(AGGREGATOR_2015_2016) self.agg_two = load(AGGREGATOR_2015_2017) self.agg_one_8_dim = load(AGGREGATOR_2015_2016_8_dim) self.agg_two_8_dim = load(AGGREGATOR_2015_2017_8_dim)
def loadRobertaCheckpoint(pathBERTCheckpoint, pathData, from_pretrained=False): """ Load Roberta model from checkpoint. If load a pretrained model from fairseq, set from_pretrained=True. """ if from_pretrained: # Require connection to download bpe, possible errors for trained checkpoint that contains cfg roberta = RobertaModel.from_pretrained(dirname(pathBERTCheckpoint), basename(pathBERTCheckpoint), pathData) else: # Set up the args Namespace model_args = argparse.Namespace(task='masked_lm', seed=-1, output_dictionary_size=-1, data=pathData, path=pathBERTCheckpoint) # Setup task task = tasks.setup_task(model_args) # Load model models, _model_args = checkpoint_utils.load_model_ensemble( [model_args.path], task=task) model = models[0] # Wrap-up to RobertaHubInterface (to be consistent with RobertaModel.from_pretrained) roberta = RobertaHubInterface(_model_args, task, model) return roberta
def __init__(self, device, model_path='res/roberta.large'): super().__init__() self.device = device with torch.no_grad(): self.roberta = RobertaModel.from_pretrained( model_path, checkpoint_file='model.pt') self.roberta.eval()
def Roberta_feature_extraction(ids,texts,feature_file_name): roberta = RobertaModel.from_pretrained('roberta.large',checkpoint_file = 'model.pt') roberta.eval() feature_dict={} for i in range(len(ids)): id = ids[i] print(id) title = texts[i] tokens = roberta.encode(title) #assert tokens.tolist() == [0, 31414, 232, 328, 2] print(tokens.tolist()) roberta.decode(tokens) # 'Hello world!' # Extract the last layer's features last_layer_features = roberta.extract_features(tokens) #assert last_layer_features.size() == torch.Size([1, 5, 1024]) print(torch.mean(last_layer_features,1,True)) #print(last_layer_features.detach().numpy().shape()) print(len(torch.mean(last_layer_features,1,True).detach().numpy().tolist()[0][0])) #print(np.mean(last_layer_features.detach().numpy(), axis=0).tolist()[0]) print(torch.mean(last_layer_features, 1, True).detach().numpy().tolist()[0][0]) feature_dict[tumblr_id]=torch.mean(last_layer_features, 1, True).detach().numpy().tolist()[0][0] np.save(feature_file_name, feature_dict)
def __init__(self, args, encoder): super().__init__(encoder) self.args = args # We follow BERT's random weight initialization self.apply(init_bert_params) self.classification_heads = nn.ModuleDict() ############################## Adding the pretrained SSL models to extract features############### if self.args.a_only or self.args.all_in: self.roberta_vqwav2vec = RobertaModel.from_pretrained( '/hpc/gsir059/INTERSPEECH/MOSI-SEMI/trained_ssl/wav2vec/vq-wav2vec-Kmeans-Roberta', checkpoint_file='bert_kmeans.pt') if self.args.frozen_ssl: for param in self.roberta_vqwav2vec.parameters(): param.requires_grad = False if self.args.t_only or self.args.all_in: roberta = torch.hub.load('pytorch/fairseq', 'roberta.large') ########################### Freezing pretrained SSL paramtere################################### self.model_text2vec = roberta if self.args.frozen_ssl: for param in self.model_text2vec.parameters(): param.requires_grad = False
def __init__(self): self.model = RobertaModel.from_pretrained( "/data/models/roberta.large", checkpoint_file="model.pt", ) self.model.to("cpu") self.model.eval()
def from_pretrained(cls, hparams: HyperOptArgumentParser, lm_head: bool = False): if not os.path.exists("pretrained/"): os.mkdir("pretrained/") pretrained_model = hparams.pretrained_model if pretrained_model == "roberta.base": download_file_maybe_extract( ROBERTA_BASE_URL, directory="pretrained", check_files=[ROBERTA_BASE_MODEL_NAME], ) elif pretrained_model == "roberta.large": download_file_maybe_extract( ROBERTA_LARGE_URL, directory="pretrained", check_files=[ROBERTA_LARGE_MODEL_NAME], ) else: raise Exception(f"{pretrained_model} is an invalid RoBERTa model.") roberta = RobertaModel.from_pretrained("pretrained/" + pretrained_model, checkpoint_file="model.pt") roberta.eval() tokenizer = RoBERTaTextEncoder( roberta.encode, roberta.task.source_dictionary.__dict__["indices"]) return RoBERTa(roberta=roberta, tokenizer=tokenizer, hparams=hparams, lm_head=lm_head)
def predict(): parser = argparse.ArgumentParser() parser.add_argument("--output_dir", default=None, type=str, required=True, help="") parser.add_argument("--task", default=None, type=str, required=True, help="") parser.add_argument("--data_dir", default=None, type=str, required=True, help="") args = parser.parse_args() # print(args) roberta = RobertaModel.from_pretrained( args.output_dir, # './outputs/RTE/7/', checkpoint_file='checkpoint_best.pt', data_name_or_path=args.data_dir) label_fn = lambda label: roberta.task.label_dictionary.string( [label + roberta.task.target_dictionary.nspecial]) # print(label_fn) ncorrect, nsamples = 0, 0 roberta.cuda() roberta.eval() with open('../data-superglue-csv/' + args.task + '/test.tsv') as fin: fin.readline() preds = [] for index, line in enumerate(fin): tokens = line.strip().split('\t') # print(tokens) sent1, sent2 = tokens[0], tokens[1] # print(sent1,"\n", sent2) tokens = roberta.encode(sent1, sent2) # print(tokens) if len(tokens) > 512: # print(len(tokens)) # print(tokens) tokens = torch.cat((tokens[0].reshape(1), tokens[-511:]), 0) # print(tokens) logits = roberta.predict('sentence_classification_head', tokens) prediction = F.log_softmax(logits, dim=-1).argmax().item() # print(prediction) prediction_label = label_fn(prediction) # print(prediction_label) preds.append(prediction_label) print(preds) with open(args.output_dir + 'pred_results', "w") as writer: # print(label_list) for i in range(len(preds)): # json_i= "\"idx: %d, \"label\": \"label_i\"" writer.write("{\"idx\": %d, \"label\": \"%s\"}\n" % (i, preds[i]))
def sentence_predict(task, ckpdir, ckpname, savedir, datadir=None): if datadir is None: datadir = 'data/{}-bin/'.format(task) if task == "AX": datadir = 'data/MNLI-bin/' roberta = RobertaModel.from_pretrained(ckpdir, ckpname, datadir) roberta.cuda() roberta.eval() label_fn = lambda label: roberta.task.label_dictionary.string( [label + roberta.task.target_dictionary.nspecial]) tasks = [task] testfiles = [ os.path.join(datadir, '../glue_data/{}/test.tsv'.format(task)) ] if task == "AX": testfiles = [ os.path.join(datadir, '../glue_data/diagnostic/diagnostic.tsv') ] elif task == "MNLI": tasks = ["MNLI-m", "MNLI-mm"] testfiles = [ os.path.join(datadir, '../glue_data/MNLI/test_matched.tsv'), os.path.join(datadir, '../glue_data/MNLI/test_mismatched.tsv') ] for task, testfile in zip(tasks, testfiles): with open(os.path.join(savedir, '{}.tsv'.format(task)), 'wt') as out_file: tsv_writer = csv.writer(out_file, delimiter='\t') tsv_writer.writerow(['index', 'prediction']) with open(testfile) as fin: fin.readline() for index, line in tqdm(enumerate(fin)): tokens = line.strip().split('\t') if task in ['CoLA', 'SST-2']: tokens = roberta.encode(tokens[1]) elif task == "MRPC": tokens = roberta.encode(tokens[3], tokens[4]) elif task == "STS-B": tokens = roberta.encode(tokens[7], tokens[8]) elif task in ["MNLI-m", "MNLI-mm"]: tokens = roberta.encode(tokens[8], tokens[9]) elif task in ["RTE", "QNLI", "QQP", "AX"]: tokens = roberta.encode(tokens[1], tokens[2]) if task == "STS-B": prediction_label = roberta.predict( 'sentence_classification_head', tokens, return_logits=True).item() prediction_label = min(1.0, max(0.0, prediction_label)) else: prediction = roberta.predict( 'sentence_classification_head', tokens).argmax().item() if 'MNLI' in task: prediction = 2 - prediction prediction_label = label_fn(prediction) tsv_writer.writerow([index, prediction_label])
def evaluate(words: List[str], path: Path = None, model: RobertaModel = None, print_step: int = 1000): if not model: model = RobertaModel.from_pretrained('../models/robbert', checkpoint_file='model.pt') model.eval() wordlistfiller = WordListFiller(words, model=model) dataset_path = path if path is not None else models_path / ( "-".join(words) + ".tsv") correct = 0 total = 0 errors = 0 with open(dataset_path) as input_file: for line in input_file: sentence, index = line.split('\t') expected = words[int(index.strip())] try: predicted = wordlistfiller.find_optimal_word(sentence) if predicted is None: errors += 1 elif predicted == expected: correct += 1 total += 1 if total % print_step == 0: print("{0:.2f}%".format(100 * correct / total), correct, total, str(errors) + " errors", expected, predicted, sentence, sep=' / ') except Exception: print("Error with", line) errors += 1 total += 1 return correct, total, errors
def __init__(self, model_dir=MODEL_DIR, ckpt_file=CHECKPOINT_FILE, use_gpu=False): self.model = RobertaModel.from_pretrained(model_dir, checkpoint_file=ckpt_file) self.model.eval() # disable dropout if use_gpu: self.model.cuda()
def __init__(self): super().__init__(embedding_dim=768) self.roberta = RobertaModel.from_pretrained( "/Users/mark/Documents/Datasets/Pretrained_models/RoBERTa/roberta.base", checkpoint_file="model.pt", ) self.fitted: bool = False
def __init__(self, cfg: Wav2BertConfig, w2v_encoder: BaseFairseqModel): super().__init__() self.cfg = cfg self.w2v_encoder = w2v_encoder from fairseq.models.roberta import RobertaModel if os.path.isfile(os.path.join(cfg.bert_path, 'model.pt')): print('loading bert from cfg path') bert = RobertaModel.from_pretrained(cfg.bert_path, checkpoint_file='model.pt') else: print('loading bert from relative path') bert = RobertaModel.from_pretrained('models/roberta.base', checkpoint_file='model.pt') self.bert_layers = bert.model.encoder.sentence_encoder.layers self.proj = Linear(cfg.encoder_embed_dim, len(bert.task.target_dictionary))
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.encoder_type = opt['encoder_type'] if opt['encoder_type'] == EncoderModelType.ROBERTA: from fairseq.models.roberta import RobertaModel self.bert = RobertaModel.from_pretrained(opt['init_checkpoint']) hidden_size = self.bert.args.encoder_embed_dim self.pooler = LinearPooler(hidden_size) else: self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) hidden_size = self.bert_config.hidden_size if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False self.decoder_opt = opt['answer_opt'] self.task_types = opt["task_types"] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] task_type = self.task_types[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if task_type == TaskType.Span: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SeqenceLabeling: out_proj = nn.Linear(hidden_size, lab) elif task_type == TaskType.MaskLM: if opt['encoder_type'] == EncoderModelType.ROBERTA: # TODO: xiaodl out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: if decoder_opt == 1: out_proj = SANClassifier(hidden_size, hidden_size, lab, opt, prefix='answer', dropout=dropout) else: out_proj = nn.Linear(hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init()
def __init__(self,configs): super(RobertaACSA, self).__init__() self.configs=configs self.roberta=RobertaModel.from_pretrained('pretrained/roberta.large', checkpoint_file='model.pt') self.linear_hidden=torch.nn.Linear(configs.ROBERTA_DIM,configs.LINEAR_HIDDEN_DIM) self.linear_output=torch.nn.Linear(configs.LINEAR_HIDDEN_DIM,3) self.dropout_output=torch.nn.Dropout(0.1)
def add_args(parser): RobertaModel.add_args(parser) # add args for Linformer parser.add_argument('--compressed', type=int, help='compressed ratio of sequence length') parser.add_argument( '--shared-kv-compressed', type=int, help='share compressed matrix between k and v, in each layer') parser.add_argument( '--shared-layer-kv-compressed', type=int, help='share compressed matrix between k and v and across all layers' ) parser.add_argument('--freeze-compress', type=int, help='freeze the parameters in compressed layer')
def __init__(self): self.model = RobertaModel.from_pretrained( "/data/models/icebert-base-36k", checkpoint_file="model.pt", bpe="gpt2", gpt2_encoder_json="/data/models/icebert-base-36k/icebert-bpe-vocab.json", gpt2_vocab_bpe="/data/models/icebert-base-36k/icebert-bpe-merges.txt", ) self.model.to("cpu") self.model.eval()
def load_roberta(name=None, roberta_cache_path=None, roberta_use_gpu=False): if not roberta_cache_path: # Load the Roberta Model from torch hub roberta = torch.hub.load('pytorch/fairseq', name) else: roberta = RobertaModel.from_pretrained(roberta_cache_path, checkpoint_file='model.pt') roberta.eval() if roberta_use_gpu: roberta.cuda() return roberta
def predict(): parser = argparse.ArgumentParser() parser.add_argument("--output_dir", default=None, type=str, required=True, help="") parser.add_argument("--task", default=None, type=str, required=True, help="") parser.add_argument("--data_dir", default=None, type=str, required=True, help="") args = parser.parse_args() # print(args) roberta = RobertaModel.from_pretrained( args.output_dir, # './outputs/RTE/7/', checkpoint_file='checkpoint_best.pt', data_name_or_path=args.data_dir) label_fn = lambda label: roberta.task.label_dictionary.string( [label + roberta.task.target_dictionary.nspecial]) # print(label_fn) ncorrect, nsamples = 0, 0 roberta.cuda() roberta.eval() with open('../data-superglue-csv/' + args.task + '/val.tsv') as fin: fin.readline() logits = np.array([]) num_classes = 2 for index, line in enumerate(fin): tokens = line.strip().split('\t') sent1, sent2 = tokens[0], tokens[1] tokens = roberta.encode(sent1, sent2) logit = roberta.predict('sentence_classification_head', tokens).item() logits = np.append(logits, logit) print(logit) logits = logits.reshape((-1, num_classes)) preds = np.argmax(logits, -1) print(preds) with open(args.output_dir + 'eval_results2', "w") as writer: # print(label_list) for i in range(len(preds)): # json_i= "\"idx: %d, \"label\": \"label_i\"" writer.write("{\"idx\": %d, \"label\": \"%s\"}\n" % (i, preds[i]))
def __init__(self, model_dir, model_name, device): self.model = RobertaModel.from_pretrained(model_dir, checkpoint_file=model_name) self.model.to(device=device) self.device = device self.bpe = self.model.bpe self.task = self.model.task self.max_sentence_length = 256 self.cosine_similarity = torch.nn.CosineSimilarity(dim=0) self.mask = "<mask>" self.start_sentence = "<s>" self.period = '.'
def load_roberta(name=None, roberta_cache_path=None): if not roberta_cache_path: roberta = torch.hub.load('pytorch/fairseq', name) else: roberta = RobertaModel.from_pretrained(roberta_cach_path, checkpoint_file='model.pt') roberta.eval() if torch.cuda.is_available(): roberta.cuda() return roberta
def mnli_dev(ckpdir, ckpname, datadir=None): task = 'MNLI' if datadir is None: datadir = 'data/{}-bin/'.format(task) roberta = RobertaModel.from_pretrained(ckpdir, ckpname, datadir) roberta.cuda() roberta.eval() label_fn = lambda label: roberta.task.label_dictionary.string( [label + roberta.task.target_dictionary.nspecial]) str2label = lambda str: roberta.task.label_dictionary.encode_line(str)[ 0].item() - roberta.task.target_dictionary.nspecial tasks = [task] testfiles = [ os.path.join(datadir, '../glue_data/{}/test.tsv'.format(task)) ] tasks = ["MNLI-m", "MNLI-mm"] testfiles = [ os.path.join(datadir, '../glue_data/MNLI/dev_matched.tsv'), os.path.join(datadir, '../glue_data/MNLI/dev_mismatched.tsv') ] for task, testfile in zip(tasks, testfiles): tv_loss = 0 tv_low = 0 tv_high = 0 accuracy = 0 print("Task: {}".format(task)) with open(testfile) as fin: fin.readline() pbar = tqdm(enumerate(fin)) for index, line in pbar: tokens = line.strip().split('\t') input_token = roberta.encode(tokens[8], tokens[9]) log_softmax_out = roberta.predict( 'sentence_classification_head', input_token) prediction = 2 - log_softmax_out.argmax().item() labels = np.array( [str2label(t.lower()) for t in tokens[10:15]]) labels = np.array([sum(labels == l) for l in range(3)]) / 5 assert sum(labels) == 1 tv_loss += sum( abs(l1 - math.exp(l2)) for l1, l2 in zip( labels, log_softmax_out.detach().cpu().numpy()[0])) tv_high += 2 - max(labels) tv_low += sum(abs(labels - 1 / 3)) accuracy += prediction == labels.argmax() pbar.set_description( "tv: {:.4f}/ {:.4f}-{:.4f}, accu: {:.4f} ".format( tv_loss / (index + 1), tv_low / (index + 1), tv_high / (index + 1), accuracy / (index + 1)))
def load_model(self, model_name): full_model_name = 'models/' + model_name + '.pt' if not os.path.exists(full_model_name): print( f"{model_name} model not found on models/ directory. Downloading from torch.hub ...." ) pretrained = torch.hub.load('pytorch/fairseq', model_name) torch.save(pretrained.model, full_model_name) pretrained = RobertaModel.from_pretrained(model_name) pretrained.eval() self.model = pretrained
def fit(self, sentences): if self.model is None: from fairseq.models.roberta import RobertaModel from fairseq.data.encoders.fastbpe import fastBPE self.model = RobertaModel.from_pretrained( 'PhoBERT_base_fairseq', checkpoint_file='model.pt') self.model.eval() args = BPE() self.model.bpe = fastBPE(args) return self
def __init__(self, pretrain="auxiliary_data/PhoBERT_base_fairseq"): self.phoBERT = RobertaModel.from_pretrained(pretrain, checkpoint_file='model.pt') self.phoBERT.eval() parser = options.get_preprocessing_parser() parser.add_argument('--bpe-codes', type=str, help='path to fastBPE BPE', default=pretrain + "/bpe.codes") args, unknown = parser.parse_known_args() self.phoBERT.bpe = fastBPE( args) #Incorporate the BPE encoder into PhoBERT
def __init__(self, args): super().__init__() roberta_model_dir = args.roberta_model_dir roberta_model_name = args.roberta_model_name roberta_vocab_name = args.roberta_vocab_name self.dict_file = "{}/{}".format(roberta_model_dir, roberta_vocab_name) self.model = RobertaModel.from_pretrained( roberta_model_dir, checkpoint_file=roberta_model_name) self.bpe = self.model.bpe self.task = self.model.task self._build_vocab() self._init_inverse_vocab()
def load_bert(self, bertpath): if not bertpath: return None print("LOADING BERT....") roberta = RobertaModel.from_pretrained( bertpath, checkpoint_file='bert_kmeans.pt') roberta = roberta.eval() if torch.cuda.is_available(): print('moving ROBERTA to CUDA') roberta.cuda() return roberta
def get_clsemb(model, dataset): ckp = sys.argv[3] datapath = sys.argv[4] p = sys.argv[5] head_name = sys.argv[6] roberta = RobertaModel.from_pretrained( ckp, checkpoint_file='checkpoint_best.pt', data_name_or_path=datapath) roberta.cuda() roberta.eval() label_fn = lambda label: roberta.task.label_dictionary.string( [label + roberta.task.label_dictionary.nspecial]) #test_examples, test_ys = get_test_examples('ruletaker/test.input0.bpe', 'rawrule/d0/test.label') #for i in tqdm(batches): # xs = test_examples[i:i+bs] # ys = test_ys[i:i+bs] # xbatch = collate_tokens([roberta.encode(test_examples[j]) for j in range(i, min(len(test_ys), i+bs))], pad_idx=1) # pred = label_fn(roberta.predict('ruletaker_head', xbatch)).argmax(dim=1).cpu().data.numpy() # correct_cnt += np.sum(np.array(ys) == np.array(pred)) for split in ['train', 'dev', 'test']: examples, ys = get_examples(os.path.join(p, '%s.input0' % split), os.path.join(p, '%s.label' % split)) print("loaded data from", os.path.join(p, '%s.input0' % split), "and", os.path.join(p, '%s.label' % split)) correct_cnt = 0 #bs = 8 #batches = range(0, len(test_examples), bs) out = torch.zeros((len(ys), 25, 1024)) preds = [] with torch.no_grad(): for i in tqdm(range(len(examples)), desc=split): x = examples[i] y = ys[i] tokens = roberta.encode(x) pred = label_fn( roberta.predict(head_name, tokens).argmax().item()) preds.append(pred) if pred == y: correct_cnt += 1 features = roberta.extract_features(tokens, return_all_hiddens=True) cls_embs = torch.zeros((len(features), 1024)) for k in range(len(features)): cls_embs[k] = features[k][0, 0, :] out[i] = cls_embs.cpu() print("acc is", correct_cnt / len(ys)) torch.save(out, 'out/{0}_{1}_{2}_embs.pt'.format(model, dataset, split)) torch.save(preds, 'out/{0}_{1}_{2}_preds.pt'.format(model, dataset, split)) torch.save([int(xx) for xx in ys], 'out/{0}_{1}_{2}_labels.pt'.format(model, dataset, split))