def __init__(self, tokenizer): super(RobertaForMultipleChoiceWithLM2, self).__init__() self.roberta_lm = RobertaForMaskedLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=RobertaConfig.from_pretrained('roberta-large')) self.roberta = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=RobertaConfig.from_pretrained('roberta-large')) self.tokenizer = tokenizer self.lamda = nn.Parameter(torch.tensor([1.0]))
def init_model(self, model_name): if model_name == 'Bert': config = BertConfig.from_pretrained('bert-base-uncased') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = BertForMultipleChoice.from_pretrained( 'pre_weights/bert-base-uncased_model.bin', config=config) elif model_name == 'Roberta': config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) # print('load csqa pretrain weights...') # self.model.load_state_dict(torch.load( # 'checkpoints/commonsenseQA_pretrain_temp.pth' # )) elif model_name == 'Albert': self.model = AlbertForMultipleChoice.from_pretrained( 'pre_weights/albert-xxlarge_model.bin', config=AlbertConfig.from_pretrained('albert-xxlarge-v1')) elif model_name == 'RobertaLM': config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) elif model_name == 'RobertaLM2': self.model = RobertaForMultipleChoiceWithLM2(self.tokenizer) elif 'GNN' in model_name: self.model = SOTA_goal_model(self.args) elif 'LM' in model_name: config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) elif 'KBERT' in model_name: config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) else: pass self.model.to(self.args['device']) if torch.cuda.device_count() > 1 and self.args['use_multi_gpu']: print("{} GPUs are available. Let's use them.".format( torch.cuda.device_count())) self.model = torch.nn.DataParallel(self.model)
def __init__(self): super(TweetModel, self).__init__() config = RobertaConfig.from_pretrained( ROOT_PATH + '/input/roberta-base/config.json', output_hidden_states=True) self.roberta = RobertaModel.from_pretrained( ROOT_PATH + '/input/roberta-base/pytorch_model.bin', config=config) self.dropout = nn.Dropout(0.5) self.high_dropout = nn.Dropout(USE_MULTI_SAMPLE_DROPOUT_RATE) self.fc = nn.Linear(config.hidden_size, 2) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0) if USE_BERT_LAST_N_LAYERS == -1: n_weights = config.num_hidden_layers else: n_weights = USE_BERT_LAST_N_LAYERS #config.num_hidden_layers + 1 self.n_layers = n_weights weights_init = torch.zeros(n_weights).float() weights_init.data[:-1] = -3 self.layer_weights = torch.nn.Parameter(weights_init) self.multi_layer_dropout = nn.Dropout(0.2)
def Bertolo_feature_extraction(ids,texts, feature_file_name): config = RobertaConfig.from_pretrained("./bert-like models/bertolo/config.json") tokenizer1 = AutoTokenizer.from_pretrained("./bertolo",normalization=True) model = AutoModel.from_pretrained("./bertolo",config=config) feature_dict={} for i in range(len(ids)): id = ids[i] print(id) title = texts[i] #input_ids = torch.tensor([tokenizer.encode(tumblr_text)]) input_ids = tokenizer1.encode(title, return_tensors="pt") print(input_ids) #with torch.no_grad(): features = model(input_ids)[0] # Models outputs are now tuples print(features.size()) feature = torch.mean(features, 1, True).detach().numpy() print(feature[0]) feature = list(feature[0][0]) print(feature) print(len(feature)) feature_dict[tumblr_id]=feature np.save(feature_file_name, feature_dict)
def get_classification_roberta(): ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='tti') config = RobertaConfig.from_pretrained(Config.Roberta.config) roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model, config=config) x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids) x = keras.layers.Dropout(0.2)(x[0]) x = keras.layers.GlobalAveragePooling1D()(x) x = keras.layers.Dense(3, activation='softmax', name='sentiment')(x) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=x) lr_schedule = keras.experimental.CosineDecay(5e-5, 1000) optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) loss = keras.losses.CategoricalCrossentropy( label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer, metrics=['acc']) return model
def __init__(self): super(TweetModel, self).__init__() config = RobertaConfig.from_pretrained('roberta/config.json', output_hidden_states=True) self.roberta = RobertaModel.from_pretrained( 'roberta/pytorch_model.bin', config=config) self.dropout = nn.Dropout(0.15) self.cnn1 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2), torch.nn.BatchNorm1d(128), torch.nn.LeakyReLU()) self.cnn1_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2), torch.nn.BatchNorm1d(64), torch.nn.LeakyReLU()) self.cnn2 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2), torch.nn.BatchNorm1d(128), torch.nn.LeakyReLU()) self.cnn2_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2), torch.nn.BatchNorm1d(64), torch.nn.LeakyReLU()) self.fc1 = nn.Linear(64, 1) self.fc2 = nn.Linear(64, 1) nn.init.normal_(self.fc1.weight, std=0.02) nn.init.normal_(self.fc1.bias, 0) nn.init.normal_(self.fc2.weight, std=0.02) nn.init.normal_(self.fc2.bias, 0)
def __init__(self): self.config = RobertaConfig.from_pretrained("roberta-base") self.config.output_hidden_states = True self.tok = RobertaTokenizer.from_pretrained("roberta-base") self.model = RobertaModel.from_pretrained("roberta-base", config=self.config)
def load_model(args): if args.transformer_model.startswith('bert'): path = '/home/yinfan/.cache/torch/transformers/bert-base-uncased-pytorch_model.bin' config = BertConfig.from_pretrained(args.transformer_model, output_hidden_states=True) tokenizer = BertTokenizer.from_pretrained(args.transformer_model, do_lower_case=True) model = BertModel.from_pretrained( path, from_tf=bool('.ckpt' in args.transformer_model), config=config) else: path = '/home/yinfan/.cache/torch/transformers/roberta-base-pytorch_model.bin' tokenizer = RobertaTokenizer.from_pretrained(args.transformer_model) config = RobertaConfig.from_pretrained(args.transformer_model, output_hidden_states=True) model = RobertaModel.from_pretrained( path, from_tf=bool('.ckpt' in args.transformer_model), config=config) # roberta = RobertaModel.from_pretrained(args.roberta_model, cache_dir=args.cache_dir, config=config) model_embedding = model.embeddings model_embedding.to(args.device) if args.n_gpu > 1: model_embedding = torch.nn.DataParallel(model_embedding) model.to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model) if args.untrained_transformer == 1: model.apply(init_weights) return model, model_embedding, tokenizer
def __init__(self, config: Bunch) -> None: pl.LightningModule.__init__(self) self.config = config bpe_codes_path = os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/bpe.codes", ) bpe = fastBPE(Namespace(bpe_codes=bpe_codes_path)) vocab = Dictionary() vocab.add_from_file( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/dict.txt", )) tokenizer = BertweetTokenizer(self.config.max_tokens_per_tweet, bpe, vocab) self.data_processor = BertweetDataProcessor(config, tokenizer) model_config = RobertaConfig.from_pretrained( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/config.json", )) self.model = RobertaForSequenceClassification.from_pretrained( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/model.bin", ), config=model_config, ) self.loss = CrossEntropyLoss()
def __init__(self): self.num_labels: int = 2 config: RobertaConfig = RobertaConfig.from_pretrained( "./BERTweet_base_transformers/config.json", output_hidden_states=True, ) super().__init__(config) self.bertweet: RobertaModel = RobertaModel.from_pretrained( "./BERTweet_base_transformers/model.bin", config=config) self.dense = nn.Linear( in_features=768 * 4, out_features=1024, ) self.dropout = nn.Dropout(p=0.15) self.dense_2 = nn.Linear( in_features=1024, out_features=512, ) self.dense_3 = nn.Linear( in_features=512, out_features=256, ) self.classifier = nn.Linear( in_features=256, out_features=self.num_labels, )
def load_model(model_path, model_name, num_classes): if model_name == 'bert-base-uncased': tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True) config = BertConfig.from_pretrained(model_name) else: tokenizer = RobertaTokenizer.from_pretrained(model_name, do_lower_case=True) config = RobertaConfig.from_pretrained(model_name) if model_name == 'bert-base-uncased': transformer_model = BertModel.from_pretrained(model_name, config=config) else: transformer_model = RobertaModel.from_pretrained(model_name, config=config) config.output_hidden_states = True model = SequenceClassifier(transformer_model, config, n_layers, num_classes) model.load_state_dict( torch.load('{model_path}'.format(model_path=model_path))) model.eval() return model, tokenizer
def _init_deep_model(self, model_type, model_path, num_labels, num_regs=None): if 'roberta' in model_type: tokenizer = RobertaTokenizer.from_pretrained(model_path) config = RobertaConfig.from_pretrained(model_path) config.num_labels = num_labels model = RobertaForSequenceClassification.from_pretrained(model_path, config=config) model.eval() model.to(self.device) elif 'electra_multitask' in model_type: tokenizer = ElectraTokenizer.from_pretrained(model_path) tokenizer.add_special_tokens({'additional_special_tokens': ['[VALUES]']}) config = ElectraConfig.from_pretrained(model_path) config.num_labels = num_labels config.num_regs = num_regs config.vocab_size = len(tokenizer) model = ElectraForSequenceClassificationMultiTask.from_pretrained(model_path, config=config) model.eval() model.to(self.device) elif 'electra' in model_type: tokenizer = ElectraTokenizer.from_pretrained(model_path) config = ElectraConfig.from_pretrained(model_path) config.num_labels = num_labels model = ElectraForSequenceClassification.from_pretrained(model_path, config=config) model.eval() model.to(self.device) else: raise NotImplementedError() return config, tokenizer, model
def __init__(self, model_directory: str, predictor_name: str, device="cuda") -> None: self.device = device self.config = RobertaConfig.from_pretrained(model_directory) # Load in model related information self._tokenizer = RobertaTokenizerFast.from_pretrained( model_directory, add_special_tokens=False) self._model = model = RobertaForSequenceClassification.from_pretrained( model_directory, config=self.config).to(device) self._model.eval() # Prepare optimizer no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ] }, ] self._optimizer = AdamW(optimizer_grouped_parameters) self._optimizer.load_state_dict( torch.load(os.path.join(model_directory, "optimizer.pt")))
def __init__(self, args, tokenizer, train_dataset=None, dev_dataset=None, test_dataset=None): self.args = args self.tokenizer = tokenizer self.train_dataset = train_dataset self.dev_dataset = dev_dataset self.test_dataset = test_dataset self.id2label = load_id2label(args.id2label) self.num_labels = len(self.id2label) self.config = RobertaConfig.from_pretrained( args.model_name_or_path, num_labels=self.num_labels, finetuning_task="VLSP2020-Relex", id2label={str(i): label for i, label in self.id2label.items()}, label2id={label: i for i, label in self.id2label.items()}, ) if self.args.model_type == "es": self.model = RobertaEntityStarts.from_pretrained( args.model_name_or_path, config=self.config) elif self.args.model_type == "all": self.model = RobertaConcatAll.from_pretrained( args.model_name_or_path, config=self.config) # GPU or CPU self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device)
def build_model(self): ids = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) att = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) tok = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) # Network architecture config = RobertaConfig.from_pretrained(self.config.data.roberta.path + self.config.data.roberta.config) bert_model = TFRobertaModel.from_pretrained( self.config.data.roberta.path + self.config.data.roberta.roberta_weights, config=config) x = bert_model(ids, attention_mask=att, token_type_ids=tok) self.init_head(x[0]) self.add_dropout(0.1) self.add_lstm(64, True) self.add_dropout(0.1) self.add_dense(1) self.add_activation('softmax') self.model = tf.keras.models.Model( inputs=[ids, att, tok], outputs=[self.start_head, self.end_head]) self.model.compile(loss=self.config.model.loss, optimizer=self.config.model.optimizer)
def __init__(self, args, device='cpu'): super().__init__() self.args = args self.device = device self.epoch = 0 self.dropout = nn.Dropout(self.args.dropout) # Entailment Tracking # roberta_model_path = '/research/king3/ik_grp/yfgao/pretrain_models/huggingface/roberta-base' roberta_model_path = args.pretrained_lm_path roberta_config = RobertaConfig.from_pretrained(roberta_model_path, cache_dir=None) self.roberta = RobertaModel.from_pretrained(roberta_model_path, cache_dir=None, config=roberta_config) encoder_layer = TransformerEncoderLayer(self.args.bert_hidden_size, 12, 4 * self.args.bert_hidden_size) encoder_norm = nn.LayerNorm(self.args.bert_hidden_size) self.transformer_encoder = TransformerEncoder(encoder_layer, args.trans_layer, encoder_norm) self._reset_transformer_parameters() self.w_entail = nn.Linear(self.args.bert_hidden_size, 3, bias=True) # Logic Reasoning self.entail_emb = nn.Parameter( torch.rand(3, self.args.bert_hidden_size)) nn.init.normal_(self.entail_emb) self.w_selfattn = nn.Linear(self.args.bert_hidden_size * 2, 1, bias=True) self.w_output = nn.Linear(self.args.bert_hidden_size * 2, 4, bias=True)
def main(): args = run_parse_args() logger.info(args) # Setup CUDA, GPU args.use_gpu = torch.cuda.is_available() and not args.no_cuda args.model_device = torch.device( f"cuda:{args.model_gpu_index}" if args.use_gpu else "cpu") args.n_gpu = torch.cuda.device_count() # Setup logging logger.warning("Model Device: %s, n_gpu: %s", args.model_device, args.n_gpu) # Set seed set_seed(args) load_model_path = os.path.join(args.query_output_root, args.previous_qencoder, "model") logger.info(f"load from {load_model_path}") config = RobertaConfig.from_pretrained(load_model_path) model = RobertaDot.from_pretrained(load_model_path, config=config) model.to(args.model_device) logger.info("Training/evaluation parameters %s", args) # Evaluation train(args, model)
def main(): args = build_parser().parse_args() print("Creating snapshot directory if not exist...") if not os.path.exists(args.snapshots_path): os.mkdir(args.snapshots_path) print("Loading Roberta components...") tokenizer = RobertaTokenizer.from_pretrained("roberta-base") config = RobertaConfig.from_pretrained("roberta-base", output_hidden_states=True) base_model = RobertaModel(config).cuda() model = LangInferModel(base_model, config, args.span_heads).cuda() optimizer = configure_adam_optimizer(model, args.lr, args.weight_decay, args.adam_epsilon) print("Preparing the data for training...") train_loader, test_loaders = build_data_loaders(args, tokenizer) criterion = nn.CrossEntropyLoss() print( f"Training started for {args.epoch_num} epochs. Might take a while...") train(args.epoch_num, model, optimizer, criterion, train_loader, test_loaders, args.snapshots_path) print("Training is now finished. You can check out the results now")
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = RobertaConfig.from_pretrained(cf.model_base, num_labels=cf.num_labels, finetuning_task=cf.finetuning_task) tokenizer = RobertaTokenizer.from_pretrained(cf.model_base, do_lower_case=True) model = RobertaForSequenceClassification.from_pretrained(cf.model_base, config=config) model.to(device) train_raw_text = get_raw_text(cf.train_file_dir) train_features = tokenize_raw_text(train_raw_text, tokenizer) train_dataset = create_dataset(train_features) optimizer = AdamW(model.parameters(), lr=cf.learning_rate, eps=cf.adam_epsilon) global_step, training_loss = train(dataset, model, optimizer, batch_size=cf.train_batch_size, num_epochs=cf.num_epochs) torch.save(model.state_dict(), cf.model_file_dir)
def __init__(self, device): super(RobertaTweetEmbedding, self).__init__(device=device) self.config = RobertaConfig.from_pretrained( '../data/models/BERTweet_base_transformers/config.json') self.model = RobertaModel.from_pretrained( '../data/models/BERTweet_base_transformers/model.bin', config=self.config) self.model.eval( ) # disable dropout (or leave in train mode to finetune) self.model.to(self.device) self.pad_token_id = self.config.pad_token_id self.embedding_dim = self.model.config.hidden_size # Load BPE encoder parser = argparse.ArgumentParser() parser.add_argument( '--bpe-codes', default="../data/models/BERTweet_base_transformers/bpe.codes", required=False, type=str, help='path to fastBPE BPE') args = parser.parse_args() self.bpe = fastBPE(args) # Load the dictionary self.vocab = Dictionary() self.vocab.add_from_file( "../data/models/BERTweet_base_transformers/dict.txt")
def __init__(self, args): super().__init__() if not isinstance(args, argparse.Namespace): # eval mode assert isinstance(args, dict) args = argparse.Namespace(**args) # compute other fields according to args train_dataset = DependencyDataset(file_path=os.path.join( args.data_dir, f"train.{args.data_format}"), bert=args.bert_dir) # save these information to args to convene evaluation. args.pos_tags = train_dataset.pos_tags args.dep_tags = train_dataset.dep_tags args.ignore_pos_tags = train_dataset.ignore_pos_tags if args.ignore_punct else set( ) args.num_gpus = len( [x for x in str(args.gpus).split(",") if x.strip()]) if "," in args.gpus else int(args.gpus) args.t_total = (len(train_dataset) // (args.accumulate_grad_batches * args.num_gpus) + 1) * args.max_epochs self.save_hyperparameters(args) self.args = args bert_name = args.bert_name if bert_name == 'roberta-large': bert_config = RobertaConfig.from_pretrained(args.bert_dir) DependencyConfig = RobertaDependencyConfig elif bert_name == 'bert': bert_config = BertConfig.from_pretrained(args.bert_dir) DependencyConfig = BertDependencyConfig else: raise ValueError("Unknown bert name!!") self.model_config = DependencyConfig( pos_tags=args.pos_tags, dep_tags=args.dep_tags, pos_dim=args.pos_dim, additional_layer=args.additional_layer, additional_layer_dim=args.additional_layer_dim, additional_layer_type=args.additional_layer_type, arc_representation_dim=args.arc_representation_dim, tag_representation_dim=args.tag_representation_dim, biaf_dropout=args.biaf_dropout, **bert_config.__dict__) self.model = BiaffineDependencyParser(args.bert_dir, config=self.model_config) if args.freeze_bert: for param in self.model.bert.parameters(): param.requires_grad = False self.train_stat = AttachmentScores() self.val_stat = AttachmentScores() self.test_stat = AttachmentScores() self.ignore_pos_tags = list(args.ignore_pos_tags)
def __init__(self, args): super(SOTA_goal_model, self).__init__() self.args = args # roberta_config = AlbertConfig.from_pretrained('albert-base-v2') # self.roberta = AlbertForMultipleChoice.from_pretrained( # 'pre_weights/albert-base-v2-pytorch_model.bin', config=roberta_config) roberta_config = RobertaConfig.from_pretrained('roberta-large') roberta_config.attention_probs_dropout_prob = 0.2 roberta_config.hidden_dropout_prob = 0.2 if args.get('with_lm'): self.roberta = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=roberta_config) else: self.roberta = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=roberta_config) from utils.attentionUtils import SelfAttention self.gcn = GCNNet() self.merge_fc1 = nn.Linear(roberta_config.hidden_size + 128, 512) self.attn = SelfAttention(512, 8) # self.roberta_fc1 = nn.Linear(roberta_config.hidden_size, 128) # 将 roberta vector 降维到与 gcn 相同 # self.gcn_fc1 = nn.Linear(128, 128) # 同上 self.fc3 = nn.Linear(512 + roberta_config.hidden_size, 1) self.dropout = nn.Dropout(0.2)
def get_training_objects(params): """ Define and return training objects """ config = RobertaConfig.from_pretrained(params["model_name"], num_labels=2) model = RobertaForSequenceClassification.from_pretrained( params["model_name"], config=config) model.to(params["device"]) no_decay = ["bias", "LayerNorm.weight"] gpd_params = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": params["weight_decay"], }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW(gpd_params, lr=params["lr"], eps=params["adam_epsilon"]) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=params["warmup_steps"], num_training_steps=params["total_steps"], ) return model, optimizer, scheduler
def predict_pair(model_args, data_args, training_args): # Set seed set_seed(training_args.seed) if 'roberta' in model_args.model_type: tokenizer = RobertaTokenizer.from_pretrained(model_args.tokenizer_name_or_path) config = RobertaConfig.from_pretrained(model_args.model_name_or_path) config.num_labels = data_args.num_labels model = RobertaForSequenceClassification.from_pretrained(model_args.model_name_or_path, config=config) elif 'electra' in model_args.model_type: tokenizer = ElectraTokenizer.from_pretrained(model_args.tokenizer_name_or_path) config = ElectraConfig.from_pretrained(model_args.model_name_or_path) config.num_labels = data_args.num_labels model = ElectraForSequenceClassification.from_pretrained(model_args.model_name_or_path, config=config) else: # default -> bert tokenizer = BertTokenizer.from_pretrained(model_args.tokenizer_name_or_path) config = BertConfig.from_pretrained(model_args.model_name_or_path) config.num_labels = data_args.num_labels model = BertForSequenceClassification.from_pretrained(model_args.model_name_or_path, config=config) model.to(training_args.device) test_df = pickle.load(open(data_args.test_data_file, 'rb')) test_dataset = get_dataset(data_args, tokenizer, test_df, model_args.model_type) data_collator = MyDataCollator() if training_args.local_rank != -1: sampler = SequentialDistributedSampler(test_dataset) model = torch.nn.DataParallel(model) else: n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) sampler = SequentialSampler(test_dataset) print(len(test_dataset)) dataloader = DataLoader( test_dataset, sampler=sampler, batch_size=training_args.eval_batch_size, collate_fn=data_collator, ) model.eval() all_probs = [] for inputs in tqdm(dataloader): for k, v in inputs.items(): inputs[k] = v.to(training_args.device) inputs.pop('labels') with torch.no_grad(): outputs = model(**inputs) logits = outputs[0] probs = torch.softmax(logits, dim=-1) maxp, maxi = torch.max(probs, dim=-1) result = [(_i, _p) for _p, _i in zip(maxp, maxi)] all_probs.extend(result) with open('./{}_{}.answer_classify.result'.format(data_args.data_type, model_args.model_type), 'w', encoding='utf-8') as fout: for i in range(len(test_df)): fout.write('{} | {} | {} | {} | {}\n'.format(test_df[i][0], test_df[i][1], test_df[i][2], all_probs[i][0], all_probs[i][1]))
def __init__(self, class_count: int, label_str: str, model_name_str: str = 'roberta-base'): config = RobertaConfig.from_pretrained(model_name_str) tokenizer = RobertaTokenizer.from_pretrained(model_name_str) super().__init__(class_count, label_str, config, tokenizer, model_name_str)
def __init__(self, config): super(Model, self).__init__() model_config = RobertaConfig.from_pretrained( config.bert_path, num_labels=config.num_classes) self.roberta = RobertaForSequenceClassification.from_pretrained( config.bert_path, config=model_config) for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self): super().__init__() config = RobertaConfig.from_pretrained('roberta-large', output_hidden_states=True) self.roberta = RobertaModel(config=config) config.num_labels = 1 self.classifier = RobertaClassificationHead(config=config) # self._debug = 1 self._debug = -1
def __init__(self): super(TokenModel, self).__init__() self.config = RobertaConfig.from_pretrained(config.roberta_config, output_hidden_states=True) self.roberta = RobertaModel.from_pretrained(config.roberta_model, config=self.config) self.dropout = nn.Dropout(p=0.5) self.fc = nn.Linear(self.config.hidden_size, 2) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0)
def __init__(self, PATH="data/roberta/"): super(EmotionModel, self).__init__() config = RobertaConfig.from_pretrained(PATH, return_dict=False) self.bert_model = RobertaModel.from_pretrained(PATH + "pytorch_model.bin", config=config) self.dropout = nn.Dropout(0.1) self.linear1 = nn.Linear(768, 1) self.linear2 = nn.Linear(768 + 1, 1)
def __init__(self, learning_rate: float, roberta_type: str = 'roberta-base'): super().__init__() config = RobertaConfig.from_pretrained(roberta_type) config.num_labels = 2 self.num_labels = config.num_labels self.config = config self.lr = learning_rate self.model = RobertaForMultipleChoice(config).from_pretrained(roberta_type, num_labels=self.num_labels)