def __init__(self, config, num=0): super(roBerta, self).__init__() model_config = RobertaConfig() model_config.vocab_size = config.vocab_size model_config.hidden_size = config.hidden_size[0] model_config.num_attention_heads = 16 # 计算loss的方法 self.loss_method = config.loss_method self.multi_drop = config.multi_drop self.roberta = RobertaModel(model_config) if config.requires_grad: for param in self.roberta.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.hidden_size = config.hidden_size[num] if self.loss_method in ['binary', 'focal_loss', 'ghmc']: self.classifier = nn.Linear(self.hidden_size, 1) else: self.classifier = nn.Linear(self.hidden_size, self.num_labels) self.text_linear = nn.Linear(config.embeding_size, config.hidden_size[0]) self.vocab_layer = nn.Linear(config.hidden_size[0], config.vocab_size) self.classifier.apply(self._init_weights) self.roberta.apply(self._init_weights) self.text_linear.apply(self._init_weights) self.vocab_layer.apply(self._init_weights)
def __init__(self, args, config, dataloader, ckpdir): self.device = torch.device('cuda') if ( args.gpu and torch.cuda.is_available()) else torch.device('cpu') if torch.cuda.is_available(): print('[Runner] - CUDA is available!') self.model_kept = [] self.global_step = 1 self.log = SummaryWriter(ckpdir) self.args = args self.config = config self.dataloader = dataloader self.ckpdir = ckpdir # optimizer self.learning_rate = float(config['optimizer']['learning_rate']) self.warmup_proportion = config['optimizer']['warmup_proportion'] self.gradient_accumulation_steps = config['optimizer'][ 'gradient_accumulation_steps'] self.gradient_clipping = config['optimizer']['gradient_clipping'] # Training details self.apex = config['runner']['apex'] self.total_steps = config['runner']['total_steps'] self.log_step = config['runner']['log_step'] self.save_step = config['runner']['save_step'] self.duo_feature = config['runner']['duo_feature'] self.max_keep = config['runner']['max_keep'] # Model configs self.semantic_config = RobertaConfig(**config['semantic']) self.acoustic_config = RobertaConfig(**config['acoustic'])
def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "ROBERTA constructor." ) # TK: The following code checks the same once again. if vocab_size is not None: config = RobertaConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = RobertaModel(config) elif pretrained_model_name is not None: model = RobertaModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = RobertaConfig.from_json_file(config_filename) model = RobertaModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor" ) model.to(self._device) self.add_module("roberta", model) self.config = model.config self._hidden_size = model.config.hidden_size
def __init__(self, tokenizer): super(RobertaForMultipleChoiceWithLM2, self).__init__() self.roberta_lm = RobertaForMaskedLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=RobertaConfig.from_pretrained('roberta-large')) self.roberta = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=RobertaConfig.from_pretrained('roberta-large')) self.tokenizer = tokenizer self.lamda = nn.Parameter(torch.tensor([1.0]))
def init_model(self, model_name): if model_name == 'Bert': config = BertConfig.from_pretrained('bert-base-uncased') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = BertForMultipleChoice.from_pretrained( 'pre_weights/bert-base-uncased_model.bin', config=config) elif model_name == 'Roberta': config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) # print('load csqa pretrain weights...') # self.model.load_state_dict(torch.load( # 'checkpoints/commonsenseQA_pretrain_temp.pth' # )) elif model_name == 'Albert': self.model = AlbertForMultipleChoice.from_pretrained( 'pre_weights/albert-xxlarge_model.bin', config=AlbertConfig.from_pretrained('albert-xxlarge-v1')) elif model_name == 'RobertaLM': config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) elif model_name == 'RobertaLM2': self.model = RobertaForMultipleChoiceWithLM2(self.tokenizer) elif 'GNN' in model_name: self.model = SOTA_goal_model(self.args) elif 'LM' in model_name: config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) elif 'KBERT' in model_name: config = RobertaConfig.from_pretrained('roberta-large') config.hidden_dropout_prob = 0.2 config.attention_probs_dropout_prob = 0.2 self.model = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=config) else: pass self.model.to(self.args['device']) if torch.cuda.device_count() > 1 and self.args['use_multi_gpu']: print("{} GPUs are available. Let's use them.".format( torch.cuda.device_count())) self.model = torch.nn.DataParallel(self.model)
def load_model(model_path, model_name, num_classes): if model_name == 'bert-base-uncased': tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True) config = BertConfig.from_pretrained(model_name) else: tokenizer = RobertaTokenizer.from_pretrained(model_name, do_lower_case=True) config = RobertaConfig.from_pretrained(model_name) if model_name == 'bert-base-uncased': transformer_model = BertModel.from_pretrained(model_name, config=config) else: transformer_model = RobertaModel.from_pretrained(model_name, config=config) config.output_hidden_states = True model = SequenceClassifier(transformer_model, config, n_layers, num_classes) model.load_state_dict( torch.load('{model_path}'.format(model_path=model_path))) model.eval() return model, tokenizer
def __init__(self, args, tokenizer, train_dataset=None, dev_dataset=None, test_dataset=None): self.args = args self.tokenizer = tokenizer self.train_dataset = train_dataset self.dev_dataset = dev_dataset self.test_dataset = test_dataset self.id2label = load_id2label(args.id2label) self.num_labels = len(self.id2label) self.config = RobertaConfig.from_pretrained( args.model_name_or_path, num_labels=self.num_labels, finetuning_task="VLSP2020-Relex", id2label={str(i): label for i, label in self.id2label.items()}, label2id={label: i for i, label in self.id2label.items()}, ) if self.args.model_type == "es": self.model = RobertaEntityStarts.from_pretrained( args.model_name_or_path, config=self.config) elif self.args.model_type == "all": self.model = RobertaConcatAll.from_pretrained( args.model_name_or_path, config=self.config) # GPU or CPU self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device)
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) attention_mask = None if self.use_attention_mask: attention_mask = random_attention_mask( [self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) config = RobertaConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, is_decoder=False, initializer_range=self.initializer_range, ) return config, input_ids, token_type_ids, attention_mask
def __init__(self): super(TweetModel, self).__init__() config = RobertaConfig.from_pretrained('roberta/config.json', output_hidden_states=True) self.roberta = RobertaModel.from_pretrained( 'roberta/pytorch_model.bin', config=config) self.dropout = nn.Dropout(0.15) self.cnn1 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2), torch.nn.BatchNorm1d(128), torch.nn.LeakyReLU()) self.cnn1_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2), torch.nn.BatchNorm1d(64), torch.nn.LeakyReLU()) self.cnn2 = nn.Sequential(torch.nn.Conv1d(config.hidden_size, 128, 2), torch.nn.BatchNorm1d(128), torch.nn.LeakyReLU()) self.cnn2_1 = nn.Sequential(torch.nn.Conv1d(128, 64, 2), torch.nn.BatchNorm1d(64), torch.nn.LeakyReLU()) self.fc1 = nn.Linear(64, 1) self.fc2 = nn.Linear(64, 1) nn.init.normal_(self.fc1.weight, std=0.02) nn.init.normal_(self.fc1.bias, 0) nn.init.normal_(self.fc2.weight, std=0.02) nn.init.normal_(self.fc2.bias, 0)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = RobertaConfig.from_pretrained(cf.model_base, num_labels=cf.num_labels, finetuning_task=cf.finetuning_task) tokenizer = RobertaTokenizer.from_pretrained(cf.model_base, do_lower_case=True) model = RobertaForSequenceClassification.from_pretrained(cf.model_base, config=config) model.to(device) train_raw_text = get_raw_text(cf.train_file_dir) train_features = tokenize_raw_text(train_raw_text, tokenizer) train_dataset = create_dataset(train_features) optimizer = AdamW(model.parameters(), lr=cf.learning_rate, eps=cf.adam_epsilon) global_step, training_loss = train(dataset, model, optimizer, batch_size=cf.train_batch_size, num_epochs=cf.num_epochs) torch.save(model.state_dict(), cf.model_file_dir)
def load_model(args): if args.transformer_model.startswith('bert'): path = '/home/yinfan/.cache/torch/transformers/bert-base-uncased-pytorch_model.bin' config = BertConfig.from_pretrained(args.transformer_model, output_hidden_states=True) tokenizer = BertTokenizer.from_pretrained(args.transformer_model, do_lower_case=True) model = BertModel.from_pretrained( path, from_tf=bool('.ckpt' in args.transformer_model), config=config) else: path = '/home/yinfan/.cache/torch/transformers/roberta-base-pytorch_model.bin' tokenizer = RobertaTokenizer.from_pretrained(args.transformer_model) config = RobertaConfig.from_pretrained(args.transformer_model, output_hidden_states=True) model = RobertaModel.from_pretrained( path, from_tf=bool('.ckpt' in args.transformer_model), config=config) # roberta = RobertaModel.from_pretrained(args.roberta_model, cache_dir=args.cache_dir, config=config) model_embedding = model.embeddings model_embedding.to(args.device) if args.n_gpu > 1: model_embedding = torch.nn.DataParallel(model_embedding) model.to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model) if args.untrained_transformer == 1: model.apply(init_weights) return model, model_embedding, tokenizer
def __init__(self, args, device='cpu'): super().__init__() self.args = args self.device = device self.epoch = 0 self.dropout = nn.Dropout(self.args.dropout) # Entailment Tracking # roberta_model_path = '/research/king3/ik_grp/yfgao/pretrain_models/huggingface/roberta-base' roberta_model_path = args.pretrained_lm_path roberta_config = RobertaConfig.from_pretrained(roberta_model_path, cache_dir=None) self.roberta = RobertaModel.from_pretrained(roberta_model_path, cache_dir=None, config=roberta_config) encoder_layer = TransformerEncoderLayer(self.args.bert_hidden_size, 12, 4 * self.args.bert_hidden_size) encoder_norm = nn.LayerNorm(self.args.bert_hidden_size) self.transformer_encoder = TransformerEncoder(encoder_layer, args.trans_layer, encoder_norm) self._reset_transformer_parameters() self.w_entail = nn.Linear(self.args.bert_hidden_size, 3, bias=True) # Logic Reasoning self.entail_emb = nn.Parameter( torch.rand(3, self.args.bert_hidden_size)) nn.init.normal_(self.entail_emb) self.w_selfattn = nn.Linear(self.args.bert_hidden_size * 2, 1, bias=True) self.w_output = nn.Linear(self.args.bert_hidden_size * 2, 4, bias=True)
def __init__(self, data_dir: Path, tokenizer: PreTrainedTokenizer, dataset: Dataset, local_rank=-1): assert data_dir, "data_dir input needed" self.model_dir = f"{data_dir}/results" self.dataset = dataset self.config = RobertaConfig( vocab_size=52_000, max_position_embeddings=514, num_attention_heads=12, num_hidden_layers=6, type_vocab_size=1, ) self.training_args = TrainingArguments( run_name=data_dir.name, local_rank=local_rank, learning_rate=0.00005, # default 0.00005 output_dir=f"{self.model_dir}", overwrite_output_dir=False, num_train_epochs=1, per_device_train_batch_size=48, # Nvidia K80 99% seed=42, save_steps=10_000, save_total_limit=1, ) self.data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=True, mlm_probability=0.15)
def __init__(self): self.num_labels: int = 2 config: RobertaConfig = RobertaConfig.from_pretrained( "./BERTweet_base_transformers/config.json", output_hidden_states=True, ) super().__init__(config) self.bertweet: RobertaModel = RobertaModel.from_pretrained( "./BERTweet_base_transformers/model.bin", config=config) self.dense = nn.Linear( in_features=768 * 4, out_features=1024, ) self.dropout = nn.Dropout(p=0.15) self.dense_2 = nn.Linear( in_features=1024, out_features=512, ) self.dense_3 = nn.Linear( in_features=512, out_features=256, ) self.classifier = nn.Linear( in_features=256, out_features=self.num_labels, )
def load_transformer_model(model_dir): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') config = RobertaConfig.from_json_file('{}/config.json'.format(model_dir)) model = RobertaForSequenceClassification.from_pretrained(model_dir, config=config) model = model.to(device) return model
def __init__(self, config: Bunch) -> None: pl.LightningModule.__init__(self) self.config = config bpe_codes_path = os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/bpe.codes", ) bpe = fastBPE(Namespace(bpe_codes=bpe_codes_path)) vocab = Dictionary() vocab.add_from_file( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/dict.txt", )) tokenizer = BertweetTokenizer(self.config.max_tokens_per_tweet, bpe, vocab) self.data_processor = BertweetDataProcessor(config, tokenizer) model_config = RobertaConfig.from_pretrained( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/config.json", )) self.model = RobertaForSequenceClassification.from_pretrained( os.path.join( config.pretrained_model_base_path, "BERTweet_base_transformers/model.bin", ), config=model_config, ) self.loss = CrossEntropyLoss()
def roberta_build(self, sparse=False, base_model=None, density=1.0, eval=True): if base_model == None: config = RobertaConfig( vocab_size=52_000, max_position_embeddings=514, num_attention_heads=12, num_hidden_layers=6, type_vocab_size=1, ) model = RobertaForMaskedLM(config=config).cuda() else: model = base_model if sparse: mp = BlockSparseModelPatcher() mp.add_pattern( "roberta\.encoder\.layer\.[0-9]+.intermediate\.dense", {"density": density}) mp.add_pattern("roberta\.encoder\.layer\.[0-9]+.output\.dense", {"density": density}) mp.patch_model(model) if eval: model.eval() return model, model.num_parameters()
def __init__(self): self.config = RobertaConfig.from_pretrained("roberta-base") self.config.output_hidden_states = True self.tok = RobertaTokenizer.from_pretrained("roberta-base") self.model = RobertaModel.from_pretrained("roberta-base", config=self.config)
def __init__(self, args): super(SOTA_goal_model, self).__init__() self.args = args # roberta_config = AlbertConfig.from_pretrained('albert-base-v2') # self.roberta = AlbertForMultipleChoice.from_pretrained( # 'pre_weights/albert-base-v2-pytorch_model.bin', config=roberta_config) roberta_config = RobertaConfig.from_pretrained('roberta-large') roberta_config.attention_probs_dropout_prob = 0.2 roberta_config.hidden_dropout_prob = 0.2 if args.get('with_lm'): self.roberta = RobertaForMultipleChoiceWithLM.from_pretrained( 'pre_weights/roberta-large_model.bin', config=roberta_config) else: self.roberta = RobertaForMultipleChoice.from_pretrained( 'pre_weights/roberta-large_model.bin', config=roberta_config) from utils.attentionUtils import SelfAttention self.gcn = GCNNet() self.merge_fc1 = nn.Linear(roberta_config.hidden_size + 128, 512) self.attn = SelfAttention(512, 8) # self.roberta_fc1 = nn.Linear(roberta_config.hidden_size, 128) # 将 roberta vector 降维到与 gcn 相同 # self.gcn_fc1 = nn.Linear(128, 128) # 同上 self.fc3 = nn.Linear(512 + roberta_config.hidden_size, 1) self.dropout = nn.Dropout(0.2)
def main(): args = build_parser().parse_args() print("Creating snapshot directory if not exist...") if not os.path.exists(args.snapshots_path): os.mkdir(args.snapshots_path) print("Loading Roberta components...") tokenizer = RobertaTokenizer.from_pretrained("roberta-base") config = RobertaConfig.from_pretrained("roberta-base", output_hidden_states=True) base_model = RobertaModel(config).cuda() model = LangInferModel(base_model, config, args.span_heads).cuda() optimizer = configure_adam_optimizer(model, args.lr, args.weight_decay, args.adam_epsilon) print("Preparing the data for training...") train_loader, test_loaders = build_data_loaders(args, tokenizer) criterion = nn.CrossEntropyLoss() print( f"Training started for {args.epoch_num} epochs. Might take a while...") train(args.epoch_num, model, optimizer, criterion, train_loader, test_loaders, args.snapshots_path) print("Training is now finished. You can check out the results now")
def get_training_objects(params): """ Define and return training objects """ config = RobertaConfig.from_pretrained(params["model_name"], num_labels=2) model = RobertaForSequenceClassification.from_pretrained( params["model_name"], config=config) model.to(params["device"]) no_decay = ["bias", "LayerNorm.weight"] gpd_params = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": params["weight_decay"], }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW(gpd_params, lr=params["lr"], eps=params["adam_epsilon"]) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=params["warmup_steps"], num_training_steps=params["total_steps"], ) return model, optimizer, scheduler
def get_classification_roberta(): ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='tti') config = RobertaConfig.from_pretrained(Config.Roberta.config) roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model, config=config) x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids) x = keras.layers.Dropout(0.2)(x[0]) x = keras.layers.GlobalAveragePooling1D()(x) x = keras.layers.Dense(3, activation='softmax', name='sentiment')(x) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=x) lr_schedule = keras.experimental.CosineDecay(5e-5, 1000) optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) loss = keras.losses.CategoricalCrossentropy( label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer, metrics=['acc']) return model
def __init__(self, cfg, device): super().__init__() tokenizer = RobertaTokenizerFast.from_pretrained('./bird_bpe_vocab', max_len=256) _config = RobertaConfig( vocab_size=tokenizer._tokenizer.get_vocab_size(), hidden_size=512, num_hidden_layers=4, num_attention_heads=8, max_position_embeddings=256, pad_token_id=1, eos_token_id=0, bos_token_id=2, output_attentions=False, output_hidden_states=False ) _model = RobertaForMaskedLM(_config) _model.load_state_dict(torch.load('bert_small/checkpoint-1100/pytorch_model.bin')) _model.eval() self.tokenizer = tokenizer self._model = _model self.device = device self.pad_token = 0 self.batch_size = cfg.batch_size self.proj = None if cfg.proj_lang: self.proj = nn.Sequential(*[EqualisedLinearLayer(512, cfg.latent_dim, weight_scaling=cfg.weight_scaling), nn.Tanh()])
def Bertolo_feature_extraction(ids,texts, feature_file_name): config = RobertaConfig.from_pretrained("./bert-like models/bertolo/config.json") tokenizer1 = AutoTokenizer.from_pretrained("./bertolo",normalization=True) model = AutoModel.from_pretrained("./bertolo",config=config) feature_dict={} for i in range(len(ids)): id = ids[i] print(id) title = texts[i] #input_ids = torch.tensor([tokenizer.encode(tumblr_text)]) input_ids = tokenizer1.encode(title, return_tensors="pt") print(input_ids) #with torch.no_grad(): features = model(input_ids)[0] # Models outputs are now tuples print(features.size()) feature = torch.mean(features, 1, True).detach().numpy() print(feature[0]) feature = list(feature[0][0]) print(feature) print(len(feature)) feature_dict[tumblr_id]=feature np.save(feature_file_name, feature_dict)
def __init__(self, args): super().__init__() if not isinstance(args, argparse.Namespace): # eval mode assert isinstance(args, dict) args = argparse.Namespace(**args) # compute other fields according to args train_dataset = DependencyDataset(file_path=os.path.join( args.data_dir, f"train.{args.data_format}"), bert=args.bert_dir) # save these information to args to convene evaluation. args.pos_tags = train_dataset.pos_tags args.dep_tags = train_dataset.dep_tags args.ignore_pos_tags = train_dataset.ignore_pos_tags if args.ignore_punct else set( ) args.num_gpus = len( [x for x in str(args.gpus).split(",") if x.strip()]) if "," in args.gpus else int(args.gpus) args.t_total = (len(train_dataset) // (args.accumulate_grad_batches * args.num_gpus) + 1) * args.max_epochs self.save_hyperparameters(args) self.args = args bert_name = args.bert_name if bert_name == 'roberta-large': bert_config = RobertaConfig.from_pretrained(args.bert_dir) DependencyConfig = RobertaDependencyConfig elif bert_name == 'bert': bert_config = BertConfig.from_pretrained(args.bert_dir) DependencyConfig = BertDependencyConfig else: raise ValueError("Unknown bert name!!") self.model_config = DependencyConfig( pos_tags=args.pos_tags, dep_tags=args.dep_tags, pos_dim=args.pos_dim, additional_layer=args.additional_layer, additional_layer_dim=args.additional_layer_dim, additional_layer_type=args.additional_layer_type, arc_representation_dim=args.arc_representation_dim, tag_representation_dim=args.tag_representation_dim, biaf_dropout=args.biaf_dropout, **bert_config.__dict__) self.model = BiaffineDependencyParser(args.bert_dir, config=self.model_config) if args.freeze_bert: for param in self.model.bert.parameters(): param.requires_grad = False self.train_stat = AttachmentScores() self.val_stat = AttachmentScores() self.test_stat = AttachmentScores() self.ignore_pos_tags = list(args.ignore_pos_tags)
def build_model(self): ids = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) att = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) tok = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) # Network architecture config = RobertaConfig.from_pretrained(self.config.data.roberta.path + self.config.data.roberta.config) bert_model = TFRobertaModel.from_pretrained( self.config.data.roberta.path + self.config.data.roberta.roberta_weights, config=config) x = bert_model(ids, attention_mask=att, token_type_ids=tok) self.init_head(x[0]) self.add_dropout(0.1) self.add_lstm(64, True) self.add_dropout(0.1) self.add_dense(1) self.add_activation('softmax') self.model = tf.keras.models.Model( inputs=[ids, att, tok], outputs=[self.start_head, self.end_head]) self.model.compile(loss=self.config.model.loss, optimizer=self.config.model.optimizer)
def _init_deep_model(self, model_type, model_path, num_labels, num_regs=None): if 'roberta' in model_type: tokenizer = RobertaTokenizer.from_pretrained(model_path) config = RobertaConfig.from_pretrained(model_path) config.num_labels = num_labels model = RobertaForSequenceClassification.from_pretrained(model_path, config=config) model.eval() model.to(self.device) elif 'electra_multitask' in model_type: tokenizer = ElectraTokenizer.from_pretrained(model_path) tokenizer.add_special_tokens({'additional_special_tokens': ['[VALUES]']}) config = ElectraConfig.from_pretrained(model_path) config.num_labels = num_labels config.num_regs = num_regs config.vocab_size = len(tokenizer) model = ElectraForSequenceClassificationMultiTask.from_pretrained(model_path, config=config) model.eval() model.to(self.device) elif 'electra' in model_type: tokenizer = ElectraTokenizer.from_pretrained(model_path) config = ElectraConfig.from_pretrained(model_path) config.num_labels = num_labels model = ElectraForSequenceClassification.from_pretrained(model_path, config=config) model.eval() model.to(self.device) else: raise NotImplementedError() return config, tokenizer, model
def main(): args = run_parse_args() logger.info(args) # Setup CUDA, GPU args.use_gpu = torch.cuda.is_available() and not args.no_cuda args.model_device = torch.device( f"cuda:{args.model_gpu_index}" if args.use_gpu else "cpu") args.n_gpu = torch.cuda.device_count() # Setup logging logger.warning("Model Device: %s, n_gpu: %s", args.model_device, args.n_gpu) # Set seed set_seed(args) load_model_path = os.path.join(args.query_output_root, args.previous_qencoder, "model") logger.info(f"load from {load_model_path}") config = RobertaConfig.from_pretrained(load_model_path) model = RobertaDot.from_pretrained(load_model_path, config=config) model.to(args.model_device) logger.info("Training/evaluation parameters %s", args) # Evaluation train(args, model)
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = RobertaConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, ) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_roberta_model(tokens_train, attn_mask_train, num_classes): config = RobertaConfig(vocab_size=50021, hidden_size=1024, num_hidden_layers=16, num_attention_heads=16, intermediate_size=2048, attention_probs_dropout_prob=0.3, hidden_dropout_prob=0.3) bert = TFRobertaModel(config) # dense1 = Dense(500, activation='relu') dense2 = Dense(368, activation='relu') dense3 = Dense(num_classes, activation='softmax') dropout = Dropout(0.3) tokens = Input(shape=(tokens_train.shape[1],), dtype=tf.int32) attn_mask = Input(shape=(attn_mask_train.shape[1],), dtype=tf.int32) pooled_output = bert(tokens, attn_mask).pooler_output med = dropout(dense2(pooled_output)) final = dense3(pooled_output) model = Model(inputs=[tokens, attn_mask], outputs=final) return model