Python XLMRobertaConfig.XLMRobertaConfig Exemples

Langage de programmation: Python

Espace de nommage/Pack: transformers

Class/Type: XLMRobertaConfig

Méthode/Fonction: XLMRobertaConfig

Exemples au hotexamples.com: 2

Python XLMRobertaConfig.XLMRobertaConfig - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de transformers.XLMRobertaConfig.XLMRobertaConfig extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

from_pretrained(20)

XLMRobertaConfig(2)

num_labels(1)

Méthodes fréquemment utilisées

from_pretrained (20)

XLMRobertaConfig (2)

num_labels (1)

Exemple #1

0

Afficher le fichier

Fichier : test_xlmr.py Projet : zh25714/OpenKiwi

def xlmr_model(): config = XLMRobertaConfig( vocab_size=251000, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act='gelu', hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=256, type_vocab_size=2, is_decoder=False, initializer_range=0.02, ) return XLMRobertaModel(config=config)

Exemple #2

0

Afficher le fichier

def convert_xlm_roberta_xl_checkpoint_to_pytorch(roberta_checkpoint_path: str, pytorch_dump_folder_path: str, classification_head: bool): """ Copy/paste/tweak roberta's weights to our BERT structure. """ roberta = FairseqRobertaModel.from_pretrained(roberta_checkpoint_path) roberta.eval() # disable dropout roberta_sent_encoder = roberta.model.encoder.sentence_encoder config = XLMRobertaConfig( vocab_size=roberta_sent_encoder.embed_tokens.num_embeddings, hidden_size=roberta.cfg.model.encoder_embed_dim, num_hidden_layers=roberta.cfg.model.encoder_layers, num_attention_heads=roberta.cfg.model.encoder_attention_heads, intermediate_size=roberta.cfg.model.encoder_ffn_embed_dim, max_position_embeddings=514, type_vocab_size=1, layer_norm_eps=1e-5, # PyTorch default used in fairseq ) if classification_head: config.num_labels = roberta.model.classification_heads[ "mnli"].out_proj.weight.shape[0] print("Our RoBERTa config:", config) model = XLMRobertaXLForSequenceClassification( config) if classification_head else XLMRobertaXLForMaskedLM(config) model.eval() # Now let's copy all the weights. # Embeddings model.roberta.embeddings.word_embeddings.weight = roberta_sent_encoder.embed_tokens.weight model.roberta.embeddings.position_embeddings.weight = roberta_sent_encoder.embed_positions.weight model.roberta.embeddings.token_type_embeddings.weight.data = torch.zeros_like( model.roberta.embeddings.token_type_embeddings.weight ) # just zero them out b/c RoBERTa doesn't use them. model.roberta.encoder.LayerNorm.weight = roberta_sent_encoder.layer_norm.weight model.roberta.encoder.LayerNorm.bias = roberta_sent_encoder.layer_norm.bias for i in range(config.num_hidden_layers): # Encoder: start of layer layer: BertLayer = model.roberta.encoder.layer[i] roberta_layer: TransformerSentenceEncoderLayer = roberta_sent_encoder.layers[ i] attention: RobertaAttention = layer.attention attention.self_attn_layer_norm.weight = roberta_layer.self_attn_layer_norm.weight attention.self_attn_layer_norm.bias = roberta_layer.self_attn_layer_norm.bias # self attention self_attn: BertSelfAttention = layer.attention.self assert (roberta_layer.self_attn.k_proj.weight.data.shape == roberta_layer.self_attn.q_proj.weight.data.shape == roberta_layer.self_attn.v_proj.weight.data.shape == torch.Size( (config.hidden_size, config.hidden_size))) self_attn.query.weight.data = roberta_layer.self_attn.q_proj.weight self_attn.query.bias.data = roberta_layer.self_attn.q_proj.bias self_attn.key.weight.data = roberta_layer.self_attn.k_proj.weight self_attn.key.bias.data = roberta_layer.self_attn.k_proj.bias self_attn.value.weight.data = roberta_layer.self_attn.v_proj.weight self_attn.value.bias.data = roberta_layer.self_attn.v_proj.bias # self-attention output self_output: BertSelfOutput = layer.attention.output assert self_output.dense.weight.shape == roberta_layer.self_attn.out_proj.weight.shape self_output.dense.weight = roberta_layer.self_attn.out_proj.weight self_output.dense.bias = roberta_layer.self_attn.out_proj.bias # this one is final layer norm layer.LayerNorm.weight = roberta_layer.final_layer_norm.weight layer.LayerNorm.bias = roberta_layer.final_layer_norm.bias # intermediate intermediate: BertIntermediate = layer.intermediate assert intermediate.dense.weight.shape == roberta_layer.fc1.weight.shape intermediate.dense.weight = roberta_layer.fc1.weight intermediate.dense.bias = roberta_layer.fc1.bias # output bert_output: BertOutput = layer.output assert bert_output.dense.weight.shape == roberta_layer.fc2.weight.shape bert_output.dense.weight = roberta_layer.fc2.weight bert_output.dense.bias = roberta_layer.fc2.bias # end of layer if classification_head: model.classifier.dense.weight = roberta.model.classification_heads[ "mnli"].dense.weight model.classifier.dense.bias = roberta.model.classification_heads[ "mnli"].dense.bias model.classifier.out_proj.weight = roberta.model.classification_heads[ "mnli"].out_proj.weight model.classifier.out_proj.bias = roberta.model.classification_heads[ "mnli"].out_proj.bias else: # LM Head model.lm_head.dense.weight = roberta.model.encoder.lm_head.dense.weight model.lm_head.dense.bias = roberta.model.encoder.lm_head.dense.bias model.lm_head.layer_norm.weight = roberta.model.encoder.lm_head.layer_norm.weight model.lm_head.layer_norm.bias = roberta.model.encoder.lm_head.layer_norm.bias model.lm_head.decoder.weight = roberta.model.encoder.lm_head.weight model.lm_head.decoder.bias = roberta.model.encoder.lm_head.bias # Let's check that we get the same results. input_ids: torch.Tensor = roberta.encode(SAMPLE_TEXT).unsqueeze( 0) # batch of size 1 our_output = model(input_ids)[0] if classification_head: their_output = roberta.model.classification_heads["mnli"]( roberta.extract_features(input_ids)) else: their_output = roberta.model(input_ids)[0] print(our_output.shape, their_output.shape) max_absolute_diff = torch.max(torch.abs(our_output - their_output)).item() print(f"max_absolute_diff = {max_absolute_diff}") # ~ 1e-7 success = torch.allclose(our_output, their_output, atol=1e-3) print("Do both models output the same tensors?", "ðŸ”¥" if success else "ðŸ’©") if not success: raise Exception("Something went wRoNg") pathlib.Path(pytorch_dump_folder_path).mkdir(parents=True, exist_ok=True) print(f"Saving model to {pytorch_dump_folder_path}") model.save_pretrained(pytorch_dump_folder_path)