def create_and_check_xlm_model( self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask, ): model = XLMModel(config=config) model.to(torch_device) model.eval() outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids) outputs = model(input_ids, langs=token_type_ids) outputs = model(input_ids) sequence_output = outputs[0] result = { "sequence_output": sequence_output, } self.parent.assertListEqual( list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size])
def get_transformers_model( settings: Dict[str, Any], model_name: str, pretrained: bool = True, ckptdir: Optional[Path] = None, ) -> PreTrainedModel: model_path = model_name if pretrained else str(ckptdir) config = AutoConfig.from_pretrained(model_path) config.attention_probs_dropout_prob = settings.get( 'encoder_attn_dropout_rate', 0.1) config.hidden_dropout_prob = settings.get('encoder_ffn_dropout_rate', 0.1) config.layer_norm_eps = settings.get('layer_norm_eps', 1e-5) if pretrained: model = AutoModel.from_pretrained(model_name, config=config) return model # if you want not parameters but only model structure, each model class is needed. if 'xlm' in model_name: model = XLMModel(config=config) elif 'albert' in model_name: model = AlbertModel(config=config) elif 'roberta' in model_name: model = RobertaModel(config=config) elif 'deberta-v2' in model_name: model = DebertaV2Model(config=config) elif 'deberta' in model_name: model = DebertaModel(config=config) elif 'bert' in model_name: model = BertModel(config=config) elif 'electra' in model_name: model = ElectraModel(config=config) else: model = BertModel(config=config) return model
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.transformer = XLMModel(config) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() self.dropout = nn.Dropout(0.1)
def xlm_model(): config = XLMConfig( vocab_size=93000, emb_dim=32, n_layers=5, n_heads=4, dropout=0.1, max_position_embeddings=512, lang2id={ "ar": 0, "bg": 1, "de": 2, "el": 3, "en": 4, "es": 5, "fr": 6, "hi": 7, "ru": 8, "sw": 9, "th": 10, "tr": 11, "ur": 12, "vi": 13, "zh": 14, }, ) return XLMModel(config=config)
def __init__(self, config): super(XLMForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLMModel(config) self.sequence_summary = SequenceSummary(config) self.init_weights()
def __init__(self, vocabs: Dict[str, Vocabulary], config: Config, pre_load_model: bool = True): super().__init__(config=config) if pre_load_model: self.xlm = XLMModel.from_pretrained(self.config.model_name, output_hidden_states=True) else: xlm_config = XLMConfig.from_pretrained(self.config.model_name, output_hidden_states=True) self.xlm = XLMModel(xlm_config) self.source_lang_id = self.xlm.config.lang2id.get( self.config.source_language) self.target_lang_id = self.xlm.config.lang2id.get( self.config.target_language) if None in (self.source_lang_id, self.target_lang_id): raise ValueError( f'Invalid lang_id for XLM model.' f' Valid ids are: {self.xlm.config.lang2id.keys()}') self.mlp = None if self.config.use_mlp: self.mlp = nn.Sequential( nn.Linear(self.xlm.config.hidden_size, self.config.hidden_size), nn.Tanh(), ) output_size = self.config.hidden_size else: output_size = self.xlm.config.hidden_size self._sizes = { const.TARGET: output_size, const.TARGET_LOGITS: output_size, const.TARGET_SENTENCE: 2 * output_size, const.SOURCE: output_size, const.SOURCE_LOGITS: output_size, } self.vocabs = { const.TARGET: vocabs[const.TARGET], const.SOURCE: vocabs[const.SOURCE], } self.output_embeddings = self.xlm.embeddings if self.config.freeze: for param in self.xlm.parameters(): param.requires_grad = False
def __init__(self, config): BertPreTrainedModel.__init__(config) XLMPreTrainedModel.__init__(config) self.num_labels = BertPreTrainedModel.config.num_labels self.bert = BertModel(config) self.classifier = nn.Linear(BertPreTrainedModel.config.hidden_size + XLMPreTrainedModel.config.hidden_size, config.num_labels) self.init_weights() #self.num_labels = config.num_labels self.transformer = XLMModel(config) self.init_weights() self.dropout = nn.Dropout(0.1)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels_list self.transformer = XLMModel(config) self.dropout = nn.Dropout(config.dropout) self.pooler = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size), nn.Tanh()) self.classifiers = nn.ModuleList([ nn.Linear(config.hidden_size, num_label) for num_label in self.num_labels ]) self.init_weights()
def __init__(self, config: Munch = None, **kwargs): """ Initialize a new XLM synapse module. Args: config (:obj:`munch.Munch`, `required`): munched config class. """ super(XLMSynapse, self).__init__(config=config, **kwargs) if config == None: config = XLMSynapse.default_config() bittensor.config.Config.update_with_kwargs(config.synapse, kwargs) XLMSynapse.check_config(config) self.config = config # Build config. xlm_config = XLMConfig( vocab_size=bittensor.__vocab_size__, emb_dim=bittensor.__network_dim__, n_layers=config.synapse.n_layers, n_heads=config.synapse.n_heads, # More needed ) # model layer: encodes tokenized sequences to network dim. self.xlm = XLMModel(xlm_config) # pooler layer: pools the hidden units for use by the pkm dendrite rpc query. self.pooler = XLMPooler(xlm_config) # router: (PKM layer) queries network using embeddings as context self.router = PKMRouter(config, query_dim=bittensor.__network_dim__) # hidden layer: transforms context and encoding to network dimension hidden units. self.hidden_layer = nn.Linear(bittensor.__network_dim__, bittensor.__network_dim__) # target layer: maps from hidden layer to vocab dimension for each token. self.target_layer = nn.Linear(bittensor.__network_dim__, bittensor.__vocab_size__, bias=False) # Loss function self.loss_fct = nn.CrossEntropyLoss() self.to(self.device)
def create_and_check_xlm_model( self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, choice_labels, input_mask, ): model = XLMModel(config=config) model.to(torch_device) model.eval() result = model(input_ids, lengths=input_lengths, langs=token_type_ids) result = model(input_ids, langs=token_type_ids) result = model(input_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))