Ejemplo n.º 1
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)

        input_lengths = None
        if self.use_input_lengths:
            input_lengths = (
                ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
            )  # small variation of seq_length

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)

        sequence_labels = None
        token_labels = None
        is_impossible_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = XLMConfig(
            vocab_size=self.vocab_size,
            n_special=self.n_special,
            emb_dim=self.hidden_size,
            n_layers=self.num_hidden_layers,
            n_heads=self.num_attention_heads,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            gelu_activation=self.gelu_activation,
            sinusoidal_embeddings=self.sinusoidal_embeddings,
            asm=self.asm,
            causal=self.causal,
            n_langs=self.n_langs,
            max_position_embeddings=self.max_position_embeddings,
            initializer_range=self.initializer_range,
            summary_type=self.summary_type,
            use_proj=self.use_proj,
            bos_token_id=self.bos_token_id,
        )

        return (
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            choice_labels,
            input_mask,
        )
Ejemplo n.º 2
0
 def test_TFXLMForQuestionAnsweringSimple(self):
     from transformers import XLMConfig, TFXLMForQuestionAnsweringSimple
     keras.backend.clear_session()
     # pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = XLMConfig()
     model = TFXLMForQuestionAnsweringSimple(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
Ejemplo n.º 3
0
    def __init__(self, config: Munch = None, **kwargs):
        """ Initialize a new XLM synapse module.

        Args:
            config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(XLMSynapse, self).__init__(config=config, **kwargs)
        if config == None:
            config = XLMSynapse.default_config()
        bittensor.config.Config.update_with_kwargs(config.synapse, kwargs)
        XLMSynapse.check_config(config)
        self.config = config

        # Build config.
        xlm_config = XLMConfig(
            vocab_size=bittensor.__vocab_size__,
            emb_dim=bittensor.__network_dim__,
            n_layers=config.synapse.n_layers,
            n_heads=config.synapse.n_heads,
            # More needed
        )

        # model layer: encodes tokenized sequences to network dim.
        self.xlm = XLMModel(xlm_config)

        # pooler layer: pools the hidden units for use by the pkm dendrite rpc query.
        self.pooler = XLMPooler(xlm_config)

        # router: (PKM layer) queries network using embeddings as context
        self.router = PKMRouter(config, query_dim=bittensor.__network_dim__)

        # hidden layer: transforms context and encoding to network dimension hidden units.
        self.hidden_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__network_dim__)

        # target layer: maps from hidden layer to vocab dimension for each token.
        self.target_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__vocab_size__,
                                      bias=False)

        # Loss function
        self.loss_fct = nn.CrossEntropyLoss()

        self.to(self.device)
Ejemplo n.º 4
0
def xlm_convert_to_huggingface(args):
   """
   Given a FaceBook's XLM model checkpoint, a BPE merges file, create and save
   a HuggingFace XLMTokenizer and a XLMModel.
   """
   xlm_pth = torch.load(args.checkpoint, map_location=torch.device('cpu'))

   with NamedTemporaryFile() as tfile:
      tfile.write(b'{}')
      tfile.flush()
      tokenizer = XLMTokenizer(
         tfile.name,
         args.merges,
         do_lowercase_and_remove_accent=False)
   tokenizer.encoder = convert_vocab(xlm_pth['dico_word2id'])
   vocab_size = len(tokenizer)
      
   params = xlm_pth['params']
   xlm_config = XLMConfig(
      emb_dim=params['emb_dim'],
      vocab_size=params['n_words'],
      n_layers=params['n_layers'],
      n_heads=params['n_heads'],
      n_langs=params['n_langs'],
      sinusoidal_embeddings=params['sinusoidal_embeddings'],
      use_lang_emb=params['use_lang_emb'],
      is_encoder=params['encoder_only'],
      output_hidden_states=True,
      n_words = params['n_words'],
   )
   
   # Provide both config and state dict to model init
   model = XLMModel.from_pretrained(
      None,
      config=xlm_config,
      state_dict=xlm_pth['model'])

   # Save
   save_directory = Path(args.output_dir)
   if not save_directory.exists():
      save_directory.mkdir(parents=True, exist_ok=True)
   model.save_pretrained(str(save_directory))
   tokenizer.save_pretrained(str(save_directory))
   tokenizer.save_vocabulary(str(save_directory))
Ejemplo n.º 5
0
 def get_config(self):
     return XLMConfig(
         vocab_size=self.vocab_size,
         n_special=self.n_special,
         emb_dim=self.hidden_size,
         n_layers=self.num_hidden_layers,
         n_heads=self.num_attention_heads,
         dropout=self.hidden_dropout_prob,
         attention_dropout=self.attention_probs_dropout_prob,
         gelu_activation=self.gelu_activation,
         sinusoidal_embeddings=self.sinusoidal_embeddings,
         asm=self.asm,
         causal=self.causal,
         n_langs=self.n_langs,
         max_position_embeddings=self.max_position_embeddings,
         initializer_range=self.initializer_range,
         summary_type=self.summary_type,
         use_proj=self.use_proj,
         num_labels=self.num_labels,
         bos_token_id=self.bos_token_id,
     )
Ejemplo n.º 6
0
    config = RobertaConfig(vocab_size=50265,
                           max_position_embeddings=514,
                           num_attention_heads=12,
                           num_hidden_layers=12,
                           type_vocab_size=1,
                           )
    tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', do_lower_case=False)
    model = RobertaForMaskedLM.from_pretrained('./multi-label_LM/multi-label_RoBerta_e10_b16', config=config)
    # 12-layer, 768-hidden, 12-heads, 125M parameters, roberta-base using the bert-base architecture

elif args.LM == 'XLM':
    from transformers import XLMConfig, XLMTokenizer, XLMWithLMHeadModel

    config = XLMConfig(vocab_size=64139,
                       emb_dim=1024,
                       max_position_embeddings=512,
                       n_heads=8,
                       n_layers=6,
                       )

    tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-enfr-1024', do_lower_case=False)
    model = XLMWithLMHeadModel.from_pretrained('./multi-label_LM/multi-label_XLM_e10_b16', config=config)
    # 6-layer, 1024-hidden, 8-heads
    # XLM English-French model trained on the concatenation of English and French wikipedia

else:
    print('need to define LM from Bert,RoBerta,XLM')

print(model)

def freeze_layer_fun(freeze_layer):
    for name, param in model.named_parameters():