Python XLMConfig Examples

Programming Language: Python

Namespace/Package Name: transformers

Class/Type: XLMConfig

Examples at hotexamples.com: 11

Python XLMConfig - 11 examples found. These are the top rated real world Python examples of transformers.XLMConfig extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

XLMConfig(6)

from_pretrained(4)

Frequently Used Methods

XLMConfig (6)

from_pretrained (4)

Example #1

Show file

File: test_modeling_tf_xlm.py Project: deepchatterjeevns/pytorch-transformers

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    2,
                                    dtype=tf.float32)

            input_lengths = None
            if self.use_input_lengths:
                input_lengths = (ids_tensor([self.batch_size], vocab_size=2) +
                                 self.seq_length - 2
                                 )  # small variation of seq_length

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.n_langs)

            sequence_labels = None
            token_labels = None
            is_impossible_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                is_impossible_labels = ids_tensor([self.batch_size],
                                                  2,
                                                  dtype=tf.float32)

            config = XLMConfig(
                vocab_size=self.vocab_size,
                n_special=self.n_special,
                emb_dim=self.hidden_size,
                n_layers=self.num_hidden_layers,
                n_heads=self.num_attention_heads,
                dropout=self.hidden_dropout_prob,
                attention_dropout=self.attention_probs_dropout_prob,
                gelu_activation=self.gelu_activation,
                sinusoidal_embeddings=self.sinusoidal_embeddings,
                asm=self.asm,
                causal=self.causal,
                n_langs=self.n_langs,
                max_position_embeddings=self.max_position_embeddings,
                initializer_range=self.initializer_range,
                summary_type=self.summary_type,
                use_proj=self.use_proj,
                bos_token_id=self.bos_token_id,
            )

            return (
                config,
                input_ids,
                token_type_ids,
                input_lengths,
                sequence_labels,
                token_labels,
                is_impossible_labels,
                input_mask,
            )

Example #2

Show file

File: test_xlm.py Project: zh25714/OpenKiwi

def xlm_model():
    config = XLMConfig(
        vocab_size=93000,
        emb_dim=32,
        n_layers=5,
        n_heads=4,
        dropout=0.1,
        max_position_embeddings=512,
        lang2id={
            "ar": 0,
            "bg": 1,
            "de": 2,
            "el": 3,
            "en": 4,
            "es": 5,
            "fr": 6,
            "hi": 7,
            "ru": 8,
            "sw": 9,
            "th": 10,
            "tr": 11,
            "ur": 12,
            "vi": 13,
            "zh": 14,
        },
    )
    return XLMModel(config=config)

Example #3

Show file

File: test_transformers.py Project: sonu1-p/keras-onnx

 def test_TFXLMForQuestionAnsweringSimple(self):
     from transformers import XLMConfig, TFXLMForQuestionAnsweringSimple
     keras.backend.clear_session()
     # pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = XLMConfig()
     model = TFXLMForQuestionAnsweringSimple(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))

Example #4

Show file

    def __init__(self,
                 vocabs: Dict[str, Vocabulary],
                 config: Config,
                 pre_load_model: bool = True):
        super().__init__(config=config)

        if pre_load_model:
            self.xlm = XLMModel.from_pretrained(self.config.model_name,
                                                output_hidden_states=True)
        else:
            xlm_config = XLMConfig.from_pretrained(self.config.model_name,
                                                   output_hidden_states=True)
            self.xlm = XLMModel(xlm_config)

        self.source_lang_id = self.xlm.config.lang2id.get(
            self.config.source_language)
        self.target_lang_id = self.xlm.config.lang2id.get(
            self.config.target_language)

        if None in (self.source_lang_id, self.target_lang_id):
            raise ValueError(
                f'Invalid lang_id for XLM model.'
                f' Valid ids are: {self.xlm.config.lang2id.keys()}')

        self.mlp = None
        if self.config.use_mlp:
            self.mlp = nn.Sequential(
                nn.Linear(self.xlm.config.hidden_size,
                          self.config.hidden_size),
                nn.Tanh(),
            )
            output_size = self.config.hidden_size
        else:
            output_size = self.xlm.config.hidden_size

        self._sizes = {
            const.TARGET: output_size,
            const.TARGET_LOGITS: output_size,
            const.TARGET_SENTENCE: 2 * output_size,
            const.SOURCE: output_size,
            const.SOURCE_LOGITS: output_size,
        }

        self.vocabs = {
            const.TARGET: vocabs[const.TARGET],
            const.SOURCE: vocabs[const.SOURCE],
        }

        self.output_embeddings = self.xlm.embeddings

        if self.config.freeze:
            for param in self.xlm.parameters():
                param.requires_grad = False

Example #5

Show file

File: xlm.py Project: timolegros/bittensor

    def __init__(self, config: Munch = None, **kwargs):
        """ Initialize a new XLM synapse module.

        Args:
            config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(XLMSynapse, self).__init__(config=config, **kwargs)
        if config == None:
            config = XLMSynapse.default_config()
        bittensor.config.Config.update_with_kwargs(config.synapse, kwargs)
        XLMSynapse.check_config(config)
        self.config = config

        # Build config.
        xlm_config = XLMConfig(
            vocab_size=bittensor.__vocab_size__,
            emb_dim=bittensor.__network_dim__,
            n_layers=config.synapse.n_layers,
            n_heads=config.synapse.n_heads,
            # More needed
        )

        # model layer: encodes tokenized sequences to network dim.
        self.xlm = XLMModel(xlm_config)

        # pooler layer: pools the hidden units for use by the pkm dendrite rpc query.
        self.pooler = XLMPooler(xlm_config)

        # router: (PKM layer) queries network using embeddings as context
        self.router = PKMRouter(config, query_dim=bittensor.__network_dim__)

        # hidden layer: transforms context and encoding to network dimension hidden units.
        self.hidden_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__network_dim__)

        # target layer: maps from hidden layer to vocab dimension for each token.
        self.target_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__vocab_size__,
                                      bias=False)

        # Loss function
        self.loss_fct = nn.CrossEntropyLoss()

        self.to(self.device)

Example #6

Show file

def xlm_convert_to_huggingface(args):
   """
   Given a FaceBook's XLM model checkpoint, a BPE merges file, create and save
   a HuggingFace XLMTokenizer and a XLMModel.
   """
   xlm_pth = torch.load(args.checkpoint, map_location=torch.device('cpu'))

   with NamedTemporaryFile() as tfile:
      tfile.write(b'{}')
      tfile.flush()
      tokenizer = XLMTokenizer(
         tfile.name,
         args.merges,
         do_lowercase_and_remove_accent=False)
   tokenizer.encoder = convert_vocab(xlm_pth['dico_word2id'])
   vocab_size = len(tokenizer)
      
   params = xlm_pth['params']
   xlm_config = XLMConfig(
      emb_dim=params['emb_dim'],
      vocab_size=params['n_words'],
      n_layers=params['n_layers'],
      n_heads=params['n_heads'],
      n_langs=params['n_langs'],
      sinusoidal_embeddings=params['sinusoidal_embeddings'],
      use_lang_emb=params['use_lang_emb'],
      is_encoder=params['encoder_only'],
      output_hidden_states=True,
      n_words = params['n_words'],
   )
   
   # Provide both config and state dict to model init
   model = XLMModel.from_pretrained(
      None,
      config=xlm_config,
      state_dict=xlm_pth['model'])

   # Save
   save_directory = Path(args.output_dir)
   if not save_directory.exists():
      save_directory.mkdir(parents=True, exist_ok=True)
   model.save_pretrained(str(save_directory))
   tokenizer.save_pretrained(str(save_directory))
   tokenizer.save_vocabulary(str(save_directory))

Example #7

Show file

 def get_config(self):
     return XLMConfig(
         vocab_size=self.vocab_size,
         n_special=self.n_special,
         emb_dim=self.hidden_size,
         n_layers=self.num_hidden_layers,
         n_heads=self.num_attention_heads,
         dropout=self.hidden_dropout_prob,
         attention_dropout=self.attention_probs_dropout_prob,
         gelu_activation=self.gelu_activation,
         sinusoidal_embeddings=self.sinusoidal_embeddings,
         asm=self.asm,
         causal=self.causal,
         n_langs=self.n_langs,
         max_position_embeddings=self.max_position_embeddings,
         initializer_range=self.initializer_range,
         summary_type=self.summary_type,
         use_proj=self.use_proj,
         num_labels=self.num_labels,
         bos_token_id=self.bos_token_id,
     )

Example #8

Show file

    config = RobertaConfig(vocab_size=50265,
                           max_position_embeddings=514,
                           num_attention_heads=12,
                           num_hidden_layers=12,
                           type_vocab_size=1,
                           )
    tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', do_lower_case=False)
    model = RobertaForMaskedLM.from_pretrained('./multi-label_LM/multi-label_RoBerta_e10_b16', config=config)
    # 12-layer, 768-hidden, 12-heads, 125M parameters, roberta-base using the bert-base architecture

elif args.LM == 'XLM':
    from transformers import XLMConfig, XLMTokenizer, XLMWithLMHeadModel

    config = XLMConfig(vocab_size=64139,
                       emb_dim=1024,
                       max_position_embeddings=512,
                       n_heads=8,
                       n_layers=6,
                       )

    tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-enfr-1024', do_lower_case=False)
    model = XLMWithLMHeadModel.from_pretrained('./multi-label_LM/multi-label_XLM_e10_b16', config=config)
    # 6-layer, 1024-hidden, 8-heads
    # XLM English-French model trained on the concatenation of English and French wikipedia

else:
    print('need to define LM from Bert,RoBerta,XLM')

print(model)

def freeze_layer_fun(freeze_layer):
    for name, param in model.named_parameters():

Example #9

Show file

def load_model(args):
    if 'bert-base-multilingual' in args['model_checkpoint']:
        # bert-base-multilingual-uncased or bert-base-multilingual-cased
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = BertTokenizer.from_pretrained(args['model_checkpoint'])
        config = BertConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)
    elif 'xlm-mlm' in args['model_checkpoint']:
        # xlm-mlm-100-1280
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = XLMTokenizer.from_pretrained(args['model_checkpoint'])
        config = XLMConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = XLMForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = XLMForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = XLMForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)
    elif 'xlm-roberta' in args['model_checkpoint']:
        # xlm-roberta-base or xlm-roberta-large
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = XLMRobertaTokenizer.from_pretrained(
            args['model_checkpoint'])
        config = XLMRobertaConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = XLMRobertaForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = XLMRobertaForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = XLMRobertaForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)
    elif 'fasttext' in args['model_checkpoint']:
        # Prepare config & tokenizer
        vocab_path = args['vocab_path']
        config_path = None

        word_tokenizer = args['word_tokenizer_class']()
        emb_path = args['embedding_path'][args['model_checkpoint']]

        _, vocab_map = load_vocab(vocab_path)
        tokenizer = SimpleTokenizer(vocab_map,
                                    word_tokenizer,
                                    lower=args["lower"])
        vocab_list = list(tokenizer.vocab.keys())

        config = BertConfig.from_pretrained('bert-base-uncased')
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']
        config.num_hidden_layers = args["num_layers"]

        embeddings = gen_embeddings(vocab_list, emb_path, emb_dim=300)
        config.hidden_size = 300
        config.num_attention_heads = 10
        config.vocab_size = len(embeddings)

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))

    elif 'scratch' in args['model_checkpoint']:
        vocab_path, config_path = None, None

        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        config = BertConfig.from_pretrained("bert-base-uncased")
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']
        config.num_hidden_layers = args["num_layers"]
        config.hidden_size = 300
        config.num_attention_heads = 10

        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config=config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config=config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config=config)
    elif 'indobenchmark' in args['model_checkpoint']:
        # indobenchmark models
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = BertTokenizer.from_pretrained(args['model_checkpoint'])
        config = BertConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        model_class = None
        if 'sequence_classification' == args['task']:
            model_class = AlbertForSequenceClassification if 'lite' in args[
                'model_checkpoint'] else BertForSequenceClassification
        elif 'token_classification' == args['task']:
            model_class = AlbertForWordClassification if 'lite' in args[
                'model_checkpoint'] else BertForWordClassification
        elif 'multi_label_classification' == args['task']:
            model_class = AlbertForMultiLabelClassification if 'lite' in args[
                'model_checkpoint'] else BertForMultiLabelClassification
        model = model_class.from_pretrained(args['model_checkpoint'],
                                            config=config)
    return model, tokenizer, vocab_path, config_path

Example #10

Show file

def load_eval_model(args):
    vocab_path = f'./{args["model_dir"]}/{args["dataset"]}/{args["experiment_name"]}/vocab.txt'
    config_path = f'./{args["model_dir"]}/{args["dataset"]}/{args["experiment_name"]}/config.json'
    model_path = f'./{args["model_dir"]}/{args["dataset"]}/{args["experiment_name"]}/best_model_0.th'

    # Load for word2vec and fasttext
    if 'word2vec' in args['model_type'] or 'fasttext' in args['model_type']:
        emb_path = args['embedding_path'][args['model_type']]
        model, tokenizer = load_word_embedding_model(
            args['model_type'],
            args['task'],
            vocab_path,
            args['word_tokenizer_class'],
            emb_path,
            args['num_labels'],
            lower=args['lower'])
        return model, tokenizer

    # Load config & tokenizer
    if 'albert' in args['model_type']:
        config = AlbertConfig.from_json_file(config_path)
        tokenizer = BertTokenizer(vocab_path)
    elif 'babert' in args['model_type']:
        config = BertConfig.from_json_file(config_path)
        tokenizer = BertTokenizer(vocab_path)
    elif 'scratch' in args['model_type']:
        config = BertConfig.from_pretrained('bert-base-uncased')
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    elif 'bert-base-multilingual' in args['model_type']:
        config = BertConfig.from_pretrained(args['model_type'])
        tokenizer = BertTokenizer.from_pretrained(args['model_type'])
    elif 'xlm-mlm-100-1280' in args['model_type']:
        config = XLMConfig.from_pretrained(args['model_type'])
        tokenizer = XLMTokenizer.from_pretrained(args['model_type'])
    elif 'xlm-roberta' in args['model_type']:
        config = XLMRobertaConfig.from_pretrained(args['model_type'])
        tokenizer = XLMRobertaTokenizer.from_pretrained(args['model_type'])
    else:
        raise ValueError('Invalid `model_type` argument values')

    # Get model class
    base_cls, pred_cls = get_model_class(args['model_type'], args['task'])

    # Adjust config
    if type(args['num_labels']) == list:
        config.num_labels = max(args['num_labels'])
        config.num_labels_list = args['num_labels']
    else:
        config.num_labels = args['num_labels']

    # Instantiate model
    model = pred_cls(config=config)
    base_model = base_cls.from_pretrained(model_path,
                                          from_tf=False,
                                          config=config)

    # Plug pretrained base model to classification model
    if 'bert' in model.__dir__():
        model.bert = base_model
    elif 'albert' in model.__dir__():
        model.albert = base_model
    elif 'roberta' in model.__dir__():
        model.roberta = base_model
    elif 'transformer' in model.__dir__():
        model.transformer = base_model
    else:
        ValueError(
            'Model attribute not found, is there any change in the `transformers` library?'
        )

    return model, tokenizer

Example #11

Show file

def load_model(args):
    if 'albert-large-wwmlm-512' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-large-wwmlm-512/albert_large_model_bpe_wwmlm_512_vocab_uncased_30000.txt"
        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(
            "../embeddings/albert-large-wwmlm-512/albert_large_model_bpe_wwmlm_512_albert_large_config.json"
        )
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-large-wwmlm-512/albert_large_model_bpe_wwmlm_512_pytorch_albert_large_512_629k.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model
    elif 'albert-base-wwmlm-512' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-base-wwmlm-512/albert_base_model_bpe_wwmlm_512_vocab_uncased_30000.txt"
        config_path = "../embeddings/albert-base-wwmlm-512/albert_base_model_bpe_wwmlm_512_albert_base_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-base-wwmlm-512/albert_base_model_bpe_wwmlm_512_pytorch_model_albert_base_162k.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model

    elif 'albert-large-wwmlm-128' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-large-wwmlm-128/albert_large_model_bpe_wwmlm_128_vocab_uncased_30000.txt"
        config_path = "../embeddings/albert-large-wwmlm-128/albert_large_model_bpe_wwmlm_128_albert_large_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-large-wwmlm-128/albert_large_model_bpe_wwmlm_128_pytorch_albert_large_128_500k.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model

    elif 'babert-bpe-mlm-large-512' == args['model_checkpoint']:
        # babert_bpe
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-bpe-mlm-large-512/babert_model_bpe_mlm_uncased_large_512_dup10-5_vocab_uncased_30522.txt"
        config_path = "../embeddings/babert-bpe-mlm-large-512/babert_model_bpe_mlm_uncased_large_512_dup10-5_bert_large_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-bpe-mlm-large-512/babert_model_bpe_mlm_uncased_large_512_dup10-5_pytorch_babert_uncased_large_512_dup10-5_1120k.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'albert-base-uncased-112500' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-base-uncased-112500/vocab.txt"
        config_path = "../embeddings/albert-base-uncased-112500/bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-base-uncased-112500/albert_base_uncased_112500.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model

    elif 'albert-base-uncased-96000' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-base-uncased-96000/vocab.txt"
        config_path = "../embeddings/albert-base-uncased-96000/bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-base-uncased-96000/albert_base_uncased_96000.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model

    elif 'albert-base-uncased-191k' == args['model_checkpoint']:
        vocab_path = "../embeddings/albert-base-uncased-191k/pytorch_models_albert_base_uncased_191500_vocab_uncased_30000.txt"
        config_path = "../embeddings/albert-base-uncased-191k/pytorch_models_albert_base_uncased_191500_albert_base_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = AlbertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = AlbertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = AlbertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = AlbertForMultiLabelClassification(config)

        # Plug pretrained bert model
        albert_model = AlbertModel.from_pretrained(
            "../embeddings/albert-base-uncased-191k/pytorch_models_albert_base_uncased_191500_pytorch_model_albert_base_191k.bin",
            from_tf=False,
            config=config)
        model.albert = albert_model

    elif 'babert-opensubtitle' == args['model_checkpoint']:
        # babert-opensubtitle
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-opensubtitle/vocab.txt"
        config_path = "../embeddings/babert-opensubtitle/bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-opensubtitle/model.ckpt-1000000.index",
            from_tf=True,
            config=config)
        model.bert = bert_model.bert

    elif 'babert-bpe-mlm-large-uncased-1100k' == args['model_checkpoint']:
        # babert_bpe
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-bpe-mlm-large-uncased-1100k/pytorch_models_babert_uncased_large_1100k_vocab_uncased_30522.txt"
        config_path = "../embeddings/babert-bpe-mlm-large-uncased-1100k/pytorch_models_babert_uncased_large_1100k_bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-bpe-mlm-large-uncased-1100k/pytorch_models_babert_uncased_large_1100k_pytorch_model_babert_large_1100k.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'babert-bpe-mlm-large-uncased-1m' == args['model_checkpoint']:
        # babert_bpe
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-bpe-mlm-large-uncased-1m/pytorch_models_babert_uncased_large_1mil_vocab_uncased_30522.txt"
        config_path = "../embeddings/babert-bpe-mlm-large-uncased-1m/pytorch_models_babert_uncased_large_1mil_bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-bpe-mlm-large-uncased-1m/pytorch_models_babert_uncased_large_1mil_pytorch_model_babert_large_1mil.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'babert-base-512' == args['model_checkpoint']:
        # babert_bpe
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-base-512/pytorch_models_babert_base_512_vocab_uncased_30522.txt"
        config_path = "../embeddings/babert-base-512/pytorch_models_babert_base_512_bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-base-512/pytorch_models_babert_base_512_pytorch_model_babert_base_uncased_512.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'babert-bpe-mlm-large-uncased' == args['model_checkpoint']:
        # babert_bpe
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-bpe-mlm-large-uncased/pytorch_models_babert_uncased_large_vocab_uncased_30522.txt"
        config_path = "../embeddings/babert-bpe-mlm-large-uncased/pytorch_models_babert_uncased_large_bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-bpe-mlm-large-uncased/pytorch_models_babert_uncased_large_pytorch_model_babert_large_778500.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'babert-bpe-mlm-uncased-128-dup10-5' == args['model_checkpoint']:
        # babert_bpe_wwmlm
        # Prepare config & tokenizer
        vocab_path = "../embeddings/babert-bpe-mlm-uncased-128-dup10-5/vocab.txt"
        config_path = "../embeddings/babert-bpe-mlm-uncased-128-dup10-5/bert_config.json"

        tokenizer = BertTokenizer(vocab_path)
        config = BertConfig.from_json_file(config_path)
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)

        # Plug pretrained bert model
        bert_model = BertForPreTraining.from_pretrained(
            "../embeddings/babert-bpe-mlm-uncased-128-dup10-5/pytorch_model.bin",
            config=config)
        model.bert = bert_model.bert

    elif 'bert-base-multilingual' in args['model_checkpoint']:
        # bert-base-multilingual-uncased or bert-base-multilingual-cased
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = BertTokenizer.from_pretrained(args['model_checkpoint'])
        config = BertConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)

    elif 'xlm-mlm' in args['model_checkpoint']:
        # xlm-mlm-100-1280
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = XLMTokenizer.from_pretrained(args['model_checkpoint'])
        config = XLMConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = XLMForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = XLMForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = XLMForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)
    elif 'xlm-roberta' in args['model_checkpoint']:
        # xlm-roberta-base or xlm-roberta-large
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = XLMRobertaTokenizer.from_pretrained(
            args['model_checkpoint'])
        config = XLMRobertaConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = XLMRobertaForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = XLMRobertaForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = XLMRobertaForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)
    elif 'word2vec' in args['model_checkpoint'] or 'fasttext' in args[
            'model_checkpoint']:
        # Prepare config & tokenizer
        vocab_path = args['vocab_path']
        config_path = None

        word_tokenizer = args['word_tokenizer_class']()
        emb_path = args['embedding_path'][args['model_checkpoint']]

        _, vocab_map = load_vocab(vocab_path)
        tokenizer = SimpleTokenizer(vocab_map,
                                    word_tokenizer,
                                    lower=args["lower"])
        vocab_list = list(tokenizer.vocab.keys())

        config = BertConfig.from_pretrained('bert-base-uncased')
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']
        config.num_hidden_layers = args["num_layers"]

        if args['model_checkpoint'] == 'word2vec-twitter':
            embeddings = gen_embeddings(vocab_list, emb_path)
            config.hidden_size = 400
            config.num_attention_heads = 8

        if args['model_checkpoint'] == 'fasttext-cc-id' or args[
                'model_checkpoint'] == 'fasttext-cc-id-300-no-oov-uncased' or args[
                    'model_checkpoint'] == 'fasttext-4B-id-300-no-oov-uncased':
            embeddings = gen_embeddings(vocab_list, emb_path, emb_dim=300)
            config.hidden_size = 300
            config.num_attention_heads = 10

        config.vocab_size = len(embeddings)

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config)
            model.bert.embeddings.word_embeddings.weight.data.copy_(
                torch.FloatTensor(embeddings))

    elif 'scratch' in args['model_checkpoint']:
        vocab_path, config_path = None, None

        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        config = BertConfig.from_pretrained("bert-base-uncased")
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']
        config.num_hidden_layers = args["num_layers"]
        config.hidden_size = 300
        config.num_attention_heads = 10

        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification(config=config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification(config=config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification(config=config)
    elif 'indobenchmark' in args['model_checkpoint']:
        # indobenchmark models
        # Prepare config & tokenizer
        vocab_path, config_path = None, None
        tokenizer = BertTokenizer.from_pretrained(args['model_checkpoint'])
        config = BertConfig.from_pretrained(args['model_checkpoint'])
        if type(args['num_labels']) == list:
            config.num_labels = max(args['num_labels'])
            config.num_labels_list = args['num_labels']
        else:
            config.num_labels = args['num_labels']

        # Instantiate model
        if 'sequence_classification' == args['task']:
            model = BertForSequenceClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'token_classification' == args['task']:
            model = BertForWordClassification.from_pretrained(
                args['model_checkpoint'], config=config)
        elif 'multi_label_classification' == args['task']:
            model = BertForMultiLabelClassification.from_pretrained(
                args['model_checkpoint'], config=config)

    return model, tokenizer, vocab_path, config_path