def __init__(self, cfg): super(DSB_XLNetModel, self).__init__() self.cfg = cfg cate_col_size = len(cfg.cate_cols) cont_col_size = len(cfg.cont_cols) self.cate_emb = nn.Embedding(cfg.total_cate_size, cfg.emb_size, padding_idx=0) self.cate_proj = nn.Sequential( nn.Linear(cfg.emb_size * cate_col_size, cfg.hidden_size // 2), nn.LayerNorm(cfg.hidden_size // 2), ) self.cont_emb = nn.Sequential( nn.Linear(cont_col_size, cfg.hidden_size // 2), nn.LayerNorm(cfg.hidden_size // 2), ) self.config = XLNetConfig( 3, # not used d_model=cfg.hidden_size, n_layer=cfg.nlayers, n_head=cfg.nheads, d_inner=cfg.hidden_size, #ff_activation="gelu", #untie_r=True, #attn_type="bi", #initializer_range=0.02, #layer_norm_eps=1e-12, dropout=cfg.dropout, #mem_len=None, #reuse_len=None, #bi_data=False, #clamp_len=-1, #same_length=False, #summary_type="last", #summary_use_proj=True, #summary_activation="tanh", summary_last_dropout=cfg.dropout, #start_n_top=5, #end_n_top=5, ) self.encoder = XLNetModel(self.config) def get_reg(): return nn.Sequential( nn.Linear(cfg.hidden_size, cfg.hidden_size), nn.LayerNorm(cfg.hidden_size), nn.Dropout(cfg.dropout), nn.ReLU(), nn.Linear(cfg.hidden_size, cfg.target_size), ) self.reg_layer = get_reg()
def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a language model either by supplying * the name of a remote model on s3 ("xlnet-base-cased" ...) * or a local path of a model trained via transformers ("some_dir/huggingface_model") * or a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: name or path of a model :param language: (Optional) Name of language the model was trained for (e.g. "german"). If not supplied, FARM will try to infer it from the model name. :return: Language Model """ xlnet = cls() if "farm_lm_name" in kwargs: xlnet.name = kwargs["farm_lm_name"] else: xlnet.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = os.path.join(pretrained_model_name_or_path, "language_model_config.json") if os.path.exists(farm_lm_config): # FARM style config = XLNetConfig.from_pretrained(farm_lm_config) farm_lm_model = os.path.join(pretrained_model_name_or_path, "language_model.bin") xlnet.model = XLNetModel.from_pretrained(farm_lm_model, config=config, **kwargs) xlnet.language = xlnet.model.config.language else: # Pytorch-transformer Style xlnet.model = XLNetModel.from_pretrained( pretrained_model_name_or_path, **kwargs) xlnet.language = cls._infer_language_from_name( pretrained_model_name_or_path) config = xlnet.model.config # XLNet does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler. # The pooler takes the last hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim). # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we # feed everything to the prediction head config.summary_last_dropout = 0 xlnet.pooler = SequenceSummary(config) xlnet.pooler.apply(xlnet.model._init_weights) return xlnet
def convert_xlnet_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_folder_path, finetuning_task=None): # Initialise PyTorch model config = XLNetConfig.from_json_file(bert_config_file) finetuning_task = finetuning_task.lower( ) if finetuning_task is not None else "" if finetuning_task in GLUE_TASKS_NUM_LABELS: print( "Building PyTorch XLNetForSequenceClassification model from configuration: {}" .format(str(config))) model = XLNetForSequenceClassification( config, num_labels=GLUE_TASKS_NUM_LABELS[finetuning_task]) elif 'squad' in finetuning_task: model = XLNetForQuestionAnswering(config) else: model = XLNetLMHeadModel(config) # Load weights from tf checkpoint load_tf_weights_in_xlnet(model, config, tf_checkpoint_path, finetuning_task) # Save pytorch-model pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME) pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME) print("Save PyTorch model to {}".format( os.path.abspath(pytorch_weights_dump_path))) torch.save(model.state_dict(), pytorch_weights_dump_path) print("Save configuration file to {}".format( os.path.abspath(pytorch_config_dump_path))) with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string())