Exemple #1
0
    def __init__(self, cfg):
        super(DSB_XLNetModel, self).__init__()
        self.cfg = cfg
        cate_col_size = len(cfg.cate_cols)
        cont_col_size = len(cfg.cont_cols)
        self.cate_emb = nn.Embedding(cfg.total_cate_size,
                                     cfg.emb_size,
                                     padding_idx=0)
        self.cate_proj = nn.Sequential(
            nn.Linear(cfg.emb_size * cate_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.cont_emb = nn.Sequential(
            nn.Linear(cont_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.config = XLNetConfig(
            3,  # not used            
            d_model=cfg.hidden_size,
            n_layer=cfg.nlayers,
            n_head=cfg.nheads,
            d_inner=cfg.hidden_size,
            #ff_activation="gelu",
            #untie_r=True,
            #attn_type="bi",
            #initializer_range=0.02,
            #layer_norm_eps=1e-12,
            dropout=cfg.dropout,
            #mem_len=None,
            #reuse_len=None,
            #bi_data=False,
            #clamp_len=-1,
            #same_length=False,
            #summary_type="last",
            #summary_use_proj=True,
            #summary_activation="tanh",
            summary_last_dropout=cfg.dropout,
            #start_n_top=5,
            #end_n_top=5,
        )

        self.encoder = XLNetModel(self.config)

        def get_reg():
            return nn.Sequential(
                nn.Linear(cfg.hidden_size, cfg.hidden_size),
                nn.LayerNorm(cfg.hidden_size),
                nn.Dropout(cfg.dropout),
                nn.ReLU(),
                nn.Linear(cfg.hidden_size, cfg.target_size),
            )

        self.reg_layer = get_reg()
Exemple #2
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a language model either by supplying

        * the name of a remote model on s3 ("xlnet-base-cased" ...)
        * or a local path of a model trained via transformers ("some_dir/huggingface_model")
        * or a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: name or path of a model
        :param language: (Optional) Name of language the model was trained for (e.g. "german").
                         If not supplied, FARM will try to infer it from the model name.
        :return: Language Model

        """
        xlnet = cls()
        if "farm_lm_name" in kwargs:
            xlnet.name = kwargs["farm_lm_name"]
        else:
            xlnet.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = os.path.join(pretrained_model_name_or_path,
                                      "language_model_config.json")
        if os.path.exists(farm_lm_config):
            # FARM style
            config = XLNetConfig.from_pretrained(farm_lm_config)
            farm_lm_model = os.path.join(pretrained_model_name_or_path,
                                         "language_model.bin")
            xlnet.model = XLNetModel.from_pretrained(farm_lm_model,
                                                     config=config,
                                                     **kwargs)
            xlnet.language = xlnet.model.config.language
        else:
            # Pytorch-transformer Style
            xlnet.model = XLNetModel.from_pretrained(
                pretrained_model_name_or_path, **kwargs)
            xlnet.language = cls._infer_language_from_name(
                pretrained_model_name_or_path)
            config = xlnet.model.config
        # XLNet does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler.
        # The pooler takes the last hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim).
        # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we
        # feed everything to the prediction head
        config.summary_last_dropout = 0
        xlnet.pooler = SequenceSummary(config)
        xlnet.pooler.apply(xlnet.model._init_weights)
        return xlnet
def convert_xlnet_checkpoint_to_pytorch(tf_checkpoint_path,
                                        bert_config_file,
                                        pytorch_dump_folder_path,
                                        finetuning_task=None):
    # Initialise PyTorch model
    config = XLNetConfig.from_json_file(bert_config_file)

    finetuning_task = finetuning_task.lower(
    ) if finetuning_task is not None else ""
    if finetuning_task in GLUE_TASKS_NUM_LABELS:
        print(
            "Building PyTorch XLNetForSequenceClassification model from configuration: {}"
            .format(str(config)))
        model = XLNetForSequenceClassification(
            config, num_labels=GLUE_TASKS_NUM_LABELS[finetuning_task])
    elif 'squad' in finetuning_task:
        model = XLNetForQuestionAnswering(config)
    else:
        model = XLNetLMHeadModel(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_xlnet(model, config, tf_checkpoint_path,
                             finetuning_task)

    # Save pytorch-model
    pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path,
                                             WEIGHTS_NAME)
    pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path,
                                            CONFIG_NAME)
    print("Save PyTorch model to {}".format(
        os.path.abspath(pytorch_weights_dump_path)))
    torch.save(model.state_dict(), pytorch_weights_dump_path)
    print("Save configuration file to {}".format(
        os.path.abspath(pytorch_config_dump_path)))
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())