예제 #1
0
 def __init__(self, config):
     super(XLNetForXMLC, self).__init__(config)
     self.num_labels = config.num_labels
     self.transformer = XLNetModel(config)
     self.sequence_summary = SequenceSummary(config)
     self.logits_proj = nn.Linear(config.d_model, config.num_labels)
     self.init_weights()
예제 #2
0
def xlnetModel(*args, **kwargs):
    """
    xlnetModel is the basic XLNet Transformer model from
        "XLNet: Generalized Autoregressive Pretraining for Language Understanding"
        by Zhilin Yang, Zihang Dai1, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le

    Example:
        # Load the tokenizer
        >>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')

        #  Prepare tokenized input
        >>> text_1 = "Who was Jim Henson ?"
        >>> text_2 = "Jim Henson was a puppeteer"
        >>> indexed_tokens_1 = tokenizer.encode(text_1)
        >>> indexed_tokens_2 = tokenizer.encode(text_2)
        >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
        >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])

        # Load xlnetModel
        >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
        >>> model.eval()

        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                hidden_states_1, mems = model(tokens_tensor_1)
                hidden_states_2, mems = model(tokens_tensor_2, past=mems)
    """
    model = XLNetModel.from_pretrained(*args, **kwargs)
    return model
예제 #3
0
    def __init__(self, config):

        super(XlnetForMultiLable, self).__init__(config)
        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.apply(self._init_weights)
예제 #4
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a language model either by supplying

        * the name of a remote model on s3 ("xlnet-base-cased" ...)
        * or a local path of a model trained via transformers ("some_dir/huggingface_model")
        * or a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: name or path of a model
        :param language: (Optional) Name of language the model was trained for (e.g. "german").
                         If not supplied, FARM will try to infer it from the model name.
        :return: Language Model

        """
        xlnet = cls()
        if "farm_lm_name" in kwargs:
            xlnet.name = kwargs["farm_lm_name"]
        else:
            xlnet.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = os.path.join(pretrained_model_name_or_path,
                                      "language_model_config.json")
        if os.path.exists(farm_lm_config):
            # FARM style
            config = XLNetConfig.from_pretrained(farm_lm_config)
            farm_lm_model = os.path.join(pretrained_model_name_or_path,
                                         "language_model.bin")
            xlnet.model = XLNetModel.from_pretrained(farm_lm_model,
                                                     config=config,
                                                     **kwargs)
            xlnet.language = xlnet.model.config.language
        else:
            # Pytorch-transformer Style
            xlnet.model = XLNetModel.from_pretrained(
                pretrained_model_name_or_path, **kwargs)
            xlnet.language = cls._infer_language_from_name(
                pretrained_model_name_or_path)
            config = xlnet.model.config
        # XLNet does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler.
        # The pooler takes the last hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim).
        # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we
        # feed everything to the prediction head
        config.summary_last_dropout = 0
        xlnet.pooler = SequenceSummary(config)
        xlnet.pooler.apply(xlnet.model._init_weights)
        return xlnet
예제 #5
0
    def __init__(self, config):
        super().__init__(config)
        self.attn_type = config.attn_type
        self.same_length = config.same_length

        self.transformer = XLNetModel(config)
        self.lm_loss = nn.Linear(config.d_model, config.n_token, bias=True)
        self.init_weights()
        self.tie_weights()
예제 #6
0
    def __init__(self, config, weight=None):
        super(XLNetForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.weight = weight

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)

        self.init_weights()
예제 #7
0
    def __init__(self, config):
        super(XLNetForXMLC, self).__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)
        self.loss_fct = HingeLoss(margin=1.0, squared=True)

        self.init_weights()
예제 #8
0
    def __init__(self, config):
        super(XLNetCalculator, self).__init__(config)
        self.start_n_top = config.start_n_top
        self.end_n_top = config.end_n_top

        self.transformer = XLNetModel(config)
        self.start_logits = PoolerStartLogits(config)
        self.end_logits = PoolerEndLogits(config)
        self.answer_class = PoolerAnswerMode(config, num_mode=5)

        self.init_weights()
예제 #9
0
    def __init__(self, cfg):
        super(DSB_XLNetModel, self).__init__()
        self.cfg = cfg
        cate_col_size = len(cfg.cate_cols)
        cont_col_size = len(cfg.cont_cols)
        self.cate_emb = nn.Embedding(cfg.total_cate_size,
                                     cfg.emb_size,
                                     padding_idx=0)
        self.cate_proj = nn.Sequential(
            nn.Linear(cfg.emb_size * cate_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.cont_emb = nn.Sequential(
            nn.Linear(cont_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.config = XLNetConfig(
            3,  # not used            
            d_model=cfg.hidden_size,
            n_layer=cfg.nlayers,
            n_head=cfg.nheads,
            d_inner=cfg.hidden_size,
            #ff_activation="gelu",
            #untie_r=True,
            #attn_type="bi",
            #initializer_range=0.02,
            #layer_norm_eps=1e-12,
            dropout=cfg.dropout,
            #mem_len=None,
            #reuse_len=None,
            #bi_data=False,
            #clamp_len=-1,
            #same_length=False,
            #summary_type="last",
            #summary_use_proj=True,
            #summary_activation="tanh",
            summary_last_dropout=cfg.dropout,
            #start_n_top=5,
            #end_n_top=5,
        )

        self.encoder = XLNetModel(self.config)

        def get_reg():
            return nn.Sequential(
                nn.Linear(cfg.hidden_size, cfg.hidden_size),
                nn.LayerNorm(cfg.hidden_size),
                nn.Dropout(cfg.dropout),
                nn.ReLU(),
                nn.Linear(cfg.hidden_size, cfg.target_size),
            )

        self.reg_layer = get_reg()
예제 #10
0
    def __init__(self, config, lambd, mean_pool=False):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)

        self.lambd = lambd
        print("Gradient reversal Prarameter is: {}".format(self.lambd))
        self.grl = GradientReversal(self.lambd)
        self.domain_classifier = nn.Linear(config.hidden_size, 2)

        self.init_weights()
예제 #11
0
    def __init__(self, data):
        super(GazLSTM, self).__init__()

        self.gpu = data.HP_gpu
        self.use_biword = data.use_bigram
        self.hidden_dim = data.HP_hidden_dim
        self.word_emb_dim = data.word_emb_dim
        self.biword_emb_dim = data.biword_emb_dim
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.num_layer = data.HP_num_layer
        self.model_type = data.model_type
        self.use_bert = data.use_bert
        self.device = data.device

        self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.word_emb_dim, padding_idx=0)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))

        if self.use_biword:
            self.biword_embedding = nn.Embedding(data.biword_alphabet.size(), self.biword_emb_dim, padding_idx=0)
            if data.pretrain_biword_embedding is not None:
                self.biword_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))

        char_feature_dim = self.word_emb_dim
        if self.use_biword:
            char_feature_dim += self.biword_emb_dim

        if self.use_bert:
            char_feature_dim = char_feature_dim + 768 * 2
        print('total char_feature_dim is {}'.format(char_feature_dim))
        print('bert + bert_wwm multi feature')
        ## lstm model
        if self.model_type == 'lstm':
            lstm_hidden = self.hidden_dim
            if self.bilstm_flag:
                self.hidden_dim *= 2
            self.NERmodel = NERmodel(model_type='lstm', input_dim=char_feature_dim, hidden_dim=lstm_hidden,
                                     num_layer=self.lstm_layer, biflag=self.bilstm_flag)
            self.hidden2tag = nn.Linear(self.hidden_dim, data.label_alphabet_size + 2)
        # ## cnn model
        # if self.model_type == 'cnn':
        #     self.NERmodel = NERmodel(model_type='cnn', input_dim=char_feature_dim, hidden_dim=self.hidden_dim,
        #                              num_layer=self.num_layer, dropout=data.HP_dropout, gpu=self.gpu)
        #
        # ## attention model
        if self.model_type == 'transformer':
            self.NERmodel = NERmodel(model_type='transformer', input_dim=char_feature_dim, hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer, dropout=data.HP_dropout)
            self.hidden2tag = nn.Linear(480, data.label_alphabet_size + 2)

        self.drop = nn.Dropout(p=data.HP_dropout)

        self.crf = CRF(data.label_alphabet_size, self.gpu, self.device)

        if self.use_bert:
            self.bert_encoder = BertModel.from_pretrained('transformer_cpt/bert/')
            self.xlnet_encoder = XLNetModel.from_pretrained('transformer_cpt/chinese_xlnet_base_pytorch')
            self.bert_encoder_wwm = BertModel.from_pretrained('transformer_cpt/chinese_roberta_wwm_ext_pytorch/')
            for p in self.bert_encoder.parameters():
                p.requires_grad = False
            # for p in self.xlnet_encoder.parameters():
            #     p.requires_grad = False
            for p in self.bert_encoder_wwm.parameters():
                p.requires_grad = False
        if self.gpu:
            self.word_embedding = self.word_embedding.cuda(self.device)
            if self.use_biword:
                self.biword_embedding = self.biword_embedding.cuda(self.device)
            self.NERmodel = self.NERmodel.cuda(self.device)
            self.hidden2tag = self.hidden2tag.cuda(self.device)
            self.crf = self.crf.cuda(self.device)
            if self.use_bert:
                self.bert_encoder = self.bert_encoder.cuda(self.device)
                # self.xlnet_encoder = self.xlnet_encoder.cuda(self.device)
                self.bert_encoder_wwm = self.bert_encoder_wwm.cuda(self.device)
예제 #12
0
    def __init__(self,
                 vocab: Vocabulary,
                 pretrained_model: str = None,
                 requires_grad: bool = True,
                 transformer_weights_model: str = None,
                 num_labels: int = 2,
                 predictions_file=None,
                 layer_freeze_regexes: List[str] = None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._predictions = []

        self._pretrained_model = pretrained_model

        if 't5' in pretrained_model:
            self._padding_value = 1  # The index of the RoBERTa padding token
            if transformer_weights_model:  # Override for RoBERTa only for now
                logging.info(f"Loading Transformer weights model from {transformer_weights_model}")
                transformer_model_loaded = load_archive(transformer_weights_model)
                self._transformer_model = transformer_model_loaded.model._transformer_model
            else:
                self._transformer_model = T5Model.from_pretrained(pretrained_model)
            self._dropout = torch.nn.Dropout(self._transformer_model.config.hidden_dropout_prob)
        if 'roberta' in pretrained_model:
            self._padding_value = 1  # The index of the RoBERTa padding token
            if transformer_weights_model:  # Override for RoBERTa only for now
                logging.info(f"Loading Transformer weights model from {transformer_weights_model}")
                transformer_model_loaded = load_archive(transformer_weights_model)
                self._transformer_model = transformer_model_loaded.model._transformer_model
            else:
                self._transformer_model = RobertaModel.from_pretrained(pretrained_model)
            self._dropout = torch.nn.Dropout(self._transformer_model.config.hidden_dropout_prob)
        elif 'xlnet' in pretrained_model:
            self._padding_value = 5  # The index of the XLNet padding token
            self._transformer_model = XLNetModel.from_pretrained(pretrained_model)
            self.sequence_summary = SequenceSummary(self._transformer_model.config)
        elif 'albert' in pretrained_model:
            self._transformer_model = AlbertModel.from_pretrained(pretrained_model)
            self._padding_value = 0  # The index of the BERT padding token
            self._dropout = torch.nn.Dropout(self._transformer_model.config.hidden_dropout_prob)
        elif 'bert' in pretrained_model:
            self._transformer_model = BertModel.from_pretrained(pretrained_model)
            self._padding_value = 0  # The index of the BERT padding token
            self._dropout = torch.nn.Dropout(self._transformer_model.config.hidden_dropout_prob)
        else:
            assert (ValueError)

        for name, param in self._transformer_model.named_parameters():
            if layer_freeze_regexes and requires_grad:
                grad = not any([bool(re.search(r, name)) for r in layer_freeze_regexes])
            else:
                grad = requires_grad
            if grad:
                param.requires_grad = True
            else:
                param.requires_grad = False

        transformer_config = self._transformer_model.config
        transformer_config.num_labels = num_labels
        self._output_dim = self._transformer_model.config.hidden_size

        # unifing all model classification layer
        self._classifier = Linear(self._output_dim, num_labels)
        self._classifier.weight.data.normal_(mean=0.0, std=0.02)
        self._classifier.bias.data.zero_()

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        self._debug = -1
예제 #13
0
    def __init__(self,
                 vocab: Vocabulary,
                 pretrained_model: str = None,
                 requires_grad: bool = True,
                 probe_type: str = None,
                 layer_freeze_regexes: List[str] = None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._pretrained_model = pretrained_model
        if 'roberta' in pretrained_model:
            self._padding_value = 1  # The index of the RoBERTa padding token
            self._transformer_model = RobertaModel.from_pretrained(
                pretrained_model)
            self._dropout = torch.nn.Dropout(
                self._transformer_model.config.hidden_dropout_prob)
        elif 'xlnet' in pretrained_model:
            self._padding_value = 5  # The index of the XLNet padding token
            self._transformer_model = XLNetModel.from_pretrained(
                pretrained_model)
            self.sequence_summary = SequenceSummary(
                self._transformer_model.config)
        elif 'albert' in pretrained_model:
            self._transformer_model = AlbertModel.from_pretrained(
                pretrained_model)
            self._padding_value = 0  # The index of the BERT padding token
            self._dropout = torch.nn.Dropout(
                self._transformer_model.config.hidden_dropout_prob)
        elif 'bert' in pretrained_model:
            self._transformer_model = BertModel.from_pretrained(
                pretrained_model)
            self._padding_value = 0  # The index of the BERT padding token
            self._dropout = torch.nn.Dropout(
                self._transformer_model.config.hidden_dropout_prob)
        else:
            assert (ValueError)

        if probe_type == 'MLP':
            layer_freeze_regexes = ["embeddings", "encoder"]

        for name, param in self._transformer_model.named_parameters():
            if layer_freeze_regexes and requires_grad:
                grad = not any(
                    [bool(re.search(r, name)) for r in layer_freeze_regexes])
            else:
                grad = requires_grad
            if grad:
                param.requires_grad = True
            else:
                param.requires_grad = False

        transformer_config = self._transformer_model.config
        transformer_config.num_labels = 1
        self._output_dim = self._transformer_model.config.hidden_size

        # unifing all model classification layer
        self._classifier = Linear(self._output_dim, 1)
        self._classifier.weight.data.normal_(mean=0.0, std=0.02)
        self._classifier.bias.data.zero_()

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        self._debug = 2
    def __init__(self,
                 vocab: Vocabulary,
                 model_name: str,
                 k=12,
                 output_dim=1,
                 freeze_embeddings=False,
                 temperature=1,
                 train_with_regular_softmax=False,
                 use_similarity=False,
                 pass_probabilities_to_classifier=False,
                 use_straight_through_gumbel_softmax=False,
                 anneal_temperature=False,
                 train_generator=True,
                 use_kld_loss=False,
                 generate_until_dot=False,
                 lm_loss_coeff=1,
                 use_cls=False,
                 pass_only_generated=False,
                 sim_coeff=1,
                 dropout=0.1,
                 train_with_just_sim_loss_for_epochs_num=-1,
                 decouple_gen_and_cls_embs=False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 load_weights=False,
                 zero_generated_out=False,
                 output_several_results_on_every_step=False,
                 results_each_step=0,
                 use_repetition_loss=False,
                 sequence_ngram_n=1,
                 rep_coeff=1,
                 use_similarity_btw_question_and_answers=False,
                 anneal_repetition_loss=False,
                 anneal_kld_loss=False,
                 add_cls_after_epoch_num=-1,
                 train_lm_generator=False,
                 gen_lm_loss_coeff=1,
                 train_cls_without_lm_loss=False):
        super(GeneralGenerationForClassfiication, self).__init__(vocab)
        self.gen_model = XLNetLMHeadModel.from_pretrained(model_name,
                                                          dropout=dropout)
        self.tokenizer = XLNetTokenizer.from_pretrained(model_name)
        self.gen_word_embedding = self.gen_model.transformer.word_embedding
        self.gen_embeddings_weight = self.gen_word_embedding.weight

        if use_cls:
            self.cls_model = XLNetModel.from_pretrained(model_name)
            self.cls_word_embedding = self.cls_model.word_embedding
            self.cls_embeddings_weight = self.cls_word_embedding.weight
        if use_kld_loss:
            self.freezed_lm = XLNetLMHeadModel.from_pretrained(model_name)
            self.freezed_lm.requires_grad_(False)

        n_embd = 768 if 'base' in model_name else 1024
        self.cls = nn.Linear(n_embd, output_dim, bias=True)
        self.use_cls = use_cls
        self.use_similarity = use_similarity
        self.train_generator = train_generator
        self.dropout = nn.Dropout(dropout)
        self.k = k

        self.use_kld_loss = use_kld_loss
        self.lm_loss_coeff = lm_loss_coeff
        self.anneal_kld_loss = anneal_kld_loss
        self.sim_coeff = sim_coeff
        self.use_repetition_loss = use_repetition_loss
        self.rep_coeff = rep_coeff
        self.anneal_repetition_loss = anneal_repetition_loss
        self.sequence_ngram_n = sequence_ngram_n

        if freeze_embeddings:
            self.gen_embeddings_weight.requires_grad = False
            self.gen_word_embedding.requries_grad_(False)

        if not train_generator:
            self.gen_model.requires_grad_(False)
            self.gen_embeddings_weight.requires_grad = False
            generate_until_dot = True

        self.temperature = temperature
        self.train_with_regular_softmax = train_with_regular_softmax
        self.use_straight_through_gumbel_softmax = use_straight_through_gumbel_softmax
        self.anneal_temperature = anneal_temperature
        self.topk_gs = output_several_results_on_every_step
        self.results_each_step = results_each_step

        self.generate_until_dot = generate_until_dot
        self.pass_only_generated = pass_only_generated

        self.train_with_just_sim_loss_for_epochs_num = train_with_just_sim_loss_for_epochs_num
        self.add_cls_after_epoch_num = add_cls_after_epoch_num
        self.use_similarity_btw_question_and_answers = use_similarity_btw_question_and_answers
        self.decouple_gen_and_cls_embs = decouple_gen_and_cls_embs
        self.pass_probabilities_to_classifier = pass_probabilities_to_classifier
        self.zero_generated_out = zero_generated_out
        self.supervised_generator = train_lm_generator
        self.gen_lm_loss_coeff = gen_lm_loss_coeff
        self.train_cls_without_sup_gen = train_cls_without_lm_loss

        if load_weights:
            initializer(self)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "sim_accuracy": CategoricalAccuracy(),
            "kld_loss": Average(),
            "repetition_loss": Average(),
            "classification_loss": Average(),
            "similarity_loss": Average(),
        }