def __init__( self, language_model, prediction_heads, embeds_dropout_prob, lm_output_types, device, loss_aggregation_fn=None, head_feats=False, freeze_model=False, custom_pooling_strategy=None, prediction_layer=-1, ): self.head_feats = head_feats self.pooler = None super(CustomAdaptiveModel, self).__init__(language_model, prediction_heads, embeds_dropout_prob, lm_output_types, device, loss_aggregation_fn) if freeze_model: for p in self.language_model.parameters(): p.requires_grad = False if custom_pooling_strategy is not None: config = self.language_model.model.config config.summary_type = custom_pooling_strategy self.pooler = SequenceSummary(config) self.pooler.apply(self.language_model.model._init_weights) logger.info( f"Using custom pooling strategy: {custom_pooling_strategy}") self.prediction_layer = prediction_layer
def __init__(self, hparams): super().__init__() self.hparams = hparams self.save_hyperparameters() config = AutoConfig.from_pretrained(self.hparams.model_name) self.model = AutoModel.from_pretrained(self.hparams.model_name) self.pooler = SequenceSummary(config) self.classifier = nn.Linear(config.d_model, self.hparams.num_classes) self.concept_store = torch.load(self.hparams.concept_store) self.phrase_logits = TimeDistributed( nn.Linear(config.d_model, self.hparams.num_classes)) self.sequence_summary = SequenceSummary(config) self.topk = self.hparams.topk # self.topk_gil_mlp = TimeDistributed(nn.Linear(config.d_model, # self.hparams.num_classes)) self.topk_gil_mlp = nn.Linear(config.d_model, self.hparams.num_classes) self.multihead_attention = torch.nn.MultiheadAttention(config.d_model, dropout=0.2, num_heads=8) self.activation = nn.ReLU() self.lamda = self.hparams.lamda self.gamma = self.hparams.gamma self.dropout = nn.Dropout(config.dropout) self.loss = nn.CrossEntropyLoss()
def __init__(self, config): super().__init__(config) config.num_labels = 1 self.transformer = OpenAIGPTModel(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.multiple_choice_head = SequenceSummary(config) self.persona_head = SequenceSummary(config) self.init_weights()
def __init__(self, transformer_model: PreTrainedModel, config: PretrainedConfig, pad_idx: int, cls_idx: int): super(GPT2Classifier, self).__init__() self.transformer = transformer_model self.head = SequenceSummary(config) self.pad_idx = pad_idx self.cls_idx = cls_idx
def __init__(self, config: transformers.PretrainedConfig): super().__init__(config) self.config = config assert hasattr(config, NUM_SEQUENCE_LABELS) self.num_labels = getattr(config, NUM_SEQUENCE_LABELS) self.sequence_summary = SequenceSummary(config) self.classifier = nn.Linear(config.hidden_size, self.num_labels)
def __init__(self, config): super(XLNetForXMLC, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.logits_proj = nn.Linear(config.d_model, config.num_labels) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = 3 # RTE Task self.num_labels_3way = 3 # RTE SPs multi-label task self.num_labels_multi = 5 # RTE span detection task self.start_n_top = config.start_n_top self.end_n_top = config.end_n_top self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.logits_proj_3way = nn.Linear(config.d_model, self.num_labels_3way) self.logits_proj_multi = nn.Linear(config.d_model, self.num_labels_multi) self.weights_3way = [1, 1.3, 3.3] self.weights_multi = [15, 10, 15, 5, 5] self.class_weights_3way = torch.FloatTensor( self.weights_3way).to(device) self.class_weights_multi = torch.FloatTensor( self.weights_multi).to(device) # RTE span detection task self.start_logits = PoolerStartLogits(config) self.end_logits = PoolerEndLogits(config) self.answer_class = PoolerAnswerClass(config) self.init_weights()
def __init__(self, config): super(XLMForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLMModel(config) self.sequence_summary = SequenceSummary(config) self.init_weights()
def __init__(self, config): print("************ THIS MODEL COMES FROM CS224N PROJECT ************") super().__init__(config) self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.multiple_choice_head = SequenceSummary(config) self.init_weights()
def __init__(self, config): super(GPT2DoubleHeadsModel, self).__init__(config) config.num_labels = 1 self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.multiple_choice_head = SequenceSummary(config) self.init_weights()
def __init__(self, config): super(XLNetForXMC, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.init_weights()
def __init__(self, config): super(XLNetForSequenceClassificationGivenEmbedding, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModelWithoutEmbedding(config) self.sequence_summary = SequenceSummary(config) self.logits_proj = nn.Linear(config.d_model, 1) self.init_weights()
def __init__(self, config): super().__init__(config) config.num_labels = 1 self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.cls_head = SequenceSummary(config) self.init_weights()
def __init__(self, model, device, pretrained_config): super(modified_XLNet, self).__init__() self.model = model self.cls_linear_1 = nn.Linear(768, 300) self.cls_linear_2 = nn.Linear(300, 2) self.device = device self.dropout_1 = nn.Dropout(0.5) self.dropout_2 = nn.Dropout(0.5) self.sequence_summary = SequenceSummary(pretrained_config)
def __init__(self, config, weight=None): super(FlaubertForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.weight = weight self.transformer = FlaubertModel(config) self.sequence_summary = SequenceSummary(config) self.init_weights()
def __init__(self, dropout_rate=0.3, n_outputs=2): super(XLNETClassifier, self).__init__() self.pretrained_model = XLNetModel.from_pretrained("xlnet-base-cased") self.sequence_summary = SequenceSummary(self.pretrained_model.config) self.d1 = torch.nn.Dropout(dropout_rate) self.l1 = torch.nn.Linear(768, 64) self.bn1 = torch.nn.LayerNorm(64) self.d2 = torch.nn.Dropout(dropout_rate) self.l2 = torch.nn.Linear(64, n_outputs)
def __init__(self, config): super().__init__(config) config.num_labels = 1 self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.debias_head = nn.functional.linear self.multiple_choice_head = SequenceSummary(config) self.init_weights()
def __init__(self, config): super(XLNet_Reader, self).__init__(config) self.config = config self.xlnet = XLNetModel(config) self.sentence_summary = SequenceSummary(config) # self.dropout =nn.Dropout(0.3) # self.aggregation = nn.LSTM(config.hidden_size,config.hidden_size,batch_first=True,num_layers=1,bidirectional=True) # self.batch_norm_for_rnn = nn.BatchNorm1d(config.hidden_size) self.dropout = nn.Dropout(0.1)
def __init__(self, pretrained_model_type, pretrained_model, tagset_size, class_size, dropout=0., device=None, extFeats_dim=None, multi_class=False, task_st='NN', task_sc='CLS'): """Initialize model.""" super(Transformers_joint_slot_and_intent, self).__init__() self.tagset_size = tagset_size self.class_size = class_size self.dropout = dropout self.device = device self.extFeats_dim = extFeats_dim self.multi_class = multi_class self.task_st = task_st # 'NN', 'NN_crf' self.task_sc = task_sc # None, 'CLS', 'max', 'CLS_max' self.dropout_layer = nn.Dropout(p=self.dropout) self.pretrained_model_type = pretrained_model_type self.pretrained_model = pretrained_model if self.pretrained_model_type == 'xlnet': self.sequence_summary = SequenceSummary( self.pretrained_model.config) self.embedding_dim = self.pretrained_model.config.hidden_size # The LSTM takes word embeddings as inputs, and outputs hidden states self.append_feature_dim = 0 if self.extFeats_dim: self.append_feature_dim += self.extFeats_dim self.extFeats_linear = nn.Linear(self.append_feature_dim, self.append_feature_dim) else: self.extFeats_linear = None # The linear layer that maps from hidden state space to tag space if self.task_st == 'NN': self.hidden2tag = nn.Linear( self.embedding_dim + self.append_feature_dim, self.tagset_size) else: self.hidden2tag = nn.Linear( self.embedding_dim + self.append_feature_dim, self.tagset_size + 2) self.crf_layer = crf.CRF(self.tagset_size, self.device) if self.task_sc == 'CLS' or self.task_sc == 'max': self.hidden2class = nn.Linear(self.embedding_dim, self.class_size) elif self.task_sc == 'CLS_max': self.hidden2class = nn.Linear(self.embedding_dim * 2, self.class_size) else: pass
def __init__(self, config, *args, **kwargs): super().__init__(config) self.logits_device = torch.device(kwargs.pop("logits_device")) self.num_labels = config.num_labels self.transformer = XLNetModel(config, **kwargs) self.sequence_summary = SequenceSummary(config).to(self.logits_device) self.logits_proj = nn.Linear(config.d_model, config.num_labels).to(self.logits_device) self.init_weights()
def __init__(self, config, pos_weight=None): super(XLNetForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.pos_weight = pos_weight self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.logits_proj = nn.Linear(config.d_model, config.num_labels) self.init_weights()
def __init__(self, config): super(XLNetForXMLC, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.logits_proj = nn.Linear(config.d_model, config.num_labels) self.loss_fct = HingeLoss(margin=1.0, squared=True) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) hidden_size = config.d_model + self.num_size self.logits_proj = nn.Linear(hidden_size, config.num_labels) self.init_weights()
def __init__(self, model_name, num_fine_labels, num_coarse_labels): super().__init__() self.transformer = XLNetModel.from_pretrained( model_name, num_labels=num_fine_labels) self.sequence_summary = SequenceSummary(self.transformer.config) self.classifier_coarse = nn.Linear(self.transformer.config.d_model, num_coarse_labels) self.classifier_fine = nn.Linear(self.transformer.config.d_model, num_fine_labels)
def __init__(self, config): super(GPT2ForSequenceRanking, self).__init__(config) self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) config.summary_type = 'mean' self.good_head = SequenceSummary(config) self.size = config.n_embd self.init_weights()
def __init__(self, config): super(GPT2MultiHeadsAdversarialClModel, self).__init__(config) config.num_labels = 1 self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.multiple_choice_head = SequenceSummary(config) if not hasattr(config, 'cls'): config.cls = {} # set default values if 'default' not in config.cls: config.cls['default'] = { "summary_first_dropout": 0.1, "summary_proj_to_labels": True, "summary_type": 'cls_index', "summary_use_proj": True, "is_adversarial": False, } self.cl_heads = {} for cl_name, cl_config in config.cls.items(): if cl_name != 'default': assert 'labels' in cl_config, f'no labels set in config for classifier {cl_name}' _cl_config = copy.deepcopy(config.cls['default']) _cl_config.update(cl_config) _cl_config['num_labels'] = len(_cl_config['labels']) _cl_config['hidden_size'] = config.hidden_size self.cl_heads[cl_name] = SequenceSummary( PretrainedConfig(**_cl_config)) setattr(self.cl_heads[cl_name], 'is_adversarial', _cl_config.get('is_adversarial', False)) self.add_module(name=f'cl_head_{cl_name}', module=self.cl_heads[cl_name]) config.num_labels = 1 self.init_weights()
def __init__(self, config, lambd, mean_pool=False): super().__init__(config) self.num_labels = config.num_labels self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) self.logits_proj = nn.Linear(config.d_model, config.num_labels) self.lambd = lambd print("Gradient reversal Prarameter is: {}".format(self.lambd)) self.grl = GradientReversal(self.lambd) self.domain_classifier = nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config, lossfct=None, CEL_type='mean', quick_return=False): super().__init__(config) self.num_labels = config.num_labels self.lossfct = lossfct self.transformer = XLNetModel(config) self.sequence_summary = SequenceSummary(config) #self.dropout = nn.Dropout(0.1) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.CEL_type = CEL_type self.quick_return = quick_return self.init_weights()
def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a pretrained model by supplying * the name of a remote model on s3 ("distilbert-base-german-cased" ...) * OR a local path of a model trained via transformers ("some_dir/huggingface_model") * OR a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. :type pretrained_model_name_or_path: str """ distilbert = cls() if "farm_lm_name" in kwargs: distilbert.name = kwargs["farm_lm_name"] else: distilbert.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = os.path.join(pretrained_model_name_or_path, "language_model_config.json") if os.path.exists(farm_lm_config): # FARM style distilbert_config = DistilBertConfig.from_pretrained( farm_lm_config) farm_lm_model = os.path.join(pretrained_model_name_or_path, "language_model.bin") distilbert.model = DistilBertModel.from_pretrained( farm_lm_model, config=distilbert_config, **kwargs) distilbert.language = distilbert.model.config.language else: # Pytorch-transformer Style distilbert.model = DistilBertModel.from_pretrained( pretrained_model_name_or_path, **kwargs) distilbert.language = cls._infer_language_from_name( pretrained_model_name_or_path) config = distilbert.model.config # DistilBERT does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler. # The pooler takes the first hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim). # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we # feed everything to the prediction head config.summary_last_dropout = 0 config.summary_type = 'first' config.summary_activation = 'tanh' distilbert.pooler = SequenceSummary(config) distilbert.pooler.apply(distilbert.model._init_weights) return distilbert
def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a language model either by supplying * the name of a remote model on s3 ("xlnet-base-cased" ...) * or a local path of a model trained via transformers ("some_dir/huggingface_model") * or a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: name or path of a model :param language: (Optional) Name of language the model was trained for (e.g. "german"). If not supplied, FARM will try to infer it from the model name. :return: Language Model """ xlnet = cls() if "farm_lm_name" in kwargs: xlnet.name = kwargs["farm_lm_name"] else: xlnet.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = os.path.join(pretrained_model_name_or_path, "language_model_config.json") if os.path.exists(farm_lm_config): # FARM style config = XLNetConfig.from_pretrained(farm_lm_config) farm_lm_model = os.path.join(pretrained_model_name_or_path, "language_model.bin") xlnet.model = XLNetModel.from_pretrained(farm_lm_model, config=config, **kwargs) xlnet.language = xlnet.model.config.language else: # Pytorch-transformer Style xlnet.model = XLNetModel.from_pretrained( pretrained_model_name_or_path, **kwargs) xlnet.language = cls._infer_language_from_name( pretrained_model_name_or_path) config = xlnet.model.config # XLNet does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler. # The pooler takes the last hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim). # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we # feed everything to the prediction head config.summary_last_dropout = 0 xlnet.pooler = SequenceSummary(config) xlnet.pooler.apply(xlnet.model._init_weights) return xlnet