def __init__(self, config): super(XLMForTokenClassification, self).__init__(config) self.num_labels = config.num_labels self.transformer = XLMModel(config) self.dropout = nn.Dropout(config.dropout) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, config, weight=None): super(XLMForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.weight = weight self.transformer = XLMModel(config) self.sequence_summary = SequenceSummary(config) self.init_weights()
def xlmModel(*args, **kwargs): """ # Load xlmModel >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): hidden_states_1, mems = model(tokens_tensor_1) hidden_states_2, mems = model(tokens_tensor_2, past=mems) """ model = XLMModel.from_pretrained(*args, **kwargs) return model
def __init__(self, cfg): super(DSB_XLMModel, self).__init__() self.cfg = cfg cate_col_size = len(cfg.cate_cols) cont_col_size = len(cfg.cont_cols) self.cate_emb = nn.Embedding(cfg.total_cate_size, cfg.emb_size, padding_idx=0) self.cate_proj = nn.Sequential( nn.Linear(cfg.emb_size * cate_col_size, cfg.hidden_size // 2), nn.LayerNorm(cfg.hidden_size // 2), ) self.cont_emb = nn.Sequential( nn.Linear(cont_col_size, cfg.hidden_size // 2), nn.LayerNorm(cfg.hidden_size // 2), ) self.config = XLMConfig( 3, # not used emb_dim=cfg.hidden_size, n_layers=cfg.nlayers, n_heads=cfg.nheads, dropout=cfg.dropout, attention_dropout=cfg.dropout, gelu_activation=True, sinusoidal_embeddings=False, causal=False, asm=False, n_langs=1, use_lang_emb=True, max_position_embeddings=cfg.seq_len, embed_init_std=(cfg.hidden_size)**-0.5, layer_norm_eps=1e-12, init_std=0.02, bos_index=0, eos_index=1, pad_index=2, unk_index=3, mask_index=5, is_encoder=True, summary_type="first", summary_use_proj=True, summary_activation=None, summary_proj_to_labels=True, summary_first_dropout=cfg.dropout, start_n_top=5, end_n_top=5, mask_token_id=0, lang_id=0, ) self.encoder = XLMModel(self.config) def get_reg(): return nn.Sequential( nn.Linear(cfg.hidden_size, cfg.hidden_size), nn.LayerNorm(cfg.hidden_size), nn.Dropout(cfg.dropout), nn.ReLU(), nn.Linear(cfg.hidden_size, cfg.target_size), ) self.reg_layer = get_reg()