예제 #1
0
    def __init__(self, config):
        super(XLMForTokenClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.transformer = XLMModel(config)
        self.dropout = nn.Dropout(config.dropout)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
예제 #2
0
    def __init__(self, config, weight=None):
        super(XLMForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.weight = weight

        self.transformer = XLMModel(config)
        self.sequence_summary = SequenceSummary(config)

        self.init_weights()
def xlmModel(*args, **kwargs):
    """
        # Load xlmModel
        >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
        >>> model.eval()

        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                hidden_states_1, mems = model(tokens_tensor_1)
                hidden_states_2, mems = model(tokens_tensor_2, past=mems)
    """
    model = XLMModel.from_pretrained(*args, **kwargs)
    return model
예제 #4
0
    def __init__(self, cfg):
        super(DSB_XLMModel, self).__init__()
        self.cfg = cfg
        cate_col_size = len(cfg.cate_cols)
        cont_col_size = len(cfg.cont_cols)
        self.cate_emb = nn.Embedding(cfg.total_cate_size,
                                     cfg.emb_size,
                                     padding_idx=0)
        self.cate_proj = nn.Sequential(
            nn.Linear(cfg.emb_size * cate_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.cont_emb = nn.Sequential(
            nn.Linear(cont_col_size, cfg.hidden_size // 2),
            nn.LayerNorm(cfg.hidden_size // 2),
        )
        self.config = XLMConfig(
            3,  # not used
            emb_dim=cfg.hidden_size,
            n_layers=cfg.nlayers,
            n_heads=cfg.nheads,
            dropout=cfg.dropout,
            attention_dropout=cfg.dropout,
            gelu_activation=True,
            sinusoidal_embeddings=False,
            causal=False,
            asm=False,
            n_langs=1,
            use_lang_emb=True,
            max_position_embeddings=cfg.seq_len,
            embed_init_std=(cfg.hidden_size)**-0.5,
            layer_norm_eps=1e-12,
            init_std=0.02,
            bos_index=0,
            eos_index=1,
            pad_index=2,
            unk_index=3,
            mask_index=5,
            is_encoder=True,
            summary_type="first",
            summary_use_proj=True,
            summary_activation=None,
            summary_proj_to_labels=True,
            summary_first_dropout=cfg.dropout,
            start_n_top=5,
            end_n_top=5,
            mask_token_id=0,
            lang_id=0,
        )

        self.encoder = XLMModel(self.config)

        def get_reg():
            return nn.Sequential(
                nn.Linear(cfg.hidden_size, cfg.hidden_size),
                nn.LayerNorm(cfg.hidden_size),
                nn.Dropout(cfg.dropout),
                nn.ReLU(),
                nn.Linear(cfg.hidden_size, cfg.target_size),
            )

        self.reg_layer = get_reg()