Beispiel #1
0
    def __init__(self,
                 num_classes: int,
                 vocabulary_size: int,
                 embedding_size: int = 300,
                 hidden_size: int = 100,
                 swem_mode: str = "concat",
                 swem_window: int = 2,
                 activation_function: str = "relu",
                 drop_out: float = 0.2,
                 padding_idx: int = 0,
                 pretrained_vec=None,
                 update_embedding: bool = True,
                 apply_sm: bool = True):
        """
        Args:
            num_classes (int) : number of classes
            vocabulary_size (int): number of items in the vocabulary
            embedding_size (int): size of the embeddings
            swem_mode (str):
            activation_function (str)
            drop_out (float) : default 0.2; drop out rate applied to the embedding layer
            padding_idx (int): default 0; Embedding will not use this index
            pretrained_vec (nd.array): default None : numpy matrix containing pretrained word vectors
            update_embedding (boolean) : default True : train or freeze the embedding layer

        """
        super(SWEM, self).__init__()

        self.swem_mode = swem_mode
        self.swem_window = swem_window
        self.apply_sm = apply_sm
        self.drop_out = drop_out
        self.pretrained_vec = pretrained_vec
        self.embedding_size = embedding_size
        self.embedding = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=embedding_size,
            padding_idx=padding_idx,
        )

        if self.pretrained_vec is not None:
            self.embedding.weight.data.copy_(
                torch.from_numpy(self.pretrained_vec))
        else:
            init.xavier_uniform_(self.embedding.weight)
        if update_embedding:
            self.embedding.weight.requires_grad = update_embedding

        if self.swem_mode == "concat":
            in_size = embedding_size * 2
        else:
            in_size = embedding_size

        # TODO : the AdaptiveAvgPool1d only allows to use a swem_window=2 aka bigram
        self.hier_pool = nn.AdaptiveAvgPool1d(self.embedding_size)

        self.fc1 = nn.Linear(in_size, hidden_size)
        self.activation = get_activation_func(activation_function.lower())
        self.fc2 = nn.Linear(hidden_size, out_features=num_classes)
Beispiel #2
0
    def __init__(
        self,
        num_classes: int,
        vocabulary_size: int,
        embedding_size: int,
        ngram: int = 55,
        fc_hidden_sizes: list = [25],
        fc_activation_functions: list = ["relu"],
        fc_dropouts: list = [0.5],
        pretrained_vec=None,
        update_embedding: bool = True,
        padding_idx: int = 0,
        apply_sm: bool = True,
        device: str = "cuda",
    ):
        super(LEAM, self).__init__()

        self.num_classes = num_classes
        self.pretrained_vec = pretrained_vec
        self.apply_sm = apply_sm
        self.device = device

        self.hidden_sizes = fc_hidden_sizes
        self.embedding = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=embedding_size,
            padding_idx=padding_idx,
        )
        if self.pretrained_vec is not None:
            self.embedding.weight.data.copy_(
                torch.from_numpy(self.pretrained_vec))
        else:
            init.xavier_uniform_(self.embedding.weight)
        if update_embedding:
            self.embedding.weight.requires_grad = update_embedding

        self.embedding_class = nn.Embedding(num_classes, embedding_size)
        self.conv = torch.nn.Conv1d(in_channels=num_classes,
                                    out_channels=num_classes,
                                    kernel_size=2 * ngram + 1,
                                    padding=ngram)

        self.hidden_sizes = [embedding_size
                             ] + self.hidden_sizes + [num_classes]
        modules = []
        for i in range(len(self.hidden_sizes) - 1):
            modules.append(
                nn.Linear(in_features=self.hidden_sizes[i],
                          out_features=self.hidden_sizes[i + 1]))
            if i < len(self.hidden_sizes) - 2:
                modules.append(get_activation_func(fc_activation_functions[i]))
                if fc_dropouts[i] is not None:
                    if fc_dropouts[i] > 0.0:
                        modules.append(torch.nn.Dropout(p=fc_dropouts[i]))
        self.module_list = nn.ModuleList(modules)
Beispiel #3
0
    def __init__(self,
                 num_classes: int,
                 vocabulary_size: int,
                 embedding_size: int = 128,
                 region_size: int = 7,
                 max_sent_len: int = 256,
                 activation_function: str = "relu",
                 padding_idx: int = 0,
                 drop_out: float = 0.2,
                 pretrained_vec=None,
                 update_embedding: bool = True,
                 device: str = "cuda",
                 apply_sm: bool = True):
        """
        Args:
            num_classes (int) : number of classes
            vocabulary_size (int): number of items in the vocabulary
            embedding_size (int): size of the embeddings
            padding_idx (int): default 0; Embedding will not use this index
            drop_out (float) : default 0.2; drop out rate applied to the embedding layer
            pretrained_vec (nd.array): default None : numpy matrix containing pretrained word vectors
            update_embedding: bool (boolean) : default True : option to train/freeze embedding layer weights
        """
        super(EXAM, self).__init__()
        self.num_classes = num_classes
        self.max_sent_len = max_sent_len
        self.region_size = region_size
        self.region_radius = self.region_size // 2
        self.embedding_size = embedding_size
        self.drop_out = drop_out
        self.pretrained_vec = pretrained_vec
        self.device = torch.device(device)
        self.apply_sm = apply_sm

        # Embedding layers required for the region embedding (Word Context Scenario)
        self.embedding = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=region_size * self.embedding_size,
            padding_idx=padding_idx,
        )
        self.embedding_region = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=self.embedding_size,
            padding_idx=padding_idx,
        )

        self.activation = get_activation_func(activation_function.lower())
        self.max_pool_1d = nn.AdaptiveAvgPool1d(output_size=1)

        # EXAM adds 2 extra linear layers (dense1/dense2) on top of the default region embedding models
        self.dense0 = nn.Linear(self.embedding_size, num_classes)
        self.dense1 = nn.Linear(self.max_sent_len - 2 * self.region_radius,
                                self.max_sent_len * 2)
        self.dense2 = nn.Linear(self.max_sent_len * 2, 1)

        if self.pretrained_vec is not None:
            self.embedding.weight.data.copy_(
                torch.from_numpy(self.pretrained_vec))
        else:
            init.xavier_uniform_(self.embedding.weight)
        if update_embedding:
            self.embedding.weight.requires_grad = update_embedding
Beispiel #4
0
    def __init__(self,
                 num_classes: int,
                 vocabulary_size: int,
                 embedding_size: int = 300,
                 embedding_mode: str = "avg",
                 fc_hidden_sizes: list = [256, 128, 64],
                 fc_activation_functions: list = ["relu", "relu", "relu"],
                 fc_dropouts: list = [0.2, None, None],
                 padding_idx: int = 0,
                 pretrained_vec=None,
                 update_embedding: bool = True,
                 apply_sm: bool = True):
        """
        Args:
            num_classes (int) : number of classes
            vocabulary_size (int): number of items in the vocabulary
            embedding_size (int): size of the embeddings
            embedding_mode (str): "avg","max" or "concat"
            fc_activation_functions (str)
            dropout (float) : default 0.2; drop out rate applied to the embedding layer
            padding_idx (int): default 0; Embedding will not use this index
            pretrained_vec (nd.array): default None : numpy matrix containing pretrained word vectors
            update_embedding (boolean) : default True : train (True) or freeze(False) the embedding layer

        """
        super(MLP, self).__init__()

        self.embedding_mode = embedding_mode
        self.hidden_sizes = fc_hidden_sizes
        self.pretrained_vec = pretrained_vec
        self.embedding = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=embedding_size,
            padding_idx=padding_idx,
        )
        self.apply_sm = apply_sm
        if self.pretrained_vec is not None:
            self.embedding.weight.data.copy_(
                torch.from_numpy(self.pretrained_vec))
        else:
            init.xavier_uniform_(self.embedding.weight)
        if update_embedding:
            self.embedding.weight.requires_grad = update_embedding

        if self.embedding_mode == "concat":
            in_size = embedding_size * 2
        else:
            in_size = embedding_size

        # Dynamic setup of MLP given the input parameters
        self.hidden_sizes = [in_size] + self.hidden_sizes + [num_classes]
        modules = []
        for i in range(len(self.hidden_sizes) - 1):
            modules.append(
                nn.Linear(in_features=self.hidden_sizes[i],
                          out_features=self.hidden_sizes[i + 1]))
            if i < len(self.hidden_sizes) - 2:
                modules.append(get_activation_func(fc_activation_functions[i]))
                if fc_dropouts[i] is not None:
                    if fc_dropouts[i] > 0.0:
                        modules.append(torch.nn.Dropout(p=fc_dropouts[i]))
        self.module_list = nn.ModuleList(modules)
Beispiel #5
0
    def __init__(self,
                 num_classes: int,
                 vocabulary_size: int,
                 embedding_size: int = 300,
                 hidden_size: int = 100,
                 rnn_type: str = "lstm",
                 rnn_num_layers: int = 2,
                 rnn_bidirectional: bool = True,
                 rnn_dropout: float = 0.2,
                 activation_function: str = "tanh",
                 drop_out: float = 0.4,
                 padding_idx: int = 0,
                 pretrained_vec=None,
                 update_embedding: bool = True,
                 apply_sm: bool = True):
        super(TextRCNN, self).__init__()

        self.rnn_type = rnn_type.lower()
        self.apply_sm = apply_sm
        self.pretrained_vec = pretrained_vec

        self.embedding = nn.Embedding(
            num_embeddings=vocabulary_size,
            embedding_dim=embedding_size,
            padding_idx=padding_idx,
        )

        if self.pretrained_vec is not None:
            self.embedding.weight.data.copy_(
                torch.from_numpy(self.pretrained_vec))
        else:
            init.xavier_uniform_(self.embedding.weight)
        if update_embedding:
            self.embedding.weight.requires_grad = update_embedding

        if rnn_bidirectional:
            h_size = hidden_size * 2
        else:
            h_size = hidden_size

        if self.rnn_type == "lstm":
            self.rnn_encoder = nn.LSTM(
                input_size=embedding_size,
                hidden_size=hidden_size,
                num_layers=rnn_num_layers,
                batch_first=True,
                bidirectional=rnn_bidirectional,
                dropout=rnn_dropout,
            )

        elif self.rnn_type == "gru":
            self.rnn_encoder = nn.GRU(
                input_size=embedding_size,
                hidden_size=hidden_size,
                num_layers=rnn_num_layers,
                batch_first=True,
                bidirectional=rnn_bidirectional,
                dropout=rnn_dropout,
            )
        else:
            raise NotImplementedError

        self.fc1 = nn.Linear(h_size + embedding_size, h_size)
        self.activation = get_activation_func(activation_function.lower())
        self.fc2 = nn.Linear(h_size, num_classes)
        self.dropout = nn.Dropout(p=drop_out)