Example #1
0
    def __init__(
        self,
        dictionary,
        embed_dim,
        hidden_dim,
        num_layers,
        bidirectional=True,
        word_delimiter="@SPACE",
    ):
        super().__init__()

        self.word_split = DelimiterSplit(dictionary, word_delimiter)

        self.dictionary = dictionary
        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_chars = rnn.Embedding(
            num_embeddings=num_embeddings,
            embedding_dim=embed_dim,
            padding_idx=self.padding_idx,
            freeze_embed=False,
        )

        self.bidirectional = bidirectional
        if self.bidirectional:
            assert hidden_dim % 2 == 0
        self.lstm_encoder = rnn.LSTMSequenceEncoder.LSTM(
            embed_dim,
            hidden_dim // 2 if bidirectional else hidden_dim,
            num_layers=num_layers,
            bidirectional=bidirectional,
        )
Example #2
0
    def __init__(
        self,
        dictionary,
        num_chars,
        char_embed_dim,
        token_embed_dim,
        normalize_embed,
        char_rnn_units,
        char_rnn_layers,
        hidden_dim,
        num_layers,
        dropout_in,
        dropout_out,
        residual_level,
        bidirectional,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in

        self.embed_chars = char_encoder.CharRNNModel(
            dictionary=dictionary,
            num_chars=num_chars,
            char_embed_dim=char_embed_dim,
            char_rnn_units=char_rnn_units,
            char_rnn_layers=char_rnn_layers,
        )

        self.embed_tokens = None
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=len(dictionary),
                embedding_dim=token_embed_dim,
                padding_idx=dictionary.pad(),
                freeze_embed=False,
                normalize_embed=normalize_embed,
            )

        self.word_dim = char_rnn_units + token_embed_dim

        self.bilstm = rnn.BiLSTM(
            num_layers=num_layers,
            bidirectional=bidirectional,
            embed_dim=self.word_dim,
            hidden_dim=hidden_dim,
            dropout=dropout_out,
            residual_level=residual_level,
        )

        # disables sorting and word-length thresholding if True
        # (enables ONNX tracing of length-sorted input with batch_size = 1)
        self.onnx_export_model = False
Example #3
0
    def __init__(
        self,
        dictionary,
        num_chars=50,
        char_embed_dim=32,
        convolutions_params='((128, 3), (128, 5))',
        nonlinear_fn_type='tanh',
        pool_type='max',
        num_highway_layers=0,
    ):
        super().__init__()
        self.dictionary = dictionary
        self.padding_idx = dictionary.pad()
        self.convolutions_params = convolutions_params
        self.num_highway_layers = num_highway_layers

        if nonlinear_fn_type == "tanh":
            nonlinear_fn = nn.Tanh
        elif nonlinear_fn_type == "relu":
            nonlinear_fn = nn.ReLU
        else:
            raise Exception(
                "Invalid nonlinear type: {}".format(nonlinear_fn_type))
        self.pool_type = pool_type

        self.embed_chars = rnn.Embedding(
            num_embeddings=num_chars,
            embedding_dim=char_embed_dim,
            padding_idx=self.padding_idx,
            freeze_embed=False,
        )
        self.convolutions = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(
                    char_embed_dim,
                    num_filters,
                    kernel_size,
                    padding=kernel_size,
                ), nonlinear_fn())
            for (num_filters, kernel_size) in self.convolutions_params
        ])
        conv_output_dim = sum(out_dim
                              for (out_dim, _) in self.convolutions_params)

        highway_layers = []
        for _ in range(self.num_highway_layers):
            highway_layers.append(HighwayLayer(conv_output_dim))
        self.highway_layers = nn.ModuleList(highway_layers)
Example #4
0
    def _load_byte_embedding(self):
        """
        Function to load the pre-trained byte embeddings. We need to ensure that
        the embeddings account for special yoda tags as well.
        """
        char_embed_weights = self.npz_weights["char_embed"]

        num_tags = TAGS.__len__()
        weights = np.zeros(
            (char_embed_weights.shape[0] + num_tags + 1, char_embed_weights.shape[1]),
            dtype="float32",
        )
        weights[1:-num_tags, :] = char_embed_weights

        self.embed_chars = rnn.Embedding(
            num_embeddings=self.num_embeddings,
            embedding_dim=self.char_embed_dim,
            padding_idx=self.padding_idx,
            freeze_embed=self._finetune_pretrained_weights,
        )
        self.embed_chars.weight.data.copy_(torch.FloatTensor(weights))
Example #5
0
    def __init__(self, dictionary, num_chars, char_embed_dim, char_rnn_units,
                 char_rnn_layers):
        super().__init__()
        self.num_chars = num_chars
        self.padding_idx = dictionary.pad()
        self.embed_chars = rnn.Embedding(
            num_embeddings=num_chars,
            embedding_dim=char_embed_dim,
            padding_idx=self.padding_idx,
            freeze_embed=False,
        )

        assert (
            char_rnn_units % 2 == 0
        ), "char_rnn_units must be even (to be divided evenly between directions)"
        self.char_lstm_encoder = rnn.LSTMSequenceEncoder.LSTM(
            char_embed_dim,
            char_rnn_units // 2,
            num_layers=char_rnn_layers,
            bidirectional=True,
        )

        self.onnx_export_model = False
    def __init__(
        self,
        dictionary,
        num_chars=50,
        embed_dim=32,
        token_embed_dim=256,
        freeze_embed=False,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_output_dim=256,
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        hidden_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        residual_level=None,
        bidirectional=False,
        word_dropout_params=None,
    ):

        super().__init__(dictionary)
        self.dictionary = dictionary
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.residual_level = residual_level
        self.hidden_dim = hidden_dim
        self.bidirectional = bidirectional

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
        )

        self.embed_tokens = None
        num_tokens = len(dictionary)
        self.padding_idx = dictionary.pad()
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=num_tokens,
                embedding_dim=token_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=freeze_embed,
            )
        self.word_dim = (sum(out_dim for (out_dim, _) in convolutions_params) +
                         token_embed_dim)

        self.layers = nn.ModuleList([])
        for layer in range(num_layers):
            is_layer_bidirectional = self.bidirectional and layer == 0
            if is_layer_bidirectional:
                assert hidden_dim % 2 == 0, (
                    "encoder_hidden_dim must be even if encoder_bidirectional "
                    "(to be divided evenly between directions)"
                )
            self.layers.append(
                rnn.LSTMSequenceEncoder.LSTM(
                    self.word_dim if layer == 0 else hidden_dim,
                    hidden_dim // 2 if is_layer_bidirectional else hidden_dim,
                    num_layers=1,
                    dropout=self.dropout_out,
                    bidirectional=is_layer_bidirectional,
                )
            )

        self.num_layers = len(self.layers)
        self.word_dropout_module = None
        if (
            word_dropout_params
            and word_dropout_params["word_dropout_freq_threshold"] is not None
            and word_dropout_params["word_dropout_freq_threshold"] > 0
        ):
            self.word_dropout_module = word_dropout.WordDropout(
                dictionary, word_dropout_params
            )
    def __init__(
        self,
        dictionary,
        num_chars,
        char_embed_dim,
        token_embed_dim,
        freeze_embed=False,
        char_rnn_units=256,
        char_rnn_layers=1,
        hidden_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        residual_level=None,
        bidirectional=False,
        word_dropout_params=None,
    ):

        super().__init__(dictionary)
        self.dictionary = dictionary
        self.num_chars = num_chars
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.residual_level = residual_level
        self.hidden_dim = hidden_dim
        self.bidirectional = bidirectional
        num_tokens = len(dictionary)
        self.padding_idx = dictionary.pad()

        self.embed_chars = rnn.Embedding(
            num_embeddings=num_chars,
            embedding_dim=char_embed_dim,
            padding_idx=self.padding_idx,
            freeze_embed=freeze_embed,
        )

        assert (
            char_rnn_units % 2 == 0
        ), "char_rnn_units must be even (to be divided evenly between directions)"
        self.char_lstm_encoder = rnn.LSTMSequenceEncoder.LSTM(
            char_embed_dim,
            char_rnn_units // 2,
            num_layers=char_rnn_layers,
            bidirectional=True,
        )

        self.embed_tokens = None
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=num_tokens,
                embedding_dim=token_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=freeze_embed,
            )

        self.word_dim = char_rnn_units + token_embed_dim

        self.layers = nn.ModuleList([])
        for layer in range(num_layers):
            is_layer_bidirectional = self.bidirectional and layer == 0
            if is_layer_bidirectional:
                assert hidden_dim % 2 == 0, (
                    "encoder_hidden_dim must be even if encoder_bidirectional "
                    "(to be divided evenly between directions)"
                )
            self.layers.append(
                rnn.LSTMSequenceEncoder.LSTM(
                    self.word_dim if layer == 0 else hidden_dim,
                    hidden_dim // 2 if is_layer_bidirectional else hidden_dim,
                    num_layers=1,
                    dropout=self.dropout_out,
                    bidirectional=is_layer_bidirectional,
                )
            )

        self.num_layers = len(self.layers)
        self.word_dropout_module = None
        if (
            word_dropout_params
            and word_dropout_params["word_dropout_freq_threshold"] is not None
            and word_dropout_params["word_dropout_freq_threshold"] > 0
        ):
            self.word_dropout_module = word_dropout.WordDropout(
                dictionary, word_dropout_params
            )

        # disables sorting and word-length thresholding if True
        # (enables ONNX tracing of length-sorted input with batch_size = 1)
        self.onnx_export_model = False
Example #8
0
    def __init__(
        self,
        dictionary,
        num_chars=50,
        char_embed_dim=32,
        convolutions_params="((128, 3), (128, 5))",
        nonlinear_fn_type="tanh",
        num_highway_layers=0,
        # A value of -1 for char_cnn_output_dim implies no projection layer
        # layer at the output of the highway network
        char_cnn_output_dim=-1,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
        weights_file=None,
    ):
        super().__init__()
        self.dictionary = dictionary
        self.padding_idx = dictionary.pad()
        self.use_pretrained_weights = use_pretrained_weights

        self.convolutions_params = convolutions_params
        self.num_highway_layers = num_highway_layers
        self.char_embed_dim = char_embed_dim
        self.num_embeddings = num_chars
        self.char_cnn_output_dim = char_cnn_output_dim
        self.filter_dims = sum(f[0] for f in self.convolutions_params)

        # If specified, load the pretrained weights from file
        if use_pretrained_weights:
            self._weight_file = weights_file
            self._finetune_pretrained_weights = finetune_pretrained_weights
            self._load_weights()
        else:
            if nonlinear_fn_type == "tanh":
                nonlinear_fn = nn.Tanh
            elif nonlinear_fn_type == "relu":
                nonlinear_fn = nn.ReLU
            else:
                raise Exception("Invalid nonlinear type: {}".format(nonlinear_fn_type))

            self.embed_chars = rnn.Embedding(
                num_embeddings=num_chars,
                embedding_dim=char_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=False,
            )
            self.convolutions = nn.ModuleList(
                [
                    nn.Sequential(
                        nn.Conv1d(
                            char_embed_dim,
                            num_filters,
                            kernel_size,
                            padding=kernel_size,
                        ),
                        nonlinear_fn(),
                    )
                    for (num_filters, kernel_size) in self.convolutions_params
                ]
            )

            highway_layers = []
            for _ in range(self.num_highway_layers):
                highway_layers.append(HighwayLayer(self.filter_dims))
            self.highway_layers = nn.ModuleList(highway_layers)

            if char_cnn_output_dim != -1:
                self.projection = nn.Linear(
                    self.filter_dims, self.char_cnn_output_dim, bias=True
                )
Example #9
0
    def __init__(
        self,
        dictionary,
        num_chars=50,
        unk_only_char_encoding=False,
        embed_dim=32,
        token_embed_dim=256,
        freeze_embed=False,
        normalize_embed=False,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        char_cnn_output_dim=-1,
        hidden_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        residual_level=None,
        bidirectional=False,
        word_dropout_params=None,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
        weights_file=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
            char_cnn_output_dim,
            use_pretrained_weights,
            finetune_pretrained_weights,
            weights_file,
        )

        self.embed_tokens = None
        num_tokens = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.unk_idx = dictionary.unk()
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=num_tokens,
                embedding_dim=token_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=freeze_embed,
                normalize_embed=normalize_embed,
            )
        self.word_dim = (
            char_cnn_output_dim
            if char_cnn_output_dim != -1
            else sum(out_dim for (out_dim, _) in convolutions_params)
        )
        self.token_embed_dim = token_embed_dim

        self.unk_only_char_encoding = unk_only_char_encoding
        if self.unk_only_char_encoding:
            assert char_cnn_output_dim == token_embed_dim, (
                "char_cnn_output_dim (%d) must equal to token_embed_dim (%d)"
                % (char_cnn_output_dim, token_embed_dim)
            )
            self.word_dim = token_embed_dim
        else:
            self.word_dim = self.word_dim + token_embed_dim

        self.bilstm = rnn.BiLSTM(
            num_layers=num_layers,
            bidirectional=bidirectional,
            embed_dim=self.word_dim,
            hidden_dim=hidden_dim,
            dropout=dropout_out,
            residual_level=residual_level,
        )

        # Variable tracker
        self.tracker = VariableTracker()
        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)