Exemple #1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 activation='relu',
                 initial_method=None):
        super(Conv, self).__init__()
        self.conv = nn.Conv1d(in_channels=in_channels,
                              out_channels=out_channels,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              dilation=dilation,
                              groups=groups,
                              bias=bias)
        # xavier_uniform_(self.conv.weight)

        activations = {'relu': nn.ReLU(), 'tanh': nn.Tanh()}
        if activation in activations:
            self.activation = activations[activation]
        else:
            raise Exception('Should choose activation function from: ' +
                            ', '.join([x for x in activations]))
        initial_parameter(self, initial_method)
Exemple #2
0
    def __init__(self, size_layer, activation='relu', initial_method=None, dropout=0.0):
        """Multilayer Perceptrons as a decoder

        :param size_layer: list of int, define the size of MLP layers.
        :param activation: str or function, the activation function for hidden layers.
        :param initial_method: str, the name of init method.
        :param dropout: float, the probability of dropout.

        .. note::
            There is no activation function applying on output layer.

        """
        super(MLP, self).__init__()
        self.hiddens = nn.ModuleList()
        self.output = None
        for i in range(1, len(size_layer)):
            if i + 1 == len(size_layer):
                self.output = nn.Linear(size_layer[i-1], size_layer[i])
            else:
                self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i]))

        self.dropout = nn.Dropout(p=dropout)

        actives = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh(),
        }
        if activation in actives:
            self.hidden_active = actives[activation]
        elif isinstance(activation, callable):
            self.hidden_active = activation
        else:
            raise ValueError("should set activation correctly: {}".format(activation))
        initial_parameter(self, initial_method)
    def __init__(self,
                 size_layer,
                 activation='relu',
                 initial_method=None,
                 dropout=0.0):
        super(MLP, self).__init__()
        self.hiddens = nn.ModuleList()
        self.output = None
        for i in range(1, len(size_layer)):
            if i + 1 == len(size_layer):
                self.output = nn.Linear(size_layer[i - 1], size_layer[i])
            else:
                self.hiddens.append(nn.Linear(size_layer[i - 1],
                                              size_layer[i]))

        self.dropout = nn.Dropout(p=dropout)

        actives = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh(),
        }
        if activation in actives:
            self.hidden_active = actives[activation]
        elif isinstance(activation, callable):
            self.hidden_active = activation
        else:
            raise ValueError(
                "should set activation correctly: {}".format(activation))
        initial_parameter(self, initial_method)
Exemple #4
0
    def __init__(self, in_channels, out_channels, kernel_sizes,
                 stride=1, padding=0, dilation=1,
                 groups=1, bias=True, activation="relu", initial_method=None):
        super(ConvMaxpool, self).__init__()

        # convolution
        if isinstance(kernel_sizes, (list, tuple, int)):
            if isinstance(kernel_sizes, int):
                out_channels = [out_channels]
                kernel_sizes = [kernel_sizes]

            self.convs = nn.ModuleList([nn.Conv1d(
                in_channels=in_channels,
                out_channels=oc,
                kernel_size=ks,
                stride=stride,
                padding=padding,
                dilation=dilation,
                groups=groups,
                bias=bias)
                for oc, ks in zip(out_channels, kernel_sizes)])

        else:
            raise Exception(
                'Incorrect kernel sizes: should be list, tuple or int')

        # activation function
        if activation == 'relu':
            self.activation = F.relu
        else:
            raise Exception(
                "Undefined activation function: choose from: relu")

        initial_parameter(self, initial_method)
Exemple #5
0
    def __init__(
        self,
        input_size,
        attention_unit=300,
        attention_hops=10,
        drop=0.5,
        initial_method=None,
    ):
        r"""
        
        :param int input_size: 输入tensor的hidden维度
        :param int attention_unit: 输出tensor的hidden维度
        :param int attention_hops:
        :param float drop: dropout概率,默认值为0.5
        :param str initial_method: 初始化参数方法
        """
        super(SelfAttention, self).__init__()

        self.attention_hops = attention_hops
        self.ws1 = nn.Linear(input_size, attention_unit, bias=False)
        self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False)
        self.I = torch.eye(attention_hops, requires_grad=False)
        self.I_origin = self.I
        self.drop = nn.Dropout(drop)
        self.tanh = nn.Tanh()
        initial_parameter(self, initial_method)
    def __init__(self,
                 num_tags,
                 include_start_end_trans=False,
                 allowed_transitions=None,
                 initial_method=None):

        super(ConditionalRandomField, self).__init__()

        self.include_start_end_trans = include_start_end_trans
        self.num_tags = num_tags

        # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score
        self.trans_m = nn.Parameter(torch.randn(num_tags, num_tags))
        if self.include_start_end_trans:
            self.start_scores = nn.Parameter(torch.randn(num_tags))
            self.end_scores = nn.Parameter(torch.randn(num_tags))

        if allowed_transitions is None:
            constrain = torch.zeros(num_tags + 2, num_tags + 2)
        else:
            constrain = torch.full((num_tags + 2, num_tags + 2),
                                   fill_value=-10000.0,
                                   dtype=torch.float)
            for from_tag_id, to_tag_id in allowed_transitions:
                constrain[from_tag_id, to_tag_id] = 0
        self._constrain = nn.Parameter(constrain, requires_grad=False)

        initial_parameter(self, initial_method)
Exemple #7
0
 def __init__(self,
              mode,
              Cell,
              input_size,
              hidden_size,
              num_layers=1,
              bias=True,
              batch_first=False,
              input_dropout=0,
              hidden_dropout=0,
              bidirectional=False):
     super(VarRNNBase, self).__init__()
     self.mode = mode
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.bias = bias
     self.batch_first = batch_first
     self.input_dropout = input_dropout
     self.hidden_dropout = hidden_dropout
     self.bidirectional = bidirectional
     self.num_directions = 2 if bidirectional else 1
     self._all_cells = nn.ModuleList()
     for layer in range(self.num_layers):
         for direction in range(self.num_directions):
             input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
             cell = Cell(input_size, self.hidden_size, bias)
             self._all_cells.append(
                 VarRnnCellWrapper(cell, self.hidden_size, input_dropout,
                                   hidden_dropout))
     initial_parameter(self)
Exemple #8
0
 def __init__(self,
              input_size,
              output_size,
              bias=True,
              initial_method=None):
     super(Linear, self).__init__()
     self.linear = nn.Linear(input_size, output_size, bias)
     initial_parameter(self, initial_method)
Exemple #9
0
 def __init__(self, hidden_size, bias=True):
     super(ArcBiaffine, self).__init__()
     self.U = nn.Parameter(torch.Tensor(hidden_size, hidden_size), requires_grad=True)
     self.has_bias = bias
     if self.has_bias:
         self.bias = nn.Parameter(torch.Tensor(hidden_size), requires_grad=True)
     else:
         self.register_parameter("bias", None)
     initial_parameter(self)
Exemple #10
0
    def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None):
        super(LSTMCharEmbedding, self).__init__()
        self.hidden_size = char_emb_size if hidden_size is None else hidden_size

        self.lstm = nn.LSTM(input_size=char_emb_size,
                            hidden_size=self.hidden_size,
                            num_layers=1,
                            bias=True,
                            batch_first=True)
        initial_parameter(self, initial_method)
Exemple #11
0
    def __init__(self,
                 Cell,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 bias=True,
                 batch_first=False,
                 layer_dropout=0,
                 step_dropout=0,
                 bidirectional=False,
                 initial_method=None,
                 **kwargs):
        """
        :param Cell:
        :param input_size:
        :param hidden_size:
        :param num_layers:
        :param bias:
        :param batch_first:
        :param layer_dropout:
        :param step_dropout:
        :param bidirectional:
        :param kwargs:
        """

        super(MaskedRNNBase, self).__init__()
        self.Cell = Cell
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
        self.layer_dropout = layer_dropout
        self.step_dropout = step_dropout
        self.bidirectional = bidirectional
        num_directions = 2 if bidirectional else 1

        self.all_cells = []
        for layer in range(num_layers):  # 初始化所有cell
            for direction in range(num_directions):
                layer_input_size = input_size if layer == 0 else hidden_size * num_directions

                cell = self.Cell(layer_input_size, hidden_size, self.bias,
                                 **kwargs)
                self.all_cells.append(cell)
                self.add_module('cell%d' %
                                (layer * num_directions + direction),
                                cell)  # Max的代码写得真好看
        initial_parameter(self, initial_method)
Exemple #12
0
    def __init__(self,
                 char_emb_size=50,
                 feature_maps=(40, 30, 30),
                 kernels=(3, 4, 5),
                 initial_method=None):
        super(ConvCharEmbedding, self).__init__()
        self.convs = nn.ModuleList([
            nn.Conv2d(1,
                      feature_maps[i],
                      kernel_size=(char_emb_size, kernels[i]),
                      bias=True,
                      padding=(0, 4)) for i in range(len(kernels))
        ])

        initial_parameter(self, initial_method)
Exemple #13
0
    def __init__(self,
                word_vocab_size,
                word_emb_dim,
                pos_vocab_size,
                pos_emb_dim,
                rnn_layers,
                rnn_hidden_size,
                arc_mlp_size,
                label_mlp_size,
                num_label,
                dropout,
                use_var_lstm=False,
                use_greedy_infer=False):

        super(BiaffineParser, self).__init__()
        self.word_embedding = nn.Embedding(num_embeddings=word_vocab_size, embedding_dim=word_emb_dim)
        self.pos_embedding = nn.Embedding(num_embeddings=pos_vocab_size, embedding_dim=pos_emb_dim)
        if use_var_lstm:
            self.lstm = VarLSTM(input_size=word_emb_dim + pos_emb_dim,
                                hidden_size=rnn_hidden_size,
                                num_layers=rnn_layers,
                                bias=True,
                                batch_first=True,
                                input_dropout=dropout,
                                hidden_dropout=dropout,
                                bidirectional=True)
        else:
            self.lstm = nn.LSTM(input_size=word_emb_dim + pos_emb_dim,
                                hidden_size=rnn_hidden_size,
                                num_layers=rnn_layers,
                                bias=True,
                                batch_first=True,
                                dropout=dropout,
                                bidirectional=True)

        rnn_out_size = 2 * rnn_hidden_size
        self.arc_head_mlp = nn.Sequential(nn.Linear(rnn_out_size, arc_mlp_size),
                                          nn.ELU())
        self.arc_dep_mlp = copy.deepcopy(self.arc_head_mlp)
        self.label_head_mlp = nn.Sequential(nn.Linear(rnn_out_size, label_mlp_size),
                                            nn.ELU())
        self.label_dep_mlp = copy.deepcopy(self.label_head_mlp)
        self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True)
        self.label_predictor = LabelBilinear(label_mlp_size, label_mlp_size, num_label, bias=True)
        self.normal_dropout = nn.Dropout(p=dropout)
        self.timestep_dropout = TimestepDropout(p=dropout)
        self.use_greedy_infer = use_greedy_infer
        initial_parameter(self)
Exemple #14
0
    def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None):
        """
        Character Level Word Embedding
        :param char_emb_size: the size of character level embedding. Default: 50
            say 26 characters, each embedded to 50 dim vector, then the input_size is 50.
        :param feature_maps: tuple of int. The length of the tuple is the number of convolution operations
            over characters. The i-th integer is the number of filters (dim of out channels) for the i-th
            convolution.
        :param kernels: tuple of int. The width of each kernel.
        """
        super(ConvCharEmbedding, self).__init__()
        self.convs = nn.ModuleList([
            nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4))
            for i in range(len(kernels))])

        initial_parameter(self, initial_method)
Exemple #15
0
 def __init__(self,
              input_size,
              hidden_size=100,
              num_layers=1,
              dropout=0.0,
              bidirectional=False,
              initial_method=None):
     super(LSTM, self).__init__()
     self.lstm = nn.LSTM(input_size,
                         hidden_size,
                         num_layers,
                         bias=True,
                         batch_first=True,
                         dropout=dropout,
                         bidirectional=bidirectional)
     initial_parameter(self, initial_method)
Exemple #16
0
    def __init__(self,
                 tag_size,
                 include_start_end_trans=False,
                 initial_method=None):
        super(ConditionalRandomField, self).__init__()

        self.include_start_end_trans = include_start_end_trans
        self.tag_size = tag_size

        # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score
        self.trans_m = nn.Parameter(torch.randn(tag_size, tag_size))
        if self.include_start_end_trans:
            self.start_scores = nn.Parameter(torch.randn(tag_size))
            self.end_scores = nn.Parameter(torch.randn(tag_size))

        # self.reset_parameter()
        initial_parameter(self, initial_method)
Exemple #17
0
    def __init__(
        self,
        input_size,
        attention_unit=300,
        attention_hops=10,
        drop=0.5,
        initial_method=None,
    ):
        super(SelfAttention, self).__init__()

        self.attention_hops = attention_hops
        self.ws1 = nn.Linear(input_size, attention_unit, bias=False)
        self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False)
        self.I = torch.eye(attention_hops, requires_grad=False)
        self.I_origin = self.I
        self.drop = nn.Dropout(drop)
        self.tanh = nn.Tanh()
        initial_parameter(self, initial_method)
Exemple #18
0
    def __init__(self,
                 tag_size,
                 include_start_end_trans=True,
                 initial_method=None):
        """
        :param tag_size: int, num of tags
        :param include_start_end_trans: bool, whether to include start/end tag
        """
        super(ConditionalRandomField, self).__init__()

        self.include_start_end_trans = include_start_end_trans
        self.tag_size = tag_size

        # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score
        self.transition_m = nn.Parameter(torch.randn(tag_size, tag_size))
        if self.include_start_end_trans:
            self.start_scores = nn.Parameter(torch.randn(tag_size))
            self.end_scores = nn.Parameter(torch.randn(tag_size))

        # self.reset_parameter()
        initial_parameter(self, initial_method)
Exemple #19
0
    def __init__(self,
                 Cell,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 bias=True,
                 batch_first=False,
                 dropout=(0, 0),
                 bidirectional=False,
                 initializer=None,
                 initial_method=None,
                 **kwargs):

        super(VarMaskedRNNBase, self).__init__()
        self.Cell = Cell
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
        self.bidirectional = bidirectional
        self.lstm = False
        num_directions = 2 if bidirectional else 1

        self.all_cells = []
        for layer in range(num_layers):
            for direction in range(num_directions):
                layer_input_size = input_size if layer == 0 else hidden_size * num_directions

                cell = self.Cell(layer_input_size,
                                 hidden_size,
                                 self.bias,
                                 p=dropout,
                                 initializer=initializer,
                                 **kwargs)
                self.all_cells.append(cell)
                self.add_module(
                    'cell%d' % (layer * num_directions + direction), cell)
        initial_parameter(self, initial_method)
Exemple #20
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 bias=True,
                 p=(0.5, 0.5),
                 initializer=None,
                 initial_method=None):
        super(VarFastLSTMCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.weight_ih = Parameter(torch.Tensor(4 * hidden_size, input_size))
        self.weight_hh = Parameter(torch.Tensor(4 * hidden_size, hidden_size))
        if bias:
            self.bias_ih = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_hh = Parameter(torch.Tensor(4 * hidden_size))
        else:
            self.register_parameter('bias_ih', None)
            self.register_parameter('bias_hh', None)

        self.initializer = default_initializer(
            self.hidden_size) if initializer is None else initializer
        self.reset_parameters()
        p_in, p_hidden = p
        if p_in < 0 or p_in > 1:
            raise ValueError(
                "input dropout probability has to be between 0 and 1, "
                "but got {}".format(p_in))
        if p_hidden < 0 or p_hidden > 1:
            raise ValueError(
                "hidden state dropout probability has to be between 0 and 1, "
                "but got {}".format(p_hidden))
        self.p_in = p_in
        self.p_hidden = p_hidden
        self.noise_in = None
        self.noise_hidden = None
        initial_parameter(self, initial_method)