def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, activation='relu', initial_method=None): super(Conv, self).__init__() self.conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) # xavier_uniform_(self.conv.weight) activations = {'relu': nn.ReLU(), 'tanh': nn.Tanh()} if activation in activations: self.activation = activations[activation] else: raise Exception('Should choose activation function from: ' + ', '.join([x for x in activations])) initial_parameter(self, initial_method)
def __init__(self, size_layer, activation='relu', initial_method=None, dropout=0.0): """Multilayer Perceptrons as a decoder :param size_layer: list of int, define the size of MLP layers. :param activation: str or function, the activation function for hidden layers. :param initial_method: str, the name of init method. :param dropout: float, the probability of dropout. .. note:: There is no activation function applying on output layer. """ super(MLP, self).__init__() self.hiddens = nn.ModuleList() self.output = None for i in range(1, len(size_layer)): if i + 1 == len(size_layer): self.output = nn.Linear(size_layer[i-1], size_layer[i]) else: self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i])) self.dropout = nn.Dropout(p=dropout) actives = { 'relu': nn.ReLU(), 'tanh': nn.Tanh(), } if activation in actives: self.hidden_active = actives[activation] elif isinstance(activation, callable): self.hidden_active = activation else: raise ValueError("should set activation correctly: {}".format(activation)) initial_parameter(self, initial_method)
def __init__(self, size_layer, activation='relu', initial_method=None, dropout=0.0): super(MLP, self).__init__() self.hiddens = nn.ModuleList() self.output = None for i in range(1, len(size_layer)): if i + 1 == len(size_layer): self.output = nn.Linear(size_layer[i - 1], size_layer[i]) else: self.hiddens.append(nn.Linear(size_layer[i - 1], size_layer[i])) self.dropout = nn.Dropout(p=dropout) actives = { 'relu': nn.ReLU(), 'tanh': nn.Tanh(), } if activation in actives: self.hidden_active = actives[activation] elif isinstance(activation, callable): self.hidden_active = activation else: raise ValueError( "should set activation correctly: {}".format(activation)) initial_parameter(self, initial_method)
def __init__(self, in_channels, out_channels, kernel_sizes, stride=1, padding=0, dilation=1, groups=1, bias=True, activation="relu", initial_method=None): super(ConvMaxpool, self).__init__() # convolution if isinstance(kernel_sizes, (list, tuple, int)): if isinstance(kernel_sizes, int): out_channels = [out_channels] kernel_sizes = [kernel_sizes] self.convs = nn.ModuleList([nn.Conv1d( in_channels=in_channels, out_channels=oc, kernel_size=ks, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) for oc, ks in zip(out_channels, kernel_sizes)]) else: raise Exception( 'Incorrect kernel sizes: should be list, tuple or int') # activation function if activation == 'relu': self.activation = F.relu else: raise Exception( "Undefined activation function: choose from: relu") initial_parameter(self, initial_method)
def __init__( self, input_size, attention_unit=300, attention_hops=10, drop=0.5, initial_method=None, ): r""" :param int input_size: 输入tensor的hidden维度 :param int attention_unit: 输出tensor的hidden维度 :param int attention_hops: :param float drop: dropout概率,默认值为0.5 :param str initial_method: 初始化参数方法 """ super(SelfAttention, self).__init__() self.attention_hops = attention_hops self.ws1 = nn.Linear(input_size, attention_unit, bias=False) self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False) self.I = torch.eye(attention_hops, requires_grad=False) self.I_origin = self.I self.drop = nn.Dropout(drop) self.tanh = nn.Tanh() initial_parameter(self, initial_method)
def __init__(self, num_tags, include_start_end_trans=False, allowed_transitions=None, initial_method=None): super(ConditionalRandomField, self).__init__() self.include_start_end_trans = include_start_end_trans self.num_tags = num_tags # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score self.trans_m = nn.Parameter(torch.randn(num_tags, num_tags)) if self.include_start_end_trans: self.start_scores = nn.Parameter(torch.randn(num_tags)) self.end_scores = nn.Parameter(torch.randn(num_tags)) if allowed_transitions is None: constrain = torch.zeros(num_tags + 2, num_tags + 2) else: constrain = torch.full((num_tags + 2, num_tags + 2), fill_value=-10000.0, dtype=torch.float) for from_tag_id, to_tag_id in allowed_transitions: constrain[from_tag_id, to_tag_id] = 0 self._constrain = nn.Parameter(constrain, requires_grad=False) initial_parameter(self, initial_method)
def __init__(self, mode, Cell, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, input_dropout=0, hidden_dropout=0, bidirectional=False): super(VarRNNBase, self).__init__() self.mode = mode self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.input_dropout = input_dropout self.hidden_dropout = hidden_dropout self.bidirectional = bidirectional self.num_directions = 2 if bidirectional else 1 self._all_cells = nn.ModuleList() for layer in range(self.num_layers): for direction in range(self.num_directions): input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions cell = Cell(input_size, self.hidden_size, bias) self._all_cells.append( VarRnnCellWrapper(cell, self.hidden_size, input_dropout, hidden_dropout)) initial_parameter(self)
def __init__(self, input_size, output_size, bias=True, initial_method=None): super(Linear, self).__init__() self.linear = nn.Linear(input_size, output_size, bias) initial_parameter(self, initial_method)
def __init__(self, hidden_size, bias=True): super(ArcBiaffine, self).__init__() self.U = nn.Parameter(torch.Tensor(hidden_size, hidden_size), requires_grad=True) self.has_bias = bias if self.has_bias: self.bias = nn.Parameter(torch.Tensor(hidden_size), requires_grad=True) else: self.register_parameter("bias", None) initial_parameter(self)
def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None): super(LSTMCharEmbedding, self).__init__() self.hidden_size = char_emb_size if hidden_size is None else hidden_size self.lstm = nn.LSTM(input_size=char_emb_size, hidden_size=self.hidden_size, num_layers=1, bias=True, batch_first=True) initial_parameter(self, initial_method)
def __init__(self, Cell, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, layer_dropout=0, step_dropout=0, bidirectional=False, initial_method=None, **kwargs): """ :param Cell: :param input_size: :param hidden_size: :param num_layers: :param bias: :param batch_first: :param layer_dropout: :param step_dropout: :param bidirectional: :param kwargs: """ super(MaskedRNNBase, self).__init__() self.Cell = Cell self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.layer_dropout = layer_dropout self.step_dropout = step_dropout self.bidirectional = bidirectional num_directions = 2 if bidirectional else 1 self.all_cells = [] for layer in range(num_layers): # 初始化所有cell for direction in range(num_directions): layer_input_size = input_size if layer == 0 else hidden_size * num_directions cell = self.Cell(layer_input_size, hidden_size, self.bias, **kwargs) self.all_cells.append(cell) self.add_module('cell%d' % (layer * num_directions + direction), cell) # Max的代码写得真好看 initial_parameter(self, initial_method)
def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): super(ConvCharEmbedding, self).__init__() self.convs = nn.ModuleList([ nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) for i in range(len(kernels)) ]) initial_parameter(self, initial_method)
def __init__(self, word_vocab_size, word_emb_dim, pos_vocab_size, pos_emb_dim, rnn_layers, rnn_hidden_size, arc_mlp_size, label_mlp_size, num_label, dropout, use_var_lstm=False, use_greedy_infer=False): super(BiaffineParser, self).__init__() self.word_embedding = nn.Embedding(num_embeddings=word_vocab_size, embedding_dim=word_emb_dim) self.pos_embedding = nn.Embedding(num_embeddings=pos_vocab_size, embedding_dim=pos_emb_dim) if use_var_lstm: self.lstm = VarLSTM(input_size=word_emb_dim + pos_emb_dim, hidden_size=rnn_hidden_size, num_layers=rnn_layers, bias=True, batch_first=True, input_dropout=dropout, hidden_dropout=dropout, bidirectional=True) else: self.lstm = nn.LSTM(input_size=word_emb_dim + pos_emb_dim, hidden_size=rnn_hidden_size, num_layers=rnn_layers, bias=True, batch_first=True, dropout=dropout, bidirectional=True) rnn_out_size = 2 * rnn_hidden_size self.arc_head_mlp = nn.Sequential(nn.Linear(rnn_out_size, arc_mlp_size), nn.ELU()) self.arc_dep_mlp = copy.deepcopy(self.arc_head_mlp) self.label_head_mlp = nn.Sequential(nn.Linear(rnn_out_size, label_mlp_size), nn.ELU()) self.label_dep_mlp = copy.deepcopy(self.label_head_mlp) self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True) self.label_predictor = LabelBilinear(label_mlp_size, label_mlp_size, num_label, bias=True) self.normal_dropout = nn.Dropout(p=dropout) self.timestep_dropout = TimestepDropout(p=dropout) self.use_greedy_infer = use_greedy_infer initial_parameter(self)
def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): """ Character Level Word Embedding :param char_emb_size: the size of character level embedding. Default: 50 say 26 characters, each embedded to 50 dim vector, then the input_size is 50. :param feature_maps: tuple of int. The length of the tuple is the number of convolution operations over characters. The i-th integer is the number of filters (dim of out channels) for the i-th convolution. :param kernels: tuple of int. The width of each kernel. """ super(ConvCharEmbedding, self).__init__() self.convs = nn.ModuleList([ nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) for i in range(len(kernels))]) initial_parameter(self, initial_method)
def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, bidirectional=False, initial_method=None): super(LSTM, self).__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True, dropout=dropout, bidirectional=bidirectional) initial_parameter(self, initial_method)
def __init__(self, tag_size, include_start_end_trans=False, initial_method=None): super(ConditionalRandomField, self).__init__() self.include_start_end_trans = include_start_end_trans self.tag_size = tag_size # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score self.trans_m = nn.Parameter(torch.randn(tag_size, tag_size)) if self.include_start_end_trans: self.start_scores = nn.Parameter(torch.randn(tag_size)) self.end_scores = nn.Parameter(torch.randn(tag_size)) # self.reset_parameter() initial_parameter(self, initial_method)
def __init__( self, input_size, attention_unit=300, attention_hops=10, drop=0.5, initial_method=None, ): super(SelfAttention, self).__init__() self.attention_hops = attention_hops self.ws1 = nn.Linear(input_size, attention_unit, bias=False) self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False) self.I = torch.eye(attention_hops, requires_grad=False) self.I_origin = self.I self.drop = nn.Dropout(drop) self.tanh = nn.Tanh() initial_parameter(self, initial_method)
def __init__(self, tag_size, include_start_end_trans=True, initial_method=None): """ :param tag_size: int, num of tags :param include_start_end_trans: bool, whether to include start/end tag """ super(ConditionalRandomField, self).__init__() self.include_start_end_trans = include_start_end_trans self.tag_size = tag_size # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score self.transition_m = nn.Parameter(torch.randn(tag_size, tag_size)) if self.include_start_end_trans: self.start_scores = nn.Parameter(torch.randn(tag_size)) self.end_scores = nn.Parameter(torch.randn(tag_size)) # self.reset_parameter() initial_parameter(self, initial_method)
def __init__(self, Cell, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=(0, 0), bidirectional=False, initializer=None, initial_method=None, **kwargs): super(VarMaskedRNNBase, self).__init__() self.Cell = Cell self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.bidirectional = bidirectional self.lstm = False num_directions = 2 if bidirectional else 1 self.all_cells = [] for layer in range(num_layers): for direction in range(num_directions): layer_input_size = input_size if layer == 0 else hidden_size * num_directions cell = self.Cell(layer_input_size, hidden_size, self.bias, p=dropout, initializer=initializer, **kwargs) self.all_cells.append(cell) self.add_module( 'cell%d' % (layer * num_directions + direction), cell) initial_parameter(self, initial_method)
def __init__(self, input_size, hidden_size, bias=True, p=(0.5, 0.5), initializer=None, initial_method=None): super(VarFastLSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(torch.Tensor(4 * hidden_size, input_size)) self.weight_hh = Parameter(torch.Tensor(4 * hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(torch.Tensor(4 * hidden_size)) self.bias_hh = Parameter(torch.Tensor(4 * hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.initializer = default_initializer( self.hidden_size) if initializer is None else initializer self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError( "input dropout probability has to be between 0 and 1, " "but got {}".format(p_in)) if p_hidden < 0 or p_hidden > 1: raise ValueError( "hidden state dropout probability has to be between 0 and 1, " "but got {}".format(p_hidden)) self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None initial_parameter(self, initial_method)