def init_func(m):
     classname = m.__class__.__name__
     if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
         if init_type == 'normal':
             init.normal_(m.weight.data, 0.0, gain)
         elif init_type == 'xavier':
             init.xavier_normal_(m.weight.data, gain=gain)
         elif init_type == 'kaiming':
             init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
         elif init_type == 'orthogonal':
             init.orthogonal_(m.weight.data, gain=gain)
         else:
             raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
         if hasattr(m, 'bias') and m.bias is not None:
             init.constant_(m.bias.data, 0.0)
     elif classname.find('BatchNorm2d') != -1:
         init.normal_(m.weight.data, 1.0, gain)
         init.constant_(m.bias.data, 0.0)
 def init_weights(self):
     for w in self.rnn.parameters(): # initialize the gate weights with orthogonal
         if w.dim()>1:
             weight_init.orthogonal_(w)
Exemple #3
0
 def _initialize_gru(self):
     for param in self.gru.parameters():
         if len(param.shape) >= 2:
             init.orthogonal_(param.data)
         else:
             init.normal_(param.data)
Exemple #4
0
 def init_weight(self):
     init.orthogonal_(self.gru.weight_hh_l0)
     init.orthogonal_(self.gru.weight_ih_l0)
Exemple #5
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim = 3*hps['sent_edim'] + 1*hps['pos_edim']
        ## for the region mark
        sent_embedding_dim += 1
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']

        self.word_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size, hps['sent_edim'])
        #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(torch.from_numpy(hps['word_embeddings']))

        self.role_embeddings = nn.Embedding(self.tagset_size, role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size, frame_embedding_dim)

        self.hidden2tag_M = nn.Linear(2*lstm_hidden_dim, 2*lstm_hidden_dim)
        self.hidden2tag_M_copy = nn.Linear(2*lstm_hidden_dim, 2*lstm_hidden_dim)
        self.hidden2tag_H = nn.Linear(2*lstm_hidden_dim, 2*lstm_hidden_dim)
        self.MLP = nn.Linear(2*lstm_hidden_dim, self.dep_size)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.num_layers = 2
        self.BiLSTM_share = nn.LSTM(input_size=sent_embedding_dim, hidden_size=lstm_hidden_dim, batch_first=True,
                              bidirectional=True, num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_share.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_share.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_share.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_share.all_weights[1][1])

        self.num_layers = 2
        self.BiLSTM_SRL = nn.LSTM(input_size=lstm_hidden_dim * 2, hidden_size=lstm_hidden_dim, batch_first=True,
                                    bidirectional=True, num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])


        # non-linear map to role embedding
        self.role_map = nn.Linear(in_features=role_embedding_dim * 2, out_features=self.hidden_dim * 4)

        # Init hidden state
        self.hidden = self.init_hidden()
        self.hidden_2 = self.init_hidden()
        self.hidden_3 = self.init_hidden()
        self.hidden_4 = self.init_hidden()
Exemple #6
0
def init_ortho(module):
    for weight_ in module.parameters():
        if len(weight_.size()) == 2:
            init.orthogonal_(weight_)
Exemple #7
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2 * hps['sent_edim']
        sent_embedding_dim_SRL = 2 * hps['sent_edim'] + 16
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.SRL_hidden_dropout = nn.Dropout(p=0.3)
        self.DEP_hidden_dropout_1 = nn.Dropout(p=0.3)
        self.DEP_hidden_dropout_2 = nn.Dropout(p=0.3)
        self.SRL_proj_word_dropout = nn.Dropout(p=0.3)
        self.SRL_proj_predicate_dropout = nn.Dropout(p=0.3)
        self.DEP_proj_word_dropout = nn.Dropout(p=0.3)
        self.DEP_proj_predicate_dropout = nn.Dropout(p=0.3)
        self.Idenficiation_dropout = nn.Dropout(p=0.3)
        #self.use_dropout = nn.Dropout(p=0.2)

        # The BiLSTM encoder
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        self.num_layers = 1
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_0.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][1])

        self.num_layers = 1
        self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_1.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][1])

        # SRL: primary prediciton
        self.num_layers = 3
        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])

        self.word_fixed_embeddings_SRL = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_SRL.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        self.region_embeddings = nn.Embedding(2, 16)
        self.elmo_emb_size = 200
        #L + self.elmo_emb_size * 1 + 1 * self.pos_size
        self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL,
                                  hidden_size=lstm_hidden_dim,
                                  batch_first=True,
                                  bidirectional=True,
                                  num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])

        self.elmo_mlp = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, self.elmo_emb_size), nn.ReLU())
        self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma = nn.Parameter(torch.ones(1))

        self.W_R = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))
        #self.W_share = nn.Parameter(torch.rand(lstm_hidden_dim, lstm_hidden_dim))

        self.Non_Predicate_Proj = nn.Linear(2 * lstm_hidden_dim,
                                            lstm_hidden_dim)
        self.Predicate_Proj = nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim)

        self.cvt_hidden_dim = 200
        ## SRL: auxiliary prediction: fwd-fwd
        self.Non_Predicate_Proj_FF = nn.Linear(lstm_hidden_dim,
                                               self.cvt_hidden_dim)
        self.Predicate_Proj_FF = nn.Linear(lstm_hidden_dim,
                                           self.cvt_hidden_dim)
        self.W_R_FF = nn.Parameter(
            torch.rand(self.cvt_hidden_dim + 1,
                       self.tagset_size * self.cvt_hidden_dim))

        ## SRL: auxiliary prediction: bwd-bwd
        self.Non_Predicate_Proj_BB = nn.Linear(lstm_hidden_dim,
                                               self.cvt_hidden_dim)
        self.Predicate_Proj_BB = nn.Linear(lstm_hidden_dim,
                                           self.cvt_hidden_dim)
        self.W_R_BB = nn.Parameter(
            torch.rand(self.cvt_hidden_dim + 1,
                       self.tagset_size * self.cvt_hidden_dim))

        ## SRL: auxiliary prediction: fwd-bwd
        self.Non_Predicate_Proj_FB = nn.Linear(lstm_hidden_dim,
                                               self.cvt_hidden_dim)
        self.Predicate_Proj_FB = nn.Linear(lstm_hidden_dim,
                                           self.cvt_hidden_dim)
        self.W_R_FB = nn.Parameter(
            torch.rand(self.cvt_hidden_dim + 1,
                       self.tagset_size * self.cvt_hidden_dim))

        ## SRL: auxiliary prediction: bwd-fwd
        self.Non_Predicate_Proj_BF = nn.Linear(lstm_hidden_dim,
                                               self.cvt_hidden_dim)
        self.Predicate_Proj_BF = nn.Linear(lstm_hidden_dim,
                                           self.cvt_hidden_dim)
        self.W_R_BF = nn.Parameter(
            torch.rand(self.cvt_hidden_dim + 1,
                       self.tagset_size * self.cvt_hidden_dim))

        # Dependency extractor: primary preidition
        self.hidden2tag_1 = nn.Linear(4 * lstm_hidden_dim, lstm_hidden_dim)
        self.hidden2tag_2 = nn.Linear(4 * lstm_hidden_dim, lstm_hidden_dim)
        self.W_dep = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.specific_dep_size * lstm_hidden_dim))
        self.tag2hidden = nn.Linear(self.specific_dep_size,
                                    self.pos_size,
                                    bias=False)

        # Dependency extractor: auxiliary FF
        self.hidden2tag_1_FF = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.hidden2tag_2_FF = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.MLP_FF = nn.Linear(2 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP_FF_2 = nn.Linear(2 * lstm_hidden_dim, self.specific_dep_size)

        # Dependency extractor: auxiliary BB
        self.hidden2tag_1_BB = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.hidden2tag_2_BB = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.MLP_BB = nn.Linear(2 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP_BB_2 = nn.Linear(2 * lstm_hidden_dim, self.specific_dep_size)

        # Dependency extractor: auxiliary FB
        self.hidden2tag_1_FB = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.hidden2tag_2_FB = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.MLP_FB = nn.Linear(2 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP_FB_2 = nn.Linear(2 * lstm_hidden_dim, self.specific_dep_size)

        # Dependency extractor: auxiliary BF
        self.hidden2tag_1_BF = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.hidden2tag_2_BF = nn.Linear(lstm_hidden_dim, lstm_hidden_dim)
        self.MLP_BF = nn.Linear(2 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP_BF_2 = nn.Linear(2 * lstm_hidden_dim, self.specific_dep_size)

        # Predicate identification
        self.MLP_identification = nn.Linear(4 * lstm_hidden_dim,
                                            2 * lstm_hidden_dim)
        self.Idenficiation = nn.Linear(2 * lstm_hidden_dim, 3)

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_4 = self.init_hidden_share()
 def _initialize_weights(self):
     init.orthogonal_(self.conv2d.weight, init.calculate_gain('relu'))
Exemple #9
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 rnn_type='lstm',
                 num_layers=1,
                 num_hidden_layers=2,
                 bias=True,
                 batch_first=True,
                 dropout=0,
                 bidirectional=False,
                 nr_cells=5,
                 read_heads=2,
                 cell_size=10,
                 nonlinearity='tanh',
                 gpu_id=-1,
                 independent_linears=False,
                 share_memory=True,
                 debug=False,
                 clip=20):
        super(DNC, self).__init__()
        # todo: separate weights and RNNs for the interface and output vectors

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn_type = rnn_type
        self.num_layers = num_layers
        self.num_hidden_layers = num_hidden_layers
        self.bias = bias
        self.batch_first = batch_first
        self.dropout = dropout
        self.bidirectional = bidirectional
        self.nr_cells = nr_cells
        self.read_heads = read_heads
        self.cell_size = cell_size
        self.nonlinearity = nonlinearity
        self.gpu_id = gpu_id
        self.independent_linears = independent_linears
        self.share_memory = share_memory
        self.debug = debug
        self.clip = clip

        self.w = self.cell_size
        self.r = self.read_heads

        self.read_vectors_size = self.r * self.w * 3
        self.output_size = self.hidden_size

        self.nn_input_size = self.input_size + self.read_vectors_size
        self.nn_output_size = self.output_size + self.read_vectors_size

        self.rnns = []
        self.memories = []

        for layer in range(self.num_layers):
            if self.rnn_type.lower() == 'rnn':
                self.rnns.append(
                    nn.RNN((self.nn_input_size
                            if layer == 0 else self.nn_output_size),
                           self.output_size,
                           bias=self.bias,
                           nonlinearity=self.nonlinearity,
                           batch_first=True,
                           dropout=self.dropout,
                           num_layers=self.num_hidden_layers))
            elif self.rnn_type.lower() == 'gru':
                self.rnns.append(
                    nn.GRU((self.nn_input_size
                            if layer == 0 else self.nn_output_size),
                           self.output_size,
                           bias=self.bias,
                           batch_first=True,
                           dropout=self.dropout,
                           num_layers=self.num_hidden_layers))
            if self.rnn_type.lower() == 'lstm':
                self.rnns.append(
                    nn.LSTM((self.nn_input_size
                             if layer == 0 else self.nn_output_size),
                            self.output_size,
                            bias=self.bias,
                            batch_first=True,
                            dropout=self.dropout,
                            num_layers=self.num_hidden_layers))
            setattr(self,
                    self.rnn_type.lower() + '_layer_' + str(layer),
                    self.rnns[layer])

            # memories for each layer
            if not self.share_memory:
                self.memories.append(
                    Memory(input_size=self.output_size,
                           mem_size=self.nr_cells,
                           cell_size=self.w,
                           read_heads=self.r,
                           gpu_id=self.gpu_id,
                           independent_linears=self.independent_linears))
                setattr(self, 'rnn_layer_memory_' + str(layer),
                        self.memories[layer])

        # only one memory shared by all layers
        if self.share_memory:
            self.memories.append(
                Memory(input_size=self.output_size,
                       mem_size=self.nr_cells,
                       cell_size=self.w,
                       read_heads=self.r,
                       gpu_id=self.gpu_id,
                       independent_linears=self.independent_linears))
            setattr(self, 'rnn_layer_memory_shared', self.memories[0])

        # final output layer
        self.output = nn.Linear(self.nn_output_size, self.output_size)
        orthogonal_(self.output.weight)

        if self.gpu_id != -1:
            [x.cuda(self.gpu_id) for x in self.rnns]
            [x.cuda(self.gpu_id) for x in self.memories]
            self.output.cuda()
Exemple #10
0
def weight_init(m):
    '''
    Snippet stolen from https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5
    Usage:
        model = Model()
        model.apply(weight_init)
    '''
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    else:
        pass
Exemple #11
0
def weight_init(m):
    '''
    Usage:
        model = Model()
        model.apply(weight_init)
    '''
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        #         init.kaiming_uniform_(m.weight.data, a=0.2, mode='fan_in', nonlinearity='leaky_relu')
        #         init.xavier_normal_(m.weight.data)
        #         init.xavier_uniform_(m.weight.data, gain=1.0)
        torch.nn.init.kaiming_normal_(m.weight.data,
                                      a=0.2,
                                      mode='fan_in',
                                      nonlinearity='leaky_relu')
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        #         init.kaiming_uniform_(m.weight.data, a=0, mode='fan_in', nonlinearity='leaky_relu')
        #         init.xavier_normal_(m.weight.data)
        torch.nn.init.kaiming_normal_(m.weight.data,
                                      a=0,
                                      mode='fan_in',
                                      nonlinearity='leaky_relu')
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        #         init.kaiming_uniform_(m.weight.data, a=0, mode='fan_in', nonlinearity='leaky_relu')
        #         init.xavier_normal_(m.weight.data)
        torch.nn.init.kaiming_normal_(m.weight.data,
                                      a=0,
                                      mode='fan_in',
                                      nonlinearity='leaky_relu')
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        #         init.kaiming_uniform_(m.weight.data, a=0, mode='fan_in', nonlinearity='leaky_relu')
        #         init.xavier_normal_(m.weight.data)
        torch.nn.init.kaiming_normal_(m.weight.data,
                                      a=0,
                                      mode='fan_in',
                                      nonlinearity='leaky_relu')
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
Exemple #12
0
def weight_init(m):
    '''
    Taken from: https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5
    '''
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, (
            nn.Dropout,
            nn.ReLU,
            nn.ELU,
            nn.LeakyReLU,
            nn.Sigmoid,
            nn.Tanh,
            nn.MaxPool2d,
            nn.AvgPool2d,
            nn.InstanceNorm2d,
            nn.Embedding,
    )):
        pass
    elif len(m._modules) > 0:
        pass
    else:
        print("!! Warning: {} has no deafault initialization scheme".format(
            type(m)))
Exemple #13
0
    def init_weights(self):
        # LSTM Unit: numlayer = 1, initialization
        init.orthogonal_(self.gru_layer1.all_weights[0][0], gain=np.sqrt(2.0))
        init.orthogonal_(self.gru_layer1.all_weights[0][1], gain=np.sqrt(2.0))
        init.uniform_(self.gru_layer1.all_weights[0][2], 1, 0.1)
        init.uniform_(self.gru_layer1.all_weights[0][3], 1, 0.1)

        init.orthogonal_(self.gru_layer2.all_weights[0][0], gain=np.sqrt(2.0))
        init.orthogonal_(self.gru_layer2.all_weights[0][1], gain=np.sqrt(2.0))
        init.uniform_(self.gru_layer2.all_weights[0][2], 1, 0.1)
        init.uniform_(self.gru_layer2.all_weights[0][3], 1, 0.1)

        init.orthogonal_(self.gru_layer3.all_weights[0][0], gain=np.sqrt(2.0))
        init.orthogonal_(self.gru_layer3.all_weights[0][1], gain=np.sqrt(2.0))
        init.uniform_(self.gru_layer3.all_weights[0][2], 1, 0.1)
        init.uniform_(self.gru_layer3.all_weights[0][3], 1, 0.1)
Exemple #14
0
def torch_weight_init(m):
    '''
    Usage:
        model = Model()
        model.apply(weight_init)
    '''
    if isinstance(m, nn.Conv1d):
        # init.normal_(m.weight.data)
        init.xavier_uniform_(m.weight.data)

        if m.bias is not None:
            # init.normal_(m.bias.data)
            init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        # init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        # init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        # init.xavier_normal_(m.weight.data)
        init.xavier_uniform_(m.weight.data)

        # init.normal_(m.bias.data)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for name, param in m.named_parameters():
            if 'weight_ih' in name:
                torch.nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                torch.nn.init.orthogonal_(param.data)
            elif 'bias' in name:
                param.data.fill_(0)

        # for param in m.parameters():
        #     if len(param.shape) >= 2:
        #         # init.orthogonal_(param.data)
        #         init.orthogonal_(param.data)
        #     else:
        #         # init.normal_(param.data)
        #         init.xavier_uniform_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
Exemple #15
0
def weight_init(m):
    """
    Usage:
        model = Model()
        model.apply(weight_init)
    """
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.Embedding):
        embed_size = m.weight.size(-1)
        if embed_size > 0:
            init_range = 0.5 / m.weight.size(-1)
            init.uniform_(m.weight.data, -init_range, init_range)
    def __init__(self, input_size, output_size, use_noisy_net=False):
        super(CnnActorCriticNetwork, self).__init__()

        if use_noisy_net:
            print('use NoisyNet')
            linear = NoisyLinear
        else:
            linear = nn.Linear

        self.feature = nn.Sequential(
            nn.Conv2d(in_channels=4, out_channels=32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4,
                      stride=2), nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,
                      stride=1), nn.ReLU(), Flatten(), linear(2304, 256),
            nn.ReLU(), linear(256, 448), nn.ReLU())

        self.actor = nn.Sequential(linear(448, 448), nn.ReLU(),
                                   linear(448, output_size))

        self.extra_layer = nn.Sequential(linear(448, 448), nn.ReLU())

        self.critic_ext = linear(448, 1)
        self.critic_int = linear(448, 1)

        for p in self.modules():
            if isinstance(p, nn.Conv2d):
                init.orthogonal_(p.weight, np.sqrt(2))
                p.bias.data.zero_()

            if isinstance(p, nn.Linear):
                init.orthogonal_(p.weight, np.sqrt(2))
                p.bias.data.zero_()

        init.orthogonal_(self.critic_ext.weight, 0.01)
        self.critic_ext.bias.data.zero_()

        init.orthogonal_(self.critic_int.weight, 0.01)
        self.critic_int.bias.data.zero_()

        for i in range(len(self.actor)):
            if type(self.actor[i]) == nn.Linear:
                init.orthogonal_(self.actor[i].weight, 0.01)
                self.actor[i].bias.data.zero_()

        for i in range(len(self.extra_layer)):
            if type(self.extra_layer[i]) == nn.Linear:
                init.orthogonal_(self.extra_layer[i].weight, 0.1)
                self.extra_layer[i].bias.data.zero_()
Exemple #17
0
def weight_init(m):
    # pylint: disable=too-many-branches, too-many-statements
    """
    Function to initialize the weight of a layer.
    Usage:
        network = Model()
        network.apply(weight_init)
    """
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
Exemple #18
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2 * hps['sent_edim']
        sent_embedding_dim_SRL = 2 * hps['sent_edim'] + 16 + 16
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size,
                                               hps['sent_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        self.region_embeddings = nn.Embedding(2, 16)
        #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.role_embeddings = nn.Embedding(self.tagset_size,
                                            role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size,
                                             frame_embedding_dim)

        self.elmo_emb_size = 200
        self.elmo_mlp_word = nn.Sequential(nn.Linear(1024, self.elmo_emb_size),
                                           nn.ReLU())
        self.elmo_word = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma_word = nn.Parameter(torch.ones(1))

        self.elmo_mlp = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, self.elmo_emb_size), nn.ReLU())
        self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma = nn.Parameter(torch.ones(1))

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.DEP_link_H_dropout = nn.Dropout(p=0.5)
        self.DEP_link_M_dropout = nn.Dropout(p=0.5)
        self.DEP_tag_H_dropout = nn.Dropout(p=0.5)
        self.DEP_tag_M_dropout = nn.Dropout(p=0.5)

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.hidden_state_dropout = nn.Dropout(p=0.3)
        self.dropout_1 = nn.Dropout(p=0.3)
        self.dropout_2 = nn.Dropout(p=0.3)

        self.hidden_state_dropout_SRL = nn.Dropout(p=0.3)
        self.dropout_1_SRL = nn.Dropout(p=0.3)
        self.dropout_2_SRL = nn.Dropout(p=0.3)

        #self.use_dropout = nn.Dropout(p=0.2)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.num_layers = 1
        self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_0.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][1])

        self.num_layers = 1
        self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_1.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][1])

        self.num_layers = 3
        self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL,
                                  hidden_size=lstm_hidden_dim,
                                  batch_first=True,
                                  bidirectional=True,
                                  num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])

        self.ldims = lstm_hidden_dim
        self.hidLayerFOH_link = nn.Linear(self.ldims * 2, self.ldims)
        self.hidLayerFOM_link = nn.Linear(self.ldims * 2, self.ldims)
        self.W_R_link = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1, 1 + lstm_hidden_dim))

        self.hidLayerFOH_tag = nn.Linear(self.ldims * 2, self.ldims)
        self.hidLayerFOM_tag = nn.Linear(self.ldims * 2, self.ldims)
        self.W_R_tag = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.dep_size * (1 + lstm_hidden_dim)))

        self.Non_Predicate_Proj = nn.Linear(2 * lstm_hidden_dim,
                                            lstm_hidden_dim)
        self.Predicate_Proj = nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim)
        self.W_R = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.VR_word_embedding = nn.Parameter(
            torch.from_numpy(np.ones((1, self.word_emb_dim), dtype='float32')))

        self.VR_word_embedding_random = nn.Parameter(
            torch.from_numpy(np.ones((1, self.word_emb_dim), dtype='float32')))

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_4 = self.init_hidden_share()
Exemple #19
0
 def init_hyper(m):
     if type(m) == nn.Linear:
         orthogonal_(m.weight.data, gain=1.0)
         m.bias.data.fill_(0.0)
Exemple #20
0
    def __init__(
        self,
        input_dim,
        hidden_dim,
        kernel_size,
        padding_mode='zeros',
        batchnorm=True,
        use_attention=True,
        timesteps=64
    ):  # Timesteps is funky here... but go ahead and try this until you figure out the exact training length
        " Referenced from https://github.com/happyjin/ConvGRU-pytorch"
        super(ConvGRUCell, self).__init__()
        self.padding = kernel_size // 2
        hidden_size = hidden_dim
        self.batchnorm = batchnorm
        self.timesteps = timesteps
        self.use_attention = use_attention

        if self.use_attention:
            self.a_wu_gate = nn.Conv2d(hidden_size + input_dim,
                                       hidden_size,
                                       1,
                                       padding=1 // 2)
            init.orthogonal_(self.a_wu_gate.weight)
            init.constant_(self.a_wu_gate.bias, 1.)
        self.i_w_gate = nn.Conv2d(hidden_size + input_dim, hidden_size, 1)
        self.e_w_gate = nn.Conv2d(hidden_size * 2, hidden_size, 1)
        self.inh_init = nn.Conv2d(1, hidden_size, 1, padding=1 // 2)
        self.exc_init = nn.Conv2d(1, hidden_size, 1, padding=1 // 2)

        spatial_h_size = kernel_size
        self.h_padding = spatial_h_size // 2
        self.w_exc = nn.Parameter(
            torch.empty(hidden_size, hidden_size, spatial_h_size,
                        spatial_h_size))
        self.w_inh = nn.Parameter(
            torch.empty(hidden_size, hidden_size, spatial_h_size,
                        spatial_h_size))

        self.alpha = nn.Parameter(torch.empty((hidden_size, 1, 1)))
        self.mu = nn.Parameter(torch.empty((hidden_size, 1, 1)))

        self.gamma = nn.Parameter(torch.empty((hidden_size, 1, 1)))
        self.kappa = nn.Parameter(torch.empty((hidden_size, 1, 1)))

        self.bn = nn.ModuleList([
            nn.GroupNorm(1, hidden_size, eps=1e-03, affine=True)
            for i in range(2)
        ])

        init.orthogonal_(self.w_inh)
        init.orthogonal_(self.w_exc)

        init.orthogonal_(self.i_w_gate.weight)
        init.orthogonal_(self.e_w_gate.weight)

        for bn in self.bn:
            init.constant_(bn.weight, 0.1)

        init.uniform_(self.alpha, a=0., b=0.1)
        init.uniform_(self.mu, a=0., b=0.1)
        init.uniform_(self.gamma, a=0., b=0.1)
        init.uniform_(self.kappa, a=0., b=0.1)

        # Init gate biases
        init.uniform_(self.i_w_gate.bias.data, 1, self.timesteps - 1)
        self.i_w_gate.bias.data.log()
        self.e_w_gate.bias.data = -self.i_w_gate.bias.data
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2 * hps['sent_edim'] + 16
        sent_embedding_dim_SRL = 2 * hps['sent_edim'] + 16
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.char_size = hps['vchar']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.char_embeddings = nn.Embedding(self.char_size, 50)
        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size,
                                               hps['sent_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        self.region_embeddings = nn.Embedding(2, 16)
        #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.charCNN = layer.CharCNN(num_of_conv=3,
                                     in_channels=1,
                                     out_channels=50,
                                     kernel_size=[2, 3, 4],
                                     in_features=50,
                                     out_features=100)

        self.role_embeddings = nn.Embedding(self.tagset_size,
                                            role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size,
                                             frame_embedding_dim)

        self.hidden2tag = nn.Linear(4 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP = nn.Linear(2 * lstm_hidden_dim, self.dep_size)
        self.tag2hidden = nn.Linear(self.dep_size, self.pos_size)

        self.hidden2tag_spe = nn.Linear(2 * lstm_hidden_dim,
                                        2 * lstm_hidden_dim)
        self.MLP_spe = nn.Linear(2 * lstm_hidden_dim, 4)
        self.tag2hidden_spe = nn.Linear(4, self.pos_size)

        #self.elmo_embeddings_0 = nn.Embedding(vocab_size, 1024)
        #self.elmo_embeddings_0.weight.data.copy_(torch.from_numpy(hps['elmo_embeddings_0']))

        #self.elmo_embeddings_1 = nn.Embedding(vocab_size, 1024)
        #self.elmo_embeddings_1.weight.data.copy_(torch.from_numpy(hps['elmo_embeddings_1']))

        self.elmo_emb_size = 200
        self.elmo_mlp_word = nn.Sequential(nn.Linear(1024, self.elmo_emb_size),
                                           nn.ReLU())
        self.elmo_word = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma_word = nn.Parameter(torch.ones(1))

        self.elmo_mlp = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, self.elmo_emb_size), nn.ReLU())
        self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma = nn.Parameter(torch.ones(1))

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.hidden_state_dropout = nn.Dropout(p=0.3)
        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.hidden_state_dropout = nn.Dropout(p=0.3)
        self.dropout_1 = nn.Dropout(p=0.3)
        self.dropout_2 = nn.Dropout(p=0.3)

        self.hidden_state_dropout_SRL = nn.Dropout(p=0.3)
        self.dropout_1_SRL = nn.Dropout(p=0.3)
        self.dropout_2_SRL = nn.Dropout(p=0.3)

        #self.use_dropout = nn.Dropout(p=0.2)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.num_layers = 1
        self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_0.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][1])

        self.num_layers = 1
        self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_1.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][1])

        self.num_layers = 3
        self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL +
                                  self.elmo_emb_size * 1,
                                  hidden_size=lstm_hidden_dim,
                                  batch_first=True,
                                  bidirectional=True,
                                  num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])

        self.Non_Predicate_Proj_DEP = nn.Linear(2 * lstm_hidden_dim,
                                                lstm_hidden_dim)
        self.Predicate_Proj_DEP = nn.Linear(2 * lstm_hidden_dim,
                                            lstm_hidden_dim)
        self.W_R_DEP = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1, self.dep_size * lstm_hidden_dim))

        self.Non_Predicate_Proj = nn.Linear(2 * lstm_hidden_dim,
                                            lstm_hidden_dim)
        self.Predicate_Proj = nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim)
        self.W_R = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_4 = self.init_hidden_share()

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_DEP_base = self.init_hidden_spe()
        self.hidden_DEP = self.init_hidden_spe()
        self.hidden_SRL_base = self.init_hidden_spe()
        self.hidden_SRL = self.init_hidden_SRL()
        self.hidden_PI = self.init_hidden_share()
Exemple #22
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2 * hps['sent_edim']
        sent_embedding_dim_SRL = 2 * hps['sent_edim'] + 0 * hps['pos_edim'] + 16

        self.sent_embedding_dim_DEP = sent_embedding_dim_DEP
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size,
                                               hps['sent_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        self.region_embeddings = nn.Embedding(2, 16)
        # self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.role_embeddings = nn.Embedding(self.tagset_size,
                                            role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size,
                                             frame_embedding_dim)

        self.hidden2tag = nn.Linear(4 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP = nn.Linear(2 * lstm_hidden_dim, self.dep_size)
        self.tag2hidden = nn.Linear(self.dep_size, self.pos_size)

        self.hidden2tag_spe = nn.Linear(2 * lstm_hidden_dim,
                                        2 * lstm_hidden_dim)
        self.MLP_spe = nn.Linear(2 * lstm_hidden_dim, 4)
        self.tag2hidden_spe = nn.Linear(4, self.pos_size)

        # self.elmo_embeddings_0 = nn.Embedding(vocab_size, 1024)
        # self.elmo_embeddings_0.weight.data.copy_(torch.from_numpy(hps['elmo_embeddings_0']))

        # self.elmo_embeddings_1 = nn.Embedding(vocab_size, 1024)
        # self.elmo_embeddings_1.weight.data.copy_(torch.from_numpy(hps['elmo_embeddings_1']))

        self.elmo_emb_size = 200
        self.elmo_mlp_word = nn.Sequential(nn.Linear(1024, self.elmo_emb_size),
                                           nn.ReLU())
        self.elmo_word = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma_word = nn.Parameter(torch.ones(1))

        self.elmo_mlp = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, self.elmo_emb_size), nn.ReLU())
        self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma = nn.Parameter(torch.ones(1))

        self.SRL_input_dropout = nn.Dropout(p=0.5)
        self.DEP_input_dropout = nn.Dropout(p=0.5)
        self.hidden_state_dropout_DEP = nn.Dropout(p=0.3)

        self.hidden_state_dropout_1 = nn.Dropout(p=0.5)
        self.hidden_state_dropout_2 = nn.Dropout(p=0.5)
        self.head_dropout = nn.Dropout(p=0.5)
        self.dep_dropout = nn.Dropout(p=0.5)

        self.DEP_input_dropout_unlabeled = nn.Dropout(p=0.2)
        self.hidden_state_dropout_1_unlabeled = nn.Dropout(p=0.2)
        self.hidden_state_dropout_2_unlabeled = nn.Dropout(p=0.2)
        self.head_dropout_unlabeled = nn.Dropout(p=0.2)
        self.dep_dropout_unlabeled = nn.Dropout(p=0.2)

        self.head_dropout_unlabeled_FF = nn.Dropout(p=0.2)
        self.dep_dropout_unlabeled_FF = nn.Dropout(p=0.2)
        self.head_dropout_unlabeled_BB = nn.Dropout(p=0.2)
        self.dep_dropout_unlabeled_BB = nn.Dropout(p=0.2)
        self.head_dropout_unlabeled_FB = nn.Dropout(p=0.2)
        self.dep_dropout_unlabeled_FB = nn.Dropout(p=0.2)
        self.head_dropout_unlabeled_BF = nn.Dropout(p=0.2)
        self.dep_dropout_unlabeled_BF = nn.Dropout(p=0.2)
        # self.use_dropout = nn.Dropout(p=0.2)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.SA_primary_num_layers = 1
        self.BiLSTM_SA_primary = nn.LSTM(input_size=sent_embedding_dim_DEP,
                                         hidden_size=lstm_hidden_dim,
                                         batch_first=True,
                                         bidirectional=True,
                                         num_layers=self.SA_primary_num_layers)

        init.orthogonal_(self.BiLSTM_SA_primary.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SA_primary.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SA_primary.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SA_primary.all_weights[1][1])

        self.SA_high_num_layers = 1
        self.BiLSTM_SA_high = nn.LSTM(input_size=lstm_hidden_dim * 2,
                                      hidden_size=lstm_hidden_dim,
                                      batch_first=True,
                                      bidirectional=True,
                                      num_layers=self.SA_high_num_layers)

        init.orthogonal_(self.BiLSTM_SA_high.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SA_high.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SA_high.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SA_high.all_weights[1][1])

        self.SRL_primary_num_layers = 1
        self.BiLSTM_SRL_primary = nn.LSTM(
            input_size=sent_embedding_dim_SRL,
            hidden_size=lstm_hidden_dim,
            batch_first=True,
            bidirectional=True,
            num_layers=self.SRL_primary_num_layers)

        init.orthogonal_(self.BiLSTM_SRL_primary.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL_primary.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL_primary.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL_primary.all_weights[1][1])

        self.SRL_high_num_layers = 2
        self.BiLSTM_SRL_high = nn.LSTM(input_size=2 * lstm_hidden_dim,
                                       hidden_size=lstm_hidden_dim,
                                       batch_first=True,
                                       bidirectional=True,
                                       num_layers=self.SRL_high_num_layers)

        init.orthogonal_(self.BiLSTM_SRL_high.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL_high.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL_high.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL_high.all_weights[1][1])

        # non-linear map to role embedding
        self.role_map = nn.Linear(in_features=role_embedding_dim * 2,
                                  out_features=self.hidden_dim * 4)

        self.map_dim = lstm_hidden_dim

        self.ldims = lstm_hidden_dim
        self.hidLayerFOH_SRL = nn.Linear(self.ldims * 2, self.ldims)
        self.hidLayerFOM_SRL = nn.Linear(self.ldims * 2, self.ldims)
        self.W_R_SRL = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.hidLayerFOH_SRL_FF = nn.Linear(self.ldims, self.ldims)
        self.hidLayerFOM_SRL_FF = nn.Linear(self.ldims, self.ldims)
        self.W_R_SRL_FF = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.hidLayerFOH_SRL_BB = nn.Linear(self.ldims, self.ldims)
        self.hidLayerFOM_SRL_BB = nn.Linear(self.ldims, self.ldims)
        self.W_R_SRL_BB = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.hidLayerFOH_SRL_BF = nn.Linear(self.ldims, self.ldims)
        self.hidLayerFOM_SRL_BF = nn.Linear(self.ldims, self.ldims)
        self.W_R_SRL_BF = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.hidLayerFOH_SRL_FB = nn.Linear(self.ldims, self.ldims)
        self.hidLayerFOM_SRL_FB = nn.Linear(self.ldims, self.ldims)
        self.W_R_SRL_FB = nn.Parameter(
            torch.rand(lstm_hidden_dim + 1,
                       self.tagset_size * (lstm_hidden_dim + 1)))

        self.VR_embedding = nn.Parameter(
            torch.from_numpy(
                np.ones((1, sent_embedding_dim_DEP), dtype='float32')))

        self.mid_hidden = lstm_hidden_dim
        self.POS_MLP = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim), nn.ReLU(),
            nn.Linear(lstm_hidden_dim, self.pos_size))

        self.SRL_primary_hidden = self.init_SRL_primary()
        self.SRL_high_hidden = self.init_SRL_high()
        self.SA_primary_hidden = self.init_SA_primary()
        self.SA_high_hidden = self.init_SA_high()
Exemple #23
0
    def __init__(
        self,
        d_feat=6,
        output_dim=1,
        freq_dim=10,
        hidden_size=64,
        dropout_W=0.0,
        dropout_U=0.0,
        device="cpu",
    ):
        super().__init__()

        self.input_dim = d_feat
        self.output_dim = output_dim
        self.freq_dim = freq_dim
        self.hidden_dim = hidden_size
        self.device = device

        self.W_i = nn.Parameter(
            init.xavier_uniform_(torch.empty(
                (self.input_dim, self.hidden_dim))))
        self.U_i = nn.Parameter(
            init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
        self.b_i = nn.Parameter(torch.zeros(self.hidden_dim))

        self.W_ste = nn.Parameter(
            init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
        self.U_ste = nn.Parameter(
            init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
        self.b_ste = nn.Parameter(torch.ones(self.hidden_dim))

        self.W_fre = nn.Parameter(
            init.xavier_uniform_(torch.empty(self.input_dim, self.freq_dim)))
        self.U_fre = nn.Parameter(
            init.orthogonal_(torch.empty(self.hidden_dim, self.freq_dim)))
        self.b_fre = nn.Parameter(torch.ones(self.freq_dim))

        self.W_c = nn.Parameter(
            init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
        self.U_c = nn.Parameter(
            init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
        self.b_c = nn.Parameter(torch.zeros(self.hidden_dim))

        self.W_o = nn.Parameter(
            init.xavier_uniform_(torch.empty(self.input_dim, self.hidden_dim)))
        self.U_o = nn.Parameter(
            init.orthogonal_(torch.empty(self.hidden_dim, self.hidden_dim)))
        self.b_o = nn.Parameter(torch.zeros(self.hidden_dim))

        self.U_a = nn.Parameter(init.orthogonal_(torch.empty(self.freq_dim,
                                                             1)))
        self.b_a = nn.Parameter(torch.zeros(self.hidden_dim))

        self.W_p = nn.Parameter(
            init.xavier_uniform_(torch.empty(self.hidden_dim,
                                             self.output_dim)))
        self.b_p = nn.Parameter(torch.zeros(self.output_dim))

        self.activation = nn.Tanh()
        self.inner_activation = nn.Hardsigmoid()
        self.dropout_W, self.dropout_U = (dropout_W, dropout_U)
        self.fc_out = nn.Linear(self.output_dim, 1)

        self.states = []
Exemple #24
0
def weight_init(m):
    '''
    Usage:
        model = Model()
        model.apply(weight_init)
    '''
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
Exemple #25
0
 def _initialize_weights(self):
     init.orthogonal_(self.conv1.weight, init.calculate_gain("relu"))
     init.orthogonal_(self.conv2.weight, init.calculate_gain("relu"))
     init.orthogonal_(self.conv3.weight, init.calculate_gain("relu"))
     init.orthogonal_(self.conv4.weight)
Exemple #26
0
 def init_weight(self):
     init.xavier_normal_(self.hidden_proj.weight)
     init.orthogonal_(self.gru.weight_hh_l0)
     init.orthogonal_(self.gru.weight_ih_l0)
     self.gru.bias_ih_l0.data.fill_(0.0)
     self.gru.bias_hh_l0.data.fill_(0.0)
Exemple #27
0
 def init_weights(self):
     for w in self.rnn.parameters():
         if w.dim() > 1:
             weight_init.orthogonal_(w)
Exemple #28
0
 def reset_parameters(self):
     init.orthogonal_(self.weight, self.gain)
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)
Exemple #29
0
 def init_params(self):
     for layer in range(len(self.LSTMLayer.all_weights)):
         init.orthogonal_(self.LSTMLayer.all_weights[layer][0])
         init.orthogonal_(self.LSTMLayer.all_weights[layer][1])
         init.zeros_(self.LSTMLayer.all_weights[layer][2])
         init.zeros_(self.LSTMLayer.all_weights[layer][3])
 def _initialize_weights(self):
     init.orthogonal_(self.conv1.weight, init.calculate_gain('relu'))
     init.orthogonal_(self.conv2.weight, init.calculate_gain('relu'))
     init.orthogonal_(self.conv3.weight, init.calculate_gain('relu'))
     init.orthogonal_(self.conv4.weight)
Exemple #31
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2 * hps['sent_edim'] + 1 * hps['pos_edim']
        sent_embedding_dim_SRL = 2 * hps['sent_edim'] + 16
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size,
                                               hps['sent_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        self.region_embeddings = nn.Embedding(2, 16)
        #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size,
                                                      hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(
            torch.from_numpy(hps['word_embeddings']))

        self.role_embeddings = nn.Embedding(self.tagset_size,
                                            role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size,
                                             frame_embedding_dim)

        self.hidden2tag = nn.Linear(4 * lstm_hidden_dim, 2 * lstm_hidden_dim)
        self.MLP = nn.Linear(2 * lstm_hidden_dim, self.specific_dep_size)
        self.tag2hidden = nn.Linear(self.specific_dep_size, self.pos_size)

        self.Head_Proj = nn.Linear(4 * lstm_hidden_dim, lstm_hidden_dim)
        self.W_share = nn.Parameter(
            torch.rand(lstm_hidden_dim, self.dep_size * lstm_hidden_dim))
        self.Dep_Proj = nn.Linear(4 * lstm_hidden_dim, lstm_hidden_dim)

        self.MLP_identification = nn.Linear(4 * lstm_hidden_dim,
                                            2 * lstm_hidden_dim)
        self.Idenficiation = nn.Linear(2 * lstm_hidden_dim, 3)

        self.Non_Predicate_Proj = nn.Linear(2 * lstm_hidden_dim,
                                            lstm_hidden_dim)
        self.Predicate_Proj = nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim)

        self.MLP_classifier_1 = nn.Linear(400, 400)
        self.MLP_classifier_0 = nn.Linear(400, self.tagset_size)

        self.elmo_emb_size = 200
        self.elmo_mlp_word = nn.Sequential(nn.Linear(1024, self.elmo_emb_size),
                                           nn.ReLU())
        self.elmo_word = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma_word = nn.Parameter(torch.ones(1))

        self.elmo_mlp = nn.Sequential(
            nn.Linear(2 * lstm_hidden_dim, self.elmo_emb_size), nn.ReLU())
        self.elmo_w = nn.Parameter(torch.Tensor([0.5, 0.5]))
        self.elmo_gamma = nn.Parameter(torch.ones(1))

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.3)
        self.hidden_state_dropout = nn.Dropout(p=0.3)
        self.dropout_1 = nn.Dropout(p=0.3)
        self.dropout_2 = nn.Dropout(p=0.3)
        self.label_dropout_3 = nn.Dropout(p=0.3)
        self.label_dropout_4 = nn.Dropout(p=0.3)
        self.id_dropout = nn.Dropout(p=0.3)
        #self.use_dropout = nn.Dropout(p=0.2)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.num_layers = 1
        self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_0.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][1])

        self.num_layers = 1
        self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2,
                                hidden_size=lstm_hidden_dim,
                                batch_first=True,
                                bidirectional=True,
                                num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_1.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][1])

        self.num_layers = 2
        self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL,
                                  hidden_size=lstm_hidden_dim,
                                  batch_first=True,
                                  bidirectional=True,
                                  num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])

        self.More_1 = nn.Linear(2 * lstm_hidden_dim, lstm_hidden_dim)
        self.W_R = nn.Linear(lstm_hidden_dim, self.dep_size)

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_4 = self.init_hidden_share()
Exemple #32
0
def weight_init(m): # https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data) 
Exemple #33
0
 def __init__(self):
     super(DummyTorchModule, self).__init__()
     self.test_weight = torch.nn.Parameter(
         init.orthogonal_(torch.Tensor(5, 5)))