Ejemplo n.º 1
0
    def __init__(self, input_size,n_head=4, identity=False):
        super(multiSeqAttnMatch, self).__init__()

        self.hidden_size = input_size // n_head
        self.w = nn.Parameter(torch.FloatTensor(n_head, input_size, self.hidden_size))
        init.xavier_normal(self.w)
        self.n_head = n_head
Ejemplo n.º 2
0
def weights_init_xavier(m):
    classname = m.__class__.__name__
    # print(classname)
    if classname.find('Conv') != -1:
        init.xavier_normal(m.weight.data, gain=0.02)
    elif classname.find('Linear') != -1:
        init.xavier_normal(m.weight.data, gain=0.02)
    elif classname.find('BatchNorm2d') != -1:
        init.normal(m.weight.data, 1.0, 0.02)
        init.constant(m.bias.data, 0.0)
Ejemplo n.º 3
0
    def __init__(self, args):
        super(BiGRU, self).__init__()
        self.hidden_dim = args.hidden_dim
        self.batch_size = args.batch_size
        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)
        self.word_embeddings = nn.Embedding(args.embed_num, args.embedding_dim)
        self.bigru = nn.GRU(args.embedding_dim, args.hidden_dim, bidirectional=True, dropout=args.dropout_model)

        # self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim)
        # self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num)
        self.hidden2label = nn.Linear(args.hidden_dim * 2, args.class_num)
        self.hidden = self.init_hidden(args.batch_size)
        pretrained_weight = np.array(args.pretrained_weight)
        # print(pretrained_weight.shape)
        self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # weight modify, gru do not have forget gate
        init.xavier_normal(self.bigru.all_weights[0][0], gain=np.sqrt(2.0))
        init.xavier_normal(self.bigru.all_weights[0][1], gain=np.sqrt(2.0))
        init.xavier_normal(self.bigru.all_weights[1][0], gain=np.sqrt(2.0))
        init.xavier_normal(self.bigru.all_weights[1][1], gain=np.sqrt(2.0))
        # self.bigru.all_weights[0][3].data.fill_(0.1)
        # self.bigru.all_weights[0][2].data.fill_(0.1)
        # self.bigru.all_weights[1][3].data.fill_(0.1)
        # self.bigru.all_weights[1][2].data.fill_(0.1)

        self.bn1 = nn.BatchNorm1d(600)
    def __init__(self, args):
        super(BiLSTM_1, self).__init__()
        self.args = args
        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)
        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} |||||".format(args.max_norm))
            self.embed = nn.Embedding(V, D, scale_grad_by_freq=True)
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        self.bilstm = nn.LSTM(D, self.hidden_dim, num_layers=self.num_layers, bias=True, bidirectional=True,
                              dropout=self.args.dropout)
        print(self.bilstm)
        if args.init_weight:
            print("Initing W .......")
            init.xavier_normal(self.bilstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value))
            init.xavier_normal(self.bilstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value))
            init.xavier_normal(self.bilstm.all_weights[1][0], gain=np.sqrt(args.init_weight_value))
            init.xavier_normal(self.bilstm.all_weights[1][1], gain=np.sqrt(args.init_weight_value))
            # print("eeeeeeeeeeeeeeeeeeeeeeee")
            # fan_in, fan_out = BiLSTM_1.calculate_fan_in_and_fan_out(self.bilstm.all_weights[1][1])
            # print(" in {} out {} ".format(fan_in, fan_out))
            # std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
            # print("aaaaaaaaaaaaa {} ".format(std))
            # print("self.bilstm.all_weights {} ".format(self.bilstm.all_weights))
            # self.bilstm.all_weights[0][3].data.fill_(0)
            # self.bilstm.all_weights[0][2].data.fill_(0)
            # self.bilstm.all_weights[1][3].data.fill_(0)
            # self.bilstm.all_weights[1][2].data.fill_(0)
            # self.bilstm.all_weights[0][3].data[20:40].fill_(1)
            # self.bilstm.all_weights[0][3].data[0:20].fill_(0)
            # self.bilstm.all_weights[0][3].data[40:80].fill_(0)
            # # self.bilstm.all_weights[0][3].data[40:].fill_(0)
            # self.bilstm.all_weights[0][2].data[20:40].fill_(1)
            # self.bilstm.all_weights[0][2].data[0:20].fill_(0)
            # self.bilstm.all_weights[0][2].data[40:80].fill_(0)
            # # self.bilstm.all_weights[0][2].data[40:].fill_(0)
            # self.bilstm.all_weights[1][3].data[20:40].fill_(1)
            # self.bilstm.all_weights[1][3].data[0:20].fill_(0)
            # self.bilstm.all_weights[1][3].data[40:80].fill_(0)
            # # self.bilstm.all_weights[1][3].data[40:].fill_(0)
            # self.bilstm.all_weights[1][2].data[20:40].fill_(1)
            # self.bilstm.all_weights[1][2].data[0:20].fill_(0)
            # self.bilstm.all_weights[1][2].data[40:80].fill_(0)
            # # self.bilstm.all_weights[1][2].data[40:].fill_(0)

        # self.hidden2label1 = nn.Linear(self.hidden_dim * 2, self.hidden_dim)
        # self.hidden2label2 = nn.Linear(self.hidden_dim, C)
        self.hidden2label = nn.Linear(self.hidden_dim * 2, C)
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        print("self.hidden", self.hidden)
Ejemplo n.º 5
0
 def __init__(self, args):
     super(BiLSTM, self).__init__()
     self.hidden_dim = args.hidden_dim
     self.batch_size = args.batch_size
     self.dropout = nn.Dropout(args.dropout)
     self.dropout_embed = nn.Dropout(args.dropout_embed)
     self.word_embeddings = nn.Embedding(args.embed_num, args.embedding_dim, max_norm=5.0)
     self.lstm = nn.LSTM(args.embedding_dim, args.hidden_dim, bidirectional=True, dropout=args.dropout_model)
     self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim)
     self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num)
     self.hidden = self.init_hidden(args.batch_size)
     pretrained_weight = np.array(args.pretrained_weight)
     # print(pretrained_weight.shape)
     self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight))
     # weight modify
     init.xavier_normal(self.lstm.all_weights[0][0], gain=np.sqrt(2.0))
     init.xavier_normal(self.lstm.all_weights[0][1], gain=np.sqrt(2.0))
     init.xavier_normal(self.lstm.all_weights[1][0], gain=np.sqrt(2.0))
     init.xavier_normal(self.lstm.all_weights[1][1], gain=np.sqrt(2.0))
     self.lstm.all_weights[0][3].data[20:40].fill_(1)
     self.lstm.all_weights[0][3].data[0:20].fill_(0)
     self.lstm.all_weights[0][3].data[40:80].fill_(0)
     self.lstm.all_weights[0][2].data[20:40].fill_(1)
     self.lstm.all_weights[0][2].data[0:20].fill_(0)
     self.lstm.all_weights[0][2].data[40:80].fill_(0)
     self.lstm.all_weights[1][3].data[20:40].fill_(1)
     self.lstm.all_weights[1][3].data[0:20].fill_(0)
     self.lstm.all_weights[1][3].data[40:80].fill_(0)
     self.lstm.all_weights[1][2].data[20:40].fill_(1)
     self.lstm.all_weights[1][2].data[0:20].fill_(0)
     self.lstm.all_weights[1][2].data[40:80].fill_(0)
    def __init__(self, args):
        super(DEEP_CNN_MUI, self).__init__()
        self.args = args
        
        V = args.embed_num
        V_mui = args.embed_num_mui
        D = args.embed_dim
        C = args.class_num
        Ci = 2
        Co = args.kernel_num
        Ks = args.kernel_sizes
        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True)

        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight))
            pretrained_weight_static = np.array(args.pretrained_weight_static)
            self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static))
            # whether to fixed the word embedding
            self.embed_no_static.weight.requires_grad = True
        # cons layer
        self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks]
        self.convs2 = [nn.Conv2d(1, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks]
        print(self.convs1)
        print(self.convs2)

        if args.init_weight:
            print("Initing W .......")
            for (conv1, conv2) in zip(self.convs1, self.convs2):
                init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv1.bias, 0, 0)
                init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv2.bias, 0, 0)

        # dropout
        self.dropout = nn.Dropout(args.dropout)
        # linear
        in_fea = len(Ks) * Co
        self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True)
        self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
 def init_func(m):
     classname = m.__class__.__name__
     if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
         if init_type == 'normal':
             init.normal(m.weight.data, 0.0, gain)
         elif init_type == 'xavier':
             init.xavier_normal(m.weight.data, gain=gain)
         elif init_type == 'kaiming':
             init.kaiming_normal(m.weight.data, a=0, mode='fan_in')
         elif init_type == 'orthogonal':
             init.orthogonal(m.weight.data, gain=gain)
         else:
             raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
         if hasattr(m, 'bias') and m.bias is not None:
             init.constant(m.bias.data, 0.0)
     elif classname.find('BatchNorm2d') != -1:
         init.normal(m.weight.data, 1.0, gain)
         init.constant(m.bias.data, 0.0)
Ejemplo n.º 8
0
Archivo: utils.py Proyecto: phonx/MUNIT
 def init_fun(m):
     classname = m.__class__.__name__
     if (classname.find('Conv') == 0 or classname.find('Linear') == 0) and hasattr(m, 'weight'):
         # print m.__class__.__name__
         if init_type == 'gaussian':
             init.normal(m.weight.data, 0.0, 0.02)
         elif init_type == 'xavier':
             init.xavier_normal(m.weight.data, gain=math.sqrt(2))
         elif init_type == 'kaiming':
             init.kaiming_normal(m.weight.data, a=0, mode='fan_in')
         elif init_type == 'orthogonal':
             init.orthogonal(m.weight.data, gain=math.sqrt(2))
         elif init_type == 'default':
             pass
         else:
             assert 0, "Unsupported initialization: {}".format(init_type)
         if hasattr(m, 'bias') and m.bias is not None:
             init.constant(m.bias.data, 0.0)
Ejemplo n.º 9
0
    def __init__(self, args):
        super(CNN, self).__init__()
        self.args = args
        # self.conv1l = nn.Conv2d(3,20,5,stride=1,bias=True)
        # init.xavier_normal()

        Ci = 1
        self.embed = nn.Embedding(args.embed_num, args.embedding_dim)
        # print(self.embed)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        # print(len(args.pretrained_weight))

        pretrained_weight = np.array(args.pretrained_weight)
        # print(pretrained_weight.shape)
        self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))

        # print(self.embed)                         # 15453 Embedding(15453, 128)

        self.convs1 = [nn.Conv2d(Ci, args.kernel_num, (K, args.embedding_dim)) for K in args.kernel_sizes]
        # init.xavier_normal([(conv.weight, gain=np.sqrt(2.0) for conv in self.convsl)])
        for conv in self.convs1:
            init.xavier_normal(conv.weight, gain=np.sqrt(2.0))
            # init.normal(conv.weight, mean=0, std=0.1)
            # init.constant(conv.bias, 0.1)
        # print(self.convs1)

        # self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        # self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        # self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        # self.conv16 = nn.Conv2d(Ci, Co, (6, D))

        self.dropout = nn.Dropout(args.dropout)
        self.fc1 = nn.Linear(len(args.kernel_sizes) * args.kernel_num, args.class_num)  # len(Ks)*Co -> C
        # self.fc1 = nn.Linear(len(Ks) * Co * 2, C)  # len(Ks)*Co*2 -> C
        # self.bn = nn.BatchNorm1d(1, momentum=0.5)
        self.bn = nn.BatchNorm2d(1)
    def __init__(self, args):
        super(LSTM, self).__init__()
        self.args = args
        # print(args)

        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} |||||".format(args.max_norm))
            self.embed = nn.Embedding(V, D, scale_grad_by_freq=True)
        # word embedding
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # lstm
        self.lstm = nn.LSTM(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers)

        if args.init_weight:
            print("Initing W .......")
            # n = self.lstm.input_size * self.lstm
            init.xavier_normal(self.lstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value))
            init.xavier_normal(self.lstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value))

        # linear
        self.hidden2label = nn.Linear(self.hidden_dim, C)
        # hidden
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        # dropout
        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)
Ejemplo n.º 11
0
 def __init__(self, in_size, out_size, kernel_size=3,stride=1, padding=1, activation=nn.ReLU(), space_dropout=False):
     super(UNetUpBlock, self).__init__()
     self.conv0 = nn.Conv2d(in_size, out_size, 3, stride=1, padding=1)
     self.conv = nn.Conv2d(in_size, out_size, kernel_size, stride=1, padding=1)
     self.conv2 = nn.Conv2d(out_size, out_size, kernel_size,stride=1, padding=1)
     init.xavier_normal(self.conv0.weight,gain=np.sqrt(2))
     init.xavier_normal(self.conv.weight,gain=np.sqrt(2))
     init.xavier_normal(self.conv2.weight,gain=np.sqrt(2))
     init.constant(self.conv0.bias, 0.1)
     init.constant(self.conv.bias, 0.1)
     init.constant(self.conv2.bias, 0.1)
     self.activation = activation
     self.upsampler = nn.Upsample(scale_factor=2)
Ejemplo n.º 12
0
    def __init__(self, in_size, out_size, kernel_size=3, stride=1, padding=1, activation = nn.ReLU(), downsample=True):
        super(UNetConvBlock, self).__init__()
        self.conv_down = nn.Conv2d(in_size, in_size, kernel_size, stride=2, padding=1)
        self.conv = nn.Conv2d(in_size, out_size, kernel_size, stride=1, padding=padding)
        self.conv2 = nn.Conv2d(out_size, out_size, kernel_size,stride=1, padding=1)
        init.xavier_normal(self.conv_down.weight,gain=np.sqrt(2))
        init.xavier_normal(self.conv.weight,gain=np.sqrt(2))
        init.xavier_normal(self.conv2.weight,gain=np.sqrt(2))
        init.constant(self.conv_down.bias,0.1)
        init.constant(self.conv.bias, 0.1)
        init.constant(self.conv2.bias, 0.1)

        self.activation = activation
        self.downsample = downsample
Ejemplo n.º 13
0
    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
        super(MultiHeadAttention, self).__init__()

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k))
        self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k))
        self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v))

        self.attention = ScaledDotProductAttention(d_model)
        self.layer_norm = LayerNormalization(d_model)
        self.proj = Linear(n_head*d_v, d_model)

        self.dropout = nn.Dropout(dropout)

        init.xavier_normal(self.w_qs)
        init.xavier_normal(self.w_ks)
        init.xavier_normal(self.w_vs)
Ejemplo n.º 14
0
    def __init__(self, **kwargs):
        super(TextCNN, self).__init__()
        self.input_size = kwargs['input_size']
        self.hidden_size = kwargs['hidden_size']
        self.output_size = kwargs['output_size']
        if 'kernel_num' in kwargs:
            self.kernel_num = kwargs['kernel_num']
        else:
            self.kernel_num = 256
        if 'kernel_sizes' in kwargs:
            self.kernel_sizes = kwargs['kernel_sizes']
        else:
            self.kernel_sizes = [1, 2, 3, 4]
        if 'embed_size' in kwargs:
            self.embed_size = kwargs['embed_size']
        else:
            self.embed_size = kwargs['hidden_size']
        if 'dropout' in kwargs:
            self.dropout = kwargs['dropout']
        else:
            self.dropout = 0.1
        if 'wide_conv' in kwargs:
            self.wide_conv = kwargs['wide_conv']
        else:
            self.wide_conv = False
        if 'init_weight' in kwargs:
            self.init_weight = kwargs['init_weight']
        else:
            self.init_weight = False
        if 'init_weight_value' in kwargs:
            self.init_weight_value = kwargs['init_weight_value']
        else:
            self.init_weight_value = 2.0
        if 'batch_normal' in kwargs:
            self.batch_normal = kwargs['batch_normal']
        else:
            self.batch_normal = False
        if 'batch_normal_momentum' in kwargs:
            self.batch_normal_momentum
        else:
            self.batch_normal_momentum = 0.1
        if 'batch_normal_affine' in kwargs:
            self.batch_normal_affine = kwargs['batch_normal_affine']
        else:
            self.batch_normal_affine = False

        Ci = 1  # input channels, 处理文本,一层通道
        Co = self.kernel_num  # output channel
        Ks = self.kernel_sizes  # list

        if 'max_norm' in kwargs:
            self.embed = nn.Embedding(self.input_size,
                                      self.embed_size,
                                      max_norm=kwargs['max_norm'])
        else:
            self.embed = nn.Embedding(self.input_size,
                                      self.embed_size,
                                      scale_grad_by_freq=True)
        if 'word_embedding' in kwargs:
            pretrained_weight = torch.from_numpy(kwargs['word_embedding'])
            self.embed.weight.data.copy_(pretrained_weight)
            self.embed.weight.requires_grad = True
        if self.wide_conv is True:
            self.convs1 = [
                nn.Conv2d(in_channels=Ci,
                          out_channels=Co,
                          kernel_size=(K, self.embed_size),
                          stride=(1, 1),
                          padding=(K // 2, 0),
                          dilation=1,
                          bias=True) for K in Ks
            ]
        else:
            self.convs1 = [
                nn.Conv2d(in_channels=Ci,
                          out_channels=Co,
                          kernel_size=(K, self.embed_size),
                          bias=True) for K in Ks
            ]
        if self.init_weight:
            for conv in self.convs1:
                init.xavier_normal(conv.weight.data,
                                   gain=np.sqrt(self.init_weight_value))
                fanin, fanout = self.cal_fanin_fanout(conv.weight.data)
                std = np.sqrt(self.init_weight_value) * np.sqrt(
                    2.0 / (fanin + fanout))
                init.uniform(conv.bias, 0, 0)

        self.dropout = nn.Dropout(self.dropout)
        in_fea = len(Ks) * Co
        self.f1 = nn.Linear(in_fea, in_fea // 2, bias=True)
        self.f2 = nn.Linear(in_fea // 2, self.output_size, bias=True)
        if self.batch_normal:
            self.convs1_bn = nn.BatchNorm2d(
                num_features=Co,
                momentum=self.batch_normal_momentum,
                affine=self.batch_normal_affine)
            self.f1_bn = nn.BatchNorm1d(num_features=in_fea // 2,
                                        momentum=self.batch_normal_momentum,
                                        affine=self.batch_normal_affine)
            self.f2_bn = nn.BatchNorm1d(num_features=self.output_size,
                                        momentum=self.batch_normal_momentum,
                                        affine=self.batch_normal_affine)
Ejemplo n.º 15
0
 def __init__(self, d_in, d_out, bias=True):
     super(Linear, self).__init__()
     self.linear = nn.Linear(d_in, d_out, bias=bias)
     init.xavier_normal(self.linear.weight)
    def __init__(self, args):
        super(CNN_MUI, self).__init__()
        self.args = args
        
        V = args.embed_num
        V_mui = args.embed_num_mui
        D = args.embed_dim
        C = args.class_num
        Ci = 2
        Co = args.kernel_num
        Ks = args.kernel_sizes

        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            # self.embed_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True)
            # self.embed_static = nn.Embedding(V, D, scale_grad_by_freq=True)
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight))
            pretrained_weight_static = np.array(args.pretrained_weight_static)
            self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static))
            # whether to fixed the word embedding
            self.embed_no_static.weight.requires_grad = True
            # self.embed_static.weight.requires_grad = False

        if args.wide_conv is True:
            print("using wide convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
                                     padding=(K//2, 0), bias=True) for K in Ks]
        else:
            print("using narrow convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks]
        # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks]
        print(self.convs1)

        if args.init_weight:
            print("Initing W .......")
            for conv in self.convs1:
                init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv.bias, 0, 0)
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)

        in_fea = len(Ks) * Co
        self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True)
        self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)

        if args.batch_normalizations is True:
            print("using batch_normalizations in the model......")
            self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum,
                                            affine=args.batch_norm_affine)
            self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
            self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
 def weights_init(m):
     for _, mi in m._modules.items():
         if isinstance(mi, nn.Conv2d) or isinstance(m, nn.Linear):
             xavier_normal(mi.weight.data)
             if mi.bias is not None:
                 xavier_normal(mi.bias.data)
Ejemplo n.º 18
0
 def __init__(self, d_in, d_out, bias=True):
     super(XavierLinear, self).__init__()
     self.linear = nn.Linear(d_in, d_out, bias=bias)
     init.xavier_normal(self.linear.weight)
Ejemplo n.º 19
0
 def __init__(self, vocab_size, hidden_size):
     super(AnswerModule, self).__init__()
     self.z = nn.Linear(2 * hidden_size, vocab_size)
     init.xavier_normal(self.z.state_dict()['weight'])
     self.dropout = nn.Dropout(0.1)
Ejemplo n.º 20
0
    def __init__(self):
        super(Discriminator, self).__init__()

        self.conv1 = nn.Conv2d(1,32,kernel_size=3,stride=2,padding=1)   # 256x256
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)    # 128x128
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)    # 64x64
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)    # 32x32
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)    # 16x16
        self.conv6 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)    # 8x8
        self.conv7 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1)    # 4x4
        self.conv8 = nn.Conv2d(512, 1024, kernel_size=4, stride=1, padding=0)    # 1x1
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        self.bn5 = nn.BatchNorm2d(256)
        self.bn6 = nn.BatchNorm2d(512)
        self.bn7 = nn.BatchNorm2d(512)
        self.sigmoid = nn.Sigmoid()
        self.lrelu = nn.LeakyReLU(negative_slope=0.2)
        init.xavier_normal(self.conv1.weight, gain=np.sqrt(2))
        init.constant(self.conv1.bias, 0.1)
        init.xavier_normal(self.conv2.weight, gain=np.sqrt(2))
        init.constant(self.conv2.bias, 0.1)
        init.xavier_normal(self.conv3.weight, gain=np.sqrt(2))
        init.constant(self.conv3.bias, 0.1)
        init.xavier_normal(self.conv4.weight, gain=np.sqrt(2))
        init.constant(self.conv4.bias, 0.1)
        init.xavier_normal(self.conv5.weight, gain=np.sqrt(2))
        init.constant(self.conv5.bias, 0.1)
        init.xavier_normal(self.conv6.weight, gain=np.sqrt(2))
        init.constant(self.conv6.bias, 0.1)
        init.xavier_normal(self.conv7.weight, gain=np.sqrt(2))
        init.constant(self.conv7.bias, 0.1)
        init.xavier_normal(self.conv8.weight, gain=np.sqrt(2))
        init.constant(self.conv8.bias, 0.1)
Ejemplo n.º 21
0
 def weight_init(m):
     if isinstance(m, nn.Conv2d):
         init.xavier_normal(m.weight)
         init.constant(m.bias, 0)
Ejemplo n.º 22
0
def weights_init(m):
    classname = m.__class__.__name__
    if 'Linear' in classname:
        init.xavier_normal(m.weight.data)
        init.constant(m.bias, 0.0)
Ejemplo n.º 23
0
 def init_weight(self):
     init.xavier_normal(self.img_embed.weight)
     init.xavier_normal(self.att_embed.weight)
Ejemplo n.º 24
0
def init_weights(layer):
    if isinstance(layer, nn.Linear):
        xavier_normal(layer.weight.data)
Ejemplo n.º 25
0
def xavier_init(model):
    for param in model.parameters():
        if len(param.size()) == 2:
            xavier_normal(param)
Ejemplo n.º 26
0
    def __init__(self, dimensions, **kwargs):
        super(VAE, self).__init__()
        assert len(dimensions) > 1

        # unpack dimension of vae
        self.embedding_dim = dimensions[0]
        self.hidden_dims = dimensions[1:-1]
        self.latent_dim = dimensions[-1]
        self.dec_final_act = kwargs['decoder_final_activation']
        self.device = torch.device('cuda' if (
            torch.cuda.is_available()) else 'cpu')
        self.is_logits = kwargs.get('logits', False)

        if self.is_logits:
            self.resconstruction_loss = nn.modules.loss.MSELoss()
        else:
            self.resconstruction_loss = self.binary_cross_entropy

        # Construct layers for encoder and decoder block
        # Encoder block
        self.enc_hidden_layers = nn.Sequential()
        self.enc_hidden_layers.add_module(
            'hidden_layer_0', nn.Linear(self.embedding_dim,
                                        self.hidden_dims[0]))
        self.enc_hidden_layers.add_module('h_layer_act_0', nn.ReLU())

        for i, _ in enumerate(self.hidden_dims[:-1]):
            self.enc_hidden_layers.add_module(
                'hidden_layer_{}'.format(i + 1),
                nn.Linear(self.hidden_dims[i], self.hidden_dims[i + 1])),
            self.enc_hidden_layers.add_module('h_layer_act_{}'.format(i + 1),
                                              nn.ReLU())

        # define mean and log variance of vae
        self.z_mean = nn.Linear(self.hidden_dims[-1], self.latent_dim)
        self.z_log_var = nn.Linear(self.hidden_dims[-1], self.latent_dim)
        # ~Encoder block

        # Decoder block
        dec_hidden_layers = nn.Sequential()
        dec_hidden_layers.add_module(
            'hidden_layer_0', nn.Linear(self.latent_dim, self.hidden_dims[-1]))
        dec_hidden_layers.add_module('h_layer_act_0', nn.ReLU())
        reversed_hidden_dims = list(reversed(self.hidden_dims))
        for i, _ in enumerate(reversed(self.hidden_dims)):
            if i == (len(reversed_hidden_dims) - 1):
                dec_hidden_layers.add_module(
                    'hidden_layer_{}'.format(i + 1),
                    nn.Linear(reversed_hidden_dims[i], self.embedding_dim)),
            else:
                dec_hidden_layers.add_module(
                    'hidden_layer_{}'.format(i + 1),
                    nn.Linear(reversed_hidden_dims[i],
                              reversed_hidden_dims[i + 1])),
                dec_hidden_layers.add_module('h_layer_act_{}'.format(i + 1),
                                             nn.ReLU())

        # Final activation function of decoder depends on data
        if self.dec_final_act is not None:
            if self.dec_final_act == 'sigmoid':
                dec_hidden_layers.add_module('dec_final_act', nn.Sigmoid())
            elif self.dec_final_act == 'tanh':
                dec_hidden_layers.add_module('dec_final_act', nn.Tanh())
            elif self.dec_final_act == 'relu':
                dec_hidden_layers.add_module('dec_final_act', nn.ReLU())
            else:
                pass
        self.decoder = dec_hidden_layers
        # ~ Decoder block

        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.xavier_normal(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()

        self.to(self.device)
Ejemplo n.º 27
0
def main():
    print("Loading data from '%s'" % opt.data)

    dataset = torch.load(opt.data)
    if opt.model_type == 'nmt':
        if dataset.get("type", "text") not in ["bitext", "text"]:
            print("WARNING: The provided dataset is not bilingual!")
    elif opt.model_type == 'lm':
        if dataset.get("type", "text") != 'monotext':
            print("WARNING: The provided dataset is not monolingual!")
    else:
        raise NotImplementedError('Not valid model type %s' % opt.model_type)

    dict_checkpoint = (opt.train_from
                       if opt.train_from else opt.train_from_state_dict)
    if dict_checkpoint:
        print('Loading dicts from checkpoint at %s' % dict_checkpoint)
        checkpoint = torch.load(dict_checkpoint)
        if opt.model_type == 'nmt':
            assert checkpoint.get('type', None) is None or \
                checkpoint['type'] == "nmt", \
                "The loaded model is not neural machine translation!"
        elif opt.model_type == 'lm':
            assert checkpoint['type'] == "lm", \
                "The loaded model is not a language model!"
        dataset['dicts'] = checkpoint['dicts']

    trainData = onmt.Dataset(dataset['train']['src'],
                             dataset['train']['tgt'],
                             opt.batch_size,
                             opt.gpus,
                             data_type=dataset.get("type", "text"))
    validData = onmt.Dataset(dataset['valid']['src'],
                             dataset['valid']['tgt'],
                             opt.batch_size,
                             opt.gpus,
                             volatile=True,
                             data_type=dataset.get("type", "text"))

    dicts = dataset['dicts']
    model_opt = checkpoint['opt'] if dict_checkpoint else opt
    if dicts.get('tgt', None) is None:
        # Makes the code compatible with the language model
        dicts['tgt'] = dicts['src']
    if opt.model_type == 'nmt':
        print(' * vocabulary size. source = %d; target = %d' %
              (dicts['src'].size(), dicts['tgt'].size()))
    elif opt.model_type == 'lm':
        print(' * vocabulary size = %d' % (dicts['src'].size()))
    print(' * number of training sentences. %d' % len(dataset['train']['src']))
    print(' * maximum batch size. %d' % opt.batch_size)

    print('Building model...')

    if opt.model_type == 'nmt':

        decoder = onmt.Decoders.getDecoder(model_opt.decoder_type)(
            model_opt, dicts['tgt'])
        encoder = onmt.Encoders.getEncoder(model_opt.encoder_type)(
            model_opt, dicts['src'])

        model = onmt.Models.NMTModel(encoder, decoder)

    elif opt.model_type == 'lm':
        model = onmt.LanguageModel.LM(model_opt, dicts['src'])

    generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, dicts['tgt'].size()), nn.LogSoftmax())

    if opt.train_from:
        print('Loading model from checkpoint at %s' % opt.train_from)
        chk_model = checkpoint['model']
        generator_state_dict = chk_model.generator.state_dict()
        model_state_dict = {
            k: v
            for k, v in chk_model.state_dict().items() if 'generator' not in k
        }
        model.load_state_dict(model_state_dict)
        generator.load_state_dict(generator_state_dict)
        opt.start_epoch = checkpoint['epoch'] + 1

    if opt.train_from_state_dict:
        print('Loading model from state_dict at %s' %
              opt.train_from_state_dict)
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        model_opt.start_epoch = opt.start_epoch
        model_opt.epochs = opt.epochs

    if len(opt.gpus) >= 1:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()

    if len(opt.gpus) > 1:
        model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)
        generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0)
        model_opt["gpus"] = opt.gpus

    model.generator = generator

    if not opt.train_from_state_dict and not opt.train_from:
        for p in model.parameters():
            #p.data.uniform_(-opt.param_init, opt.param_init)
            if len(p.data.size()) > 1:
                init.xavier_normal(p.data)
            else:
                p.data.uniform_(-opt.param_init, opt.param_init)
        model.initialize_parameters(opt.param_init)
        model.load_pretrained_vectors(opt)

    if (not opt.train_from_state_dict
            and not opt.train_from) or opt.change_optimizer:
        optim = onmt.Optim(opt.optim,
                           opt.learning_rate,
                           opt.max_grad_norm,
                           lr_decay=opt.learning_rate_decay,
                           start_decay_at=opt.start_decay_at)
        optim.set_parameters(model.parameters())
        model_opt.learning_rate = opt.learning_rate
        model_opt.learning_rate_decay = opt.learning_rate_decay
        model_opt.save_each = opt.save_each

    else:
        print('Loading optimizer from checkpoint:')
        optim = checkpoint['optim']
        optim.optimizer.load_state_dict(
            checkpoint['optim'].optimizer.state_dict())
        optim.set_parameters(model.parameters())

    nParams = sum([p.nelement() for p in model.parameters()])
    print('* number of parameters: %d' % nParams)

    if opt.train_from or opt.train_from_state_dict:
        print(model_opt)

    model_opt.use_learning_rate_decay = opt.use_learning_rate_decay
    trainModel(model, trainData, validData, dataset, optim, model_opt)
Ejemplo n.º 28
0
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        xavier_normal(m.weight.data)
        xavier_normal(m.bias.data)
Ejemplo n.º 29
0
def init_linear(linear):
    init.xavier_normal(linear.weight)
    linear.bias.data.zero_()
    def __init__(self, args):
        super(CNN_Text, self).__init__()
        self.args = args
        
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            # self.embed.weight.data.uniform(-0.1, 0.1)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, scale_grad_by_freq=True)
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
            # fixed the word embedding
            self.embed.weight.requires_grad = True
        print("dddd {} ".format(self.embed.weight.data.size()))

        if args.wide_conv is True:
            print("using wide convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
                                     padding=(K//2, 0), dilation=1, bias=False) for K in Ks]
        else:
            print("using narrow convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks]
        # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks]
        print(self.convs1)

        # for con in self.convs1:
            # print("PP {} ".format(con.weight))
        if args.init_weight:
            print("Initing W .......")
            for conv in self.convs1:
                init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value))
                fan_in, fan_out = CNN_Text.calculate_fan_in_and_fan_out(conv.weight.data)
                print(" in {} out {} ".format(fan_in, fan_out))
                std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
                print("aaaaaaaaaaaaa {} ".format(std))
                # init.uniform(conv.bias, 0, 0)

        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)
        in_fea = len(Ks) * Co
        # self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True)
        # self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
        self.fc = nn.Linear(in_features=in_fea, out_features=C, bias=True)
        # whether to use batch normalizations
        if args.batch_normalizations is True:
            print("using batch_normalizations in the model......")
            self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum,
                                            affine=args.batch_norm_affine)
            self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
            self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
Ejemplo n.º 31
0
 def weight_init(m):
     if isinstance(m, nn.Conv2d):
         init.xavier_normal(m.weight)
         init.constant(m.bias, 0)