def __init__(self, args):
        super(DEEP_CNN_MUI, self).__init__()
        self.args = args
        
        V = args.embed_num
        V_mui = args.embed_num_mui
        D = args.embed_dim
        C = args.class_num
        Ci = 2
        Co = args.kernel_num
        Ks = args.kernel_sizes
        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True)

        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight))
            pretrained_weight_static = np.array(args.pretrained_weight_static)
            self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static))
            # whether to fixed the word embedding
            self.embed_no_static.weight.requires_grad = True
        # cons layer
        self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks]
        self.convs2 = [nn.Conv2d(1, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks]
        print(self.convs1)
        print(self.convs2)

        if args.init_weight:
            print("Initing W .......")
            for (conv1, conv2) in zip(self.convs1, self.convs2):
                init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv1.bias, 0, 0)
                init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv2.bias, 0, 0)

        # dropout
        self.dropout = nn.Dropout(args.dropout)
        # linear
        in_fea = len(Ks) * Co
        self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True)
        self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
Ejemplo n.º 2
0
 def fwd_split(self, input, batch, depth,
               random_split=False, mode='train', epoch=0):
     length = self.split.n
     var = 0.0
     # Iterate over scales
     e = Variable(torch.zeros(self.batch_size, length)).type(dtype)
     mask = (input[:, :, 0] >= 0).type(dtype).squeeze()
     Phis, Bs, Inputs_N, Samples = ([] for ii in range(4))
     for scale in range(depth):
         logits, probs, input_n, Phi = self.split(e, input,
                                                  mask, scale=scale)
         # Sample from probabilities and update embeddings
         if random_split:
             rand = (Variable(torch.zeros(self.batch_size, length))
                     .type(dtype))
             init.uniform(rand)
             sample = (rand > 0.5).type(dtype)
         else:
             rand = (Variable(torch.zeros(self.batch_size, length))
                     .type(dtype))
             init.uniform(rand)
             sample = (probs > rand).type(dtype)
         e = 2 * e + sample
         # Appends
         Samples.append(sample)
         Phis.append(Phi)
         Bs.append(probs)
         Inputs_N.append(input_n)
         # variance of bernouilli probabilities
         var += self.compute_variance(probs, mask)
     # computes log probabilities of binary actions for the policy gradient
     Log_Probs = self.log_probabilities(Bs, Samples, mask, depth)
     # pad embeddings with infinity to not affect embeddings argsort
     infty = 1e6
     e = e * mask + (1 - mask) * infty
     return var, Phis, Bs, Inputs_N, e, Log_Probs
Ejemplo n.º 3
0
    def __init__(self, batchNorm=True, div_flow=20):
        super(FlowNetCImg, self).__init__()

        self.batchNorm = batchNorm
        self.div_flow = div_flow

        self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
        self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
        self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
        self.conv_redir = conv(self.batchNorm,
                               256,
                               32,
                               kernel_size=1,
                               stride=1)

        #if args.fp16:
        #    self.corr = nn.Sequential(
        #        tofp32(),
        #        Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
        #        tofp16())
        #else:
        self.corr = Correlation(pad_size=20,
                                kernel_size=1,
                                max_displacement=20,
                                stride1=1,
                                stride2=2,
                                corr_multiply=1)

        self.corr_activation = nn.LeakyReLU(0.1, inplace=True)
        self.conv3_1 = conv(self.batchNorm, 473, 256)
        self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
        self.conv4_1 = conv(self.batchNorm, 512, 512)
        self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
        self.conv5_1 = conv(self.batchNorm, 512, 512)
        self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
        self.conv6_1 = conv(self.batchNorm, 1024, 1024)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)
                # init_deconv_bilinear(m.weight)
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
Ejemplo n.º 4
0
    def __init__(self, with_bn=True, fp16=False):
        super(FlowNetC, self).__init__()

        self.with_bn = with_bn
        self.fp16 = fp16

        self.conv1 = conv(3, 64, kernel_size=7, stride=2, with_bn=with_bn)
        self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv_redir = conv(256,
                               32,
                               kernel_size=1,
                               stride=1,
                               with_bn=with_bn)

        corr = Correlation(pad_size=20,
                           kernel_size=1,
                           max_displacement=20,
                           stride1=1,
                           stride2=2,
                           corr_multiply=1)
        self.corr = nn.Sequential(tofp32(), corr, tofp16()) if fp16 else corr

        self.corr_activation = nn.LeakyReLU(0.1, inplace=True)
        self.conv3_1 = conv(473, 256, with_bn=with_bn)
        self.conv4 = conv(256, 512, stride=2, with_bn=with_bn)
        self.conv4_1 = conv(512, 512, with_bn=with_bn)
        self.conv5 = conv(512, 512, stride=2, with_bn=with_bn)
        self.conv5_1 = conv(512, 512, with_bn=with_bn)
        self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn)
        self.conv6_1 = conv(1024, 1024, with_bn=with_bn)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)

        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)
Ejemplo n.º 5
0
 def init_param(self, param):
     if len(param.size()) < 2:
         init.uniform(param)
     else:            
         init.xavier_uniform(param)
    def __init__(self,
                 input_size,
                 output_size,
                 hidden_size,
                 dtype,
                 n_layers=1,
                 batch_size=1,
                 scale=1.0,
                 final_layer_flag=0,
                 policy_flag=0):
        super(PMLP, self).__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.scale = scale  # scale output of actor from [-1,1] to range of action space [-scale,scale]. set to 1 for critic
        self.final_layer_flag = final_layer_flag  # 1 for actor, 0 for critic (since critic range need not be restricted to [-1,1])
        self.dtype = dtype
        self.policy_flag = policy_flag
        if policy_flag:
            self.action_log_std = nn.Parameter(torch.zeros(1, output_size))

        #self.control_gru_list = []
        self.control_hidden_list = []
        self.control_h2o_list = []

        #self.gru_00 = nn.GRUCell(self.input_size, self.hidden_size)
        self.l_00 = nn.Linear(self.input_size, self.hidden_size).type(dtype)
        self.h2o_0 = nn.Linear(self.hidden_size, self.output_size).type(dtype)
        #self.gru_10 = nn.GRUCell(self.input_size, self.hidden_size)
        self.l_10 = nn.Linear(self.input_size, self.hidden_size).type(dtype)
        self.h2o_1 = nn.Linear(self.hidden_size, self.output_size).type(dtype)
        #self.gru_20 = nn.GRUCell(self.input_size, self.hidden_size)
        self.l_20 = nn.Linear(self.input_size, self.hidden_size).type(dtype)
        self.h2o_2 = nn.Linear(self.hidden_size, self.output_size).type(dtype)
        #self.gru_30 = nn.GRUCell(self.input_size, self.hidden_size)
        self.l_30 = nn.Linear(self.input_size, self.hidden_size).type(dtype)
        self.h2o_3 = nn.Linear(self.hidden_size, self.output_size).type(dtype)

        init_fanin(self.l_00.weight)
        init_fanin(self.l_10.weight)
        init_fanin(self.l_20.weight)
        init_fanin(self.l_30.weight)

        init.uniform(self.h2o_0.weight, -3e-3, 3e-3)
        init.uniform(self.h2o_0.bias, -3e-3, 3e-3)
        init.uniform(self.h2o_1.weight, -3e-3, 3e-3)
        init.uniform(self.h2o_1.bias, -3e-3, 3e-3)
        init.uniform(self.h2o_2.weight, -3e-3, 3e-3)
        init.uniform(self.h2o_2.bias, -3e-3, 3e-3)
        init.uniform(self.h2o_3.weight, -3e-3, 3e-3)
        init.uniform(self.h2o_3.bias, -3e-3, 3e-3)

        if n_layers == 2:

            #self.gru_01 = nn.GRUCell(self.hidden_size, self.hidden_size)
            #self.gru_11 = nn.GRUCell(self.hidden_size, self.hidden_size)
            #self.gru_21 = nn.GRUCell(self.hidden_size, self.hidden_size)
            #self.gru_31 = nn.GRUCell(self.hidden_size, self.hidden_size)
            self.l_01 = nn.Linear(self.hidden_size,
                                  self.hidden_size).type(dtype)
            self.l_11 = nn.Linear(self.hidden_size,
                                  self.hidden_size).type(dtype)
            self.l_21 = nn.Linear(self.hidden_size,
                                  self.hidden_size).type(dtype)
            self.l_31 = nn.Linear(self.hidden_size,
                                  self.hidden_size).type(dtype)

            init_fanin(self.l_01.weight)
            init_fanin(self.l_11.weight)
            init_fanin(self.l_21.weight)
            init_fanin(self.l_31.weight)

        self.control_hidden_list.append(
            [self.l_00, self.l_10, self.l_20, self.l_30])
        if n_layers == 2:
            self.control_hidden_list.append(
                [self.l_01, self.l_11, self.l_21, self.l_31])

        self.control_h2o_list = [
            self.h2o_0, self.h2o_1, self.h2o_2, self.h2o_3
        ]

        #self.alpha = []
        #for i in range(4):
        #	self.alpha.append(Alpha(n_layers))

        #self.init_controls(self.control_hidden_list, self.control_h2o_list, self.alpha)
        #self.h_0 = Variable(torch.zeros(batch_size, hidden_size), requires_grad=True)
        #if n_layers == 2:
        #	self.h_1 = Variable(torch.zeros(batch_size, hidden_size), requires_grad=True)

        self.hidden_list = []
        self.h2o_list = []
        self.phase_list = []

        # to initialize grad of control hidden and h2o ... I need to do this stupid thing ...
        dummy_x = Variable(torch.zeros(batch_size, input_size),
                           requires_grad=False).type(dtype)
        dummy_y = Variable(torch.zeros(batch_size, output_size),
                           requires_grad=False).type(dtype)
        dummy_criterion = nn.MSELoss()

        if n_layers == 1:
            for l, h2o in zip(self.control_hidden_list[0],
                              self.control_h2o_list):
                dummy_h = F.relu(l(dummy_x))
                dummy_o = h2o(dummy_h)
                dummy_loss = dummy_criterion(dummy_o, dummy_y)
                dummy_loss.backward()

        if n_layers == 2:
            for l0, l1, h2o in zip(self.control_hidden_list[0],
                                   self.control_hidden_list[1],
                                   self.control_h2o_list):
                dummy_h0 = F.relu(l0(dummy_x))
                dummy_h1 = l1(dummy_h0)
                dummy_o = h2o(dummy_h1)
                dummy_loss = dummy_criterion(dummy_o, dummy_y)
                dummy_loss.backward()
Ejemplo n.º 7
0
 def init_weights(self):
     initrange = 0.01
     init.uniform(self.embeddings_pri.weight, -1 * initrange, initrange)
     init.uniform(self.embeddings_sec.weight, -1 * initrange, initrange)
Ejemplo n.º 8
0
import matplotlib
import matplotlib.pyplot as plt

from NoiseNet import NoiseNet

from learn.load import load_noise as load
from learn.train import train

from torch.nn.init import uniform

train_loader = load()

# Initialize Model

model = NoiseNet()
uniform(model.fc1.weight.data, a=0.005, b=0.015)
criterion = nn.MSELoss()

# setup optimization routine
learning_rate = 1e-4
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

# Training Function

log_interval = 2048
# Actual Loop

nepochs = 80000

data = next(enumerate(train_loader))[1]
Ejemplo n.º 9
0
def init_embeddings(embeddings):
    init.uniform(embeddings.weight, -0.05, 0.05)
Ejemplo n.º 10
0
    def __init__(self, args):
        super(DEEP_CNN_MUI, self).__init__()
        self.args = args

        V = args.embed_num
        V_mui = args.embed_num_mui
        D = args.embed_dim
        C = args.class_num
        Ci = 2
        Co = args.kernel_num
        Ks = args.kernel_sizes
        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V,
                                                D,
                                                max_norm=args.max_norm,
                                                scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui,
                                             D,
                                             max_norm=args.max_norm,
                                             scale_grad_by_freq=True)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True)

        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed_no_static.weight.data.copy_(
                torch.from_numpy(pretrained_weight))
            pretrained_weight_static = np.array(args.pretrained_weight_static)
            self.embed_static.weight.data.copy_(
                torch.from_numpy(pretrained_weight_static))
            # whether to fixed the word embedding
            self.embed_no_static.weight.requires_grad = True
        # cons layer
        self.convs1 = [
            nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0), bias=True)
            for K in Ks
        ]
        self.convs2 = [
            nn.Conv2d(1, Co, (K, D), stride=1, padding=(K // 2, 0), bias=True)
            for K in Ks
        ]
        print(self.convs1)
        print(self.convs2)

        if args.init_weight:
            print("Initing W .......")
            for (conv1, conv2) in zip(self.convs1, self.convs2):
                init.xavier_normal(conv1.weight.data,
                                   gain=np.sqrt(args.init_weight_value))
                init.uniform(conv1.bias, 0, 0)
                init.xavier_normal(conv2.weight.data,
                                   gain=np.sqrt(args.init_weight_value))
                init.uniform(conv2.bias, 0, 0)

        # dropout
        self.dropout = nn.Dropout(args.dropout)
        # linear
        in_fea = len(Ks) * Co
        self.fc1 = nn.Linear(in_features=in_fea,
                             out_features=in_fea // 2,
                             bias=True)
        self.fc2 = nn.Linear(in_features=in_fea // 2,
                             out_features=C,
                             bias=True)
Ejemplo n.º 11
0
 def reset_parameters(self):
     self.mlp.reset_parameters()
     # Initialize the last softmax layer with
     last_linear = self.mlp.get_linear_layer(self.num_layers)
     init.uniform(last_linear.weight.data, -0.005, 0.005)
Ejemplo n.º 12
0
def XavierFill(tensor):
    """Caffe2 XavierFill Implementation"""
    size = reduce(operator.mul, tensor.shape, 1)
    fan_in = size / tensor.shape[0]
    scale = math.sqrt(3 / fan_in)
    return init.uniform(tensor, -scale, scale)
Ejemplo n.º 13
0
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.autograd import Variable
from visdom import Visdom
viz = Visdom()

num_data=1000
num_epoch=400

x = init.uniform(torch.Tensor(num_data,1),-10,10)
y = init.uniform(torch.Tensor(num_data,1),-10,10)
z = x**2 + y**2

x_noise = x + init.normal(torch.FloatTensor(num_data,1),std=0.5)
y_noise = y + init.normal(torch.FloatTensor(num_data,1),std=0.5)
z_noise = x_noise**2 + y_noise**2
data_noise = torch.cat([x_noise,y_noise,z_noise],1)

# visualize data

win_1=viz.scatter(
		X=data_noise,
		opts=dict(
			markersize=5,
			markercolor=np.ndarray(shape=[num_data,3],dtype=float,buffer=[51,153,255]*np.ones(shape=[num_data,3]))
			)
		)
Ejemplo n.º 14
0
def UniInitializer(param):
    uniform(param, -0.005, 0.005)
Ejemplo n.º 15
0
def init_linear(linear):
    init.uniform(linear.weight, -0.05, 0.05)
    init.constant(linear.bias, 0.)
Ejemplo n.º 16
0
    def __init__(self, input_channels=12, with_bn=True):
        super(FlowNetS, self).__init__()

        self.with_bn = with_bn
        self.conv1 = conv(input_channels,
                          64,
                          kernel_size=7,
                          stride=2,
                          with_bn=with_bn)
        self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv3_1 = conv(256, 256, with_bn=with_bn)
        self.conv4 = conv(256, 512, stride=2, with_bn=with_bn)
        self.conv4_1 = conv(512, 512, with_bn=with_bn)
        self.conv5 = conv(512, 512, stride=2, with_bn=with_bn)
        self.conv5_1 = conv(512, 512, with_bn=with_bn)
        self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn)
        self.conv6_1 = conv(1024, 1024, with_bn=with_bn)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)

        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)
Ejemplo n.º 17
0
                         tra_type=args.tra_type, rnn_mode=args.rnn_type, cla_dropout=args.cla_dropout)

    if args.load is not None:
        # Load network
        print("### Loading .. ###")
        basedir = './models/'
        net.load_state_dict(torch.load(basedir + args.load + '/best', map_location=lambda storage, loc: storage))
    else:
        # Weight initialization --> accelerate training with Xavier
        dict = {}  # we can store the weights in this dict for convenience
        for name, param in net.named_parameters():
            if 'weight' in name:  # all weights
                weight_init.xavier_uniform(param, gain=1.6)
                if args.rnn_type == 'SRU':
                    print('SRU mode')
                    weight_init.uniform(param, -0.05, 0.05)
                dict[name] = param
            if 'bias' in name:  # all biases
                weight_init.constant(param, 0)
            if args.rnn_type == 'LSTM':  # only LSTM biases
                if ('bias_ih' in name) or ('bias_hh' in name):
                    no4 = int(len(param) / 4)
                    no2 = int(len(param) / 2)
                    weight_init.constant(param, 0)
                    weight_init.constant(param[no4:no2], 1)

    if args.cuda == True:
        net = net.cuda()

    # optimizer = optim.Adam(net.parameters(), weight_decay=args.weight_decay)
    optimizer = optim.Adam(net.parameters())
 def __init__(self):
     """"Constructor of the class"""
     super(ExactMatchChannel, self).__init__()
     self.alpha = nn.Parameter(torch.FloatTensor(1))
     # Initializing the value of alpha
     init.uniform(self.alpha)
Ejemplo n.º 19
0
    def __init__(self, args):
        print("Decoder model")
        super(Decoder_WordLstm, self).__init__()
        self.args = args

        # self.lstm = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True)
        self.lstmcell = nn.LSTMCell(input_size=self.args.hidden_size,
                                    hidden_size=self.args.rnn_hidden_dim,
                                    bias=True)
        init.xavier_uniform(self.lstmcell.weight_ih)
        init.xavier_uniform(self.lstmcell.weight_hh)
        self.lstmcell.bias_hh.data.uniform_(
            -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)),
            np.sqrt(6 / (self.args.rnn_hidden_dim + 1)))
        self.lstmcell.bias_ih.data.uniform_(
            -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)),
            np.sqrt(6 / (self.args.rnn_hidden_dim + 1)))

        self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size,
                                      embedding_dim=self.args.pos_dim)
        init.uniform(self.pos_embed.weight,
                     a=-np.sqrt(3 / self.args.pos_dim),
                     b=np.sqrt(3 / self.args.pos_dim))
        self.pos_embed.weight.requires_grad = True

        self.linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2 +
                                self.args.hidden_size,
                                out_features=self.args.label_size,
                                bias=False)

        # self.non_linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2, out_features=self.args.hidden_size,
        #                             bias=True)

        self.combine_linear = nn.Linear(
            in_features=self.args.rnn_hidden_dim * 2 + self.args.pos_dim,
            out_features=self.args.hidden_size,
            bias=True)

        init.xavier_uniform(self.linear.weight)
        # init.xavier_uniform(self.non_linear.weight)
        init.xavier_uniform(self.combine_linear.weight)
        # self.non_linear.bias.data.uniform_(-np.sqrt(6 / (self.args.hidden_size + 1)),
        #                                    np.sqrt(6 / (self.args.hidden_size + 1)))
        self.combine_linear.bias.data.uniform_(
            -np.sqrt(6 / (self.args.hidden_size + 1)),
            np.sqrt(6 / (self.args.hidden_size + 1)))

        self.dropout = nn.Dropout(self.args.dropout)

        self.softmax = nn.LogSoftmax()

        self.bucket = Variable(torch.zeros(1, self.args.label_size)).type(
            torch.FloatTensor)
        self.bucket_rnn = Variable(torch.zeros(
            1, self.args.rnn_hidden_dim)).type(torch.FloatTensor)
        if self.args.use_cuda is True:
            self.bucket = self.bucket.cuda()
            self.bucket_rnn = self.bucket_rnn.cuda()

        self.z_bucket = Variable(torch.zeros(1, self.args.hidden_size)).type(
            torch.FloatTensor)
        self.h_bucket = Variable(torch.zeros(
            1, self.args.rnn_hidden_dim)).type(torch.FloatTensor)
        self.c_bucket = Variable(torch.zeros(
            1, self.args.rnn_hidden_dim)).type(torch.FloatTensor)
        if self.args.use_cuda is True:
            self.z_bucket = self.z_bucket.cuda()
            self.h_bucket = self.h_bucket.cuda()
            self.c_bucket = self.c_bucket.cuda()
Ejemplo n.º 20
0
 def init_hidden(self, initrange):
     for ww in self.parameters():
         init.uniform(ww.data, -1 * initrange, initrange)
     weight = next(self.parameters()).data
     return autograd.Variable(
         weight.new(1, self.hidden_size).zero_().cuda())
Ejemplo n.º 21
0
 def _layer_init(self, layer, x):
     init.uniform(layer.weight, a=-x, b=x)
     init.constant(layer.bias, 0)
    def __init__(self, args):
        super(CNN_MUI, self).__init__()
        self.args = args
        
        V = args.embed_num
        V_mui = args.embed_num_mui
        D = args.embed_dim
        C = args.class_num
        Ci = 2
        Co = args.kernel_num
        Ks = args.kernel_sizes

        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            # self.embed_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True)
            self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True)
            # self.embed_static = nn.Embedding(V, D, scale_grad_by_freq=True)
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight))
            pretrained_weight_static = np.array(args.pretrained_weight_static)
            self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static))
            # whether to fixed the word embedding
            self.embed_no_static.weight.requires_grad = True
            # self.embed_static.weight.requires_grad = False

        if args.wide_conv is True:
            print("using wide convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
                                     padding=(K//2, 0), bias=True) for K in Ks]
        else:
            print("using narrow convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks]
        # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks]
        print(self.convs1)

        if args.init_weight:
            print("Initing W .......")
            for conv in self.convs1:
                init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value))
                init.uniform(conv.bias, 0, 0)
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)

        in_fea = len(Ks) * Co
        self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True)
        self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)

        if args.batch_normalizations is True:
            print("using batch_normalizations in the model......")
            self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum,
                                            affine=args.batch_norm_affine)
            self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
            self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
Ejemplo n.º 23
0
 def init_weights(self):
     init.uniform(self.lstm.weight_ih_l0, a = -0.01, b = 0.01)
     init.orthogonal(self.lstm.weight_hh_l0)
     self.lstm.weight_ih_l0.requires_grad = True
     self.lstm.weight_hh_l0.requires_grad = True      
Ejemplo n.º 24
0
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.autograd import Variable
from visdom import Visdom
viz = Visdom()

# data generation

num_data = 1000
num_epoch = 1000

noise = init.normal(torch.FloatTensor(num_data, 1), std=0.5)
x = init.uniform(torch.Tensor(num_data, 1), -15, 10)
y = -x**3 - 8 * (x**2) + 7 * x + 3
x_noise = x + noise
y_noise = -x_noise**3 - 8 * (x_noise**2) + 7 * x_noise + 3

input_data = torch.cat([x, y_noise], 1)

win = viz.scatter(
    X=input_data,
    opts=dict(
        xtickmin=-15,
        xtickmax=10,
        xtickstep=1,
        ytickmin=-300,
        ytickmax=200,
        ytickstep=1,
    def __init__(self, config):
        super(Encoder, self).__init__()
        self.config = config

        # random
        self.char_embed = nn.Embedding(self.config.embed_char_num,
                                       self.config.embed_char_dim,
                                       sparse=False,
                                       padding_idx=self.config.char_paddingId)
        self.char_embed.weight.requires_grad = True

        self.bichar_embed = nn.Embedding(
            self.config.embed_bichar_num,
            self.config.embed_bichar_dim,
            sparse=False,
            padding_idx=self.config.bichar_paddingId)
        self.bichar_embed.weight.requires_grad = True

        # fix the word embedding
        self.static_char_embed = nn.Embedding(
            self.config.static_embed_char_num,
            self.config.embed_char_dim,
            sparse=False,
            padding_idx=self.config.static_char_paddingId)
        init.uniform(self.static_char_embed.weight,
                     a=-np.sqrt(3 / self.config.embed_char_dim),
                     b=np.sqrt(3 / self.config.embed_char_dim))
        self.static_bichar_embed = nn.Embedding(
            self.config.static_embed_bichar_num,
            self.config.embed_bichar_dim,
            sparse=False,
            padding_idx=self.config.static_bichar_paddingId)
        init.uniform(self.static_bichar_embed.weight,
                     a=-np.sqrt(3 / self.config.embed_bichar_dim),
                     b=np.sqrt(3 / self.config.embed_bichar_dim))

        # load external word embedding
        if config.char_pretrained_embed is True:
            self.static_char_embed.weight.data.copy_(
                self.config.char_pretrain_embed)
            for index in range(self.config.embed_char_dim):
                self.static_char_embed.weight.data[
                    self.config.static_char_paddingId][index] = 0
        self.static_char_embed.weight.requires_grad = False

        if config.bichar_pretrained_embed is True:
            self.static_bichar_embed.weight.data.copy_(
                self.config.bichar_pretrain_embed)
            for index in range(self.config.embed_bichar_dim):
                self.static_bichar_embed.weight.data[
                    self.config.static_bichar_paddingId][index] = 0
        self.static_bichar_embed.weight.requires_grad = False

        # LSTMCell
        self.lstm_left = nn.LSTMCell(input_size=self.config.rnn_dim,
                                     hidden_size=self.config.rnn_hidden_dim,
                                     bias=True)
        self.lstm_right = nn.LSTMCell(input_size=self.config.rnn_dim,
                                      hidden_size=self.config.rnn_hidden_dim,
                                      bias=True)

        # init lstm weight and bias
        init.xavier_uniform(self.lstm_left.weight_ih)
        init.xavier_uniform(self.lstm_left.weight_hh)
        init.xavier_uniform(self.lstm_right.weight_ih)
        init.xavier_uniform(self.lstm_right.weight_hh)
        value = np.sqrt(6 / (self.config.rnn_hidden_dim + 1))
        self.lstm_left.bias_hh.data.uniform_(-value, value)
        self.lstm_left.bias_ih.data.uniform_(-value, value)
        self.lstm_right.bias_hh.data.uniform_(-value, value)
        self.lstm_right.bias_ih.data.uniform_(-value, value)

        self.dropout = nn.Dropout(self.config.dropout)
        self.dropout_embed = nn.Dropout(self.config.dropout_embed)

        self.input_dim = (self.config.embed_char_dim +
                          self.config.embed_bichar_dim) * 2

        self.liner = nn.Linear(in_features=self.input_dim,
                               out_features=self.config.rnn_dim,
                               bias=True)

        # init linear
        init.xavier_uniform(self.liner.weight)
        init_linear_value = np.sqrt(6 / (self.config.rnn_dim + 1))
        self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
Ejemplo n.º 26
0
def init_fanin(tensor):
    fanin = tensor.size(1)
    v = 1.0 / np.sqrt(fanin)
    init.uniform(tensor, -v, v)
    def __init__(self, args):
        print("Decoder model")
        super(Decoder_WordLstm, self).__init__()
        self.args = args

        self.pos_paddingKey = self.args.create_alphabet.pos_PaddingID
        print("pos_paddingKey", self.pos_paddingKey)
        print("appID", self.args.create_alphabet.appID)

        # self.lstm = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True)
        self.lstmcell = nn.LSTMCell(input_size=self.args.hidden_size,
                                    hidden_size=self.args.rnn_hidden_dim,
                                    bias=True)
        init.xavier_uniform(self.lstmcell.weight_ih)
        init.xavier_uniform(self.lstmcell.weight_hh)
        self.lstmcell.bias_hh.data.uniform_(
            -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)),
            np.sqrt(6 / (self.args.rnn_hidden_dim + 1)))
        self.lstmcell.bias_ih.data.uniform_(
            -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)),
            np.sqrt(6 / (self.args.rnn_hidden_dim + 1)))

        # self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size, embedding_dim=self.args.pos_dim,
        #                               padding_idx=self.pos_paddingKey)
        self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size,
                                      embedding_dim=self.args.pos_dim)
        init.uniform(self.pos_embed.weight,
                     a=-np.sqrt(3 / self.args.pos_dim),
                     b=np.sqrt(3 / self.args.pos_dim))
        for i in range(self.args.pos_dim):
            self.pos_embed.weight.data[self.pos_paddingKey][i] = 0
        self.pos_embed.weight.requires_grad = True

        self.linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2 +
                                self.args.hidden_size,
                                out_features=self.args.label_size,
                                bias=False)

        # self.non_linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2, out_features=self.args.hidden_size,
        #                             bias=True)

        self.combine_linear = nn.Linear(
            in_features=self.args.rnn_hidden_dim * 2 + self.args.pos_dim,
            out_features=self.args.hidden_size,
            bias=True)

        init.xavier_uniform(self.linear.weight)
        # init.xavier_uniform(self.non_linear.weight)
        init.xavier_uniform(self.combine_linear.weight)
        # self.non_linear.bias.data.uniform_(-np.sqrt(6 / (self.args.hidden_size + 1)),
        #                                    np.sqrt(6 / (self.args.hidden_size + 1)))
        self.combine_linear.bias.data.uniform_(
            -np.sqrt(6 / (self.args.hidden_size + 1)),
            np.sqrt(6 / (self.args.hidden_size + 1)))

        self.dropout = nn.Dropout(self.args.dropout)

        self.softmax = nn.LogSoftmax(dim=1)

        self.bucket = Variable(torch.zeros(1, self.args.label_size))
        self.bucket_rnn = Variable(torch.zeros(1, self.args.rnn_hidden_dim))
        if self.args.use_cuda is True:
            self.bucket = self.bucket.cuda()
            self.bucket_rnn = self.bucket_rnn.cuda()
Ejemplo n.º 28
0
	def __init__(self, kwargs):
		super(TextCNNNet, self).__init__()
		self.input_size = kwargs['input_size']
		self.hidden_size = kwargs['hidden_size']
		self.output_size = kwargs['output_size']
		if 'kernel_num' in kwargs:
			self.kernel_num = kwargs['kernel_num']
		else:
			self.kernel_num = 128
		if 'kernel_sizes' in kwargs:
			self.kernel_sizes = kwargs['kernel_sizes']
		else:
			self.kernel_sizes = [1, 2, 3, 4]
		if 'embed_size' in kwargs:
			self.embed_size = kwargs['embed_size']
		else:
			self.embed_size = kwargs['hidden_size']		
		if 'dropout' in kwargs:
			self.dropout = kwargs['dropout']
		else:
			self.dropout = 0.1
		if 'wide_conv' in kwargs:
			self.wide_conv = kwargs['wide_conv']
		else:
			self.wide_conv = True
		if 'init_weight' in kwargs:
			self.init_weight = kwargs['init_weight']
		else:
			self.init_weight = False
		if 'init_weight_value' in kwargs:
			self.init_weight_value = kwargs['init_weight_value']
		else:
			self.init_weight_value = 2.0		
		if 'batch_normal' in kwargs:
			self.batch_normal = kwargs['batch_normal']
		else:
			self.batch_normal = False
		if 'batch_normal_momentum' in kwargs:
			self.batch_normal_momentum
		else:
			self.batch_normal_momentum = 0.1
		if 'batch_normal_affine' in kwargs:
			self.batch_normal_affine = kwargs['batch_normal_affine']
		else:
			self.batch_normal_affine = False

		Ci = 1	# input channels, 处理文本,一层通道
		Co = self.kernel_num	# output channel
		Ks = self.kernel_sizes	# list
		
		if 'max_norm' in kwargs:
			self.embed = nn.Embedding(self.input_size, self.embed_size, max_norm=kwargs['max_norm'])
		else:
			self.embed = nn.Embedding(self.input_size, self.embed_size, scale_grad_by_freq=True)
		if 'word_embedding' in kwargs:
			pretrained_weight = torch.from_numpy(kwargs['word_embedding'])
			self.embed.weight.data.copy_(pretrained_weight)
			self.embed.weight.requires_grad = True
		if self.wide_conv is True:
			self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), stride=(1, 1), padding=(K//2 ,0), dilation=1, bias=True) for K in Ks]
		else:
			self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), bias=True) for K in Ks]
		if self.init_weight:
			for conv in self.convs1:
				init.xavier_normal(conv.weight.data, gain=np.sqrt(self.init_weight_value))
				fanin, fanout = self.cal_fanin_fanout(conv.weight.data)
				std = np.sqrt(self.init_weight_value) * np.sqrt(2.0 / (fanin+fanout))
				init.uniform(conv.bias, 0, 0)

		self.dropout = nn.Dropout(self.dropout)
		in_fea = len(Ks) * Co

		self.f1 = nn.Linear(in_fea, in_fea//2, bias=True)
		self.f2 = nn.Linear(in_fea//2, self.output_size, bias=True)
		self.h2o = nn.Linear(in_fea, self.output_size)
		self.softmax = nn.LogSoftmax()

		if self.batch_normal:
			self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine)
			self.f1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine)
			self.f2_bn = nn.BatchNorm1d(num_features=self.output_size, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine)
Ejemplo n.º 29
0
    # load the training data, then further partition into training and validation sets, preserving the ratio of
    # positives to negative training examples
    train_data = imdbTrainDataset()
    train_dataloader = DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)


    # load the model
    model = CNN(vocab_size=20000, embedding_dim=128, hidden_dim=50, label_size=1, batch_size=batch_size, seq_len=250)
    model.cuda()

    # model._parameters = init.xavier_normal(list(model.parameters()))
    # or
    for param in model.parameters():
        # init.xavier_normal(param)
        init.uniform(param)
    loss_fn = torch.nn.BCEWithLogitsLoss()
    # loss_fn = torch.nn.CrossEntropyLoss()
    loss_name = "bce"

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    start_clock = time.clock()
    epoch = 0

    print("time_stamp: {}".format(time_stamp))
    print()
    print("model: {}".format(args.model))
    print("data: {}".format(args.data))
    print("features: {}".format(args.feats))
    print("null features: {}".format(args.null))
Ejemplo n.º 30
0
    def __init__(self, args, batchNorm=True):
        super(FlowNetS, self).__init__()

        self.batchNorm = batchNorm
        self.conv1 = conv(self.batchNorm, 12, 64, kernel_size=7, stride=2)
        self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
        self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
        self.conv3_1 = conv(self.batchNorm, 256, 256)
        self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
        self.conv4_1 = conv(self.batchNorm, 512, 512)
        self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
        self.conv5_1 = conv(self.batchNorm, 512, 512)
        self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
        self.conv6_1 = conv(self.batchNorm, 1024, 1024)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)
Ejemplo n.º 31
0
    def __init__(self, batchNorm=False, div_flow=20.):
        super(FlowNet2, self).__init__()
        self.batchNorm = batchNorm
        self.div_flow = div_flow
        #self.rgb_max = args.rgb_max
        self.rgb_max = 1
        #self.args = args

        self.channelnorm = ChannelNorm()

        # First Block (FlowNetC)
        self.flownetc = FlowNetC.FlowNetC(batchNorm=self.batchNorm)
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')

        # if args.fp16:
        #     self.resample1 = nn.Sequential(
        #                     tofp32(),
        #                     Resample2d(),
        #                     tofp16())
        # else:
        self.resample1 = Resample2d()

        # Block (FlowNetS1)
        self.flownets_1 = FlowNetS.FlowNetS(batchNorm=self.batchNorm)
        self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
        # if args.fp16:
        #     self.resample2 = nn.Sequential(
        #                     tofp32(),
        #                     Resample2d(),
        #                     tofp16())
        # else:
        self.resample2 = Resample2d()

        # Block (FlowNetS2)
        self.flownets_2 = FlowNetS.FlowNetS(batchNorm=self.batchNorm)

        # Block (FlowNetSD)
        self.flownets_d = FlowNetSD.FlowNetSD(batchNorm=self.batchNorm)
        self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest')
        self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest')

        # if args.fp16:
        #     self.resample3 = nn.Sequential(
        #                     tofp32(),
        #                     Resample2d(),
        #                     tofp16())
        # else:
        self.resample3 = Resample2d()

        # if args.fp16:
        #     self.resample4 = nn.Sequential(
        #                     tofp32(),
        #                     Resample2d(),
        #                     tofp16())
        # else:
        self.resample4 = Resample2d()

        # Block (FLowNetFusion)
        self.flownetfusion = FlowNetFusion.FlowNetFusion(
            batchNorm=self.batchNorm)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)
 def init_hidden(self, initrange):
     for ww in self.parameters():
         init.uniform(ww.data, -1 * initrange, initrange)
Ejemplo n.º 33
0
def trainClassifier(allDataTrain,targetDataTrain,allDataTest,targetDataTest,learning_rate,momentum,maxEpoch,saveModel,results):
    classifier = buildClassifierModel(embedding_dim, 1).cuda()
    for param in classifier.parameters():
        init.uniform(param, -1 * 0.0000001, 0.0000001)
    loss_function = nn.BCELoss(size_average=True).cuda()
    optimizer = optim.RMSprop(classifier.parameters(), lr=learning_rate, alpha=0.99, eps=1e-08, weight_decay=0,momentum=momentum, centered=False)
    lossVal = mres.LossValues()
    errors = []
    lrStr = mres.floatToStr("%2.15f",learning_rate)
    momentumStr = mres.floatToStr("%2.15f",momentum)
    epc = 0
    numberOfDoc = math.floor((allDataTrain.size()[0]/batch_size)*batch_size)
    for fold in range(folds):
        for epoch in range(maxEpoch):
            print("class :  %s fold %d epoch %d" % (classname,fold,epoch))
            epc += 1
            inds = torch.range(1, numberOfDoc, batch_size).long()
            shuffle = torch.randperm(inds.size()[0])
            lr=optimizer.param_groups[0]['lr']
            lrStr = mres.floatToStr("%2.15f",lr)
            for i in range(int(numberOfDoc/batch_size)):
                start = inds[shuffle[i]] - 1
                endd = inds[shuffle[i]] + batch_size - 1
                inp = autograd.Variable(allDataTrain[start:endd].data.cuda(), requires_grad=False)
                target = autograd.Variable(torch.Tensor(batch_size).copy_(targetDataTrain[start:endd]).cuda(), requires_grad=False)
                classifier.zero_grad()
                pred = classifier.forward(inp)
                loss = loss_function(pred,target)
                print("fold %d epoch %d lr %s - pred %f target %f loss %f " % (fold,epoch,lrStr,pred.data[0][0],target.data[0], loss.data[0]))
                loss.backward()
                optimizer.step()
                errors.append(loss.data[0])
                lossVal.y.append(loss.data[0])
                mean = torch.mean(torch.Tensor(errors))
                lossVal.mean.append(mean)

            if epoch % 50 == 0 and epoch != 0:
                trainresults = mres.testClassifier(classifier,allDataTrain,targetDataTrain)
                res = "train - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f  truePosPred/allPos:%d/%d=%f" % (
                lrStr, momentumStr, maxEpoch, epoch+1,trainresults.correct, trainresults.all,trainresults.trueNegatives,trainresults.allNegatives,
                trainresults.negRate, trainresults.truePositives, trainresults.allPositives,trainresults.posRate)
                results.append(res)

                testresults = mres.testClassifier(classifier, allDataTest, targetDataTest)
                res = "test - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f  truePosPred/allPos:%d/%d=%f" % (
                    lrStr, momentumStr, maxEpoch, epoch+1,testresults.correct, testresults.all,
                    testresults.trueNegatives, testresults.allNegatives,
                    testresults.negRate, testresults.truePositives, testresults.allPositives, testresults.posRate)
                results.append(res)

        trainresults = mres.testClassifier(classifier, allDataTrain, targetDataTrain)
        res = "train - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f  truePosPred/allPos:%d/%d=%f" % (
            lrStr, momentumStr, maxEpoch, maxEpoch, trainresults.correct, trainresults.all,
            trainresults.trueNegatives, trainresults.allNegatives,
            trainresults.negRate, trainresults.truePositives, trainresults.allPositives, trainresults.posRate)
        results.append(res)

        testresults = mres.testClassifier(classifier, allDataTest, targetDataTest)
        res = "test - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f  truePosPred/allPos:%d/%d=%f" % (
            lrStr, momentumStr, maxEpoch, maxEpoch, testresults.correct, testresults.all,
            testresults.trueNegatives, testresults.allNegatives,
            testresults.negRate, testresults.truePositives, testresults.allPositives, testresults.posRate)
        results.append(res)

    if saveModel == True:
        lossVal.x = range(folds * maxEpoch * int(numberOfDoc/batch_size))
        lrStr = mres.floatToStr("%2.15f",learning_rate)
        fname = "%scdlc-mlp-batch-loss-values-%s-%s-%d.bin" % (path,lrStr,momentumStr,maxEpoch)
        fh = open(fname, 'wb')  # Save model file as pickle
        pickle.dump(lossVal, fh)
        fh.close()
    return classifier
Ejemplo n.º 34
0
    def __init__(self, args):
        print("Encoder model --- LSTM")
        super(Encoder_WordLstm, self).__init__()
        self.args = args

        # random
        self.char_embed = nn.Embedding(self.args.embed_char_num,
                                       self.args.embed_char_dim)
        for index in range(self.args.embed_char_dim):
            self.char_embed.weight.data[
                self.args.create_alphabet.char_PaddingID][index] = 0
        self.char_embed.weight.requires_grad = True

        self.bichar_embed = nn.Embedding(self.args.embed_bichar_num,
                                         self.args.embed_bichar_dim)
        for index in range(self.args.embed_bichar_dim):
            self.bichar_embed.weight.data[
                self.args.create_alphabet.bichar_PaddingID][index] = 0
        self.bichar_embed.weight.requires_grad = True

        # fix the word embedding
        self.static_char_embed = nn.Embedding(self.args.static_embed_char_num,
                                              self.args.embed_char_dim)
        init.uniform(self.static_char_embed.weight,
                     a=-np.sqrt(3 / self.args.embed_char_dim),
                     b=np.sqrt(3 / self.args.embed_char_dim))
        self.static_bichar_embed = nn.Embedding(
            self.args.static_embed_bichar_num, self.args.embed_bichar_dim)
        init.uniform(self.static_bichar_embed.weight,
                     a=-np.sqrt(3 / self.args.embed_bichar_dim),
                     b=np.sqrt(3 / self.args.embed_bichar_dim))

        # load external word embedding
        if args.char_Embedding is True:
            print("char_Embedding")
            pretrained_char_weight = np.array(args.pre_char_word_vecs)
            self.static_char_embed.weight.data.copy_(
                torch.from_numpy(pretrained_char_weight))
            for index in range(self.args.embed_char_dim):
                self.static_char_embed.weight.data[
                    self.args.create_static_alphabet.char_PaddingID][index] = 0
            self.static_char_embed.weight.requires_grad = False

        if args.bichar_Embedding is True:
            print("bichar_Embedding")
            pretrained_bichar_weight = np.array(args.pre_bichar_word_vecs)
            self.static_bichar_embed.weight.data.copy_(
                torch.from_numpy(pretrained_bichar_weight))
            # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_PaddingID])
            # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_UnkID])
            for index in range(self.args.embed_bichar_dim):
                self.static_bichar_embed.weight.data[
                    self.args.create_static_alphabet.
                    bichar_PaddingID][index] = 0
            self.static_bichar_embed.weight.requires_grad = False

        self.lstm_left = nn.LSTM(input_size=self.args.hidden_size,
                                 hidden_size=self.args.rnn_hidden_dim,
                                 dropout=self.args.dropout,
                                 bias=True)
        self.lstm_right = nn.LSTM(input_size=self.args.hidden_size,
                                  hidden_size=self.args.rnn_hidden_dim,
                                  dropout=self.args.dropout,
                                  bias=True)

        # init lstm weight and bias
        init.xavier_uniform(self.lstm_left.weight_ih_l0)
        init.xavier_uniform(self.lstm_left.weight_hh_l0)
        init.xavier_uniform(self.lstm_right.weight_ih_l0)
        init.xavier_uniform(self.lstm_right.weight_hh_l0)
        value = np.sqrt(6 / (self.args.rnn_hidden_dim + 1))
        self.lstm_left.bias_ih_l0.data.uniform_(-value, value)
        self.lstm_left.bias_hh_l0.data.uniform_(-value, value)
        self.lstm_right.bias_ih_l0.data.uniform_(-value, value)
        self.lstm_right.bias_hh_l0.data.uniform_(-value, value)

        self.hidden_l = self.init_hidden_cell(self.args.batch_size)
        self.hidden_r = self.init_hidden_cell(self.args.batch_size)

        self.dropout = nn.Dropout(self.args.dropout)
        self.dropout_embed = nn.Dropout(self.args.dropout_embed)

        self.input_dim = (self.args.embed_char_dim +
                          self.args.embed_bichar_dim) * 2
        self.liner = nn.Linear(in_features=self.input_dim,
                               out_features=self.args.hidden_size,
                               bias=True)

        # init linear
        init.xavier_uniform(self.liner.weight)
        init_linear_value = np.sqrt(6 / (self.args.hidden_size + 1))
        self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
    def __init__(self, args):
        super(HighWay_CNN, self).__init__()
        self.args = args
        
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        if args.max_norm is not None:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True)
            # self.embed.weight.data.uniform(-0.1, 0.1)
        else:
            print("max_norm = {} ".format(args.max_norm))
            self.embed = nn.Embedding(V, D, scale_grad_by_freq=True)
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
            # fixed the word embedding
            self.embed.weight.requires_grad = True
        print("dddd {} ".format(self.embed.weight.data.size()))

        if args.wide_conv is True:
            print("using wide convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
                                     padding=(K//2, 0), dilation=1, bias=True) for K in Ks]
        else:
            print("using narrow convolution")
            self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks]
        # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks]
        print(self.convs1)

        # for con in self.convs1:
            # print("PP {} ".format(con.weight))
        if args.init_weight:
            print("Initing W .......")
            for conv in self.convs1:
                init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value))
                fan_in, fan_out = HighWay_CNN.calculate_fan_in_and_fan_out(conv.weight.data)
                print(" in {} out {} ".format(fan_in, fan_out))
                std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
                print("aaaaaaaaaaaaa {} ".format(std))
                init.uniform(conv.bias, 0, 0)

        self.dropout = nn.Dropout(args.dropout)
        # self.dropout = nn.Dropout2d(args.dropout)
        # self.dropout = nn.AlphaDropout(args.dropout)

        in_fea = len(Ks) * Co
        # self.fc1 = nn.Linear(in_features=in_fea, out_features=C, bias=True)
        self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea, bias=True)
        # self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)

        # highway gate layer
        # self.gate_layer = nn.Linear(in_features=in_fea, out_features=C, bias=True)
        self.gate_layer = nn.Linear(in_features=in_fea, out_features=in_fea, bias=True)
        # self.gate_layer.bias.data.fill_(-1)

        # last liner
        self.logit_layer = nn.Linear(in_features=in_fea, out_features=C, bias=True)

        # whether to use batch normalizations
        if args.batch_normalizations is True:
            print("using batch_normalizations in the model......")
            self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum,
                                            affine=args.batch_norm_affine)
            self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)
            self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum,
                                         affine=args.batch_norm_affine)