def __init__(self, args): super(DEEP_CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # cons layer self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] self.convs2 = [nn.Conv2d(1, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] print(self.convs1) print(self.convs2) if args.init_weight: print("Initing W .......") for (conv1, conv2) in zip(self.convs1, self.convs2): init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv1.bias, 0, 0) init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv2.bias, 0, 0) # dropout self.dropout = nn.Dropout(args.dropout) # linear in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
def fwd_split(self, input, batch, depth, random_split=False, mode='train', epoch=0): length = self.split.n var = 0.0 # Iterate over scales e = Variable(torch.zeros(self.batch_size, length)).type(dtype) mask = (input[:, :, 0] >= 0).type(dtype).squeeze() Phis, Bs, Inputs_N, Samples = ([] for ii in range(4)) for scale in range(depth): logits, probs, input_n, Phi = self.split(e, input, mask, scale=scale) # Sample from probabilities and update embeddings if random_split: rand = (Variable(torch.zeros(self.batch_size, length)) .type(dtype)) init.uniform(rand) sample = (rand > 0.5).type(dtype) else: rand = (Variable(torch.zeros(self.batch_size, length)) .type(dtype)) init.uniform(rand) sample = (probs > rand).type(dtype) e = 2 * e + sample # Appends Samples.append(sample) Phis.append(Phi) Bs.append(probs) Inputs_N.append(input_n) # variance of bernouilli probabilities var += self.compute_variance(probs, mask) # computes log probabilities of binary actions for the policy gradient Log_Probs = self.log_probabilities(Bs, Samples, mask, depth) # pad embeddings with infinity to not affect embeddings argsort infty = 1e6 e = e * mask + (1 - mask) * infty return var, Phis, Bs, Inputs_N, e, Log_Probs
def __init__(self, batchNorm=True, div_flow=20): super(FlowNetCImg, self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1) #if args.fp16: # self.corr = nn.Sequential( # tofp32(), # Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1), # tofp16()) #else: self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) self.corr_activation = nn.LeakyReLU(0.1, inplace=True) self.conv3_1 = conv(self.batchNorm, 473, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm, 1024, 1024) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) # init_deconv_bilinear(m.weight) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
def __init__(self, with_bn=True, fp16=False): super(FlowNetC, self).__init__() self.with_bn = with_bn self.fp16 = fp16 self.conv1 = conv(3, 64, kernel_size=7, stride=2, with_bn=with_bn) self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn) self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn) self.conv_redir = conv(256, 32, kernel_size=1, stride=1, with_bn=with_bn) corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) self.corr = nn.Sequential(tofp32(), corr, tofp16()) if fp16 else corr self.corr_activation = nn.LeakyReLU(0.1, inplace=True) self.conv3_1 = conv(473, 256, with_bn=with_bn) self.conv4 = conv(256, 512, stride=2, with_bn=with_bn) self.conv4_1 = conv(512, 512, with_bn=with_bn) self.conv5 = conv(512, 512, stride=2, with_bn=with_bn) self.conv5_1 = conv(512, 512, with_bn=with_bn) self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn) self.conv6_1 = conv(1024, 1024, with_bn=with_bn) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight)
def init_param(self, param): if len(param.size()) < 2: init.uniform(param) else: init.xavier_uniform(param)
def __init__(self, input_size, output_size, hidden_size, dtype, n_layers=1, batch_size=1, scale=1.0, final_layer_flag=0, policy_flag=0): super(PMLP, self).__init__() self.input_size = input_size self.output_size = output_size self.hidden_size = hidden_size self.n_layers = n_layers self.batch_size = batch_size self.scale = scale # scale output of actor from [-1,1] to range of action space [-scale,scale]. set to 1 for critic self.final_layer_flag = final_layer_flag # 1 for actor, 0 for critic (since critic range need not be restricted to [-1,1]) self.dtype = dtype self.policy_flag = policy_flag if policy_flag: self.action_log_std = nn.Parameter(torch.zeros(1, output_size)) #self.control_gru_list = [] self.control_hidden_list = [] self.control_h2o_list = [] #self.gru_00 = nn.GRUCell(self.input_size, self.hidden_size) self.l_00 = nn.Linear(self.input_size, self.hidden_size).type(dtype) self.h2o_0 = nn.Linear(self.hidden_size, self.output_size).type(dtype) #self.gru_10 = nn.GRUCell(self.input_size, self.hidden_size) self.l_10 = nn.Linear(self.input_size, self.hidden_size).type(dtype) self.h2o_1 = nn.Linear(self.hidden_size, self.output_size).type(dtype) #self.gru_20 = nn.GRUCell(self.input_size, self.hidden_size) self.l_20 = nn.Linear(self.input_size, self.hidden_size).type(dtype) self.h2o_2 = nn.Linear(self.hidden_size, self.output_size).type(dtype) #self.gru_30 = nn.GRUCell(self.input_size, self.hidden_size) self.l_30 = nn.Linear(self.input_size, self.hidden_size).type(dtype) self.h2o_3 = nn.Linear(self.hidden_size, self.output_size).type(dtype) init_fanin(self.l_00.weight) init_fanin(self.l_10.weight) init_fanin(self.l_20.weight) init_fanin(self.l_30.weight) init.uniform(self.h2o_0.weight, -3e-3, 3e-3) init.uniform(self.h2o_0.bias, -3e-3, 3e-3) init.uniform(self.h2o_1.weight, -3e-3, 3e-3) init.uniform(self.h2o_1.bias, -3e-3, 3e-3) init.uniform(self.h2o_2.weight, -3e-3, 3e-3) init.uniform(self.h2o_2.bias, -3e-3, 3e-3) init.uniform(self.h2o_3.weight, -3e-3, 3e-3) init.uniform(self.h2o_3.bias, -3e-3, 3e-3) if n_layers == 2: #self.gru_01 = nn.GRUCell(self.hidden_size, self.hidden_size) #self.gru_11 = nn.GRUCell(self.hidden_size, self.hidden_size) #self.gru_21 = nn.GRUCell(self.hidden_size, self.hidden_size) #self.gru_31 = nn.GRUCell(self.hidden_size, self.hidden_size) self.l_01 = nn.Linear(self.hidden_size, self.hidden_size).type(dtype) self.l_11 = nn.Linear(self.hidden_size, self.hidden_size).type(dtype) self.l_21 = nn.Linear(self.hidden_size, self.hidden_size).type(dtype) self.l_31 = nn.Linear(self.hidden_size, self.hidden_size).type(dtype) init_fanin(self.l_01.weight) init_fanin(self.l_11.weight) init_fanin(self.l_21.weight) init_fanin(self.l_31.weight) self.control_hidden_list.append( [self.l_00, self.l_10, self.l_20, self.l_30]) if n_layers == 2: self.control_hidden_list.append( [self.l_01, self.l_11, self.l_21, self.l_31]) self.control_h2o_list = [ self.h2o_0, self.h2o_1, self.h2o_2, self.h2o_3 ] #self.alpha = [] #for i in range(4): # self.alpha.append(Alpha(n_layers)) #self.init_controls(self.control_hidden_list, self.control_h2o_list, self.alpha) #self.h_0 = Variable(torch.zeros(batch_size, hidden_size), requires_grad=True) #if n_layers == 2: # self.h_1 = Variable(torch.zeros(batch_size, hidden_size), requires_grad=True) self.hidden_list = [] self.h2o_list = [] self.phase_list = [] # to initialize grad of control hidden and h2o ... I need to do this stupid thing ... dummy_x = Variable(torch.zeros(batch_size, input_size), requires_grad=False).type(dtype) dummy_y = Variable(torch.zeros(batch_size, output_size), requires_grad=False).type(dtype) dummy_criterion = nn.MSELoss() if n_layers == 1: for l, h2o in zip(self.control_hidden_list[0], self.control_h2o_list): dummy_h = F.relu(l(dummy_x)) dummy_o = h2o(dummy_h) dummy_loss = dummy_criterion(dummy_o, dummy_y) dummy_loss.backward() if n_layers == 2: for l0, l1, h2o in zip(self.control_hidden_list[0], self.control_hidden_list[1], self.control_h2o_list): dummy_h0 = F.relu(l0(dummy_x)) dummy_h1 = l1(dummy_h0) dummy_o = h2o(dummy_h1) dummy_loss = dummy_criterion(dummy_o, dummy_y) dummy_loss.backward()
def init_weights(self): initrange = 0.01 init.uniform(self.embeddings_pri.weight, -1 * initrange, initrange) init.uniform(self.embeddings_sec.weight, -1 * initrange, initrange)
import matplotlib import matplotlib.pyplot as plt from NoiseNet import NoiseNet from learn.load import load_noise as load from learn.train import train from torch.nn.init import uniform train_loader = load() # Initialize Model model = NoiseNet() uniform(model.fc1.weight.data, a=0.005, b=0.015) criterion = nn.MSELoss() # setup optimization routine learning_rate = 1e-4 optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) # Training Function log_interval = 2048 # Actual Loop nepochs = 80000 data = next(enumerate(train_loader))[1]
def init_embeddings(embeddings): init.uniform(embeddings.weight, -0.05, 0.05)
def __init__(self, args): super(DEEP_CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_( torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_( torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # cons layer self.convs1 = [ nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0), bias=True) for K in Ks ] self.convs2 = [ nn.Conv2d(1, Co, (K, D), stride=1, padding=(K // 2, 0), bias=True) for K in Ks ] print(self.convs1) print(self.convs2) if args.init_weight: print("Initing W .......") for (conv1, conv2) in zip(self.convs1, self.convs2): init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv1.bias, 0, 0) init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv2.bias, 0, 0) # dropout self.dropout = nn.Dropout(args.dropout) # linear in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
def reset_parameters(self): self.mlp.reset_parameters() # Initialize the last softmax layer with last_linear = self.mlp.get_linear_layer(self.num_layers) init.uniform(last_linear.weight.data, -0.005, 0.005)
def XavierFill(tensor): """Caffe2 XavierFill Implementation""" size = reduce(operator.mul, tensor.shape, 1) fan_in = size / tensor.shape[0] scale = math.sqrt(3 / fan_in) return init.uniform(tensor, -scale, scale)
import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.init as init from torch.autograd import Variable from visdom import Visdom viz = Visdom() num_data=1000 num_epoch=400 x = init.uniform(torch.Tensor(num_data,1),-10,10) y = init.uniform(torch.Tensor(num_data,1),-10,10) z = x**2 + y**2 x_noise = x + init.normal(torch.FloatTensor(num_data,1),std=0.5) y_noise = y + init.normal(torch.FloatTensor(num_data,1),std=0.5) z_noise = x_noise**2 + y_noise**2 data_noise = torch.cat([x_noise,y_noise,z_noise],1) # visualize data win_1=viz.scatter( X=data_noise, opts=dict( markersize=5, markercolor=np.ndarray(shape=[num_data,3],dtype=float,buffer=[51,153,255]*np.ones(shape=[num_data,3])) ) )
def UniInitializer(param): uniform(param, -0.005, 0.005)
def init_linear(linear): init.uniform(linear.weight, -0.05, 0.05) init.constant(linear.bias, 0.)
def __init__(self, input_channels=12, with_bn=True): super(FlowNetS, self).__init__() self.with_bn = with_bn self.conv1 = conv(input_channels, 64, kernel_size=7, stride=2, with_bn=with_bn) self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn) self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn) self.conv3_1 = conv(256, 256, with_bn=with_bn) self.conv4 = conv(256, 512, stride=2, with_bn=with_bn) self.conv4_1 = conv(512, 512, with_bn=with_bn) self.conv5 = conv(512, 512, stride=2, with_bn=with_bn) self.conv5_1 = conv(512, 512, with_bn=with_bn) self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn) self.conv6_1 = conv(1024, 1024, with_bn=with_bn) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight)
tra_type=args.tra_type, rnn_mode=args.rnn_type, cla_dropout=args.cla_dropout) if args.load is not None: # Load network print("### Loading .. ###") basedir = './models/' net.load_state_dict(torch.load(basedir + args.load + '/best', map_location=lambda storage, loc: storage)) else: # Weight initialization --> accelerate training with Xavier dict = {} # we can store the weights in this dict for convenience for name, param in net.named_parameters(): if 'weight' in name: # all weights weight_init.xavier_uniform(param, gain=1.6) if args.rnn_type == 'SRU': print('SRU mode') weight_init.uniform(param, -0.05, 0.05) dict[name] = param if 'bias' in name: # all biases weight_init.constant(param, 0) if args.rnn_type == 'LSTM': # only LSTM biases if ('bias_ih' in name) or ('bias_hh' in name): no4 = int(len(param) / 4) no2 = int(len(param) / 2) weight_init.constant(param, 0) weight_init.constant(param[no4:no2], 1) if args.cuda == True: net = net.cuda() # optimizer = optim.Adam(net.parameters(), weight_decay=args.weight_decay) optimizer = optim.Adam(net.parameters())
def __init__(self): """"Constructor of the class""" super(ExactMatchChannel, self).__init__() self.alpha = nn.Parameter(torch.FloatTensor(1)) # Initializing the value of alpha init.uniform(self.alpha)
def __init__(self, args): print("Decoder model") super(Decoder_WordLstm, self).__init__() self.args = args # self.lstm = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) self.lstmcell = nn.LSTMCell(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) init.xavier_uniform(self.lstmcell.weight_ih) init.xavier_uniform(self.lstmcell.weight_hh) self.lstmcell.bias_hh.data.uniform_( -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)), np.sqrt(6 / (self.args.rnn_hidden_dim + 1))) self.lstmcell.bias_ih.data.uniform_( -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)), np.sqrt(6 / (self.args.rnn_hidden_dim + 1))) self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size, embedding_dim=self.args.pos_dim) init.uniform(self.pos_embed.weight, a=-np.sqrt(3 / self.args.pos_dim), b=np.sqrt(3 / self.args.pos_dim)) self.pos_embed.weight.requires_grad = True self.linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2 + self.args.hidden_size, out_features=self.args.label_size, bias=False) # self.non_linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2, out_features=self.args.hidden_size, # bias=True) self.combine_linear = nn.Linear( in_features=self.args.rnn_hidden_dim * 2 + self.args.pos_dim, out_features=self.args.hidden_size, bias=True) init.xavier_uniform(self.linear.weight) # init.xavier_uniform(self.non_linear.weight) init.xavier_uniform(self.combine_linear.weight) # self.non_linear.bias.data.uniform_(-np.sqrt(6 / (self.args.hidden_size + 1)), # np.sqrt(6 / (self.args.hidden_size + 1))) self.combine_linear.bias.data.uniform_( -np.sqrt(6 / (self.args.hidden_size + 1)), np.sqrt(6 / (self.args.hidden_size + 1))) self.dropout = nn.Dropout(self.args.dropout) self.softmax = nn.LogSoftmax() self.bucket = Variable(torch.zeros(1, self.args.label_size)).type( torch.FloatTensor) self.bucket_rnn = Variable(torch.zeros( 1, self.args.rnn_hidden_dim)).type(torch.FloatTensor) if self.args.use_cuda is True: self.bucket = self.bucket.cuda() self.bucket_rnn = self.bucket_rnn.cuda() self.z_bucket = Variable(torch.zeros(1, self.args.hidden_size)).type( torch.FloatTensor) self.h_bucket = Variable(torch.zeros( 1, self.args.rnn_hidden_dim)).type(torch.FloatTensor) self.c_bucket = Variable(torch.zeros( 1, self.args.rnn_hidden_dim)).type(torch.FloatTensor) if self.args.use_cuda is True: self.z_bucket = self.z_bucket.cuda() self.h_bucket = self.h_bucket.cuda() self.c_bucket = self.c_bucket.cuda()
def init_hidden(self, initrange): for ww in self.parameters(): init.uniform(ww.data, -1 * initrange, initrange) weight = next(self.parameters()).data return autograd.Variable( weight.new(1, self.hidden_size).zero_().cuda())
def _layer_init(self, layer, x): init.uniform(layer.weight, a=-x, b=x) init.constant(layer.bias, 0)
def __init__(self, args): super(CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) # self.embed_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) # self.embed_static = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # self.embed_static.weight.requires_grad = False if args.wide_conv is True: print("using wide convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K//2, 0), bias=True) for K in Ks] else: print("using narrow convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks] # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks] print(self.convs1) if args.init_weight: print("Initing W .......") for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv.bias, 0, 0) ''' self.conv13 = nn.Conv2d(Ci, Co, (3, D)) self.conv14 = nn.Conv2d(Ci, Co, (4, D)) self.conv15 = nn.Conv2d(Ci, Co, (5, D)) ''' self.dropout = nn.Dropout(args.dropout) in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True) if args.batch_normalizations is True: print("using batch_normalizations in the model......") self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine)
def init_weights(self): init.uniform(self.lstm.weight_ih_l0, a = -0.01, b = 0.01) init.orthogonal(self.lstm.weight_hh_l0) self.lstm.weight_ih_l0.requires_grad = True self.lstm.weight_hh_l0.requires_grad = True
import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.init as init from torch.autograd import Variable from visdom import Visdom viz = Visdom() # data generation num_data = 1000 num_epoch = 1000 noise = init.normal(torch.FloatTensor(num_data, 1), std=0.5) x = init.uniform(torch.Tensor(num_data, 1), -15, 10) y = -x**3 - 8 * (x**2) + 7 * x + 3 x_noise = x + noise y_noise = -x_noise**3 - 8 * (x_noise**2) + 7 * x_noise + 3 input_data = torch.cat([x, y_noise], 1) win = viz.scatter( X=input_data, opts=dict( xtickmin=-15, xtickmax=10, xtickstep=1, ytickmin=-300, ytickmax=200, ytickstep=1,
def __init__(self, config): super(Encoder, self).__init__() self.config = config # random self.char_embed = nn.Embedding(self.config.embed_char_num, self.config.embed_char_dim, sparse=False, padding_idx=self.config.char_paddingId) self.char_embed.weight.requires_grad = True self.bichar_embed = nn.Embedding( self.config.embed_bichar_num, self.config.embed_bichar_dim, sparse=False, padding_idx=self.config.bichar_paddingId) self.bichar_embed.weight.requires_grad = True # fix the word embedding self.static_char_embed = nn.Embedding( self.config.static_embed_char_num, self.config.embed_char_dim, sparse=False, padding_idx=self.config.static_char_paddingId) init.uniform(self.static_char_embed.weight, a=-np.sqrt(3 / self.config.embed_char_dim), b=np.sqrt(3 / self.config.embed_char_dim)) self.static_bichar_embed = nn.Embedding( self.config.static_embed_bichar_num, self.config.embed_bichar_dim, sparse=False, padding_idx=self.config.static_bichar_paddingId) init.uniform(self.static_bichar_embed.weight, a=-np.sqrt(3 / self.config.embed_bichar_dim), b=np.sqrt(3 / self.config.embed_bichar_dim)) # load external word embedding if config.char_pretrained_embed is True: self.static_char_embed.weight.data.copy_( self.config.char_pretrain_embed) for index in range(self.config.embed_char_dim): self.static_char_embed.weight.data[ self.config.static_char_paddingId][index] = 0 self.static_char_embed.weight.requires_grad = False if config.bichar_pretrained_embed is True: self.static_bichar_embed.weight.data.copy_( self.config.bichar_pretrain_embed) for index in range(self.config.embed_bichar_dim): self.static_bichar_embed.weight.data[ self.config.static_bichar_paddingId][index] = 0 self.static_bichar_embed.weight.requires_grad = False # LSTMCell self.lstm_left = nn.LSTMCell(input_size=self.config.rnn_dim, hidden_size=self.config.rnn_hidden_dim, bias=True) self.lstm_right = nn.LSTMCell(input_size=self.config.rnn_dim, hidden_size=self.config.rnn_hidden_dim, bias=True) # init lstm weight and bias init.xavier_uniform(self.lstm_left.weight_ih) init.xavier_uniform(self.lstm_left.weight_hh) init.xavier_uniform(self.lstm_right.weight_ih) init.xavier_uniform(self.lstm_right.weight_hh) value = np.sqrt(6 / (self.config.rnn_hidden_dim + 1)) self.lstm_left.bias_hh.data.uniform_(-value, value) self.lstm_left.bias_ih.data.uniform_(-value, value) self.lstm_right.bias_hh.data.uniform_(-value, value) self.lstm_right.bias_ih.data.uniform_(-value, value) self.dropout = nn.Dropout(self.config.dropout) self.dropout_embed = nn.Dropout(self.config.dropout_embed) self.input_dim = (self.config.embed_char_dim + self.config.embed_bichar_dim) * 2 self.liner = nn.Linear(in_features=self.input_dim, out_features=self.config.rnn_dim, bias=True) # init linear init.xavier_uniform(self.liner.weight) init_linear_value = np.sqrt(6 / (self.config.rnn_dim + 1)) self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
def init_fanin(tensor): fanin = tensor.size(1) v = 1.0 / np.sqrt(fanin) init.uniform(tensor, -v, v)
def __init__(self, args): print("Decoder model") super(Decoder_WordLstm, self).__init__() self.args = args self.pos_paddingKey = self.args.create_alphabet.pos_PaddingID print("pos_paddingKey", self.pos_paddingKey) print("appID", self.args.create_alphabet.appID) # self.lstm = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) self.lstmcell = nn.LSTMCell(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) init.xavier_uniform(self.lstmcell.weight_ih) init.xavier_uniform(self.lstmcell.weight_hh) self.lstmcell.bias_hh.data.uniform_( -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)), np.sqrt(6 / (self.args.rnn_hidden_dim + 1))) self.lstmcell.bias_ih.data.uniform_( -np.sqrt(6 / (self.args.rnn_hidden_dim + 1)), np.sqrt(6 / (self.args.rnn_hidden_dim + 1))) # self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size, embedding_dim=self.args.pos_dim, # padding_idx=self.pos_paddingKey) self.pos_embed = nn.Embedding(num_embeddings=self.args.pos_size, embedding_dim=self.args.pos_dim) init.uniform(self.pos_embed.weight, a=-np.sqrt(3 / self.args.pos_dim), b=np.sqrt(3 / self.args.pos_dim)) for i in range(self.args.pos_dim): self.pos_embed.weight.data[self.pos_paddingKey][i] = 0 self.pos_embed.weight.requires_grad = True self.linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2 + self.args.hidden_size, out_features=self.args.label_size, bias=False) # self.non_linear = nn.Linear(in_features=self.args.rnn_hidden_dim * 2, out_features=self.args.hidden_size, # bias=True) self.combine_linear = nn.Linear( in_features=self.args.rnn_hidden_dim * 2 + self.args.pos_dim, out_features=self.args.hidden_size, bias=True) init.xavier_uniform(self.linear.weight) # init.xavier_uniform(self.non_linear.weight) init.xavier_uniform(self.combine_linear.weight) # self.non_linear.bias.data.uniform_(-np.sqrt(6 / (self.args.hidden_size + 1)), # np.sqrt(6 / (self.args.hidden_size + 1))) self.combine_linear.bias.data.uniform_( -np.sqrt(6 / (self.args.hidden_size + 1)), np.sqrt(6 / (self.args.hidden_size + 1))) self.dropout = nn.Dropout(self.args.dropout) self.softmax = nn.LogSoftmax(dim=1) self.bucket = Variable(torch.zeros(1, self.args.label_size)) self.bucket_rnn = Variable(torch.zeros(1, self.args.rnn_hidden_dim)) if self.args.use_cuda is True: self.bucket = self.bucket.cuda() self.bucket_rnn = self.bucket_rnn.cuda()
def __init__(self, kwargs): super(TextCNNNet, self).__init__() self.input_size = kwargs['input_size'] self.hidden_size = kwargs['hidden_size'] self.output_size = kwargs['output_size'] if 'kernel_num' in kwargs: self.kernel_num = kwargs['kernel_num'] else: self.kernel_num = 128 if 'kernel_sizes' in kwargs: self.kernel_sizes = kwargs['kernel_sizes'] else: self.kernel_sizes = [1, 2, 3, 4] if 'embed_size' in kwargs: self.embed_size = kwargs['embed_size'] else: self.embed_size = kwargs['hidden_size'] if 'dropout' in kwargs: self.dropout = kwargs['dropout'] else: self.dropout = 0.1 if 'wide_conv' in kwargs: self.wide_conv = kwargs['wide_conv'] else: self.wide_conv = True if 'init_weight' in kwargs: self.init_weight = kwargs['init_weight'] else: self.init_weight = False if 'init_weight_value' in kwargs: self.init_weight_value = kwargs['init_weight_value'] else: self.init_weight_value = 2.0 if 'batch_normal' in kwargs: self.batch_normal = kwargs['batch_normal'] else: self.batch_normal = False if 'batch_normal_momentum' in kwargs: self.batch_normal_momentum else: self.batch_normal_momentum = 0.1 if 'batch_normal_affine' in kwargs: self.batch_normal_affine = kwargs['batch_normal_affine'] else: self.batch_normal_affine = False Ci = 1 # input channels, 处理文本,一层通道 Co = self.kernel_num # output channel Ks = self.kernel_sizes # list if 'max_norm' in kwargs: self.embed = nn.Embedding(self.input_size, self.embed_size, max_norm=kwargs['max_norm']) else: self.embed = nn.Embedding(self.input_size, self.embed_size, scale_grad_by_freq=True) if 'word_embedding' in kwargs: pretrained_weight = torch.from_numpy(kwargs['word_embedding']) self.embed.weight.data.copy_(pretrained_weight) self.embed.weight.requires_grad = True if self.wide_conv is True: self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), stride=(1, 1), padding=(K//2 ,0), dilation=1, bias=True) for K in Ks] else: self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), bias=True) for K in Ks] if self.init_weight: for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(self.init_weight_value)) fanin, fanout = self.cal_fanin_fanout(conv.weight.data) std = np.sqrt(self.init_weight_value) * np.sqrt(2.0 / (fanin+fanout)) init.uniform(conv.bias, 0, 0) self.dropout = nn.Dropout(self.dropout) in_fea = len(Ks) * Co self.f1 = nn.Linear(in_fea, in_fea//2, bias=True) self.f2 = nn.Linear(in_fea//2, self.output_size, bias=True) self.h2o = nn.Linear(in_fea, self.output_size) self.softmax = nn.LogSoftmax() if self.batch_normal: self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine) self.f1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine) self.f2_bn = nn.BatchNorm1d(num_features=self.output_size, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine)
# load the training data, then further partition into training and validation sets, preserving the ratio of # positives to negative training examples train_data = imdbTrainDataset() train_dataloader = DataLoader(train_data, batch_size=batch_size, num_workers=num_workers) # load the model model = CNN(vocab_size=20000, embedding_dim=128, hidden_dim=50, label_size=1, batch_size=batch_size, seq_len=250) model.cuda() # model._parameters = init.xavier_normal(list(model.parameters())) # or for param in model.parameters(): # init.xavier_normal(param) init.uniform(param) loss_fn = torch.nn.BCEWithLogitsLoss() # loss_fn = torch.nn.CrossEntropyLoss() loss_name = "bce" optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) start_clock = time.clock() epoch = 0 print("time_stamp: {}".format(time_stamp)) print() print("model: {}".format(args.model)) print("data: {}".format(args.data)) print("features: {}".format(args.feats)) print("null features: {}".format(args.null))
def __init__(self, args, batchNorm=True): super(FlowNetS, self).__init__() self.batchNorm = batchNorm self.conv1 = conv(self.batchNorm, 12, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv3_1 = conv(self.batchNorm, 256, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm, 1024, 1024) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight)
def __init__(self, batchNorm=False, div_flow=20.): super(FlowNet2, self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow #self.rgb_max = args.rgb_max self.rgb_max = 1 #self.args = args self.channelnorm = ChannelNorm() # First Block (FlowNetC) self.flownetc = FlowNetC.FlowNetC(batchNorm=self.batchNorm) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') # if args.fp16: # self.resample1 = nn.Sequential( # tofp32(), # Resample2d(), # tofp16()) # else: self.resample1 = Resample2d() # Block (FlowNetS1) self.flownets_1 = FlowNetS.FlowNetS(batchNorm=self.batchNorm) self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear') # if args.fp16: # self.resample2 = nn.Sequential( # tofp32(), # Resample2d(), # tofp16()) # else: self.resample2 = Resample2d() # Block (FlowNetS2) self.flownets_2 = FlowNetS.FlowNetS(batchNorm=self.batchNorm) # Block (FlowNetSD) self.flownets_d = FlowNetSD.FlowNetSD(batchNorm=self.batchNorm) self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest') self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest') # if args.fp16: # self.resample3 = nn.Sequential( # tofp32(), # Resample2d(), # tofp16()) # else: self.resample3 = Resample2d() # if args.fp16: # self.resample4 = nn.Sequential( # tofp32(), # Resample2d(), # tofp16()) # else: self.resample4 = Resample2d() # Block (FLowNetFusion) self.flownetfusion = FlowNetFusion.FlowNetFusion( batchNorm=self.batchNorm) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight)
def init_hidden(self, initrange): for ww in self.parameters(): init.uniform(ww.data, -1 * initrange, initrange)
def trainClassifier(allDataTrain,targetDataTrain,allDataTest,targetDataTest,learning_rate,momentum,maxEpoch,saveModel,results): classifier = buildClassifierModel(embedding_dim, 1).cuda() for param in classifier.parameters(): init.uniform(param, -1 * 0.0000001, 0.0000001) loss_function = nn.BCELoss(size_average=True).cuda() optimizer = optim.RMSprop(classifier.parameters(), lr=learning_rate, alpha=0.99, eps=1e-08, weight_decay=0,momentum=momentum, centered=False) lossVal = mres.LossValues() errors = [] lrStr = mres.floatToStr("%2.15f",learning_rate) momentumStr = mres.floatToStr("%2.15f",momentum) epc = 0 numberOfDoc = math.floor((allDataTrain.size()[0]/batch_size)*batch_size) for fold in range(folds): for epoch in range(maxEpoch): print("class : %s fold %d epoch %d" % (classname,fold,epoch)) epc += 1 inds = torch.range(1, numberOfDoc, batch_size).long() shuffle = torch.randperm(inds.size()[0]) lr=optimizer.param_groups[0]['lr'] lrStr = mres.floatToStr("%2.15f",lr) for i in range(int(numberOfDoc/batch_size)): start = inds[shuffle[i]] - 1 endd = inds[shuffle[i]] + batch_size - 1 inp = autograd.Variable(allDataTrain[start:endd].data.cuda(), requires_grad=False) target = autograd.Variable(torch.Tensor(batch_size).copy_(targetDataTrain[start:endd]).cuda(), requires_grad=False) classifier.zero_grad() pred = classifier.forward(inp) loss = loss_function(pred,target) print("fold %d epoch %d lr %s - pred %f target %f loss %f " % (fold,epoch,lrStr,pred.data[0][0],target.data[0], loss.data[0])) loss.backward() optimizer.step() errors.append(loss.data[0]) lossVal.y.append(loss.data[0]) mean = torch.mean(torch.Tensor(errors)) lossVal.mean.append(mean) if epoch % 50 == 0 and epoch != 0: trainresults = mres.testClassifier(classifier,allDataTrain,targetDataTrain) res = "train - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f truePosPred/allPos:%d/%d=%f" % ( lrStr, momentumStr, maxEpoch, epoch+1,trainresults.correct, trainresults.all,trainresults.trueNegatives,trainresults.allNegatives, trainresults.negRate, trainresults.truePositives, trainresults.allPositives,trainresults.posRate) results.append(res) testresults = mres.testClassifier(classifier, allDataTest, targetDataTest) res = "test - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f truePosPred/allPos:%d/%d=%f" % ( lrStr, momentumStr, maxEpoch, epoch+1,testresults.correct, testresults.all, testresults.trueNegatives, testresults.allNegatives, testresults.negRate, testresults.truePositives, testresults.allPositives, testresults.posRate) results.append(res) trainresults = mres.testClassifier(classifier, allDataTrain, targetDataTrain) res = "train - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f truePosPred/allPos:%d/%d=%f" % ( lrStr, momentumStr, maxEpoch, maxEpoch, trainresults.correct, trainresults.all, trainresults.trueNegatives, trainresults.allNegatives, trainresults.negRate, trainresults.truePositives, trainresults.allPositives, trainresults.posRate) results.append(res) testresults = mres.testClassifier(classifier, allDataTest, targetDataTest) res = "test - lr %s mmt %s maxepoch %d epoch %d score %d/%d - trueNegPred/allNeg:%d/%d=%f truePosPred/allPos:%d/%d=%f" % ( lrStr, momentumStr, maxEpoch, maxEpoch, testresults.correct, testresults.all, testresults.trueNegatives, testresults.allNegatives, testresults.negRate, testresults.truePositives, testresults.allPositives, testresults.posRate) results.append(res) if saveModel == True: lossVal.x = range(folds * maxEpoch * int(numberOfDoc/batch_size)) lrStr = mres.floatToStr("%2.15f",learning_rate) fname = "%scdlc-mlp-batch-loss-values-%s-%s-%d.bin" % (path,lrStr,momentumStr,maxEpoch) fh = open(fname, 'wb') # Save model file as pickle pickle.dump(lossVal, fh) fh.close() return classifier
def __init__(self, args): print("Encoder model --- LSTM") super(Encoder_WordLstm, self).__init__() self.args = args # random self.char_embed = nn.Embedding(self.args.embed_char_num, self.args.embed_char_dim) for index in range(self.args.embed_char_dim): self.char_embed.weight.data[ self.args.create_alphabet.char_PaddingID][index] = 0 self.char_embed.weight.requires_grad = True self.bichar_embed = nn.Embedding(self.args.embed_bichar_num, self.args.embed_bichar_dim) for index in range(self.args.embed_bichar_dim): self.bichar_embed.weight.data[ self.args.create_alphabet.bichar_PaddingID][index] = 0 self.bichar_embed.weight.requires_grad = True # fix the word embedding self.static_char_embed = nn.Embedding(self.args.static_embed_char_num, self.args.embed_char_dim) init.uniform(self.static_char_embed.weight, a=-np.sqrt(3 / self.args.embed_char_dim), b=np.sqrt(3 / self.args.embed_char_dim)) self.static_bichar_embed = nn.Embedding( self.args.static_embed_bichar_num, self.args.embed_bichar_dim) init.uniform(self.static_bichar_embed.weight, a=-np.sqrt(3 / self.args.embed_bichar_dim), b=np.sqrt(3 / self.args.embed_bichar_dim)) # load external word embedding if args.char_Embedding is True: print("char_Embedding") pretrained_char_weight = np.array(args.pre_char_word_vecs) self.static_char_embed.weight.data.copy_( torch.from_numpy(pretrained_char_weight)) for index in range(self.args.embed_char_dim): self.static_char_embed.weight.data[ self.args.create_static_alphabet.char_PaddingID][index] = 0 self.static_char_embed.weight.requires_grad = False if args.bichar_Embedding is True: print("bichar_Embedding") pretrained_bichar_weight = np.array(args.pre_bichar_word_vecs) self.static_bichar_embed.weight.data.copy_( torch.from_numpy(pretrained_bichar_weight)) # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_PaddingID]) # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_UnkID]) for index in range(self.args.embed_bichar_dim): self.static_bichar_embed.weight.data[ self.args.create_static_alphabet. bichar_PaddingID][index] = 0 self.static_bichar_embed.weight.requires_grad = False self.lstm_left = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, dropout=self.args.dropout, bias=True) self.lstm_right = nn.LSTM(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, dropout=self.args.dropout, bias=True) # init lstm weight and bias init.xavier_uniform(self.lstm_left.weight_ih_l0) init.xavier_uniform(self.lstm_left.weight_hh_l0) init.xavier_uniform(self.lstm_right.weight_ih_l0) init.xavier_uniform(self.lstm_right.weight_hh_l0) value = np.sqrt(6 / (self.args.rnn_hidden_dim + 1)) self.lstm_left.bias_ih_l0.data.uniform_(-value, value) self.lstm_left.bias_hh_l0.data.uniform_(-value, value) self.lstm_right.bias_ih_l0.data.uniform_(-value, value) self.lstm_right.bias_hh_l0.data.uniform_(-value, value) self.hidden_l = self.init_hidden_cell(self.args.batch_size) self.hidden_r = self.init_hidden_cell(self.args.batch_size) self.dropout = nn.Dropout(self.args.dropout) self.dropout_embed = nn.Dropout(self.args.dropout_embed) self.input_dim = (self.args.embed_char_dim + self.args.embed_bichar_dim) * 2 self.liner = nn.Linear(in_features=self.input_dim, out_features=self.args.hidden_size, bias=True) # init linear init.xavier_uniform(self.liner.weight) init_linear_value = np.sqrt(6 / (self.args.hidden_size + 1)) self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
def __init__(self, args): super(HighWay_CNN, self).__init__() self.args = args V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) # self.embed.weight.data.uniform(-0.1, 0.1) else: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # fixed the word embedding self.embed.weight.requires_grad = True print("dddd {} ".format(self.embed.weight.data.size())) if args.wide_conv is True: print("using wide convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K//2, 0), dilation=1, bias=True) for K in Ks] else: print("using narrow convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks] # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks] print(self.convs1) # for con in self.convs1: # print("PP {} ".format(con.weight)) if args.init_weight: print("Initing W .......") for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value)) fan_in, fan_out = HighWay_CNN.calculate_fan_in_and_fan_out(conv.weight.data) print(" in {} out {} ".format(fan_in, fan_out)) std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out)) print("aaaaaaaaaaaaa {} ".format(std)) init.uniform(conv.bias, 0, 0) self.dropout = nn.Dropout(args.dropout) # self.dropout = nn.Dropout2d(args.dropout) # self.dropout = nn.AlphaDropout(args.dropout) in_fea = len(Ks) * Co # self.fc1 = nn.Linear(in_features=in_fea, out_features=C, bias=True) self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea, bias=True) # self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True) # highway gate layer # self.gate_layer = nn.Linear(in_features=in_fea, out_features=C, bias=True) self.gate_layer = nn.Linear(in_features=in_fea, out_features=in_fea, bias=True) # self.gate_layer.bias.data.fill_(-1) # last liner self.logit_layer = nn.Linear(in_features=in_fea, out_features=C, bias=True) # whether to use batch normalizations if args.batch_normalizations is True: print("using batch_normalizations in the model......") self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine)