def init_weights(self): """ Initialize weights. """ for conv in [self.conv1, self.conv2, self.conv3]: init.xavier_uniform(conv.weight, gain=1) init.constant(conv.bias, 0.1)
def _weights_init(m, ih_std=0.08, hh_std=0.08): if isinstance(m, nn.LSTM): m.weight_ih_l0.data.normal_(0, ih_std) m.weight_hh_l0.data.normal_(0, hh_std) elif isinstance(m, nn.Linear): xavier_uniform(m.weight.data) m.bias.data.fill_(0)
def conv_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.xavier_uniform(m.weight, gain=np.sqrt(2)) init.constant(m.bias, 0) elif classname.find('BatchNorm') != -1: init.constant(m.weight, 1) init.constant(m.bias, 0)
def __init__(self, input_dim, hidden_dim, latent_dim, max_num_nodes, pool='sum'): ''' Args: input_dim: input feature dimension for node. hidden_dim: hidden dim for 2-layer gcn. latent_dim: dimension of the latent representation of graph. ''' super(GraphVAE, self).__init__() self.conv1 = model.GraphConv(input_dim=input_dim, output_dim=hidden_dim) self.bn1 = nn.BatchNorm1d(input_dim) self.conv2 = model.GraphConv(input_dim=hidden_dim, output_dim=hidden_dim) self.bn2 = nn.BatchNorm1d(input_dim) self.act = nn.ReLU() output_dim = max_num_nodes * (max_num_nodes + 1) // 2 #self.vae = model.MLP_VAE_plain(hidden_dim, latent_dim, output_dim) self.vae = model.MLP_VAE_plain(input_dim * input_dim, latent_dim, output_dim) #self.feature_mlp = model.MLP_plain(latent_dim, latent_dim, output_dim) self.max_num_nodes = max_num_nodes for m in self.modules(): if isinstance(m, model.GraphConv): m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu')) elif isinstance(m, nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_() self.pool = pool
def xavier(param): init.xavier_uniform(param)
def __init__(self, input_channels=12, with_bn=True): super(FlowNetS, self).__init__() self.with_bn = with_bn self.conv1 = conv(input_channels, 64, kernel_size=7, stride=2, with_bn=with_bn) self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn) self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn) self.conv3_1 = conv(256, 256, with_bn=with_bn) self.conv4 = conv(256, 512, stride=2, with_bn=with_bn) self.conv4_1 = conv(512, 512, with_bn=with_bn) self.conv5 = conv(512, 512, stride=2, with_bn=with_bn) self.conv5_1 = conv(512, 512, with_bn=with_bn) self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn) self.conv6_1 = conv(1024, 1024, with_bn=with_bn) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: nn_init.uniform(m.bias) nn_init.xavier_uniform(m.weight)
def xavier(self, param): init.xavier_uniform(param)
def conv_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.xavier_uniform(m.weight, gain=np.sqrt(2))
def __init__(self, args, config, label_alphabet): super(opinionMining, self).__init__() print("build network...") self.gpu = args.ifgpu self.label_size = label_alphabet.size() self.bert_encoder_dim = config.hidden_size self.target_hidden_dim = args.target_hidden_dim self.relation_hidden_dim = args.relation_hidden_dim self.relation_threds = args.relation_threds self.drop = args.dropout self.step = args.step # encoder self.bert = BertModel(config) # target syn self.targetSyn_r = nn.Parameter( torch.Tensor(self.target_hidden_dim, self.bert_encoder_dim)) self.targetSyn_s = nn.Parameter( torch.Tensor(self.target_hidden_dim, self.bert_encoder_dim)) # relation syn self.relationSyn_u = nn.Parameter( torch.Tensor(self.relation_hidden_dim, self.bert_encoder_dim)) self.relationSyn_s = nn.Parameter( torch.Tensor(self.relation_hidden_dim, self.bert_encoder_dim)) init.xavier_uniform(self.targetSyn_r) init.xavier_uniform(self.targetSyn_s) init.xavier_uniform(self.relationSyn_u) init.xavier_uniform(self.relationSyn_s) # crf self.targetHidden2Tag = nn.Parameter( torch.Tensor(self.label_size + 2, self.target_hidden_dim)) self.targetHidden2Tag_b = nn.Parameter( torch.Tensor(1, self.label_size + 2)) init.xavier_uniform(self.targetHidden2Tag) init.xavier_uniform(self.targetHidden2Tag_b) self.crf = CRF(self.label_size, self.gpu) # relation self.relationAttention = RelationAttention(args) # other self.dropout = nn.Dropout(self.drop) self.softmax = nn.Softmax(dim=2) if self.gpu: self.bert = self.bert.cuda() self.targetSyn_r.data = self.targetSyn_r.cuda() self.targetSyn_s.data = self.targetSyn_s.cuda() self.relationSyn_u.data = self.relationSyn_u.cuda() self.relationSyn_s.data = self.relationSyn_s.cuda() self.targetHidden2Tag.data = self.targetHidden2Tag.cuda() self.targetHidden2Tag_b.data = self.targetHidden2Tag_b.cuda() self.relationAttention = self.relationAttention.cuda() self.dropout = self.dropout.cuda() self.softmax = self.softmax.cuda() def init_weights(module): if isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) self.apply(init_weights)
def __init__(self, input_dim, hidden_dim, embedding_dim, label_dim, num_layers, pred_hidden_dims=[50], concat=True, bn=True, dropout=0.0, args=None, device='cpu'): super(GcnEncoderGraph, self).__init__() print('Whether concat', concat) self.device = device self.concat = concat add_self = not concat self.bn = bn self.num_layers = num_layers self.num_aggs = 1 self.bias = True if args is not None: self.bias = args.bias self.conv_node_first, self.conv_node_last = self.GCN(1536, 16, 2) self.conv_first, self.conv_block, self.conv_last = self.build_conv_layers( input_dim, hidden_dim, embedding_dim, num_layers, add_self, normalize=True, dropout=dropout) self.act = nn.ReLU().to(device) self.label_dim = label_dim if concat: self.pred_input_dim = hidden_dim * (num_layers - 1) + embedding_dim else: self.pred_input_dim = embedding_dim for m in self.modules(): if isinstance(m, GraphConv): print('m', m) m.weight.data = init.xavier_uniform( m.weight.data, gain=nn.init.calculate_gain('relu')) print('weight', m.weight.data) print('weight', m.weight) if m.bias is not None: m.bias.data = init.constant(m.bias.data, 0.0) for m in self.modules(): if isinstance(m, GraphConvolution): print('m2', m) m.weight1.data = init.xavier_uniform( m.weight1.data, gain=nn.init.calculate_gain('relu') * 5) m.weight2.data = init.xavier_uniform( m.weight2.data, gain=nn.init.calculate_gain('relu') * 5) m.weight3.data = init.xavier_uniform( m.weight3.data, gain=nn.init.calculate_gain('relu') * 5) m.weight4.data = init.xavier_uniform( m.weight4.data, gain=nn.init.calculate_gain('relu') * 5) m.bias1.data = init.constant(m.bias1.data, 0.0) m.bias2.data = init.constant(m.bias2.data, 0.0) m.bias3.data = init.constant(m.bias3.data, 0.0) m.bias4.data = init.constant(m.bias4.data, 0.0) print('num_layers: ', num_layers) print('pred_hidden_dims: ', pred_hidden_dims) print('hidden_dim: ', hidden_dim) print('embedding_dim: ', embedding_dim) print('label_dim', label_dim)
def __init__(self, max_num_nodes, input_dim, hidden_dim, embedding_dim, label_dim, num_layers, num_pool_matrix=2, num_pool_final_matrix=1, pool_sizes=[4], pred_hidden_dims=[50], concat=True, bn=True, dropout=0.0, mask=0, args=None, device='cpu'): ''' Args: num_layers: number of gc layers before each pooling num_nodes: number of nodes for each graph in batch linkpred: flag to turn on link prediction side objective ''' super(WavePoolingGcnEncoder, self).__init__(input_dim, hidden_dim, embedding_dim, label_dim, num_layers, pred_hidden_dims=pred_hidden_dims, concat=concat, args=args, device=device) add_self = not concat self.mask = mask self.pool_sizes = pool_sizes self.num_pool_matrix = num_pool_matrix self.num_pool_final_matrix = num_pool_final_matrix self.con_final = args.con_final self.device = device print('Device_-wave: ', device) self.conv_first_after_pool = nn.ModuleList() self.conv_block_after_pool = nn.ModuleList() self.conv_last_after_pool = nn.ModuleList() print('input_dim', input_dim) for i in range(len(pool_sizes)): print('In WavePooling', self.pred_input_dim * self.num_pool_matrix) conv_first2, conv_block2, conv_last2 = self.build_conv_layers( self.pred_input_dim * self.num_pool_matrix, hidden_dim, embedding_dim, num_layers, add_self, normalize=True, dropout=dropout) self.conv_first_after_pool.append(conv_first2) self.conv_block_after_pool.append(conv_block2) self.conv_last_after_pool.append(conv_last2) if self.num_pool_final_matrix > 0: if concat: if self.con_final: self.pred_model = self.build_pred_layers( self.pred_input_dim * (len(pool_sizes) + 1) + self.pred_input_dim * self.num_pool_final_matrix, pred_hidden_dims, label_dim, num_aggs=self.num_aggs) else: self.pred_model = self.build_pred_layers( self.pred_input_dim * (len(pool_sizes)) + self.pred_input_dim * self.num_pool_final_matrix, pred_hidden_dims, label_dim, num_aggs=self.num_aggs) else: self.pred_model = self.build_pred_layers( self.pred_input_dim * self.num_pool_final_matrix, pred_hidden_dims, label_dim, num_aggs=self.num_aggs) else: if concat: self.pred_model = self.build_pred_layers( 512, label_dim, num_aggs=self.num_aggs) else: self.pred_model = self.build_pred_layers( self.pred_input_dim, pred_hidden_dims, label_dim, num_aggs=self.num_aggs) for m in self.modules(): if isinstance(m, GraphConv): m.weight.data = init.xavier_uniform( m.weight.data, gain=nn.init.calculate_gain('relu') * 5) if m.bias is not None: m.bias.data = init.constant(m.bias.data, 0.0)
def __init__(self, input_dim, output_dim, context_dim, att_hidden_dim, config): super(CondAttLSTM, self).__init__() self.output_dim = output_dim self.context_dim = context_dim self.input_dim = input_dim # one W for all x self.W_ix = nn.Linear(input_dim, output_dim) init.xavier_uniform(self.W_ix.weight) self.W_ix.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_()) # input gate self.W_i = nn.Linear(output_dim + context_dim + output_dim + output_dim, output_dim, bias=False) init.orthogonal(self.W_i.weight) # forget gate self.W_fx = nn.Linear(input_dim, output_dim) init.xavier_uniform(self.W_fx.weight) self.W_fx.bias = nn.Parameter(torch.FloatTensor(output_dim).fill_(1.0)) self.W_f = nn.Linear(output_dim + context_dim + output_dim + output_dim, output_dim, bias=False) init.orthogonal(self.W_f.weight) # memory cell new value self.W_cx = nn.Linear(input_dim, output_dim) init.xavier_uniform(self.W_cx.weight) self.W_cx.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_()) self.W_c = nn.Linear(output_dim + context_dim + output_dim + output_dim, output_dim, bias=False) init.orthogonal(self.W_c.weight) # output gate self.W_ox = nn.Linear(input_dim, output_dim) init.xavier_uniform(self.W_ox.weight) self.W_ox.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_()) self.W_o = nn.Linear(output_dim + context_dim + output_dim + output_dim, output_dim, bias=False) init.orthogonal(self.W_o.weight) # attention layer self.att_ctx = nn.Linear(context_dim, att_hidden_dim) init.xavier_uniform(self.att_ctx.weight) self.att_ctx.bias = nn.Parameter( torch.FloatTensor(att_hidden_dim).zero_()) self.att_h = nn.Linear(output_dim, att_hidden_dim, bias=False) init.xavier_uniform(self.att_h.weight) self.att = nn.Linear(att_hidden_dim, 1) init.xavier_uniform(self.att.weight) self.att.bias = nn.Parameter(torch.FloatTensor(1).zero_()) # attention over history self.h_att_hist = nn.Linear(output_dim, att_hidden_dim) init.xavier_uniform(self.h_att_hist.weight) self.h_att_hist.bias = nn.Parameter( torch.FloatTensor(att_hidden_dim).zero_()) self.h_att_h = nn.Linear(output_dim, att_hidden_dim, bias=False) init.xavier_uniform(self.h_att_h.weight) self.h_att = nn.Linear(att_hidden_dim, 1) init.xavier_uniform(self.h_att.weight) self.h_att.bias = nn.Parameter(torch.FloatTensor(1).zero_()) self.softmax = nn.Softmax(dim=-1) self.parent_hidden_state_feed = config.parent_hidden_state_feed self.dropout = config.decoder_dropout self.config = config
def my_weight_init(m): if isinstance(m, torch.nn.Linear): init.xavier_uniform(m.weight.data) init.constant(m.bias.data, 0)
def init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.xavier_uniform(m.weight, gain=np.sqrt(2.0))
def weights_init(m): if isinstance(m, nn.Conv2d): init.xavier_uniform(m.weight.data) m.bias.data.zero_() if isinstance(m, nn.Linear): init.normal(m.weight.data)
def __init__(self, use_cuda): super(Net, self).__init__() self.classes = 10 + 1 self.use_cuda = use_cuda self.image_H = 36 # CNN # conv1 self.conv1_input_chanel = 1 self.conv1_output_chanel = 10 self.conv1_kernelsize = (self.image_H, 2) self.conv1 = nn.Conv2d(self.conv1_input_chanel, self.conv1_output_chanel, self.conv1_kernelsize) # initialization init.xavier_uniform(self.conv1.weight, gain=np.sqrt(2)) init.constant(self.conv1.bias, 0.1) # conv2 self.conv2_input_chanel = 10 self.conv2_output_chanel = 20 self.conv2_kernelsize = (1, 2) self.conv2 = nn.Conv2d(self.conv2_input_chanel, self.conv2_output_chanel, self.conv2_kernelsize) # initialization init.xavier_uniform(self.conv2.weight, gain=np.sqrt(2)) init.constant(self.conv2.bias, 0.1) # batch norm (before activation) self.conv2_bn = nn.BatchNorm2d( self.conv2_output_chanel) # batch normalization # # drop out (after activation) # self.conv2_drop = nn.Dropout2d() self.conv2_H = 1 # height of feature map after conv2 # LSTM self.lstm_input_size = self.conv2_H * self.conv2_output_chanel # number of features = H * cnn_output_chanel = 32 * 32 = 1024 self.lstm_hidden_size = 32 self.lstm_num_layers = 2 self.lstm_hidden = None self.lstm_cell = None self.lstm = nn.LSTM(self.lstm_input_size, self.lstm_hidden_size, self.lstm_num_layers, batch_first=True, bidirectional=True) # # initialization # init.xavier_uniform(self.lstm.weights, gain=np.sqrt(2)) # init.constant(self.lstm.bias, 0.1) # FC: convert to 11-d probability vector self.fc_input_size = self.lstm_hidden_size * 2 self.fc_output_size = self.classes self.fc = nn.Linear(self.fc_input_size, self.fc_output_size) # initialization init.xavier_uniform(self.fc.weight, gain=np.sqrt(2)) init.constant(self.fc.bias, 0.1) # softmax: self.softmax = nn.Softmax()
def initialize_weights(m): if isinstance(m, nn.Linear) or isinstance(m, nn.ConvTranspose2d): init.xavier_uniform(m.weight.data)
input_time_length = int(timeWindowDuration/1000*samplingRate) # train_set.X.shape[1] in_chans=train_set.X[0].shape[0] if Deep4: # final_conv_length determines the size of the receptive field of the ConvNet model = Deep4Net(in_chans=in_chans, n_classes=1, input_time_length=input_time_length, pool_time_stride=pool_time_stride, final_conv_length=2, stride_before_pool=True).create_network() elif ResNet: model_name = 'resnet-xavier-uniform' init_name = model_name.lstrip('resnet-') from torch.nn import init init_fn = {'he-uniform': lambda w: init.kaiming_uniform(w, a=0), 'he-normal': lambda w: init.kaiming_normal(w, a=0), 'xavier-uniform': lambda w: init.xavier_uniform(w, gain=1), 'xavier-normal': lambda w: init.xavier_normal(w, gain=1)}[init_name] model = EEGResNet(in_chans=in_chans, n_classes=1, input_time_length=input_time_length, final_pool_length=2, n_first_filters=48, conv_weight_init_fn=init_fn).create_network() elif EEGNet_v4: model = EEGNetv4(in_chans=in_chans, n_classes=1, final_conv_length=2, input_time_length=input_time_length).create_network() # remove softmax new_model = nn.Sequential() for name, module in model.named_children(): if name == 'softmax': continue new_model.add_module(name, module)
def init_weights(self): init.xavier_uniform(self.affine_v.weight) init.xavier_uniform(self.affine_g.weight) init.xavier_uniform(self.affine_h.weight) init.xavier_uniform(self.affine_s.weight)
def __init__( self, max_num_nodes, input_dim, hidden_dim, embedding_dim, label_dim, num_layers, assign_hidden_dim, assign_ratio=0.25, assign_num_layers=-1, num_pooling=1, pred_hidden_dims=[50], concat=True, bn=True, dropout=0.0, linkpred=True, assign_input_dim=-1, args=None, ): """ Args: num_layers: number of gc layers before each pooling num_nodes: number of nodes for each graph in batch linkpred: flag to turn on link prediction side objective """ super(SoftPoolingGcnEncoder, self).__init__( input_dim, hidden_dim, embedding_dim, label_dim, num_layers, pred_hidden_dims=pred_hidden_dims, concat=concat, args=args, ) add_self = not concat self.num_pooling = num_pooling self.linkpred = linkpred self.assign_ent = True # GC self.conv_first_after_pool = [] self.conv_block_after_pool = [] self.conv_last_after_pool = [] for i in range(num_pooling): # use self to register the modules in self.modules() self.conv_first2, self.conv_block2, self.conv_last2 = self.build_conv_layers( self.pred_input_dim, hidden_dim, embedding_dim, num_layers, add_self, normalize=True, dropout=dropout, ) self.conv_first_after_pool.append(self.conv_first2) self.conv_block_after_pool.append(self.conv_block2) self.conv_last_after_pool.append(self.conv_last2) # assignment assign_dims = [] if assign_num_layers == -1: assign_num_layers = num_layers if assign_input_dim == -1: assign_input_dim = input_dim self.assign_conv_first_modules = [] self.assign_conv_block_modules = [] self.assign_conv_last_modules = [] self.assign_pred_modules = [] assign_dim = int(max_num_nodes * assign_ratio) for i in range(num_pooling): assign_dims.append(assign_dim) self.assign_conv_first, self.assign_conv_block, self.assign_conv_last = self.build_conv_layers( assign_input_dim, assign_hidden_dim, assign_dim, assign_num_layers, add_self, normalize=True, ) assign_pred_input_dim = (assign_hidden_dim * (num_layers - 1) + assign_dim if concat else assign_dim) self.assign_pred = self.build_pred_layers(assign_pred_input_dim, [], assign_dim, num_aggs=1) # next pooling layer assign_input_dim = embedding_dim assign_dim = int(assign_dim * assign_ratio) self.assign_conv_first_modules.append(self.assign_conv_first) self.assign_conv_block_modules.append(self.assign_conv_block) self.assign_conv_last_modules.append(self.assign_conv_last) self.assign_pred_modules.append(self.assign_pred) self.pred_model = self.build_pred_layers( self.pred_input_dim * (num_pooling + 1), pred_hidden_dims, label_dim, num_aggs=self.num_aggs, ) for m in self.modules(): if isinstance(m, GraphConv): m.weight.data = init.xavier_uniform( m.weight.data, gain=nn.init.calculate_gain("relu")) if m.bias is not None: m.bias.data = init.constant(m.bias.data, 0.0)
def init_weights(self): """Initialize the weights.""" init.xavier_uniform(self.affine_v.weight) init.xavier_uniform(self.affine_g.weight) init.xavier_uniform(self.affine_h.weight) init.xavier_uniform(self.L1.weight) init.xavier_uniform(self.L2.weight) init.xavier_uniform(self.affine_audio.weight) init.xavier_uniform(self.affine_video.weight)
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, src_feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, tgt_feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make inference network. inference_network = make_inference_network( model_opt, src_embeddings, tgt_embeddings, src_dict, src_feature_dicts, tgt_dict, tgt_feature_dicts ) if model_opt.inference_network_type != "none" else None # Make NMTModel(= encoder + decoder + inference network). model = ( NMTModel(encoder, decoder) if inference_network is None else ViNMTModel( encoder, decoder, inference_network, dist_type=model_opt.p_dist_type, dbg=model_opt.dbg_inf, use_prior=model_opt.use_generative_model > 0) ) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: """ generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=1)) """ generator = Generator( in_dim = model_opt.decoder_rnn_size, out_dim = len(fields["tgt"].vocab), mode = model_opt.mode, ) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') #model.load_state_dict(checkpoint['model']) model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu >= 0: model.cuda() else: model.cpu() return model
def make_base_model(model_opt, gpu, checkpoint=None): """ """ ################# The canical seq2seq ############################### embeddings = make_embeddings(model_opt, model_opt.enc_numwords, model_opt.enc_padding_idx) encoder = make_encoder(model_opt, embeddings) # if model_opt.share_embeddings: decoder = make_decoder(model_opt, embddings) else: tgt_embedding = make_embeddings(model_opt, model_opt.dec_numwords, model_opt.dec_padding_idx) decoder = make_decoder(model_opt, tgt_embedding) ################## Discriminator #################################### discor = make_dbm_discriminator(model_opt) # Discriminator(model_opt.word_vec_size, filter_num=32, filter_sizes=[1,2], #hidden_size=model_opt.hidden_size, class_num=1) ################## Generator (Projection Layer) ##################### generator = nn.Sequential( nn.Linear(model_opt.rnn_size, model_opt.dec_numwords)) # AEL ael = ApproxEmbedding(decoder.embeddings) # normalizer q_norm, r_norm = make_qr_norm(model_opt) # final model model = GANRBM(encoder, decoder, ael, generator, discor, dec_max_len=model_opt.dec_max_len, type_loss=model_opt.gan_loss_type, q_normalizer=q_norm, r_normalizer=r_norm) # Load the model states from checkpoint or initialize them. # remove rbm part if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) else: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) # special intialization for DBM if model_opt.rbm_path is not None: print("Initial rmb", model_opt.rbm_path) model.disor.rq_rbm.load_model( os.path.join(model_opt.rbm_path, model_opt.rbm_rq_prefix)) model.disor.qr_rbm.load_model( os.path.join(model_opt.rbm_path, model_opt.rbm_qr_prefix)) # if checkpoint is not None: print('Loading model parameters.') load_temp = ['encoder', 'decoder', 'generator'] model.encoder.load_state_dict(checkpoint['encoder']) model.decoder.load_state_dict(checkpoint['decoder']) model.generator.load_state_dict(checkpoint['generator']) if 'ael' in checkpoint: model.ael.load_state_dict(checkpoint['ael']) load_temp.append('ael') if 'disor' in checkpoint: model.disor.load_state_dict(checkpoint['disor']) load_temp.append('disor') print("Load", load_temp) # DBM Initializatio return model
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts, for_encoder=True) encoder = make_encoder(model_opt, src_embeddings) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) device = torch.device("cuda") all_docs = load_all_docs(model_opt, fields, device) # Make NMTModel(= encoder + decoder). if model_opt.encoder_type == 'BiAttEncoder' or model_opt.encoder_type == 'transformer': model = TwoEncoderModel(encoder, decoder, all_docs, src_embeddings) elif model_opt.encoder_type == "PostEncoder": model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def __init__(self, hidden_size, batch_size, output_size, num_layers, is_dilation=True, is_bn=True): super(Graph_RNN_structure, self).__init__() ## model configuration self.hidden_size = hidden_size self.batch_size = batch_size self.output_size = output_size self.num_layers = num_layers # num_layers of cnn_output self.is_bn=is_bn ## model self.relu = nn.ReLU() # self.linear_output = nn.Linear(hidden_size, 1) # self.linear_output_simple = nn.Linear(hidden_size, output_size) # for state transition use only, input is null # self.gru = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True) # use CNN to produce output prediction # self.cnn_output = nn.Sequential( # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1), # # nn.BatchNorm1d(hidden_size), # nn.ReLU(), # nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1) # ) if is_dilation: self.conv_block = nn.ModuleList([nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=2**i, padding=2**i) for i in range(num_layers-1)]) else: self.conv_block = nn.ModuleList([nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1) for i in range(num_layers-1)]) self.bn_block = nn.ModuleList([nn.BatchNorm1d(hidden_size) for i in range(num_layers-1)]) self.conv_out = nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1) # # use CNN to do state transition # self.cnn_transition = nn.Sequential( # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1), # # nn.BatchNorm1d(hidden_size), # nn.ReLU(), # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1) # ) # use linear to do transition, same as GCN mean aggregator self.linear_transition = nn.Sequential( nn.Linear(hidden_size,hidden_size), nn.ReLU() ) # GRU based output, output a single edge prediction at a time # self.gru_output = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True) # use a list to keep all generated hidden vectors, each hidden has size batch*hidden_dim*1, and the list size is expanding # when using convolution to compute attention weight, we need to first concat the list into a pytorch variable: batch*hidden_dim*current_num_nodes self.hidden_all = [] ## initialize for m in self.modules(): if isinstance(m, nn.Linear): # print('linear') m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu')) # print(m.weight.data.size()) if isinstance(m, nn.Conv1d): # print('conv1d') m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu')) # print(m.weight.data.size()) if isinstance(m, nn.BatchNorm1d): # print('batchnorm1d') m.weight.data.fill_(1) m.bias.data.zero_() # print(m.weight.data.size()) if isinstance(m, nn.GRU): # print('gru') m.weight_ih_l0.data = init.xavier_uniform(m.weight_ih_l0.data, gain=nn.init.calculate_gain('sigmoid')) m.weight_hh_l0.data = init.xavier_uniform(m.weight_hh_l0.data, gain=nn.init.calculate_gain('sigmoid')) m.bias_ih_l0.data = torch.ones(m.bias_ih_l0.data.size(0)) * 0.25 m.bias_hh_l0.data = torch.ones(m.bias_hh_l0.data.size(0)) * 0.25
def __init__(self, batchNorm=True, div_flow=20): super(FlowNetC, self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1) # if args.fp16: # self.corr = nn.Sequential( # tofp32(), # Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1), # tofp16()) # else: self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) self.corr_activation = nn.LeakyReLU(0.1, inplace=True) self.conv3_1 = conv(self.batchNorm, 473, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm, 1024, 1024) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026, 256) self.deconv3 = deconv(770, 128) self.deconv2 = deconv(386, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) # init_deconv_bilinear(m.weight) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
def _init_lstm(self, weight): for w in weight.chunk(4, 0): init.xavier_uniform(w)
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) if not model_opt.encoder2_type == 'none': src_dict2 = fields["src2"].vocab feature_dicts2 = onmt.io.collect_feature_vocabs(fields, 'src2') src_embeddings2 = make_embeddings(model_opt, src_dict2, feature_dicts2) if 'morph' in fields and hasattr(fields["morph"], 'vocab'): morph_dict = fields["morph"].vocab morph_embeddings = make_morph_embeddings(model_opt, morph_dict, []) encoder = make_encoder(model_opt, src_embeddings, morph_embeddings) encoder2 = make_encoder( model_opt, src_embeddings2, morph_embeddings, encoder_type='rnn' ) if not model_opt.encoder2_type == 'none' else None # else: # encoder = make_encoder(model_opt, src_embeddings) # gcn features must go here # encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') if not model_opt.encoder2_type == 'none' else None # gcn features must go here else: encoder = make_encoder(model_opt, src_embeddings, encoder_type=model_opt.encoder_type ) # gcn features must go here if model_opt.encoder2_type == 'none': encoder2 = None else: if model_opt.encoder2_type == 'gcn': encoder2 = make_encoder( model_opt, src_embeddings, encoder_type='gcn') # gcn features must go here elif model_opt.encoder2_type == 'rnn': encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') else: raise ValueError("Not implemented yet.") elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). if model_opt.encoder2_type == 'none': encoder2 = None if model_opt.encoder_type == 'gcn': if model_opt.use_dgl: model = NMTModelGCN_DGL(encoder, decoder, encoder2=encoder2) else: model = NMTModelGCN(encoder, decoder, encoder2=encoder2) else: model = NMTModel(encoder, decoder, encoder2=encoder2) model.model_type = model_opt.model_type # text # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.encoder2, 'embeddings'): model.encoder2.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc2, model_opt.fix_word_vecs_enc2) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def _init_weights_layer(self, layer): '''Method to initialise the weights for a given layer''' if isinstance(layer, nn.Linear): xavier_uniform(layer.weight.data)
def __init__(self): super(Net, self).__init__() ## TODO: Define all the layers of this CNN, the only requirements are: ## 1. This network takes in a square (same width and height), grayscale image as input ## 2. It ends with a linear layer that represents the keypoints ## it's suggested that you make this last layer output 136 values, 2 for each of the 68 keypoint (x, y) pairs # As an example, you've been given a convolutional layer, which you may (but don't have to) change: # 1 input image channel (grayscale), 32 output channels/feature maps, 5x5 square convolution kernel self.conv1 = nn.Conv2d(1, 32, 5) #self.conv1.weight.data.fill_(0.01) #self.conv1.bias.data.fill_(0.01) # comment from Udacity #Is there a reason for why you have set the weights and bias as 0.01? #A better approach would be to initialize the weights randomly using something like Xavier initialization I.xavier_uniform(self.conv1.weight) # output size (W-F)/S +1 = = (224 - 5) / 1 + 1 = 220 # (32, 220, 220) self.pool = nn.MaxPool2d(2, 2) # output size = (32, 110, 110) # previous version: self.conv2 = nn.Conv2d(32, 64, 4) self.conv2 = nn.Conv2d(32, 64, 3) I.xavier_uniform(self.conv2.weight) # Comment from Udacity #Avoid using even-sized kernels as they do not have a true center. #A 4x4 kernel doesn't have a true center and this might cause the model to have #slight shift in either direction. Since convolution occurs around the center, odd-sized kernels are better. # output size: (110 - 4) / 1 + 1 = 107 # (64, 107, 107) # (64, 53, 53) self.conv3 = nn.Conv2d(64, 128, 3) # output size: (53 - 3) / 1 + 1 = 51 # (128, 51, 51) # (128, 25, 25) self.conv4 = nn.Conv2d(128, 256, 2) # output size: (25 - 2) / 1 + 1 = 24 # (128, 24, 24) # (128, 12, 12) self.fc1 = nn.Linear(256 * 12 * 12, 3000) # dropout with p=0.4 #self.fc_drop1 = nn.Dropout(p=0.2) self.fc_drop2 = nn.Dropout(p=0.4) #self.fc1_drop = nn.Dropout(p=0.1) #self.fc2_drop = nn.Dropout(p=0.2) #self.fc3_drop = nn.Dropout(p=0.3) #self.fc4_drop = nn.Dropout(p=0.4) #self.fc5_drop = nn.Dropout(p=0.5) self.fc2 = nn.Linear(3000, 1000) self.fc3 = nn.Linear(1000, 136)
def run_experiment(train_set, valid_set, test_set, model_name, optimizer_name, init_lr, scheduler_name, use_norm_constraint, weight_decay, schedule_weight_decay, restarts, max_epochs, max_increase_epochs, np_th_seed): set_random_seeds(np_th_seed, cuda=True) #torch.backends.cudnn.benchmark = True# sometimes crashes? if valid_set is not None: assert max_increase_epochs is not None assert (max_epochs is None) != (restarts is None) if max_epochs is None: max_epochs = np.sum(restarts) n_classes = int(np.max(train_set.y) + 1) n_chans = int(train_set.X.shape[1]) input_time_length = 1000 if model_name == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=2).create_network() elif model_name == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=30).create_network() elif model_name in [ 'resnet-he-uniform', 'resnet-he-normal', 'resnet-xavier-normal', 'resnet-xavier-uniform' ]: init_name = model_name.lstrip('resnet-') from torch.nn import init init_fn = { 'he-uniform': lambda w: init.kaiming_uniform(w, a=0), 'he-normal': lambda w: init.kaiming_normal(w, a=0), 'xavier-uniform': lambda w: init.xavier_uniform(w, gain=1), 'xavier-normal': lambda w: init.xavier_normal(w, gain=1) }[init_name] model = EEGResNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length, final_pool_length=10, n_first_filters=48, conv_weight_init_fn=init_fn).create_network() else: raise ValueError("Unknown model name {:s}".format(model_name)) if 'resnet' not in model_name: to_dense_prediction_model(model) model.cuda() model.eval() out = model(np_to_var(train_set.X[:1, :, :input_time_length, None]).cuda()) n_preds_per_input = out.cpu().data.numpy().shape[2] if optimizer_name == 'adam': optimizer = optim.Adam(model.parameters(), weight_decay=weight_decay, lr=init_lr) elif optimizer_name == 'adamw': optimizer = AdamW(model.parameters(), weight_decay=weight_decay, lr=init_lr) iterator = CropsFromTrialsIterator(batch_size=60, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, seed=np_th_seed) if scheduler_name is not None: assert schedule_weight_decay == (optimizer_name == 'adamw') if scheduler_name == 'cosine': n_updates_per_epoch = sum( [1 for _ in iterator.get_batches(train_set, shuffle=True)]) if restarts is None: n_updates_per_period = n_updates_per_epoch * max_epochs else: n_updates_per_period = np.array(restarts) * n_updates_per_epoch scheduler = CosineAnnealing(n_updates_per_period) optimizer = ScheduledOptimizer( scheduler, optimizer, schedule_weight_decay=schedule_weight_decay) elif scheduler_name == 'cut_cosine': # TODO: integrate with if clause before, now just separate # to avoid messing with code n_updates_per_epoch = sum( [1 for _ in iterator.get_batches(train_set, shuffle=True)]) if restarts is None: n_updates_per_period = n_updates_per_epoch * max_epochs else: n_updates_per_period = np.array(restarts) * n_updates_per_epoch scheduler = CutCosineAnnealing(n_updates_per_period) optimizer = ScheduledOptimizer( scheduler, optimizer, schedule_weight_decay=schedule_weight_decay) else: raise ValueError("Unknown scheduler") monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedTrialMisclassMonitor(input_time_length=input_time_length), RuntimeMonitor() ] if use_norm_constraint: model_constraint = MaxNormDefaultConstraint() else: model_constraint = None # change here this cell loss_function = lambda preds, targets: F.nll_loss(th.mean(preds, dim=2), targets) if valid_set is not None: run_after_early_stop = True do_early_stop = True remember_best_column = 'valid_misclass' stop_criterion = Or([ MaxEpochs(max_epochs), NoDecrease('valid_misclass', max_increase_epochs) ]) else: run_after_early_stop = False do_early_stop = False remember_best_column = None stop_criterion = MaxEpochs(max_epochs) exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column=remember_best_column, run_after_early_stop=run_after_early_stop, cuda=True, do_early_stop=do_early_stop) exp.run() return exp
def train(*, dataset='mnist'): z1 = 100 z2 = 512 batch_size = 64 lr = 0.1 dataset = load_dataset(dataset, split='train') x0, _ = dataset[0] c, h, w = x0.size() dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=1 ) w1 = torch.rand(w*h*c, z1).cuda() w1 = Variable(w1, requires_grad=True) xavier_uniform(w1.data) """ w1_2 = torch.rand(z1, z2) w1_2 = Variable(w1_2, requires_grad=True) xavier_uniform(w1_2.data) w1_2 = w1_2.cuda() wx_2 = torch.rand(w*h*c, z2) wx_2 = Variable(wx_2, requires_grad=True) xavier_uniform(wx_2.data) wx_2 = wx_2.cuda() """ bias = torch.zeros(w*h*c).cuda() bias = Variable(bias, requires_grad=True) print(w1.is_leaf, bias.is_leaf) grads = {} momentum = 0.9 def save_grad(v): def hook(grad): v.grad = grad if not hasattr(v, 'mem'): v.mem = 0.0 v.mem = v.mem * momentum + v.grad.data * (1 - momentum) return hook #params = [w1, w1_2, wx_2, bias] params = [w1, bias] optim = torch.optim.Adadelta(params, lr=0.1) #for p in params: # p.register_hook(save_grad(p)) gamma = 5.0 nb_updates = 0 for _ in range(1000): for X, y in dataloader: optim.zero_grad() X = Variable(X) #w2 = torch.matmul(w1, w1_2) X = X.cuda() X = X.view(X.size(0), -1) """ a2 = torch.matmul(X, wx_2) a2 = a2 * (a2 > 0.8).float() Xrec = torch.matmul(a2, w2.transpose(0, 1)) + bias Xrec = torch.nn.Sigmoid()(Xrec) """ hid = torch.matmul(X, w1) hid = hid * (hid > 1.0).float() Xrec = torch.matmul(hid, w1.transpose(1, 0).contiguous()) + bias Xrec = torch.nn.Sigmoid()(Xrec) e1 = ((Xrec - X)**2).sum(1).mean() e2 = e1 e3 = e1 #e2 = torch.abs(w1_2).mean() #e3 = torch.abs(a2).mean() loss = e1 loss.backward() optim.step() #for p in params: # p.data -= lr * p.mem if nb_updates % 100 == 0: print('loss : %.3f %.3f %.3f' % (e1.data[0], e2.data[0], e3.data[0])) active = (hid.data>0).float().sum(1) print('nbActive : {:.4f} +- {:.4f}'.format(active.mean(), active.std())) im = Xrec.data.cpu().numpy() im = im.reshape(im.shape[0], c, h, w) im = grid_of_images_default(im, normalize=True) imsave('x.png', im) im = w1.data.cpu().numpy() im = im.reshape((c, h, w, z1)).transpose((3, 0, 1, 2)) im = grid_of_images_default(im, normalize=True) imsave('w1.png', im) """ im = wx_2.data.cpu().numpy() im = im.reshape((c, h, w, z2)).transpose((3, 0, 1, 2)) im = grid_of_images_default(im, normalize=True) imsave('w2.png', im) """ nb_updates += 1
def weight_init(m): if type(m) == nn.Linear: nninit.xavier_uniform(m.weight) m.bias.data.fill_(0.01)
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def init_weights(m): if type(m) == nn.Conv2d: init.xavier_uniform(m.weight, gain=np.sqrt(2.0)) init.normal(m.bias)
def xavier_weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.xavier_uniform(m.weight, gain=np.sqrt(2)) init.constant(m.bias, 0.1)
def _init_weight(self): """初始化transitions矩阵""" init.xavier_uniform(self.transitions) # 任何标签不可能->START, STOP不能->任何标签 self.transitions.data[START, :].fill_(-10000.) self.transitions.data[:, STOP].fill_(-10000.)