def __init__(self, input_size, width=3, dropout=0.2, nopad=False): super(GatedConv, self).__init__() self.conv = onmt.modules.WeightNormConv2d( input_size, 2 * input_size, kernel_size=(width, 1), stride=(1, 1), padding=(width // 2 * (1 - nopad), 0)) init.xavier_uniform_(self.conv.weight, gain=(4 * (1 - dropout))**0.5) self.dropout = nn.Dropout(dropout)
def _reset_parameters(self): init.xavier_uniform_(self.weights)
def __init__(self, in_dim1, in_dim2, in_dim3, n_layer): super(cnn, self).__init__() self.embed = torch.nn.Embedding(85, 32) init.xavier_uniform_(self.embed.weight, gain=1.0) self.lstm1 = nn.LSTM(in_dim1, in_dim2, n_layer, batch_first=True) init.xavier_normal_(self.lstm1.all_weights[0][0], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm1.all_weights[1][1], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm1.all_weights[1][0], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm1.all_weights[1][1], gain=np.sqrt(2.0)) self.batchnorm2 = nn.BatchNorm1d(25) self.lstm2 = nn.LSTM(in_dim2, in_dim3, n_layer, batch_first=True) init.xavier_normal_(self.lstm2.all_weights[0][0], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm2.all_weights[1][1], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm2.all_weights[1][0], gain=np.sqrt(2.0)) init.xavier_normal_(self.lstm2.all_weights[1][1], gain=np.sqrt(2.0)) self.batchnorm3 = nn.BatchNorm1d(25) self.linear1 = nn.Linear(in_dim3, 128) init.xavier_uniform_(self.linear1.weight, gain=np.sqrt(2.0)) init.constant_(self.linear1.bias, 0.1) self.dp1 = nn.Dropout(p=0.5) self.relu1 = nn.ReLU(inplace=True) self.linear2 = nn.Linear(128, 64) init.xavier_uniform_(self.linear2.weight, gain=np.sqrt(2.0)) init.constant_(self.linear2.bias, 0.1) self.dp2 = nn.Dropout(p=0.5) self.relu2 = nn.ReLU(inplace=True) self.linear22 = nn.Linear(64, 64) init.xavier_uniform_(self.linear22.weight, gain=np.sqrt(2.0)) init.constant_(self.linear22.bias, 0.1) self.dp22 = nn.Dropout(p=0.5) self.relu22 = nn.ReLU(inplace=True) self.linear3 = nn.Linear(64, 32) init.xavier_uniform_(self.linear3.weight, gain=np.sqrt(2.0)) init.constant_(self.linear3.bias, 0.1) self.dp3 = nn.Dropout(p=0.5) self.relu3 = nn.ReLU(inplace=True) self.linear4 = nn.Linear(32, 32) init.xavier_uniform_(self.linear4.weight, gain=np.sqrt(2.0)) init.constant_(self.linear4.bias, 0.1) self.relu4 = nn.ReLU(inplace=True) self.linear5 = nn.Linear(32, 16) init.xavier_uniform_(self.linear2.weight, gain=np.sqrt(2.0)) init.constant_(self.linear2.bias, 0.1) self.relu5 = nn.ReLU(inplace=True)
def _weights_init(m): classname = m.__class__.__name__ print(classname) if isinstance(m, nn.Linear): xavier_uniform_(m.weight) m.bias.data.fill_(0.0)
def weight_init(m): if isinstance(m, nn.Conv2d): init.xavier_uniform_(m.weight) # init.xavier_normal(m.weight) init.constant(m.bias, 0)
def __init__(self, args, word_padding_idx, vocab_size, device, checkpoint=None): self.args = args super(Summarizer, self).__init__() # self.spm = spm self.vocab_size = vocab_size self.device = device # src_dict = fields["src"].vocab # tgt_dict = fields["tgt"].vocab src_embeddings = torch.nn.Embedding(self.vocab_size, self.args.emb_size, padding_idx=word_padding_idx) tgt_embeddings = torch.nn.Embedding(self.vocab_size, self.args.emb_size, padding_idx=word_padding_idx) if (self.args.share_embeddings): tgt_embeddings.weight = src_embeddings.weight if self.args.model_type == 'hier': if (self.args.hier): self.encoder = TransformerInterEncoder( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) else: self.encoder = TransformerEncoder(self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings) elif self.args.model_type == 'he': self.encoder = TransformerEncoderHE( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) elif self.args.model_type == 'order': self.encoder = TransformerEncoderOrder( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) elif self.args.model_type == 'query': self.encoder = TransformerEncoderQuery( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, num_query_layers=self.args.query_layers, device=device) elif self.args.model_type == 'heq': self.encoder = TransformerEncoderHEQ( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) elif self.args.model_type == 'heo': self.encoder = TransformerEncoderHEO( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) elif self.args.model_type == 'hero': self.encoder = TransformerEncoderHERO( self.args.enc_layers, self.args.enc_hidden_size, self.args.heads, self.args.ff_size, self.args.enc_dropout, src_embeddings, inter_layers=self.args.inter_layers, inter_heads=self.args.inter_heads, device=device) self.decoder = TransformerDecoder(self.args.dec_layers, self.args.dec_hidden_size, heads=self.args.heads, d_ff=self.args.ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings, device=device) self.generator = get_generator(self.args.dec_hidden_size, self.vocab_size, device) if self.args.share_decoder_embeddings: self.generator[0].weight = self.decoder.embeddings.weight if checkpoint is not None: # checkpoint['model'] keys = list(checkpoint['model'].keys()) for k in keys: if ('a_2' in k): checkpoint['model'][k.replace( 'a_2', 'weight')] = checkpoint['model'][k] del (checkpoint['model'][k]) if ('b_2' in k): checkpoint['model'][k.replace( 'b_2', 'bias')] = checkpoint['model'][k] del (checkpoint['model'][k]) self.load_state_dict(checkpoint['model'], strict=True) else: for p in self.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device)
def _init_fc_parameters(fc, gain=1., bias=0.): init.xavier_uniform_(fc.weight.data, gain) fc.bias.data.fill_(bias)
def main(): #create model best_prec1 = 0 torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.set_device(0) if args.basenet == 'ResNeXt': model = ResNeXt101_64x4d(args.class_num) #net = Networktorch.nn.DataParallel(Network, device_ids=[0]) cudnn.benchmark = True if args.resume: Network.load_state_dict(torch.load(args.resume)) else: state_dict = torch.load('resnext101_64x4d-e77a0586.pth') state_dict.pop('last_linear.bias') state_dict.pop('last_linear.weight') model.load_state_dict(state_dict, strict=False) init.xavier_uniform_(model.last_linear.weight.data) model.last_linear.bias.data.zero_() for p in model.features[0].parameters(): p.requires_grad = False for p in model.features[1].parameters(): p.requires_grad = False elif args.basenet == 'pnasnet': model = pnasnet5large(args.class_num, None) #net = Networktorch.nn.DataParallel(Network, device_ids=[0]) cudnn.benchmark = True if args.resume: model.load_state_dict(torch.load(args.resume)) else: state_dict = torch.load('pnasnet5large-bf079911.pth') state_dict.pop('last_linear.bias') state_dict.pop('last_linear.weight') model.load_state_dict(state_dict, strict=False) init.xavier_uniform_(model.last_linear.weight.data) model.last_linear.bias.data.zero_() model = model.cuda() cudnn.benchmark = True # Dataset Dataset_train = Tiangong(root=args.dataset_root, mode='trainval') Dataloader_train = data.DataLoader(Dataset_train, args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True) Dataset_val = Tiangong(root=args.dataset_root, mode='val') Dataloader_val = data.DataLoader(Dataset_val, batch_size=1, num_workers=args.num_workers, shuffle=True, pin_memory=True) criterion = nn.CrossEntropyLoss().cuda() Optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(Optimizer, epoch) # train for one epoch train(Dataloader_train, model, criterion, Optimizer, epoch ) #train(Dataloader_train, Network, criterion, Optimizer, epoch) # evaluate on validation set #prec1 = validate(Dataloader_val, model, criterion) #prec1 = validate(Dataloader_val, Network, criterion) # remember best prec@1 and save checkpoint #is_best = prec1 > best_prec1 #best_prec1 = max(prec1, best_prec1) #if is_best: torch.save( model.state_dict(), 'weights/fixblock_Newtrain_' + args.basenet + '/' + '_Tiangong_RMSProp_' + repr(epoch) + '.pth')
def _reset_parameters(self): xavier_uniform_(self.in_proj_weight) if self.in_proj_bias is not None: constant_(self.in_proj_bias, 0.) constant_(self.out_proj.bias, 0.)
def __init__(self, args, batchNorm=False, div_flow=20., requires_grad=False): super(FlowNet2, self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.rgb_max = args.rgb_max self.args = args self.channelnorm = ChannelNorm() # First Block (FlowNetC) self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample1 = nn.Sequential(tofp32(), Resample2d(), tofp16()) else: self.resample1 = Resample2d() # Block (FlowNetS1) self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample2 = nn.Sequential(tofp32(), Resample2d(), tofp16()) else: self.resample2 = Resample2d() # Block (FlowNetS2) self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) # Block (FlowNetSD) self.flownets_d = FlowNetSD.FlowNetSD(args, batchNorm=self.batchNorm) self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest') self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest') if args.fp16: self.resample3 = nn.Sequential(tofp32(), Resample2d(), tofp16()) else: self.resample3 = Resample2d() if args.fp16: self.resample4 = nn.Sequential(tofp32(), Resample2d(), tofp16()) else: self.resample4 = Resample2d() # Block (FLowNetFusion) self.flownetfusion = FlowNetFusion.FlowNetFusion( args, batchNorm=self.batchNorm) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) if not requires_grad: for param in self.parameters(): param.requires_grad = False
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None): """Build a model from opts. Args: model_opt: the option loaded from checkpoint. It's important that the opts have been updated and validated. See :class:`onmt.utils.parse.ArgumentParser`. fields (dict[str, torchtext.data.Field]): `Field` objects for the model. gpu (bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. gpu_id (int or NoneType): Which GPU to use. Returns: the NMTModel. """ # Build embeddings. if model_opt.model_type == "text": src_field = fields["src"] src_emb = build_embeddings(model_opt, src_field) else: src_emb = None # Build encoder. encoder = build_encoder(model_opt, src_emb) # Build decoder. tgt_field = fields["tgt"] tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. assert src_field.base_field.vocab == tgt_field.base_field.vocab, \ "preprocess with -share_vocab if you use share_embeddings" tgt_emb.word_lut.weight = src_emb.word_lut.weight decoder = build_decoder(model_opt, tgt_emb) # Build NMTModel(= encoder + decoder). if gpu and gpu_id is not None: device = torch.device("cuda", gpu_id) elif gpu and not gpu_id: device = torch.device("cuda") elif not gpu: device = torch.device("cpu") model = onmt.models.NMTModel(encoder, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].base_field.vocab)), Cast(torch.float32), gen_func ) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: tgt_base_field = fields["tgt"].base_field vocab_size = len(tgt_base_field.vocab) pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token] generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = {fix_key(k): v for k, v in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec) model.generator = generator model.to(device) if model_opt.model_dtype == 'fp16': model.half() return model