Example #1
0
 def __init__(self, input_size, width=3, dropout=0.2, nopad=False):
     super(GatedConv, self).__init__()
     self.conv = onmt.modules.WeightNormConv2d(
         input_size, 2 * input_size, kernel_size=(width, 1), stride=(1, 1),
         padding=(width // 2 * (1 - nopad), 0))
     init.xavier_uniform_(self.conv.weight, gain=(4 * (1 - dropout))**0.5)
     self.dropout = nn.Dropout(dropout)
Example #2
0
 def _reset_parameters(self):
     init.xavier_uniform_(self.weights)
Example #3
0
    def __init__(self, in_dim1, in_dim2, in_dim3, n_layer):
        super(cnn, self).__init__()

        self.embed = torch.nn.Embedding(85, 32)
        init.xavier_uniform_(self.embed.weight, gain=1.0)

        self.lstm1 = nn.LSTM(in_dim1, in_dim2, n_layer, batch_first=True)

        init.xavier_normal_(self.lstm1.all_weights[0][0], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm1.all_weights[1][1], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm1.all_weights[1][0], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm1.all_weights[1][1], gain=np.sqrt(2.0))

        self.batchnorm2 = nn.BatchNorm1d(25)

        self.lstm2 = nn.LSTM(in_dim2, in_dim3, n_layer, batch_first=True)
        init.xavier_normal_(self.lstm2.all_weights[0][0], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm2.all_weights[1][1], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm2.all_weights[1][0], gain=np.sqrt(2.0))
        init.xavier_normal_(self.lstm2.all_weights[1][1], gain=np.sqrt(2.0))

        self.batchnorm3 = nn.BatchNorm1d(25)

        self.linear1 = nn.Linear(in_dim3, 128)
        init.xavier_uniform_(self.linear1.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear1.bias, 0.1)
        self.dp1 = nn.Dropout(p=0.5)
        self.relu1 = nn.ReLU(inplace=True)

        self.linear2 = nn.Linear(128, 64)
        init.xavier_uniform_(self.linear2.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear2.bias, 0.1)
        self.dp2 = nn.Dropout(p=0.5)
        self.relu2 = nn.ReLU(inplace=True)

        self.linear22 = nn.Linear(64, 64)
        init.xavier_uniform_(self.linear22.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear22.bias, 0.1)
        self.dp22 = nn.Dropout(p=0.5)
        self.relu22 = nn.ReLU(inplace=True)

        self.linear3 = nn.Linear(64, 32)
        init.xavier_uniform_(self.linear3.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear3.bias, 0.1)
        self.dp3 = nn.Dropout(p=0.5)
        self.relu3 = nn.ReLU(inplace=True)

        self.linear4 = nn.Linear(32, 32)
        init.xavier_uniform_(self.linear4.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear4.bias, 0.1)
        self.relu4 = nn.ReLU(inplace=True)

        self.linear5 = nn.Linear(32, 16)
        init.xavier_uniform_(self.linear2.weight, gain=np.sqrt(2.0))
        init.constant_(self.linear2.bias, 0.1)

        self.relu5 = nn.ReLU(inplace=True)
Example #4
0
def _weights_init(m):
    classname = m.__class__.__name__
    print(classname)
    if isinstance(m, nn.Linear):
        xavier_uniform_(m.weight)
        m.bias.data.fill_(0.0)
 def weight_init(m):
     if isinstance(m, nn.Conv2d):
         init.xavier_uniform_(m.weight)
         # init.xavier_normal(m.weight)
         init.constant(m.bias, 0)
    def __init__(self,
                 args,
                 word_padding_idx,
                 vocab_size,
                 device,
                 checkpoint=None):
        self.args = args
        super(Summarizer, self).__init__()
        # self.spm = spm
        self.vocab_size = vocab_size
        self.device = device
        # src_dict = fields["src"].vocab
        # tgt_dict = fields["tgt"].vocab

        src_embeddings = torch.nn.Embedding(self.vocab_size,
                                            self.args.emb_size,
                                            padding_idx=word_padding_idx)
        tgt_embeddings = torch.nn.Embedding(self.vocab_size,
                                            self.args.emb_size,
                                            padding_idx=word_padding_idx)

        if (self.args.share_embeddings):
            tgt_embeddings.weight = src_embeddings.weight

        if self.args.model_type == 'hier':
            if (self.args.hier):
                self.encoder = TransformerInterEncoder(
                    self.args.enc_layers,
                    self.args.enc_hidden_size,
                    self.args.heads,
                    self.args.ff_size,
                    self.args.enc_dropout,
                    src_embeddings,
                    inter_layers=self.args.inter_layers,
                    inter_heads=self.args.inter_heads,
                    device=device)
            else:
                self.encoder = TransformerEncoder(self.args.enc_layers,
                                                  self.args.enc_hidden_size,
                                                  self.args.heads,
                                                  self.args.ff_size,
                                                  self.args.enc_dropout,
                                                  src_embeddings)

        elif self.args.model_type == 'he':
            self.encoder = TransformerEncoderHE(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                device=device)

        elif self.args.model_type == 'order':
            self.encoder = TransformerEncoderOrder(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                device=device)

        elif self.args.model_type == 'query':
            self.encoder = TransformerEncoderQuery(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                num_query_layers=self.args.query_layers,
                device=device)

        elif self.args.model_type == 'heq':
            self.encoder = TransformerEncoderHEQ(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                device=device)

        elif self.args.model_type == 'heo':
            self.encoder = TransformerEncoderHEO(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                device=device)

        elif self.args.model_type == 'hero':
            self.encoder = TransformerEncoderHERO(
                self.args.enc_layers,
                self.args.enc_hidden_size,
                self.args.heads,
                self.args.ff_size,
                self.args.enc_dropout,
                src_embeddings,
                inter_layers=self.args.inter_layers,
                inter_heads=self.args.inter_heads,
                device=device)

        self.decoder = TransformerDecoder(self.args.dec_layers,
                                          self.args.dec_hidden_size,
                                          heads=self.args.heads,
                                          d_ff=self.args.ff_size,
                                          dropout=self.args.dec_dropout,
                                          embeddings=tgt_embeddings,
                                          device=device)

        self.generator = get_generator(self.args.dec_hidden_size,
                                       self.vocab_size, device)
        if self.args.share_decoder_embeddings:
            self.generator[0].weight = self.decoder.embeddings.weight

        if checkpoint is not None:
            # checkpoint['model']
            keys = list(checkpoint['model'].keys())
            for k in keys:
                if ('a_2' in k):
                    checkpoint['model'][k.replace(
                        'a_2', 'weight')] = checkpoint['model'][k]
                    del (checkpoint['model'][k])
                if ('b_2' in k):
                    checkpoint['model'][k.replace(
                        'b_2', 'bias')] = checkpoint['model'][k]
                    del (checkpoint['model'][k])
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            for p in self.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        self.to(device)
Example #7
0
 def _init_fc_parameters(fc, gain=1., bias=0.):
     init.xavier_uniform_(fc.weight.data, gain)
     fc.bias.data.fill_(bias)
Example #8
0
def main():
    #create model
    best_prec1 = 0
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    torch.cuda.set_device(0)
    if args.basenet == 'ResNeXt':
        model = ResNeXt101_64x4d(args.class_num)
        #net = Networktorch.nn.DataParallel(Network, device_ids=[0])
        cudnn.benchmark = True
        if args.resume:
            Network.load_state_dict(torch.load(args.resume))
        else:
            state_dict = torch.load('resnext101_64x4d-e77a0586.pth')
            state_dict.pop('last_linear.bias')
            state_dict.pop('last_linear.weight')
            model.load_state_dict(state_dict, strict=False)
            init.xavier_uniform_(model.last_linear.weight.data)
            model.last_linear.bias.data.zero_()
        for p in model.features[0].parameters():
            p.requires_grad = False
        for p in model.features[1].parameters():
            p.requires_grad = False
    elif args.basenet == 'pnasnet':
        model = pnasnet5large(args.class_num, None)
        #net = Networktorch.nn.DataParallel(Network, device_ids=[0])
        cudnn.benchmark = True
        if args.resume:
            model.load_state_dict(torch.load(args.resume))
        else:
            state_dict = torch.load('pnasnet5large-bf079911.pth')
            state_dict.pop('last_linear.bias')
            state_dict.pop('last_linear.weight')
            model.load_state_dict(state_dict, strict=False)
            init.xavier_uniform_(model.last_linear.weight.data)
            model.last_linear.bias.data.zero_()
    model = model.cuda()
    cudnn.benchmark = True

    # Dataset
    Dataset_train = Tiangong(root=args.dataset_root, mode='trainval')
    Dataloader_train = data.DataLoader(Dataset_train,
                                       args.batch_size,
                                       num_workers=args.num_workers,
                                       shuffle=True,
                                       pin_memory=True)

    Dataset_val = Tiangong(root=args.dataset_root, mode='val')
    Dataloader_val = data.DataLoader(Dataset_val,
                                     batch_size=1,
                                     num_workers=args.num_workers,
                                     shuffle=True,
                                     pin_memory=True)

    criterion = nn.CrossEntropyLoss().cuda()

    Optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(Optimizer, epoch)

        # train for one epoch
        train(Dataloader_train, model, criterion, Optimizer, epoch
              )  #train(Dataloader_train, Network, criterion, Optimizer, epoch)

        # evaluate on validation set
        #prec1 = validate(Dataloader_val, model, criterion)  #prec1 = validate(Dataloader_val, Network, criterion)

        # remember best prec@1 and save checkpoint
        #is_best = prec1 > best_prec1
        #best_prec1 = max(prec1, best_prec1)
        #if is_best:
        torch.save(
            model.state_dict(), 'weights/fixblock_Newtrain_' + args.basenet +
            '/' + '_Tiangong_RMSProp_' + repr(epoch) + '.pth')
    def _reset_parameters(self):
        xavier_uniform_(self.in_proj_weight)

        if self.in_proj_bias is not None:
            constant_(self.in_proj_bias, 0.)
            constant_(self.out_proj.bias, 0.)
    def __init__(self,
                 args,
                 batchNorm=False,
                 div_flow=20.,
                 requires_grad=False):
        super(FlowNet2, self).__init__()
        self.batchNorm = batchNorm
        self.div_flow = div_flow
        self.rgb_max = args.rgb_max
        self.args = args

        self.channelnorm = ChannelNorm()

        # First Block (FlowNetC)
        self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm)
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')

        if args.fp16:
            self.resample1 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample1 = Resample2d()

        # Block (FlowNetS1)
        self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
        self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
        if args.fp16:
            self.resample2 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample2 = Resample2d()

        # Block (FlowNetS2)
        self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)

        # Block (FlowNetSD)
        self.flownets_d = FlowNetSD.FlowNetSD(args, batchNorm=self.batchNorm)
        self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest')
        self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest')

        if args.fp16:
            self.resample3 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample3 = Resample2d()

        if args.fp16:
            self.resample4 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample4 = Resample2d()

        # Block (FLowNetFusion)
        self.flownetfusion = FlowNetFusion.FlowNetFusion(
            args, batchNorm=self.batchNorm)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)
                # init_deconv_bilinear(m.weight)

        if not requires_grad:
            for param in self.parameters():
                param.requires_grad = False
Example #11
0
def build_base_model(model_opt, fields, gpu, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # Build embeddings.
    if model_opt.model_type == "text":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = build_encoder(model_opt, src_emb)

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = build_decoder(model_opt, tgt_emb)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {fix_key(k): v
                               for k, v in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    model.generator = generator
    model.to(device)
    if model_opt.model_dtype == 'fp16':
        model.half()

    return model