Beispiel #1
0
 def init_weights(self):
     """
     Initialize weights.
     """
     for conv in [self.conv1, self.conv2, self.conv3]:
         init.xavier_uniform(conv.weight, gain=1)
         init.constant(conv.bias, 0.1)
Beispiel #2
0
def _weights_init(m, ih_std=0.08, hh_std=0.08):
    if isinstance(m, nn.LSTM):
        m.weight_ih_l0.data.normal_(0, ih_std)
        m.weight_hh_l0.data.normal_(0, hh_std)
    elif isinstance(m, nn.Linear):
        xavier_uniform(m.weight.data)
        m.bias.data.fill_(0)
def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
        init.constant(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant(m.weight, 1)
        init.constant(m.bias, 0)
Beispiel #4
0
    def __init__(self, input_dim, hidden_dim, latent_dim, max_num_nodes, pool='sum'):
        '''
        Args:
            input_dim: input feature dimension for node.
            hidden_dim: hidden dim for 2-layer gcn.
            latent_dim: dimension of the latent representation of graph.
        '''
        super(GraphVAE, self).__init__()
        self.conv1 = model.GraphConv(input_dim=input_dim, output_dim=hidden_dim)
        self.bn1 = nn.BatchNorm1d(input_dim)
        self.conv2 = model.GraphConv(input_dim=hidden_dim, output_dim=hidden_dim)
        self.bn2 = nn.BatchNorm1d(input_dim)
        self.act = nn.ReLU()

        output_dim = max_num_nodes * (max_num_nodes + 1) // 2
        #self.vae = model.MLP_VAE_plain(hidden_dim, latent_dim, output_dim)
        self.vae = model.MLP_VAE_plain(input_dim * input_dim, latent_dim, output_dim)
        #self.feature_mlp = model.MLP_plain(latent_dim, latent_dim, output_dim)

        self.max_num_nodes = max_num_nodes
        for m in self.modules():
            if isinstance(m, model.GraphConv):
                m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        self.pool = pool
Beispiel #5
0
def xavier(param):
    init.xavier_uniform(param)
Beispiel #6
0
    def __init__(self, input_channels=12, with_bn=True):
        super(FlowNetS, self).__init__()

        self.with_bn = with_bn
        self.conv1 = conv(input_channels,
                          64,
                          kernel_size=7,
                          stride=2,
                          with_bn=with_bn)
        self.conv2 = conv(64, 128, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv3 = conv(128, 256, kernel_size=5, stride=2, with_bn=with_bn)
        self.conv3_1 = conv(256, 256, with_bn=with_bn)
        self.conv4 = conv(256, 512, stride=2, with_bn=with_bn)
        self.conv4_1 = conv(512, 512, with_bn=with_bn)
        self.conv5 = conv(512, 512, stride=2, with_bn=with_bn)
        self.conv5_1 = conv(512, 512, with_bn=with_bn)
        self.conv6 = conv(512, 1024, stride=2, with_bn=with_bn)
        self.conv6_1 = conv(1024, 1024, with_bn=with_bn)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=False)

        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    nn_init.uniform(m.bias)
                nn_init.xavier_uniform(m.weight)
 def xavier(self, param):
     init.xavier_uniform(param)
Beispiel #8
0
def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
Beispiel #9
0
    def __init__(self, args, config, label_alphabet):
        super(opinionMining, self).__init__()
        print("build network...")
        self.gpu = args.ifgpu
        self.label_size = label_alphabet.size()
        self.bert_encoder_dim = config.hidden_size
        self.target_hidden_dim = args.target_hidden_dim
        self.relation_hidden_dim = args.relation_hidden_dim
        self.relation_threds = args.relation_threds
        self.drop = args.dropout
        self.step = args.step

        # encoder
        self.bert = BertModel(config)

        # target syn
        self.targetSyn_r = nn.Parameter(
            torch.Tensor(self.target_hidden_dim, self.bert_encoder_dim))
        self.targetSyn_s = nn.Parameter(
            torch.Tensor(self.target_hidden_dim, self.bert_encoder_dim))
        # relation syn
        self.relationSyn_u = nn.Parameter(
            torch.Tensor(self.relation_hidden_dim, self.bert_encoder_dim))
        self.relationSyn_s = nn.Parameter(
            torch.Tensor(self.relation_hidden_dim, self.bert_encoder_dim))
        init.xavier_uniform(self.targetSyn_r)
        init.xavier_uniform(self.targetSyn_s)
        init.xavier_uniform(self.relationSyn_u)
        init.xavier_uniform(self.relationSyn_s)

        # crf
        self.targetHidden2Tag = nn.Parameter(
            torch.Tensor(self.label_size + 2, self.target_hidden_dim))
        self.targetHidden2Tag_b = nn.Parameter(
            torch.Tensor(1, self.label_size + 2))
        init.xavier_uniform(self.targetHidden2Tag)
        init.xavier_uniform(self.targetHidden2Tag_b)

        self.crf = CRF(self.label_size, self.gpu)

        # relation
        self.relationAttention = RelationAttention(args)

        # other
        self.dropout = nn.Dropout(self.drop)
        self.softmax = nn.Softmax(dim=2)

        if self.gpu:
            self.bert = self.bert.cuda()
            self.targetSyn_r.data = self.targetSyn_r.cuda()
            self.targetSyn_s.data = self.targetSyn_s.cuda()
            self.relationSyn_u.data = self.relationSyn_u.cuda()
            self.relationSyn_s.data = self.relationSyn_s.cuda()
            self.targetHidden2Tag.data = self.targetHidden2Tag.cuda()
            self.targetHidden2Tag_b.data = self.targetHidden2Tag_b.cuda()
            self.relationAttention = self.relationAttention.cuda()
            self.dropout = self.dropout.cuda()
            self.softmax = self.softmax.cuda()

        def init_weights(module):
            if isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)

        self.apply(init_weights)
Beispiel #10
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 embedding_dim,
                 label_dim,
                 num_layers,
                 pred_hidden_dims=[50],
                 concat=True,
                 bn=True,
                 dropout=0.0,
                 args=None,
                 device='cpu'):
        super(GcnEncoderGraph, self).__init__()
        print('Whether concat', concat)
        self.device = device
        self.concat = concat
        add_self = not concat
        self.bn = bn
        self.num_layers = num_layers
        self.num_aggs = 1
        self.bias = True
        if args is not None:
            self.bias = args.bias

        self.conv_node_first, self.conv_node_last = self.GCN(1536, 16, 2)
        self.conv_first, self.conv_block, self.conv_last = self.build_conv_layers(
            input_dim,
            hidden_dim,
            embedding_dim,
            num_layers,
            add_self,
            normalize=True,
            dropout=dropout)
        self.act = nn.ReLU().to(device)
        self.label_dim = label_dim
        if concat:
            self.pred_input_dim = hidden_dim * (num_layers - 1) + embedding_dim
        else:
            self.pred_input_dim = embedding_dim

        for m in self.modules():
            if isinstance(m, GraphConv):
                print('m', m)
                m.weight.data = init.xavier_uniform(
                    m.weight.data, gain=nn.init.calculate_gain('relu'))
                print('weight', m.weight.data)
                print('weight', m.weight)

                if m.bias is not None:
                    m.bias.data = init.constant(m.bias.data, 0.0)
        for m in self.modules():
            if isinstance(m, GraphConvolution):
                print('m2', m)
                m.weight1.data = init.xavier_uniform(
                    m.weight1.data, gain=nn.init.calculate_gain('relu') * 5)
                m.weight2.data = init.xavier_uniform(
                    m.weight2.data, gain=nn.init.calculate_gain('relu') * 5)
                m.weight3.data = init.xavier_uniform(
                    m.weight3.data, gain=nn.init.calculate_gain('relu') * 5)
                m.weight4.data = init.xavier_uniform(
                    m.weight4.data, gain=nn.init.calculate_gain('relu') * 5)

                m.bias1.data = init.constant(m.bias1.data, 0.0)
                m.bias2.data = init.constant(m.bias2.data, 0.0)
                m.bias3.data = init.constant(m.bias3.data, 0.0)
                m.bias4.data = init.constant(m.bias4.data, 0.0)

        print('num_layers: ', num_layers)
        print('pred_hidden_dims: ', pred_hidden_dims)
        print('hidden_dim: ', hidden_dim)
        print('embedding_dim: ', embedding_dim)
        print('label_dim', label_dim)
Beispiel #11
0
    def __init__(self,
                 max_num_nodes,
                 input_dim,
                 hidden_dim,
                 embedding_dim,
                 label_dim,
                 num_layers,
                 num_pool_matrix=2,
                 num_pool_final_matrix=1,
                 pool_sizes=[4],
                 pred_hidden_dims=[50],
                 concat=True,
                 bn=True,
                 dropout=0.0,
                 mask=0,
                 args=None,
                 device='cpu'):
        '''
        Args:
            num_layers: number of gc layers before each pooling
            num_nodes: number of nodes for each graph in batch
            linkpred: flag to turn on link prediction side objective
        '''

        super(WavePoolingGcnEncoder,
              self).__init__(input_dim,
                             hidden_dim,
                             embedding_dim,
                             label_dim,
                             num_layers,
                             pred_hidden_dims=pred_hidden_dims,
                             concat=concat,
                             args=args,
                             device=device)
        add_self = not concat
        self.mask = mask
        self.pool_sizes = pool_sizes
        self.num_pool_matrix = num_pool_matrix
        self.num_pool_final_matrix = num_pool_final_matrix
        self.con_final = args.con_final
        self.device = device

        print('Device_-wave: ', device)

        self.conv_first_after_pool = nn.ModuleList()
        self.conv_block_after_pool = nn.ModuleList()
        self.conv_last_after_pool = nn.ModuleList()
        print('input_dim', input_dim)
        for i in range(len(pool_sizes)):
            print('In WavePooling', self.pred_input_dim * self.num_pool_matrix)
            conv_first2, conv_block2, conv_last2 = self.build_conv_layers(
                self.pred_input_dim * self.num_pool_matrix,
                hidden_dim,
                embedding_dim,
                num_layers,
                add_self,
                normalize=True,
                dropout=dropout)

            self.conv_first_after_pool.append(conv_first2)
            self.conv_block_after_pool.append(conv_block2)
            self.conv_last_after_pool.append(conv_last2)

        if self.num_pool_final_matrix > 0:

            if concat:

                if self.con_final:
                    self.pred_model = self.build_pred_layers(
                        self.pred_input_dim * (len(pool_sizes) + 1) +
                        self.pred_input_dim * self.num_pool_final_matrix,
                        pred_hidden_dims,
                        label_dim,
                        num_aggs=self.num_aggs)
                else:
                    self.pred_model = self.build_pred_layers(
                        self.pred_input_dim * (len(pool_sizes)) +
                        self.pred_input_dim * self.num_pool_final_matrix,
                        pred_hidden_dims,
                        label_dim,
                        num_aggs=self.num_aggs)

            else:

                self.pred_model = self.build_pred_layers(
                    self.pred_input_dim * self.num_pool_final_matrix,
                    pred_hidden_dims,
                    label_dim,
                    num_aggs=self.num_aggs)

        else:
            if concat:

                self.pred_model = self.build_pred_layers(
                    512, label_dim, num_aggs=self.num_aggs)
            else:
                self.pred_model = self.build_pred_layers(
                    self.pred_input_dim,
                    pred_hidden_dims,
                    label_dim,
                    num_aggs=self.num_aggs)
        for m in self.modules():
            if isinstance(m, GraphConv):
                m.weight.data = init.xavier_uniform(
                    m.weight.data, gain=nn.init.calculate_gain('relu') * 5)
                if m.bias is not None:
                    m.bias.data = init.constant(m.bias.data, 0.0)
Beispiel #12
0
    def __init__(self, input_dim, output_dim, context_dim, att_hidden_dim,
                 config):

        super(CondAttLSTM, self).__init__()

        self.output_dim = output_dim
        self.context_dim = context_dim
        self.input_dim = input_dim

        # one W for all x
        self.W_ix = nn.Linear(input_dim, output_dim)
        init.xavier_uniform(self.W_ix.weight)
        self.W_ix.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_())

        # input gate
        self.W_i = nn.Linear(output_dim + context_dim + output_dim +
                             output_dim,
                             output_dim,
                             bias=False)
        init.orthogonal(self.W_i.weight)

        # forget gate
        self.W_fx = nn.Linear(input_dim, output_dim)
        init.xavier_uniform(self.W_fx.weight)
        self.W_fx.bias = nn.Parameter(torch.FloatTensor(output_dim).fill_(1.0))

        self.W_f = nn.Linear(output_dim + context_dim + output_dim +
                             output_dim,
                             output_dim,
                             bias=False)
        init.orthogonal(self.W_f.weight)

        # memory cell new value
        self.W_cx = nn.Linear(input_dim, output_dim)
        init.xavier_uniform(self.W_cx.weight)
        self.W_cx.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_())

        self.W_c = nn.Linear(output_dim + context_dim + output_dim +
                             output_dim,
                             output_dim,
                             bias=False)
        init.orthogonal(self.W_c.weight)

        # output gate
        self.W_ox = nn.Linear(input_dim, output_dim)
        init.xavier_uniform(self.W_ox.weight)
        self.W_ox.bias = nn.Parameter(torch.FloatTensor(output_dim).zero_())

        self.W_o = nn.Linear(output_dim + context_dim + output_dim +
                             output_dim,
                             output_dim,
                             bias=False)
        init.orthogonal(self.W_o.weight)

        # attention layer
        self.att_ctx = nn.Linear(context_dim, att_hidden_dim)
        init.xavier_uniform(self.att_ctx.weight)
        self.att_ctx.bias = nn.Parameter(
            torch.FloatTensor(att_hidden_dim).zero_())
        self.att_h = nn.Linear(output_dim, att_hidden_dim, bias=False)
        init.xavier_uniform(self.att_h.weight)
        self.att = nn.Linear(att_hidden_dim, 1)
        init.xavier_uniform(self.att.weight)
        self.att.bias = nn.Parameter(torch.FloatTensor(1).zero_())

        # attention over history
        self.h_att_hist = nn.Linear(output_dim, att_hidden_dim)
        init.xavier_uniform(self.h_att_hist.weight)
        self.h_att_hist.bias = nn.Parameter(
            torch.FloatTensor(att_hidden_dim).zero_())
        self.h_att_h = nn.Linear(output_dim, att_hidden_dim, bias=False)
        init.xavier_uniform(self.h_att_h.weight)
        self.h_att = nn.Linear(att_hidden_dim, 1)
        init.xavier_uniform(self.h_att.weight)
        self.h_att.bias = nn.Parameter(torch.FloatTensor(1).zero_())

        self.softmax = nn.Softmax(dim=-1)

        self.parent_hidden_state_feed = config.parent_hidden_state_feed
        self.dropout = config.decoder_dropout
        self.config = config
Beispiel #13
0
def my_weight_init(m):
    if isinstance(m, torch.nn.Linear):
        init.xavier_uniform(m.weight.data)
        init.constant(m.bias.data, 0)
Beispiel #14
0
 def init_weight(self):
     for m in self.modules():
         if isinstance(m, nn.Conv2d):
             init.xavier_uniform(m.weight, gain=np.sqrt(2.0))
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.xavier_uniform(m.weight.data)
        m.bias.data.zero_()
    if isinstance(m, nn.Linear):
        init.normal(m.weight.data)
    def __init__(self, use_cuda):
        super(Net, self).__init__()

        self.classes = 10 + 1
        self.use_cuda = use_cuda
        self.image_H = 36

        # CNN
        # conv1
        self.conv1_input_chanel = 1
        self.conv1_output_chanel = 10
        self.conv1_kernelsize = (self.image_H, 2)
        self.conv1 = nn.Conv2d(self.conv1_input_chanel,
                               self.conv1_output_chanel, self.conv1_kernelsize)

        # initialization
        init.xavier_uniform(self.conv1.weight, gain=np.sqrt(2))
        init.constant(self.conv1.bias, 0.1)

        # conv2
        self.conv2_input_chanel = 10
        self.conv2_output_chanel = 20
        self.conv2_kernelsize = (1, 2)
        self.conv2 = nn.Conv2d(self.conv2_input_chanel,
                               self.conv2_output_chanel, self.conv2_kernelsize)

        # initialization
        init.xavier_uniform(self.conv2.weight, gain=np.sqrt(2))
        init.constant(self.conv2.bias, 0.1)

        # batch norm (before activation)
        self.conv2_bn = nn.BatchNorm2d(
            self.conv2_output_chanel)  # batch normalization

        # # drop out (after activation)
        # self.conv2_drop = nn.Dropout2d()

        self.conv2_H = 1  # height of feature map after conv2

        # LSTM
        self.lstm_input_size = self.conv2_H * self.conv2_output_chanel  # number of features = H * cnn_output_chanel = 32 * 32 = 1024
        self.lstm_hidden_size = 32
        self.lstm_num_layers = 2
        self.lstm_hidden = None
        self.lstm_cell = None

        self.lstm = nn.LSTM(self.lstm_input_size,
                            self.lstm_hidden_size,
                            self.lstm_num_layers,
                            batch_first=True,
                            bidirectional=True)
        # # initialization
        # init.xavier_uniform(self.lstm.weights, gain=np.sqrt(2))
        # init.constant(self.lstm.bias, 0.1)

        # FC: convert to 11-d probability vector
        self.fc_input_size = self.lstm_hidden_size * 2
        self.fc_output_size = self.classes
        self.fc = nn.Linear(self.fc_input_size, self.fc_output_size)
        # initialization
        init.xavier_uniform(self.fc.weight, gain=np.sqrt(2))
        init.constant(self.fc.bias, 0.1)

        # softmax:
        self.softmax = nn.Softmax()
Beispiel #17
0
def xavier(param):
    init.xavier_uniform(param)
Beispiel #18
0
def initialize_weights(m):
    if isinstance(m, nn.Linear) or isinstance(m, nn.ConvTranspose2d):
        init.xavier_uniform(m.weight.data)
            input_time_length = int(timeWindowDuration/1000*samplingRate) # train_set.X.shape[1]
            in_chans=train_set.X[0].shape[0]


            if Deep4:    
                # final_conv_length determines the size of the receptive field of the ConvNet
                model = Deep4Net(in_chans=in_chans, n_classes=1, input_time_length=input_time_length,
                             pool_time_stride=pool_time_stride,
                                    final_conv_length=2, stride_before_pool=True).create_network()
            elif ResNet:
                model_name = 'resnet-xavier-uniform'
                init_name = model_name.lstrip('resnet-')
                from torch.nn import init
                init_fn = {'he-uniform': lambda w: init.kaiming_uniform(w, a=0),
                'he-normal': lambda w: init.kaiming_normal(w, a=0),
                'xavier-uniform': lambda w: init.xavier_uniform(w, gain=1),
                'xavier-normal': lambda w: init.xavier_normal(w, gain=1)}[init_name]

                model = EEGResNet(in_chans=in_chans, n_classes=1, input_time_length=input_time_length,
                final_pool_length=2, n_first_filters=48,
                conv_weight_init_fn=init_fn).create_network()
            elif EEGNet_v4:
                model = EEGNetv4(in_chans=in_chans, n_classes=1, final_conv_length=2, input_time_length=input_time_length).create_network()


            # remove softmax
            new_model = nn.Sequential()
            for name, module in model.named_children():
                if name == 'softmax':
                    continue
                new_model.add_module(name, module)
Beispiel #20
0
 def init_weights(self):
     init.xavier_uniform(self.affine_v.weight)
     init.xavier_uniform(self.affine_g.weight)
     init.xavier_uniform(self.affine_h.weight)
     init.xavier_uniform(self.affine_s.weight)
Beispiel #21
0
    def __init__(
        self,
        max_num_nodes,
        input_dim,
        hidden_dim,
        embedding_dim,
        label_dim,
        num_layers,
        assign_hidden_dim,
        assign_ratio=0.25,
        assign_num_layers=-1,
        num_pooling=1,
        pred_hidden_dims=[50],
        concat=True,
        bn=True,
        dropout=0.0,
        linkpred=True,
        assign_input_dim=-1,
        args=None,
    ):
        """
        Args:
            num_layers: number of gc layers before each pooling
            num_nodes: number of nodes for each graph in batch
            linkpred: flag to turn on link prediction side objective
        """

        super(SoftPoolingGcnEncoder, self).__init__(
            input_dim,
            hidden_dim,
            embedding_dim,
            label_dim,
            num_layers,
            pred_hidden_dims=pred_hidden_dims,
            concat=concat,
            args=args,
        )
        add_self = not concat
        self.num_pooling = num_pooling
        self.linkpred = linkpred
        self.assign_ent = True

        # GC
        self.conv_first_after_pool = []
        self.conv_block_after_pool = []
        self.conv_last_after_pool = []
        for i in range(num_pooling):
            # use self to register the modules in self.modules()
            self.conv_first2, self.conv_block2, self.conv_last2 = self.build_conv_layers(
                self.pred_input_dim,
                hidden_dim,
                embedding_dim,
                num_layers,
                add_self,
                normalize=True,
                dropout=dropout,
            )
            self.conv_first_after_pool.append(self.conv_first2)
            self.conv_block_after_pool.append(self.conv_block2)
            self.conv_last_after_pool.append(self.conv_last2)

        # assignment
        assign_dims = []
        if assign_num_layers == -1:
            assign_num_layers = num_layers
        if assign_input_dim == -1:
            assign_input_dim = input_dim

        self.assign_conv_first_modules = []
        self.assign_conv_block_modules = []
        self.assign_conv_last_modules = []
        self.assign_pred_modules = []
        assign_dim = int(max_num_nodes * assign_ratio)
        for i in range(num_pooling):
            assign_dims.append(assign_dim)
            self.assign_conv_first, self.assign_conv_block, self.assign_conv_last = self.build_conv_layers(
                assign_input_dim,
                assign_hidden_dim,
                assign_dim,
                assign_num_layers,
                add_self,
                normalize=True,
            )
            assign_pred_input_dim = (assign_hidden_dim * (num_layers - 1) +
                                     assign_dim if concat else assign_dim)
            self.assign_pred = self.build_pred_layers(assign_pred_input_dim,
                                                      [],
                                                      assign_dim,
                                                      num_aggs=1)

            # next pooling layer
            assign_input_dim = embedding_dim
            assign_dim = int(assign_dim * assign_ratio)

            self.assign_conv_first_modules.append(self.assign_conv_first)
            self.assign_conv_block_modules.append(self.assign_conv_block)
            self.assign_conv_last_modules.append(self.assign_conv_last)
            self.assign_pred_modules.append(self.assign_pred)

        self.pred_model = self.build_pred_layers(
            self.pred_input_dim * (num_pooling + 1),
            pred_hidden_dims,
            label_dim,
            num_aggs=self.num_aggs,
        )

        for m in self.modules():
            if isinstance(m, GraphConv):
                m.weight.data = init.xavier_uniform(
                    m.weight.data, gain=nn.init.calculate_gain("relu"))
                if m.bias is not None:
                    m.bias.data = init.constant(m.bias.data, 0.0)
Beispiel #22
0
    def init_weights(self):
        """Initialize the weights."""
        init.xavier_uniform(self.affine_v.weight)
        init.xavier_uniform(self.affine_g.weight)
        init.xavier_uniform(self.affine_h.weight)

        init.xavier_uniform(self.L1.weight)
        init.xavier_uniform(self.L2.weight)
        init.xavier_uniform(self.affine_audio.weight)
        init.xavier_uniform(self.affine_video.weight)
Beispiel #23
0
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        src_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         src_feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    tgt_feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     tgt_feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make inference network.
    inference_network = make_inference_network(
        model_opt,
        src_embeddings, tgt_embeddings,
        src_dict, src_feature_dicts,
        tgt_dict, tgt_feature_dicts
    ) if model_opt.inference_network_type != "none" else None

    # Make NMTModel(= encoder + decoder + inference network).
    model = (
        NMTModel(encoder, decoder)
        if inference_network is None
        else ViNMTModel(
            encoder, decoder,
            inference_network,
            dist_type=model_opt.p_dist_type,
            dbg=model_opt.dbg_inf,
            use_prior=model_opt.use_generative_model > 0)
    )
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        """
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=1))
        """
        generator = Generator(
            in_dim = model_opt.decoder_rnn_size,
            out_dim = len(fields["tgt"].vocab),
            mode = model_opt.mode,
        )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        #model.load_state_dict(checkpoint['model'])
        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu >= 0:
        model.cuda()
    else:
        model.cpu()

    return model
def make_base_model(model_opt, gpu, checkpoint=None):
    """
    """
    ################# The canical seq2seq ###############################
    embeddings = make_embeddings(model_opt, model_opt.enc_numwords,
                                 model_opt.enc_padding_idx)
    encoder = make_encoder(model_opt, embeddings)
    #
    if model_opt.share_embeddings:
        decoder = make_decoder(model_opt, embddings)
    else:
        tgt_embedding = make_embeddings(model_opt, model_opt.dec_numwords,
                                        model_opt.dec_padding_idx)
        decoder = make_decoder(model_opt, tgt_embedding)
    ################## Discriminator ####################################
    discor = make_dbm_discriminator(model_opt)
    # Discriminator(model_opt.word_vec_size, filter_num=32, filter_sizes=[1,2],
    #hidden_size=model_opt.hidden_size, class_num=1)
    ################## Generator (Projection Layer) #####################
    generator = nn.Sequential(
        nn.Linear(model_opt.rnn_size, model_opt.dec_numwords))
    # AEL
    ael = ApproxEmbedding(decoder.embeddings)
    # normalizer
    q_norm, r_norm = make_qr_norm(model_opt)
    # final model
    model = GANRBM(encoder,
                   decoder,
                   ael,
                   generator,
                   discor,
                   dec_max_len=model_opt.dec_max_len,
                   type_loss=model_opt.gan_loss_type,
                   q_normalizer=q_norm,
                   r_normalizer=r_norm)
    # Load the model states from checkpoint or initialize them.
    # remove rbm part
    if model_opt.param_init != 0.0:
        print('Intializing model parameters.')
        for p in model.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        for p in generator.parameters():
            p.data.uniform_(-model_opt.param_init, model_opt.param_init)
    else:
        for p in model.parameters():
            if p.dim() > 1:
                xavier_uniform(p)
        for p in generator.parameters():
            if p.dim() > 1:
                xavier_uniform(p)
    # special intialization for DBM
    if model_opt.rbm_path is not None:
        print("Initial rmb", model_opt.rbm_path)
        model.disor.rq_rbm.load_model(
            os.path.join(model_opt.rbm_path, model_opt.rbm_rq_prefix))
        model.disor.qr_rbm.load_model(
            os.path.join(model_opt.rbm_path, model_opt.rbm_qr_prefix))
    #
    if checkpoint is not None:
        print('Loading model parameters.')
        load_temp = ['encoder', 'decoder', 'generator']
        model.encoder.load_state_dict(checkpoint['encoder'])
        model.decoder.load_state_dict(checkpoint['decoder'])
        model.generator.load_state_dict(checkpoint['generator'])
        if 'ael' in checkpoint:
            model.ael.load_state_dict(checkpoint['ael'])
            load_temp.append('ael')
        if 'disor' in checkpoint:
            model.disor.load_state_dict(checkpoint['disor'])
            load_temp.append('disor')
        print("Load", load_temp)
    # DBM Initializatio
    return model
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.

    src_dict = fields["src"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
    src_embeddings = make_embeddings(model_opt,
                                     src_dict,
                                     feature_dicts,
                                     for_encoder=True)
    encoder = make_encoder(model_opt, src_embeddings)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    device = torch.device("cuda")
    all_docs = load_all_docs(model_opt, fields, device)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder_type == 'BiAttEncoder' or model_opt.encoder_type == 'transformer':
        model = TwoEncoderModel(encoder, decoder, all_docs, src_embeddings)
    elif model_opt.encoder_type == "PostEncoder":
        model = NMTModel(encoder, decoder)

    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax(dim=-1))
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #26
0
    def __init__(self, hidden_size, batch_size, output_size, num_layers, is_dilation=True, is_bn=True):
        super(Graph_RNN_structure, self).__init__()
        ## model configuration
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.output_size = output_size
        self.num_layers = num_layers # num_layers of cnn_output
        self.is_bn=is_bn

        ## model
        self.relu = nn.ReLU()
        # self.linear_output = nn.Linear(hidden_size, 1)
        # self.linear_output_simple = nn.Linear(hidden_size, output_size)
        # for state transition use only, input is null
        # self.gru = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # use CNN to produce output prediction
        # self.cnn_output = nn.Sequential(
        #     nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1),
        #     # nn.BatchNorm1d(hidden_size),
        #     nn.ReLU(),
        #     nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1)
        # )

        if is_dilation:
            self.conv_block = nn.ModuleList([nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=2**i, padding=2**i) for i in range(num_layers-1)])
        else:
            self.conv_block = nn.ModuleList([nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1) for i in range(num_layers-1)])
        self.bn_block = nn.ModuleList([nn.BatchNorm1d(hidden_size) for i in range(num_layers-1)])
        self.conv_out = nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1)


        # # use CNN to do state transition
        # self.cnn_transition = nn.Sequential(
        #     nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1),
        #     # nn.BatchNorm1d(hidden_size),
        #     nn.ReLU(),
        #     nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1)
        # )

        # use linear to do transition, same as GCN mean aggregator
        self.linear_transition = nn.Sequential(
            nn.Linear(hidden_size,hidden_size),
            nn.ReLU()
        )


        # GRU based output, output a single edge prediction at a time
        # self.gru_output = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        # use a list to keep all generated hidden vectors, each hidden has size batch*hidden_dim*1, and the list size is expanding
        # when using convolution to compute attention weight, we need to first concat the list into a pytorch variable: batch*hidden_dim*current_num_nodes
        self.hidden_all = []

        ## initialize
        for m in self.modules():
            if isinstance(m, nn.Linear):
                # print('linear')
                m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
                # print(m.weight.data.size())
            if isinstance(m, nn.Conv1d):
                # print('conv1d')
                m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
                # print(m.weight.data.size())
            if isinstance(m, nn.BatchNorm1d):
                # print('batchnorm1d')
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                # print(m.weight.data.size())
            if isinstance(m, nn.GRU):
                # print('gru')
                m.weight_ih_l0.data = init.xavier_uniform(m.weight_ih_l0.data,
                                                                  gain=nn.init.calculate_gain('sigmoid'))
                m.weight_hh_l0.data = init.xavier_uniform(m.weight_hh_l0.data,
                                                                  gain=nn.init.calculate_gain('sigmoid'))
                m.bias_ih_l0.data = torch.ones(m.bias_ih_l0.data.size(0)) * 0.25
                m.bias_hh_l0.data = torch.ones(m.bias_hh_l0.data.size(0)) * 0.25
Beispiel #27
0
    def __init__(self, batchNorm=True, div_flow=20):
        super(FlowNetC, self).__init__()

        self.batchNorm = batchNorm
        self.div_flow = div_flow

        self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
        self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
        self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
        self.conv_redir = conv(self.batchNorm,
                               256,
                               32,
                               kernel_size=1,
                               stride=1)

        # if args.fp16:
        #     self.corr = nn.Sequential(
        #         tofp32(),
        #         Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
        #         tofp16())
        # else:
        self.corr = Correlation(pad_size=20,
                                kernel_size=1,
                                max_displacement=20,
                                stride1=1,
                                stride2=2,
                                corr_multiply=1)

        self.corr_activation = nn.LeakyReLU(0.1, inplace=True)
        self.conv3_1 = conv(self.batchNorm, 473, 256)
        self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
        self.conv4_1 = conv(self.batchNorm, 512, 512)
        self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
        self.conv5_1 = conv(self.batchNorm, 512, 512)
        self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
        self.conv6_1 = conv(self.batchNorm, 1024, 1024)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform(m.bias)
                init.xavier_uniform(m.weight)
                # init_deconv_bilinear(m.weight)
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
Beispiel #28
0
 def _init_lstm(self, weight):
     for w in weight.chunk(4, 0):
         init.xavier_uniform(w)
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts)
        if not model_opt.encoder2_type == 'none':
            src_dict2 = fields["src2"].vocab
            feature_dicts2 = onmt.io.collect_feature_vocabs(fields, 'src2')
            src_embeddings2 = make_embeddings(model_opt, src_dict2,
                                              feature_dicts2)

        if 'morph' in fields and hasattr(fields["morph"], 'vocab'):
            morph_dict = fields["morph"].vocab
            morph_embeddings = make_morph_embeddings(model_opt, morph_dict, [])
            encoder = make_encoder(model_opt, src_embeddings, morph_embeddings)
            encoder2 = make_encoder(
                model_opt,
                src_embeddings2,
                morph_embeddings,
                encoder_type='rnn'
            ) if not model_opt.encoder2_type == 'none' else None
        # else:
        #     encoder = make_encoder(model_opt, src_embeddings)  # gcn features must go here
        #     encoder2 = make_encoder(model_opt, src_embeddings2, encoder_type='rnn') if not model_opt.encoder2_type == 'none' else None # gcn features must go here
        else:
            encoder = make_encoder(model_opt,
                                   src_embeddings,
                                   encoder_type=model_opt.encoder_type
                                   )  # gcn features must go here
            if model_opt.encoder2_type == 'none':
                encoder2 = None
            else:
                if model_opt.encoder2_type == 'gcn':
                    encoder2 = make_encoder(
                        model_opt, src_embeddings,
                        encoder_type='gcn')  # gcn features must go here
                elif model_opt.encoder2_type == 'rnn':
                    encoder2 = make_encoder(model_opt,
                                            src_embeddings2,
                                            encoder_type='rnn')
                else:
                    raise ValueError("Not implemented yet.")
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn,
                               model_opt.rnn_size, model_opt.dropout,
                               model_opt.sample_rate, model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt,
                                     tgt_dict,
                                     feature_dicts,
                                     for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    if model_opt.encoder2_type == 'none':
        encoder2 = None
    if model_opt.encoder_type == 'gcn':
        if model_opt.use_dgl:
            model = NMTModelGCN_DGL(encoder, decoder, encoder2=encoder2)
        else:
            model = NMTModelGCN(encoder, decoder, encoder2=encoder2)
    else:
        model = NMTModel(encoder, decoder, encoder2=encoder2)
    model.model_type = model_opt.model_type  # text

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.encoder2, 'embeddings'):
            model.encoder2.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc2, model_opt.fix_word_vecs_enc2)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
 def _init_weights_layer(self, layer):
     '''Method to initialise the weights for a given layer'''
     if isinstance(layer, nn.Linear):
         xavier_uniform(layer.weight.data)
    def __init__(self):
        super(Net, self).__init__()

        ## TODO: Define all the layers of this CNN, the only requirements are:
        ## 1. This network takes in a square (same width and height), grayscale image as input
        ## 2. It ends with a linear layer that represents the keypoints
        ## it's suggested that you make this last layer output 136 values, 2 for each of the 68 keypoint (x, y) pairs

        # As an example, you've been given a convolutional layer, which you may (but don't have to) change:
        # 1 input image channel (grayscale), 32 output channels/feature maps, 5x5 square convolution kernel

        self.conv1 = nn.Conv2d(1, 32, 5)
        #self.conv1.weight.data.fill_(0.01)
        #self.conv1.bias.data.fill_(0.01)

        # comment from Udacity
        #Is there a reason for why you have set the weights and bias as 0.01?
        #A better approach would be to initialize the weights randomly using something like Xavier initialization
        I.xavier_uniform(self.conv1.weight)

        # output size (W-F)/S +1 = = (224 - 5) / 1 + 1 = 220
        # (32, 220, 220)

        self.pool = nn.MaxPool2d(2, 2)
        # output size = (32, 110, 110)

        # previous version: self.conv2 = nn.Conv2d(32, 64, 4)
        self.conv2 = nn.Conv2d(32, 64, 3)
        I.xavier_uniform(self.conv2.weight)
        # Comment from Udacity
        #Avoid using even-sized kernels as they do not have a true center.
        #A 4x4 kernel doesn't have a true center and this might cause the model to have
        #slight shift in either direction. Since convolution occurs around the center, odd-sized kernels are better.

        # output size: (110 - 4) / 1 + 1 = 107
        # (64, 107, 107)
        # (64, 53, 53)

        self.conv3 = nn.Conv2d(64, 128, 3)
        # output size: (53 - 3) / 1 + 1 = 51
        # (128, 51, 51)
        # (128, 25, 25)

        self.conv4 = nn.Conv2d(128, 256, 2)
        # output size: (25 - 2) / 1 + 1 = 24
        # (128, 24, 24)
        # (128, 12, 12)

        self.fc1 = nn.Linear(256 * 12 * 12, 3000)
        # dropout with p=0.4
        #self.fc_drop1 = nn.Dropout(p=0.2)

        self.fc_drop2 = nn.Dropout(p=0.4)

        #self.fc1_drop = nn.Dropout(p=0.1)
        #self.fc2_drop = nn.Dropout(p=0.2)
        #self.fc3_drop = nn.Dropout(p=0.3)
        #self.fc4_drop = nn.Dropout(p=0.4)
        #self.fc5_drop = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(3000, 1000)

        self.fc3 = nn.Linear(1000, 136)
def run_experiment(train_set, valid_set, test_set, model_name, optimizer_name,
                   init_lr, scheduler_name, use_norm_constraint, weight_decay,
                   schedule_weight_decay, restarts, max_epochs,
                   max_increase_epochs, np_th_seed):
    set_random_seeds(np_th_seed, cuda=True)
    #torch.backends.cudnn.benchmark = True# sometimes crashes?
    if valid_set is not None:
        assert max_increase_epochs is not None
    assert (max_epochs is None) != (restarts is None)
    if max_epochs is None:
        max_epochs = np.sum(restarts)
    n_classes = int(np.max(train_set.y) + 1)
    n_chans = int(train_set.X.shape[1])
    input_time_length = 1000
    if model_name == 'deep':
        model = Deep4Net(n_chans,
                         n_classes,
                         input_time_length=input_time_length,
                         final_conv_length=2).create_network()
    elif model_name == 'shallow':
        model = ShallowFBCSPNet(n_chans,
                                n_classes,
                                input_time_length=input_time_length,
                                final_conv_length=30).create_network()
    elif model_name in [
            'resnet-he-uniform', 'resnet-he-normal', 'resnet-xavier-normal',
            'resnet-xavier-uniform'
    ]:
        init_name = model_name.lstrip('resnet-')
        from torch.nn import init
        init_fn = {
            'he-uniform': lambda w: init.kaiming_uniform(w, a=0),
            'he-normal': lambda w: init.kaiming_normal(w, a=0),
            'xavier-uniform': lambda w: init.xavier_uniform(w, gain=1),
            'xavier-normal': lambda w: init.xavier_normal(w, gain=1)
        }[init_name]
        model = EEGResNet(in_chans=n_chans,
                          n_classes=n_classes,
                          input_time_length=input_time_length,
                          final_pool_length=10,
                          n_first_filters=48,
                          conv_weight_init_fn=init_fn).create_network()
    else:
        raise ValueError("Unknown model name {:s}".format(model_name))
    if 'resnet' not in model_name:
        to_dense_prediction_model(model)
    model.cuda()
    model.eval()

    out = model(np_to_var(train_set.X[:1, :, :input_time_length, None]).cuda())

    n_preds_per_input = out.cpu().data.numpy().shape[2]

    if optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               weight_decay=weight_decay,
                               lr=init_lr)
    elif optimizer_name == 'adamw':
        optimizer = AdamW(model.parameters(),
                          weight_decay=weight_decay,
                          lr=init_lr)

    iterator = CropsFromTrialsIterator(batch_size=60,
                                       input_time_length=input_time_length,
                                       n_preds_per_input=n_preds_per_input,
                                       seed=np_th_seed)

    if scheduler_name is not None:
        assert schedule_weight_decay == (optimizer_name == 'adamw')
        if scheduler_name == 'cosine':
            n_updates_per_epoch = sum(
                [1 for _ in iterator.get_batches(train_set, shuffle=True)])
            if restarts is None:
                n_updates_per_period = n_updates_per_epoch * max_epochs
            else:
                n_updates_per_period = np.array(restarts) * n_updates_per_epoch
            scheduler = CosineAnnealing(n_updates_per_period)
            optimizer = ScheduledOptimizer(
                scheduler,
                optimizer,
                schedule_weight_decay=schedule_weight_decay)
        elif scheduler_name == 'cut_cosine':
            # TODO: integrate with if clause before, now just separate
            # to avoid messing with code
            n_updates_per_epoch = sum(
                [1 for _ in iterator.get_batches(train_set, shuffle=True)])
            if restarts is None:
                n_updates_per_period = n_updates_per_epoch * max_epochs
            else:
                n_updates_per_period = np.array(restarts) * n_updates_per_epoch
            scheduler = CutCosineAnnealing(n_updates_per_period)
            optimizer = ScheduledOptimizer(
                scheduler,
                optimizer,
                schedule_weight_decay=schedule_weight_decay)
        else:
            raise ValueError("Unknown scheduler")
    monitors = [
        LossMonitor(),
        MisclassMonitor(col_suffix='sample_misclass'),
        CroppedTrialMisclassMonitor(input_time_length=input_time_length),
        RuntimeMonitor()
    ]

    if use_norm_constraint:
        model_constraint = MaxNormDefaultConstraint()
    else:
        model_constraint = None
    # change here this cell
    loss_function = lambda preds, targets: F.nll_loss(th.mean(preds, dim=2),
                                                      targets)

    if valid_set is not None:
        run_after_early_stop = True
        do_early_stop = True
        remember_best_column = 'valid_misclass'
        stop_criterion = Or([
            MaxEpochs(max_epochs),
            NoDecrease('valid_misclass', max_increase_epochs)
        ])
    else:
        run_after_early_stop = False
        do_early_stop = False
        remember_best_column = None
        stop_criterion = MaxEpochs(max_epochs)

    exp = Experiment(model,
                     train_set,
                     valid_set,
                     test_set,
                     iterator=iterator,
                     loss_function=loss_function,
                     optimizer=optimizer,
                     model_constraint=model_constraint,
                     monitors=monitors,
                     stop_criterion=stop_criterion,
                     remember_best_column=remember_best_column,
                     run_after_early_stop=run_after_early_stop,
                     cuda=True,
                     do_early_stop=do_early_stop)
    exp.run()
    return exp
Beispiel #33
0
def train(*, dataset='mnist'):
    z1 = 100
    z2 = 512
    batch_size = 64
    lr = 0.1

    dataset = load_dataset(dataset, split='train')
    x0, _ = dataset[0]
    c, h, w = x0.size()
    dataloader = torch.utils.data.DataLoader(
        dataset, 
        batch_size=batch_size,
        shuffle=True, 
        num_workers=1
    )

    w1 = torch.rand(w*h*c, z1).cuda()
    w1 = Variable(w1, requires_grad=True)
    xavier_uniform(w1.data)
    """
    w1_2 = torch.rand(z1, z2)
    w1_2 = Variable(w1_2, requires_grad=True)
    xavier_uniform(w1_2.data)
    w1_2 = w1_2.cuda()
        

    wx_2 = torch.rand(w*h*c, z2)
    wx_2 = Variable(wx_2, requires_grad=True)
    xavier_uniform(wx_2.data)
    wx_2 = wx_2.cuda()
    """
    
    bias = torch.zeros(w*h*c).cuda()
    bias = Variable(bias, requires_grad=True)

    print(w1.is_leaf, bias.is_leaf)

    grads = {}
    momentum = 0.9
    def save_grad(v):
        def hook(grad):
            v.grad = grad
            if not hasattr(v, 'mem'):
                v.mem = 0.0
            v.mem = v.mem * momentum + v.grad.data * (1 - momentum)
        return hook
    
    #params = [w1, w1_2, wx_2, bias]
    params = [w1, bias]
    optim = torch.optim.Adadelta(params, lr=0.1)
    #for p in params:
    #    p.register_hook(save_grad(p))
    
    gamma = 5.0
    nb_updates = 0
    for _ in range(1000):
        for X, y in dataloader:
            optim.zero_grad()
            X = Variable(X)
            #w2 = torch.matmul(w1, w1_2)
            X = X.cuda()
            X = X.view(X.size(0), -1)
            """
            a2 = torch.matmul(X, wx_2)
            a2 = a2 * (a2 > 0.8).float()
            Xrec = torch.matmul(a2, w2.transpose(0, 1)) + bias
            Xrec = torch.nn.Sigmoid()(Xrec)
            """
            hid = torch.matmul(X, w1)
            hid = hid * (hid > 1.0).float()
            Xrec = torch.matmul(hid, w1.transpose(1, 0).contiguous()) + bias
            Xrec = torch.nn.Sigmoid()(Xrec)
            e1 = ((Xrec - X)**2).sum(1).mean()
            e2 = e1
            e3 = e1
            #e2 = torch.abs(w1_2).mean()
            #e3 = torch.abs(a2).mean()
            loss = e1
            loss.backward()
            optim.step()
            #for p in params:
            #    p.data -= lr * p.mem
            if nb_updates % 100 == 0:
                print('loss : %.3f %.3f %.3f' % (e1.data[0], e2.data[0], e3.data[0]))
                
                active = (hid.data>0).float().sum(1)
                print('nbActive : {:.4f} +- {:.4f}'.format(active.mean(), active.std()))
                im = Xrec.data.cpu().numpy()
                im = im.reshape(im.shape[0], c, h, w)
                im = grid_of_images_default(im, normalize=True)
                imsave('x.png', im)

                im = w1.data.cpu().numpy()
                im = im.reshape((c, h, w, z1)).transpose((3, 0, 1, 2))
                im = grid_of_images_default(im, normalize=True)
                imsave('w1.png', im)
                """
                im = wx_2.data.cpu().numpy()
                im = im.reshape((c, h, w, z2)).transpose((3, 0, 1, 2))
                im = grid_of_images_default(im, normalize=True)
                imsave('w2.png', im)
                """

            nb_updates += 1
Beispiel #34
0
def weight_init(m):
    if type(m) == nn.Linear:
        nninit.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)
def make_base_model(model_opt, fields, gpu, checkpoint=None):
    """
    Args:
        model_opt: the option loaded from checkpoint.
        fields: `Field` objects for the model.
        gpu(bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
    Returns:
        the NMTModel.
    """
    assert model_opt.model_type in ["text", "img", "audio"], \
        ("Unsupported model type %s" % (model_opt.model_type))

    # Make encoder.
    if model_opt.model_type == "text":
        src_dict = fields["src"].vocab
        feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src')
        src_embeddings = make_embeddings(model_opt, src_dict,
                                         feature_dicts)
        encoder = make_encoder(model_opt, src_embeddings)
    elif model_opt.model_type == "img":
        encoder = ImageEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout)
    elif model_opt.model_type == "audio":
        encoder = AudioEncoder(model_opt.enc_layers,
                               model_opt.brnn,
                               model_opt.rnn_size,
                               model_opt.dropout,
                               model_opt.sample_rate,
                               model_opt.window_size)

    # Make decoder.
    tgt_dict = fields["tgt"].vocab
    feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt')
    tgt_embeddings = make_embeddings(model_opt, tgt_dict,
                                     feature_dicts, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        if src_dict != tgt_dict:
            raise AssertionError('The `-share_vocab` should be set during '
                                 'preprocess if you use share_embeddings!')

        tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight

    decoder = make_decoder(model_opt, tgt_embeddings)

    # Make NMTModel(= encoder + decoder).
    model = NMTModel(encoder, decoder)
    model.model_type = model_opt.model_type

    # Make Generator.
    if not model_opt.copy_attn:
        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)),
            nn.LogSoftmax())
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        generator = CopyGenerator(model_opt.rnn_size,
                                  fields["tgt"].vocab)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        print('Loading model parameters.')
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
    else:
        if model_opt.param_init != 0.0:
            print('Intializing model parameters.')
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                    model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec)

    # Add generator to model (this registers it as parameter of model).
    model.generator = generator

    # Make the whole model leverage GPU if indicated to do so.
    if gpu:
        model.cuda()
    else:
        model.cpu()

    return model
Beispiel #36
0
def init_weights(m):

    if type(m) == nn.Conv2d:
        init.xavier_uniform(m.weight, gain=np.sqrt(2.0))
        init.normal(m.bias)
Beispiel #37
0
def xavier_weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
        init.constant(m.bias, 0.1)
Beispiel #38
0
 def _init_weight(self):
     """初始化transitions矩阵"""
     init.xavier_uniform(self.transitions)
     # 任何标签不可能->START, STOP不能->任何标签
     self.transitions.data[START, :].fill_(-10000.)
     self.transitions.data[:, STOP].fill_(-10000.)