Beispiel #1
0
    def __init__(self, input_dim, n_hidden, n_layer,
                 dropout, n_hop):
        super().__init__()
        self._init_h = nn.Parameter(torch.Tensor(n_layer, n_hidden))
        self._init_c = nn.Parameter(torch.Tensor(n_layer, n_hidden))
        self._init_i = nn.Parameter(torch.Tensor(input_dim))
        init.uniform_(self._init_h, -INI, INI)
        init.uniform_(self._init_c, -INI, INI)
        init.uniform_(self._init_i, -0.1, 0.1)
        self._lstm = nn.LSTM(
            input_dim, n_hidden, n_layer,
            bidirectional=False, dropout=dropout
        )
        self._lstm_cell = None

        # attention parameters
        self._attn_wm = nn.Parameter(torch.Tensor(input_dim, n_hidden))
        self._attn_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self._attn_v = nn.Parameter(torch.Tensor(n_hidden))
        init.xavier_normal_(self._attn_wm)
        init.xavier_normal_(self._attn_wq)
        init.uniform_(self._attn_v, -INI, INI)

        # hop parameters
        self._hop_wm = nn.Parameter(torch.Tensor(input_dim, n_hidden))
        self._hop_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self._hop_v = nn.Parameter(torch.Tensor(n_hidden))
        init.xavier_normal_(self._hop_wm)
        init.xavier_normal_(self._hop_wq)
        init.uniform_(self._hop_v, -INI, INI)
        self._n_hop = n_hop
Beispiel #2
0
    def __init__(self, layers, activations, fl_init, action_dim) -> None:
        super(CriticNet, self).__init__()
        self.layers: nn.ModuleList = nn.ModuleList()
        self.batch_norm_ops: nn.ModuleList = nn.ModuleList()
        self.activations = activations

        assert len(layers) >= 3, "Invalid layer schema {} for critic network".format(
            layers
        )

        assert layers[-1] == 1, "Only one output node for the critic net"

        for i, layer in enumerate(layers[1:]):
            # Batch norm only applied to pre-action layers
            if i == 0:
                self.layers.append(nn.Linear(layers[i], layer))
                self.batch_norm_ops.append(nn.BatchNorm1d(layers[i]))
            elif i == 1:
                self.layers.append(nn.Linear(layers[i] + action_dim, layer))
                self.batch_norm_ops.append(nn.BatchNorm1d(layers[i]))
            # Actions skip input layer
            else:
                self.layers.append(nn.Linear(layers[i], layer))

            # If last layer use simple uniform init (as outlined in DDPG paper)
            if i + 1 == len(layers[1:]):
                init.uniform_(self.layers[i].weight, -fl_init, fl_init)
                init.uniform_(self.layers[i].bias, -fl_init, fl_init)
            # Else use fan in uniform init (as outlined in DDPG paper)
            else:
                fan_in_init(self.layers[i].weight, self.layers[i].bias)
Beispiel #3
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     if args:
         ptr_net = args[0]
     else:
         ptr_net = kwargs['ptr_net']
     assert isinstance(ptr_net, LSTMPointerNet)
     self._stop = nn.Parameter(
         torch.Tensor(self._lstm_cell.input_size))
     init.uniform_(self._stop, -INI, INI)
Beispiel #4
0
 def __init__(self, input_dim, n_hidden, n_layer, dropout, bidirectional):
     super().__init__()
     self._init_h = nn.Parameter(
         torch.Tensor(n_layer*(2 if bidirectional else 1), n_hidden))
     self._init_c = nn.Parameter(
         torch.Tensor(n_layer*(2 if bidirectional else 1), n_hidden))
     init.uniform_(self._init_h, -INI, INI)
     init.uniform_(self._init_c, -INI, INI)
     self._lstm = nn.LSTM(input_dim, n_hidden, n_layer,
                          dropout=dropout, bidirectional=bidirectional)
Beispiel #5
0
    def __init__(self, vocab_size, emb_dim,
                 n_hidden, bidirectional, n_layer, dropout=0.0):
        super().__init__()
        # embedding weight parameter is shared between encoder, decoder,
        # and used as final projection layer to vocab logit
        # can initialize with pretrained word vectors
        self._embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self._enc_lstm = nn.LSTM(
            emb_dim, n_hidden, n_layer,
            bidirectional=bidirectional, dropout=dropout
        )
        # initial encoder LSTM states are learned parameters
        state_layer = n_layer * (2 if bidirectional else 1)
        self._init_enc_h = nn.Parameter(
            torch.Tensor(state_layer, n_hidden)
        )
        self._init_enc_c = nn.Parameter(
            torch.Tensor(state_layer, n_hidden)
        )
        init.uniform_(self._init_enc_h, -INIT, INIT)
        init.uniform_(self._init_enc_c, -INIT, INIT)

        # vanillat lstm / LNlstm
        self._dec_lstm = MultiLayerLSTMCells(
            2*emb_dim, n_hidden, n_layer, dropout=dropout
        )
        # project encoder final states to decoder initial states
        enc_out_dim = n_hidden * (2 if bidirectional else 1)
        self._dec_h = nn.Linear(enc_out_dim, n_hidden, bias=False)
        self._dec_c = nn.Linear(enc_out_dim, n_hidden, bias=False)
        # multiplicative attention
        self._attn_wm = nn.Parameter(torch.Tensor(enc_out_dim, n_hidden))
        self._attn_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        init.xavier_normal_(self._attn_wm)
        init.xavier_normal_(self._attn_wq)
        # project decoder output to emb_dim, then
        # apply weight matrix from embedding layer
        self._projection = nn.Sequential(
            nn.Linear(2*n_hidden, n_hidden),
            nn.Tanh(),
            nn.Linear(n_hidden, emb_dim, bias=False)
        )
        # functional object for easier usage
        self._decoder = AttentionalLSTMDecoder(
            self._embedding, self._dec_lstm,
            self._attn_wq, self._projection
        )
Beispiel #6
0
    def __init__(self, layers, activations, fl_init) -> None:
        super(ActorNet, self).__init__()
        self.layers: nn.ModuleList = nn.ModuleList()
        self.batch_norm_ops: nn.ModuleList = nn.ModuleList()
        self.activations = activations

        assert len(layers) >= 2, "Invalid layer schema {} for actor network".format(
            layers
        )

        for i, layer in enumerate(layers[1:]):
            self.layers.append(nn.Linear(layers[i], layer))
            self.batch_norm_ops.append(nn.BatchNorm1d(layers[i]))
            # If last layer use simple uniform init (as outlined in DDPG paper)
            if i + 1 == len(layers[1:]):
                init.uniform_(self.layers[i].weight, -fl_init, fl_init)
                init.uniform_(self.layers[i].bias, -fl_init, fl_init)
            # Else use fan in uniform init (as outlined in DDPG paper)
            else:
                fan_in_init(self.layers[i].weight, self.layers[i].bias)
Beispiel #7
0
 def __init__(self, context_dim, state_dim, input_dim, bias=True):
     super().__init__()
     self._v_c = nn.Parameter(torch.Tensor(context_dim))
     self._v_s = nn.Parameter(torch.Tensor(state_dim))
     self._v_i = nn.Parameter(torch.Tensor(input_dim))
     init.uniform_(self._v_c, -INIT, INIT)
     init.uniform_(self._v_s, -INIT, INIT)
     init.uniform_(self._v_i, -INIT, INIT)
     if bias:
         self._b = nn.Parameter(torch.zeros(1))
     else:
         self.regiser_module(None, '_b')
def kaiming_bias_init(b, *kwargs):
    fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
    bound = 1 / math.sqrt(fan_in)
    return init.uniform_(b, -bound, bound)
Beispiel #9
0
def glorot(shape):
    """Glorot & Bengio (AISTATS 2010) init."""
    init_range = np.sqrt(6.0 / (shape[0] + shape[1]))
    tensor = torch.empty(shape)
    uniform_(tensor, a=-init_range, b=init_range)
    return tensor
Beispiel #10
0
def fan_in_init(weight_tensor, bias_tensor) -> None:
    """ Fan in initialization as described in DDPG paper."""
    val_range = 1.0 / np.sqrt(weight_tensor.size(1))
    init.uniform_(weight_tensor, -val_range, val_range)
    init.uniform_(bias_tensor, -val_range, val_range)
Beispiel #11
0
 def reset_parameters(self, dim):
     init.uniform_(self.z0, -math.sqrt(1 / dim), math.sqrt(1 / dim))
     init.uniform_(self.log_alpha, -math.sqrt(1 / dim), math.sqrt(1 / dim))
     init.uniform_(self.beta, -math.sqrt(1 / dim), math.sqrt(1 / dim))
Beispiel #12
0
    def __init__(self,
                 args,
                 batchNorm=False,
                 div_flow=20.,
                 requires_grad=False):
        super(FlowNet2, self).__init__()
        self.batchNorm = batchNorm
        self.div_flow = div_flow
        self.rgb_max = args.rgb_max
        self.args = args

        self.channelnorm = ChannelNorm()

        # First Block (FlowNetC)
        self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm)
        self.upsample1 = nn.Upsample(scale_factor=4,
                                     mode='bilinear')  #bilinear

        if args.fp16:
            self.resample1 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample1 = Resample2d()

        # Block (FlowNetS1)
        self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
        self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
        if args.fp16:
            self.resample2 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample2 = Resample2d()

        # Block (FlowNetS2)
        self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)

        # Block (FlowNetSD)
        self.flownets_d = FlowNetSD.FlowNetSD(args, batchNorm=self.batchNorm)
        self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest')
        self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest')

        if args.fp16:
            self.resample3 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample3 = Resample2d()

        if args.fp16:
            self.resample4 = nn.Sequential(tofp32(), Resample2d(), tofp16())
        else:
            self.resample4 = Resample2d()

        # Block (FLowNetFusion)
        self.flownetfusion = FlowNetFusion.FlowNetFusion(
            args, batchNorm=self.batchNorm)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)
                # init_deconv_bilinear(m.weight)

        if not requires_grad:
            for param in self.parameters():
                param.requires_grad = False
Beispiel #13
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)
Beispiel #14
0
 def reset_parameters(self):
     init.uniform_(self.init_param, -1 / 2, 1 / 2)
Beispiel #15
0
 def reset_parameters(self) -> None:
     bound = 1 / math.sqrt(self.weight.size(1))
     init.uniform_(self.weight, -bound, bound)
     if self.bias is not None:
         init.uniform_(self.bias, -bound, bound)
Beispiel #16
0
 def reset_bn_parameters(self):
     self.reset_running_stats()
     init.uniform_(self.gamma)
     init.zeros_(self.beta)
Beispiel #17
0
def fan_in_init(tensor) -> None:
    """ Fan in initialization as described in DDPG paper."""
    val_range = 1. / np.sqrt(tensor.size(1))
    init.uniform_(tensor, -val_range, val_range)
 def linear_init(model):
     for para in model.parameters():
         init.uniform_(para, -0.05, 0.05)
Beispiel #19
0
 def reset_parameters(self):
     self.reset_running_stats()
     init.uniform_(self.weight)
     init.zeros_(self.bias)
Beispiel #20
0
 def reset_parameters(self):
     sd = 1.0 / math.sqrt(self.hidden_size)
     for p in self.parameters():
         if p.requires_grad:
             init.uniform_(p, -sd, sd)
Beispiel #21
0
def kaiming_uniform_(tensor: Tensor, fan: int, a: float = 0., nonlinearity: str = 'leaky_relu') -> Tensor:
    bound = calculate_gain(nonlinearity, a) * (3.0 / fan) ** 0.5
    return init.uniform_(tensor, a=-bound, b=+bound)
Beispiel #22
0
def xavier_uniform_(tensor: Tensor, fan_in: int, fan_out: int, gain: float = 1.) -> Tensor:
    bound = gain * (6.0 / (fan_in + fan_out)) ** 0.5
    return init.uniform_(tensor, a=-bound, b=+bound)
Beispiel #23
0
    def __init__(
        self,
        emb_size,
        emb_dimension,
        batch_size,
        only_cpu,
        only_gpu,
        only_fst,
        only_snd,
        mix,
        neg_weight,
        negative,
        lr,
        lap_norm,
        fast_neg,
        record_loss,
        async_update,
        num_threads,
    ):
        """ initialize embedding on CPU 

        Paremeters
        ----------
        emb_size int : number of nodes
        emb_dimension int : embedding dimension
        batch_size int : number of node sequences in each batch
        only_cpu bool : training with CPU
        only_gpu bool : training with GPU
        only_fst bool : only embedding for first-order proximity
        only_snd bool : only embedding for second-order proximity
        mix bool : mixed training with CPU and GPU
        negative int : negative samples for each positve node pair
        neg_weight float : negative weight
        lr float : initial learning rate
        lap_norm float : weight of laplacian normalization
        fast_neg bool : do negative sampling inside a batch
        record_loss bool : print the loss during training
        use_context_weight : give different weights to the nodes in a context window
        async_update : asynchronous training
        """
        super(SkipGramModel, self).__init__()
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.only_cpu = only_cpu
        self.only_gpu = only_gpu
        if only_fst:
            self.fst = True
            self.snd = False
            self.emb_dimension = emb_dimension
        elif only_snd:
            self.fst = False
            self.snd = True
            self.emb_dimension = emb_dimension
        else:
            self.fst = True
            self.snd = True
            self.emb_dimension = int(emb_dimension / 2)
        self.mixed_train = mix
        self.neg_weight = neg_weight
        self.negative = negative
        self.lr = lr
        self.lap_norm = lap_norm
        self.fast_neg = fast_neg
        self.record_loss = record_loss
        self.async_update = async_update
        self.num_threads = num_threads

        # initialize the device as cpu
        self.device = torch.device("cpu")

        # embedding
        initrange = 1.0 / self.emb_dimension
        if self.fst:
            self.fst_u_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.uniform_(self.fst_u_embeddings.weight.data, -initrange,
                          initrange)
        if self.snd:
            self.snd_u_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.uniform_(self.snd_u_embeddings.weight.data, -initrange,
                          initrange)
            self.snd_v_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.constant_(self.snd_v_embeddings.weight.data, 0)

        # lookup_table is used for fast sigmoid computing
        self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01))
        self.lookup_table[0] = 0.
        self.lookup_table[-1] = 1.
        if self.record_loss:
            self.logsigmoid_table = torch.log(
                torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)))
            self.loss_fst = []
            self.loss_snd = []

        # indexes to select positive/negative node pairs from batch_walks
        self.index_emb_negu, self.index_emb_negv = init_emb2neg_index(
            self.negative, self.batch_size)

        # adam
        if self.fst:
            self.fst_state_sum_u = torch.zeros(self.emb_size)
        if self.snd:
            self.snd_state_sum_u = torch.zeros(self.emb_size)
            self.snd_state_sum_v = torch.zeros(self.emb_size)
Beispiel #24
0
def init_lstm_(lstm, init_weight=0.1):
    """
    Initializes weights of LSTM layer.
    Weights and biases are initialized with uniform(-init_weight, init_weight)
    distribution.

    :param lstm: instance of torch.nn.LSTM
    :param init_weight: range for the uniform initializer
    """
    # Initialize hidden-hidden weights
    init.uniform_(lstm.weight_hh_l0.data, -init_weight, init_weight)
    # Initialize input-hidden weights:
    init.uniform_(lstm.weight_ih_l0.data, -init_weight, init_weight)

    # Initialize bias. PyTorch LSTM has two biases, one for input-hidden GEMM
    # and the other for hidden-hidden GEMM. Here input-hidden bias is
    # initialized with uniform distribution and hidden-hidden bias is
    # initialized with zeros.
    init.uniform_(lstm.bias_ih_l0.data, -init_weight, init_weight)
    init.zeros_(lstm.bias_hh_l0.data)

    if lstm.bidirectional:
        init.uniform_(lstm.weight_hh_l0_reverse.data, -init_weight,
                      init_weight)
        init.uniform_(lstm.weight_ih_l0_reverse.data, -init_weight,
                      init_weight)

        init.uniform_(lstm.bias_ih_l0_reverse.data, -init_weight, init_weight)
        init.zeros_(lstm.bias_hh_l0_reverse.data)
def std_uniform_init(W, hidden_size):
    stdv = 1.0 / math.sqrt(hidden_size)
    return init.uniform_(W, -stdv, stdv)
Beispiel #26
0
    def __init__(self, args, batchNorm=True, div_flow=20):
        super(FlowNetC, self).__init__()

        self.batchNorm = batchNorm
        self.div_flow = div_flow

        self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
        self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
        self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
        self.conv_redir = conv(self.batchNorm,
                               256,
                               32,
                               kernel_size=1,
                               stride=1)

        if args.fp16:
            self.corr = nn.Sequential(
                tofp32(),
                Correlation(pad_size=20,
                            kernel_size=1,
                            max_displacement=20,
                            stride1=1,
                            stride2=2,
                            corr_multiply=1), tofp16())
        else:
            self.corr = Correlation(pad_size=20,
                                    kernel_size=1,
                                    max_displacement=20,
                                    stride1=1,
                                    stride2=2,
                                    corr_multiply=1)

        self.corr_activation = nn.LeakyReLU(0.1, inplace=True)
        self.conv3_1 = conv(self.batchNorm, 473, 256)
        self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
        self.conv4_1 = conv(self.batchNorm, 512, 512)
        self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
        self.conv5_1 = conv(self.batchNorm, 512, 512)
        self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
        self.conv6_1 = conv(self.batchNorm, 1024, 1024)

        self.deconv5 = deconv(1024, 512)
        self.deconv4 = deconv(1026, 256)
        self.deconv3 = deconv(770, 128)
        self.deconv2 = deconv(386, 64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)

        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2,
                                                       2,
                                                       4,
                                                       2,
                                                       1,
                                                       bias=True)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)

            if isinstance(m, nn.ConvTranspose2d):
                if m.bias is not None:
                    init.uniform_(m.bias)
                init.xavier_uniform_(m.weight)
                # init_deconv_bilinear(m.weight)
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
Beispiel #27
0
 def init(self):
     bound = math.sqrt(1.0 / self.embedding_size)
     uniform_(self.emb_relations.weight.data, -bound, bound)
     uniform_(self.emb_entities.weight.data, -bound, bound)
     uniform_(self.emb_types.weight.data, -bound, bound)
Beispiel #28
0
 def reset_parameters(self):
     init.uniform_(self.initial_param, -math.sqrt(0.5),
                   math.sqrt(0.5)).cuda()
Beispiel #29
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     bound = 0.01
     init.uniform_(self.bias, -bound, bound)
Beispiel #30
0
 def reset_parameters(self, dim):
     init.uniform_(self.w, -math.sqrt(1 / dim), math.sqrt(1 / dim))
     init.uniform_(self.u, -math.sqrt(1 / dim), math.sqrt(1 / dim))
     init.uniform_(self.b, -math.sqrt(1 / dim), math.sqrt(1 / dim))
Beispiel #31
0
    def __init__(self, input_dim, n_hidden,
                 dropout, side_dim, attention_type):
        # attention type: seneca, bidaf, mask
        assert attention_type in ['seneca', 'bidaf', 'mask']
        n_layer = 1
        n_hop = 1
        super().__init__()
        self._init_h = nn.Parameter(torch.Tensor(n_layer, n_hidden))
        self._init_c = nn.Parameter(torch.Tensor(n_layer, n_hidden))
        self._init_i = nn.Parameter(torch.Tensor(input_dim))
        init.uniform_(self._init_h, -INI, INI)
        init.uniform_(self._init_c, -INI, INI)
        init.uniform_(self._init_i, -0.1, 0.1)
        self._lstm = nn.LSTM(
            input_dim, n_hidden, n_layer,
            bidirectional=False, dropout=dropout
        )
        self._lstm_cell = None

        # attention parameters
        self._attn_wm = nn.Parameter(torch.Tensor(input_dim, n_hidden))
        self._attn_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self._attn_v = nn.Parameter(torch.Tensor(n_hidden))
        init.xavier_normal_(self._attn_wm)
        init.xavier_normal_(self._attn_wq)
        init.uniform_(self._attn_v, -INI, INI)

        # hop parameters
        self._hop_wm = nn.Parameter(torch.Tensor(input_dim, n_hidden))
        self._hop_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self._hop_v = nn.Parameter(torch.Tensor(n_hidden))
        init.xavier_normal_(self._hop_wm)
        init.xavier_normal_(self._hop_wq)
        init.uniform_(self._hop_v, -INI, INI)
        self._n_hop = n_hop

        # side info attention
        if attention_type == 'seneca':
            self.side_wm = nn.Parameter(torch.Tensor(side_dim, n_hidden))
            self.side_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
            self.side_v = nn.Parameter(torch.Tensor(n_hidden))
            init.xavier_normal_(self.side_wm)
            init.xavier_normal_(self.side_wq)
            init.uniform_(self.side_v, -INI, INI)


        self._attn_ws = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        init.xavier_normal_(self._attn_ws)


        # pad entity put in graph enc now
        # self._pad_entity = nn.Parameter(torch.Tensor(side_dim))
        # init.uniform_(self._pad_entity)

        # stop token
        self._stop = nn.Parameter(torch.Tensor(input_dim))
        init.uniform_(self._stop, -INI, INI)
Beispiel #32
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.bias is not None:
         bound = 1 / math.sqrt(self.C)
         init.uniform_(self.bias, -bound, bound)
 def reset_parameters(self):
     bound = 1 / self.weight.shape[0]
     init.uniform_(self.weight, 0, bound)
     if self.bias is not None:
         bound = 1 / math.sqrt(self.weight.shape[0])
         init.uniform_(self.bias, -bound, bound)
Beispiel #34
0
def uniform(shape, low=-0.1, high=0.1):
    tensor = torch.empty(shape)
    uniform_(tensor, a=low, b=high)
    return tensor
Beispiel #35
0
 def reset_parameters(self):
     init.uniform_(self.weight)
     init.zeros_(self.bias)
     self.mean.zero_()
     self.var.fill_(1)
Beispiel #36
0
 def init_param(self, param):
     if len(param.size()) < 2:
         init.uniform_(param)
     else:
         init.xavier_uniform_(param)
Beispiel #37
0
    def __init__(self,
                 in_channel=1,
                 out_channel=[32, 64, 128, 256],
                 dropout_prob=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6]):
        super(NaimishNet, self).__init__()

        #Architecture summary
        #Activation Layer 1 to 5 is ELU
        #Activation Layer 6: Linear activation
        #Dropout is increased by stepsize .1  from .1 to .6from layer 1 to 6
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        #LAYER 1
        #IN (1, 224, 224)
        #OUT conv1: ( 32, 221, 221)
        #maxpool1: (32, 110, 110)
        #Layer 1 out: (32, 110, 110)
        self.conv1 = nn.Sequential(
            OrderedDict([('conv1', nn.Conv2d(in_channel, out_channel[0], 4)),
                         ('elu_1', nn.ELU()),
                         ('bn1', nn.BatchNorm2d(out_channel[0])),
                         ('dropout_1', nn.Dropout2d(dropout_prob[0]))]))

        #LAYER 2
        #IN ( 32, 110, 110)
        #conv2 ( 64, 108, 108)
        #maxpool2: (64, 53, 53)
        self.conv2 = nn.Sequential(
            OrderedDict([('conv2', nn.Conv2d(out_channel[0], out_channel[1],
                                             3)), ('elu_2', nn.ELU()),
                         ('bn2', nn.BatchNorm2d(out_channel[1])),
                         ('dropout_2', nn.Dropout2d(dropout_prob[1]))]))

        #LAYER 3
        #IN (64, 53, 53)
        #Conv3: (128, 52, 52)
        #maxpool: (128, 26, 26)
        self.conv3 = nn.Sequential(
            OrderedDict([('conv3', nn.Conv2d(out_channel[1], out_channel[2],
                                             2)), ('elu_3', nn.ELU()),
                         ('bn3', nn.BatchNorm2d(out_channel[2])),
                         ('dropout_3', nn.Dropout2d(dropout_prob[2]))]))

        #Layer 4
        #IN (128, 26, 26)
        #conv4: (256, 26, 26)
        #maxpool4: ( 256, 13, 13)
        self.conv4 = nn.Sequential(
            OrderedDict([('conv4', nn.Conv2d(out_channel[2], out_channel[3],
                                             1)), ('elu_4', nn.ELU()),
                         ('bn4', nn.BatchNorm2d(out_channel[3])),
                         ('dropout_4', nn.Dropout2d(dropout_prob[3]))]))

        #IN( 256, 5, 5)
        #Flatten (256 * 13* 13)
        self.fc1 = nn.Sequential(
            OrderedDict([('fc1',
                          nn.Linear(in_features=13 * 13 * 256,
                                    out_features=1000)), ('elu_5', nn.ELU()),
                         ('bn5', nn.BatchNorm1d(1000)),
                         ('dropout_5', nn.Dropout2d(dropout_prob[4]))]))

        self.fc2 = nn.Sequential(
            OrderedDict([('fc2', nn.Linear(in_features=1000,
                                           out_features=500)),
                         ('tanh_6', nn.Tanh()), ('bn6', nn.BatchNorm1d(500)),
                         ('dropout_6', nn.Dropout2d(dropout_prob[5]))]))

        #Layer 7
        #OUT FKP: (X, Y)
        self.fc3 = nn.Linear(in_features=500, out_features=136)

        #Custom weights initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight = I.uniform_(m.weight, a=0.0, b=1.0)
            elif isinstance(m, nn.Linear):
                m.weight = I.xavier_uniform_(m.weight, gain=1)
 def init_weights(self):
     init.uniform_(self.lstm.weight_ih_l0, a=-0.01, b=0.01)
     init.orthogonal_(self.lstm.weight_hh_l0)
     self.lstm.weight_ih_l0.requires_grad = True
     self.lstm.weight_hh_l0.requires_grad = True
Beispiel #39
0
)
lin_nn_model = nn.Sequential(
    nn.Linear(d, d_1, bias=False),
    nn.Linear(d_1, d_2, bias=False)
)
ReLU_model = nn.Sequential(
    nn.Linear(d, d_1),
    nn.ReLU(),
    nn.Linear(d_1, d_2)
)
loss = nn.MSELoss()

iter = lin_nn_model.parameters()
w1 = next(iter)
w2 = next(iter)
init.uniform_(w1, a=0, b=0.01)
init.constant_(w2, w1.norm() / 10)  # This is definitely true! Compute the gradient!

learning_rate = 0.01
time_range = range(2000)
for i in range(1):
    x = data[i, :, :-1]
    y = data[i, :, -1].unsqueeze(1)
    r1, r2, r3 = [], [], []
    for t in time_range:
        y_lin_pred = lin_model(x)
        lin_risk = loss(y_lin_pred, y)
        y_lin_nn_pred = lin_nn_model(x)
        lin_nn_risk = loss(y_lin_nn_pred, y)
        y_ReLU_pred = ReLU_model(x)
        ReLU_risk = loss(y_ReLU_pred, y)