예제 #1
0
    def __first_forward(self, x):
        assert self.training
        self._forwardfunc = self.__normal_forward

        kernel = self.constval * self.alpha * F.layer_norm(self.weight,
                                                           self.norm_shape,
                                                           weight=None,
                                                           bias=None,
                                                           eps=self.gamma)
        output = F.conv2d(x,
                          kernel,
                          bias=self.bias,
                          padding=self.padding,
                          stride=self.stride)

        outputvar = output.var()
        nn.init.constant_(self.alpha, torch.sqrt(2 / outputvar))

        kernel = self.constval * self.alpha * F.layer_norm(self.weight,
                                                           self.norm_shape,
                                                           weight=None,
                                                           bias=None,
                                                           eps=self.gamma)
        output = F.conv2d(x,
                          kernel,
                          bias=self.bias,
                          padding=self.padding,
                          stride=self.stride)

        if self.activation is not None:
            output = self.activation(output)
        return output
    def forward(self, word_input, look_ahead_mask, target_padding_mask, *args):
        """

        :param word_input: (batch, target_seq_len, d_model)
        :param look_ahead_mask: (batch, target_seq_len, )
        :param target_padding_mask: (batch, target_seq_len, )
        :param args:
        :return:
        """

        # (batch, target_seq_len, d_model)
        if target_padding_mask is None and look_ahead_mask is None:
            combined_mask = None
        elif target_padding_mask is not None and look_ahead_mask is not None:
            combined_mask = torch.max(target_padding_mask, look_ahead_mask)
        else:
            combined_mask = target_padding_mask if target_padding_mask is not None else look_ahead_mask

        attn1, attn_weights_block1 = self.mha(word_input, word_input, word_input, mask=combined_mask)
        attn1 = self.dropout1(attn1)
        out1 = F.layer_norm(word_input + attn1, normalized_shape=[attn1.size(-1)])

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        output = F.layer_norm(out1 + ffn_output, normalized_shape=[ffn_output.size(-1)])
        return output, attn_weights_block1
예제 #3
0
def prep_vis_pe(bbox_preds, cls_probs):
    """
    Args:
        bbox_preds: raw pre-processed bbox predictions from detector, shape = (batch, detections, 6)
        cls_probs: raw pre-processed class probabilities from detector, shape = (batch, detections, num classes + 1)

    Returns:
        vis_pe: visual positional embedding, which is norm bbox + norm area + box score
            shape = (batch, detections, num_classes + 6 + 1)
    """
    batch_size = bbox_preds.shape[0]
    num_detections = bbox_preds.shape[1]
    num_classes = cls_probs.shape[2]-1
    max_x1s, _ = torch.max(bbox_preds[:, :, 0], dim=1)
    max_x2s, _ = torch.max(bbox_preds[:, :, 2], dim=1)
    max_y1s, _ = torch.max(bbox_preds[:, :, 1], dim=1)
    max_y2s, _ = torch.max(bbox_preds[:, :, 3], dim=1)
    w_ests = torch.max(max_x1s, max_x2s)*1.+1e-5
    h_ests = torch.max(max_y1s, max_y2s)*1.+1e-5
    bbox_preds[:, :, [0, 2]] = torch.div(bbox_preds[:, :, [0, 2]], w_ests.unsqueeze(1).unsqueeze(2))
    bbox_preds[:, :, [1, 3]] = torch.div(bbox_preds[:, :, [1, 3]], h_ests.unsqueeze(1).unsqueeze(2))

    rel_area = (bbox_preds[:, :, 3]-bbox_preds[:, :, 1])*(bbox_preds[:, :, 2]-bbox_preds[:, :, 0])
    rel_area.clamp_(0)

    vis_pe = torch.cat((bbox_preds[:, :, :4],
                        rel_area.view(batch_size, num_detections, 1),
                        bbox_preds[:, :, 5:]), dim=-1)
    vis_pe = torch.cat((F.layer_norm(vis_pe, [6]), F.layer_norm(cls_probs, [num_classes+1])), dim=-1)

    return vis_pe
예제 #4
0
def get_img_tensors(preds, fc_layer, fc_dim, num_classes, max_detections=100):
    """
    Args:
        preds: predictions from a detectron2 detector, a list of instances
        fc_layer: 0-indexed layer to pull features from (in prior literature FC6 = 0 (first FC layer), and
                    FC7 = 1 (2nd FC layer))
        fc_dim: the dimensionality of the flattened vector (usually 2048)

    Returns:
        box_features: tensor of box features from the FC layer output, shape = (number of regions, fc-dim)
        vis_pe: visual positional embedding, which is bbox + area + box score
        both are end row padded to the max detection limit
    """
    h, w = preds['instances'].image_size
    fields = preds['instances'].get_fields()

    fc_box_features = fields['fc_box_features'][:, fc_layer*fc_dim:(fc_layer+1)*fc_dim]
    probs = fields['probs']
    boxes = fields['pred_boxes'].clone()

    num_detections = fc_box_features.shape[0]

    boxes.scale(scale_x=1/w, scale_y=1/h)
    areas = boxes.area().unsqueeze(dim=1)
    scores = fields['scores'].unsqueeze(dim=1)

    bbox_areas = torch.cat([boxes.tensor, areas, scores], dim=1)
    #  4 coordinates + 1 bbox area +1 score, +1 for background class
    vis_pe = torch.cat((F.layer_norm(bbox_areas, [6]), F.layer_norm(probs, [num_classes+1])), dim=-1)

    box_features = F.pad(fc_box_features, [0, 0, 0, max_detections-num_detections])
    vis_pe = F.pad(vis_pe, [0, 0, 0, max_detections-num_detections])

    return box_features, vis_pe
예제 #5
0
 def forward(self, x):
     # x = torch.from_numpy(x)
     x = F.layer_norm(x, x.size())
     x = F.leaky_relu(self.fc1(x))
     x = F.layer_norm(x, x.size())
     x = self.fc2(x)
     return x
예제 #6
0
    def get_rcnn(self, img_path):
        if os.path.exists(img_path) and os.path.exists(
                img_path.replace('.npy', '_cls_prob.npy')):
            img = torch.from_numpy(np.load(img_path))
            img_id = img_path.split('/')[-1].split('.')[0]
            cls_label = torch.from_numpy(
                np.load(img_path.replace('.npy', '_cls_prob.npy')))
            with h5py.File(self.region_bbox_file, 'r') as region_bbox_f:
                vis_pe = torch.from_numpy(region_bbox_f[img_id][:])

            # lazy normalization of the coordinates...

            w_est = torch.max(vis_pe[:, [0, 2]]) * 1. + 1e-5
            h_est = torch.max(vis_pe[:, [1, 3]]) * 1. + 1e-5
            vis_pe[:, [0, 2]] /= w_est
            vis_pe[:, [1, 3]] /= h_est
            rel_area = (vis_pe[:, 3] - vis_pe[:, 1]) * (vis_pe[:, 2] -
                                                        vis_pe[:, 0])
            rel_area.clamp_(0)

            vis_pe = torch.cat(
                (vis_pe[:, :4], rel_area.view(-1, 1), vis_pe[:, 5:]),
                -1)  # confident score
            normalized_coord = F.normalize(vis_pe.data[:, :5] - 0.5, dim=-1)
            vis_pe = torch.cat((F.layer_norm(vis_pe, [6]), \
                                F.layer_norm(cls_label, [1601])), dim=-1)  # 1601 hard coded...
        else:
            img = torch.randn(100, 2048)
            vis_pe = torch.randn(100, 1601 + 6)
        return img, vis_pe
예제 #7
0
    def forward(self, x):
        y, _ = self.multi_head_attention(x, x, x)
        x = F.layer_norm(x + y, (self.embed_dim, ))

        y = self.feed_forward(x)
        y = self.activation(y)
        return F.layer_norm(x + y, (self.embed_dim, ))
예제 #8
0
파일: net.py 프로젝트: TYSSSY/APBGCN
    def forward(self, t, adj):  # adj=dataset.skeleton_
        if self.sequential:  # sequential architecture
            for i in range(self.num_layers):
                t = rearrange(fn.relu(self.spatial_layers[i](t, adj)),
                              'b n c -> n b c')
                t = rearrange(
                    fn.relu(
                        fn.layer_norm(self.temporal_layers[i]
                                      (t), t.shape[1:]) + t), 'n b c -> b n c')
        else:  # parallel architecture
            s = t
            t = get_synergy(t)
            #t = self.temporal_in(t.permute(1,0))
            t = torch.unsqueeze(t.permute(1, 0), 1)
            t = self.temporal_in(rearrange(t, 'b n c -> n b c'))
            #print(t.shape)
            #print(p)
            for i in range(self.num_layers):
                s = fn.relu(self.spatial_layers[i](s, adj))
                t = fn.relu(
                    fn.layer_norm(self.temporal_layers[i](t), t.shape[1:]) + t)
            if self.trainable_factor:
                factor = fn.sigmoid(self.spatial_factor).to("cuda")
                #t = factor * rearrange(s, 'b n c -> b (n c)') + (1. - factor) * t
                t = factor * s + (1. - factor) * rearrange(t, 'n b c -> b n c')

            else:
                t = (s + rearrange(t, 'n b c -> b n c')) * 0.5
        #t = self.bottle_neck(t)
        t = rearrange(self.bottle_neck(t), 'b n c -> b (n c)')
        t = self.final_layer(t)
        # return fn.sigmoid(t)  # dimension (b, n, oc)
        return t
예제 #9
0
    def forward(self, enc_demo_seq, enc_demo_key_seq, e_l, e_r, start_ind, end_ind, inputs, timestep=None):
        """Performs multi-layered, multi-headed attention."""

        if self._hp.forced_attention:
            return batchwise_index(enc_demo_seq, timestep[:,0].long()), None

        # Get (initial) attention key
        if self._hp.one_hot_attn_time_cond and timestep is not None:
            one_hot_timestep = make_one_hot(timestep.long(), self._hp.max_seq_len).float()
        else:
            one_hot_timestep = timestep
        args = [one_hot_timestep] if self._hp.timestep_cond_attention else []
        
        query = self.query_net(e_l, e_r, *args)
        
        # Attend
        s_ind, e_ind = (torch.floor(start_ind), torch.ceil(end_ind)) if self._hp.mask_inf_attention \
                                                                     else (inputs.start_ind, inputs.end_ind)
        norm_shape_k = query.shape[1:]
        norm_shape_v = enc_demo_seq.shape[2:]
        raw_attn_output, att_weights = None, None
        for attention, predictor in zip(self.attention_layers, self.predictor_layers):
            raw_attn_output, att_weights = attention(query, enc_demo_key_seq, enc_demo_seq, s_ind, e_ind,
                                                     forced_attention_step=timestep if self._hp.forced_attention else None)
            x = F.layer_norm(raw_attn_output, norm_shape_v)
            query = F.layer_norm(predictor(x) + query, norm_shape_k)  # skip connections around attention and predictor

        return apply_linear(self.out, raw_attn_output, dim=1), att_weights     # output non-normalized output of final attention layer
예제 #10
0
    def forward(self, tgt_seq, encoder_outputs, V, return_attns=False):

        dec_init_state = encoder_outputs['dec_init_state']
        encoder_states = encoder_outputs['encoder_states']
        keys = encoder_outputs['encoder_states']
        src_mask = encoder_outputs['src_mask']

        tgt_embed = self.tgt_embed_layer(tgt_seq)
        tgt_embed = self.dropout(tgt_embed)

        dec_states = None
        for l, rnn in enumerate(self.layer_stack):
            if l == 0:
                dec_states, _ = rnn(tgt_embed, dec_init_state)
                dec_states = F.layer_norm(dec_states, (self.d_model, ))
                context_txt, txt_attention = self.attention_txt(
                    dec_states, keys, encoder_states, src_mask)
                context_img, img_attention = self.attention_img(
                    dec_states, V, V)
            else:
                prev_states = dec_states
                dec_input = torch.cat([prev_states, context_txt, context_img],
                                      2)
                dec_states, last_hidden = rnn(dec_input, dec_init_state)
                dec_states = self.dropout(dec_states)
                if l >= 2:
                    dec_states = self.residual_scaler * (dec_states +
                                                         prev_states)
                dec_states = F.layer_norm(dec_states, (self.d_model, ))
        return dec_states, last_hidden.squeeze(0), (txt_attention,
                                                    img_attention)
예제 #11
0
 def forward(self, x):
     tensor_1, tensor_2 = torch.split(x, int(x.size()[1]/2), dim=1)
     act_1 = tanh(tensor_1)
     act_2 = sigmoid(tensor_2)
     norm_1 = layer_norm(act_1, act_1.size()[1:])
     norm_2 = layer_norm(act_2, act_2.size()[1:])
     return torch.mul(norm_1, norm_2)
예제 #12
0
    def forward(self, X: torch.FloatTensor,
                idx: torch.LongTensor) -> torch.FloatTensor:
        """
        Making a forward pass of layer normalization.

        Arg types:
            * **X** (Pytorch Float Tensor) - Input tensor,
                with shape (batch_size, feature_dim, num_nodes, seq_len).
            * **idx** (Pytorch Long Tensor) - Input indices.

        Return types:
            * **X** (PyTorch Float Tensor) - Output tensor,
                with shape (batch_size, feature_dim, num_nodes, seq_len).
        """
        if self._elementwise_affine:
            return F.layer_norm(
                X,
                tuple(X.shape[1:]),
                self._weight[:, idx, :],
                self._bias[:, idx, :],
                self._eps,
            )
        else:
            return F.layer_norm(X, tuple(X.shape[1:]), self._weight,
                                self._bias, self._eps)
예제 #13
0
def LayerNormLSTMScript(training: bool, zoneout_prob: float, input, h0, c0,
                        kernel, recurrent_kernel, bias, gamma, gamma_h, beta_h,
                        zoneout_mask):
    time_steps = input.shape[0]
    batch_size = input.shape[1]
    hidden_size = recurrent_kernel.shape[0]

    h = [h0]
    c = [c0]
    Wx = F.layer_norm(input @ kernel, (hidden_size * 4, ), weight=gamma[0])
    for t in range(time_steps):
        v = F.layer_norm(h[t] @ recurrent_kernel, (hidden_size * 4, ),
                         weight=gamma[1]) + Wx[t] + bias
        i, g, f, o = torch.chunk(v, 4, 1)
        i = torch.sigmoid(i)
        g = torch.tanh(g)
        f = torch.sigmoid(f)
        o = torch.sigmoid(o)
        c.append(f * c[t] + i * g)
        h.append(o * torch.tanh(
            F.layer_norm(c[-1], (hidden_size, ), weight=gamma_h, bias=beta_h)))
        if zoneout_prob:
            if training:
                h[-1] = (h[-1] - h[-2]) * zoneout_mask[t] + h[-2]
            else:
                h[-1] = zoneout_prob * h[-2] + (1 - zoneout_prob) * h[-1]
    h = torch.stack(h)
    c = torch.stack(c)
    return h, c
예제 #14
0
    def get_rcnn(self, path):
        img_id = path.split('/')[-1].split('.')[0]
        with h5py.File(self.region_det_file_prefix + '_feat' + img_id[-3:] + '.h5', 'r') as region_feat_f, \
                h5py.File(self.region_det_file_prefix + '_cls' + img_id[-3:] + '.h5', 'r') as region_cls_f, \
                h5py.File(self.region_bbox_file, 'r') as region_bbox_f:

            img = torch.from_numpy(region_feat_f[img_id][:]).float()
            cls_label = torch.from_numpy(region_cls_f[img_id][:]).float()
            vis_pe = torch.from_numpy(region_bbox_f[img_id][:])

        # lazy normalization of the coordinates...

        w_est = torch.max(vis_pe[:, [0, 2]]) * 1. + 1e-5
        h_est = torch.max(vis_pe[:, [1, 3]]) * 1. + 1e-5
        vis_pe[:, [0, 2]] /= w_est
        vis_pe[:, [1, 3]] /= h_est
        rel_area = (vis_pe[:, 3] - vis_pe[:, 1]) * (vis_pe[:, 2] -
                                                    vis_pe[:, 0])
        rel_area.clamp_(0)

        vis_pe = torch.cat(
            (vis_pe[:, :4], rel_area.view(-1, 1), vis_pe[:, 5:]),
            -1)  # confident score
        normalized_coord = F.normalize(vis_pe.data[:, :5] - 0.5, dim=-1)
        vis_pe = torch.cat((F.layer_norm(vis_pe, [6]), \
                            F.layer_norm(cls_label, [1601])), dim=-1)  # 1601 hard coded...

        return img, vis_pe
예제 #15
0
파일: image.py 프로젝트: tadejmagajna/flair
    def forward(self, x):
        x = self.conv_features(x)  # [b, d, h, w]
        b, d, h, w = x.shape
        if self.use_transformer:
            # add positional encodings
            y = torch.stack([
                torch.cat([torch.arange(h).unsqueeze(1)] * w, dim=1),
                torch.cat([torch.arange(w).unsqueeze(0)] * h, dim=0),
            ])  # [2, h, w
            y = y.view([2, h * w]).transpose(1, 0)  # [h*w, 2]
            y = y.type(torch.float32).to(flair.device)
            y = self.position_features(y).transpose(1, 0).view(
                [d, h, w])  # [h*w, d] => [d, h, w]
            y = y.unsqueeze(dim=0)  # [1, d, h, w]
            x = x + y  # [b, d, h, w] + [1, d, h, w] => [b, d, h, w]
            # reshape the pixels into the sequence
            x = x.view([b, d, h * w])  # [b, d, h*w]
            # layer norm after convolution and positional encodings
            x = F.layer_norm(x.permute([0, 2, 1]), (d, )).permute([0, 2, 1])
            # add <cls> token
            x = torch.cat([x, torch.stack([self.cls_token] * b)],
                          dim=2)  # [b, d, h*w+1]
            # transformer requires input in the shape [h*w+1, b, d]
            x = (
                x.view([b * d, h * w + 1]).transpose(1,
                                                     0).view([h * w + 1, b, d])
            )  # [b, d, h*w+1] => [b*d, h*w+1] => [h*w+1, b*d] => [h*w+1, b*d]
            x = self.transformer(x)  # [h*w+1, b, d]
            # the output is an embedding of <cls> token
            x = x[-1, :, :]  # [b, d]
        else:
            x = x.view([-1, self._feat_dim])
            x = F.layer_norm(x, (self._feat_dim, ))

        return x
예제 #16
0
def LayerNormGRUScript(training: bool, zoneout_prob: float, input, h0, kernel,
                       recurrent_kernel, bias, recurrent_bias, gamma,
                       zoneout_mask):
    time_steps = input.shape[0]
    batch_size = input.shape[1]
    hidden_size = recurrent_kernel.shape[0]

    h = [h0]
    Wx = F.layer_norm(input @ kernel,
                      (hidden_size * 3, ), weight=gamma[0]) + bias
    for t in range(time_steps):
        Rh = F.layer_norm(h[t] @ recurrent_kernel, (hidden_size * 3, ),
                          weight=gamma[1]) + recurrent_bias
        vx = torch.chunk(Wx[t], 3, 1)
        vh = torch.chunk(Rh, 3, 1)

        z = torch.sigmoid(vx[0] + vh[0])
        r = torch.sigmoid(vx[1] + vh[1])
        g = torch.tanh(vx[2] + r * vh[2])

        h.append(z * h[t] + (1 - z) * g)
        if zoneout_prob:
            if training:
                h[-1] = (h[-1] - h[-2]) * zoneout_mask[t] + h[-2]
            else:
                h[-1] = zoneout_prob * h[-2] + (1 - zoneout_prob) * h[-1]

    h = torch.stack(h)
    return h
예제 #17
0
 def encode(self, src_seq, src_mask=None):
     src_embed = self.src_embed_layer(src_seq)
     src_embed = self.dropout(src_embed)
     enc_states = src_embed
     for l, rnn in enumerate(self.encoder_rnns):
         prev_states = enc_states
         if src_mask is not None:
             prev_states = pack_padded_sequence(prev_states,
                                                lengths=src_mask.sum(1),
                                                batch_first=True)
         enc_states, (enc_last_hidden, _) = rnn(prev_states)
         if src_mask is not None:
             enc_states, _ = pad_packed_sequence(enc_states,
                                                 batch_first=True)
         enc_states = self.dropout(enc_states)
         if l >= 2:
             enc_states = self.residual_scaler * (enc_states + prev_states)
         if self.layer_norm:
             enc_states = F.layer_norm(enc_states,
                                       (self._hidden_size * 2, ))
     enc_states = self.project_nn(enc_states)
     if self.layer_norm:
         enc_states = F.layer_norm(enc_states, (self._hidden_size, ))
     encoder_outputs = {
         "encoder_states": enc_states,
         "keys": enc_states,
         "src_mask": src_mask
     }
     return encoder_outputs
예제 #18
0
    def forward(self, src_seq):

        src_embed = self.src_embed_layer(src_seq)
        src_embed = self.dropout(src_embed)
        src_mask = src_seq.clone()
        src_mask = torch.where(src_seq != 0,
                               torch.tensor(1).cuda(),
                               torch.tensor(0).cuda())

        enc_states = src_embed
        for l, rnn in enumerate(self.layer_stack):
            prev_states = enc_states
            prev_states = pack_padded_sequence(prev_states,
                                               lengths=src_mask.sum(1),
                                               batch_first=True,
                                               enforce_sorted=False)
            enc_states = rnn(prev_states)
            enc_states, _ = pad_packed_sequence(enc_states, batch_first=True)
            enc_states = self.dropout(enc_states)
            if l >= 2:
                prev_states, _ = pad_packed_sequence(prev_states,
                                                     batch_first=True)
                enc_states = self.residual_scaler * (enc_states + prev_states)
            enc_states = F.layer_norm(enc_states, (self.d_model * 2, ))
        enc_states = self.project_nn(enc_states)
        enc_states = F.layer_norm(enc_states, (self.d_model, ))

        encoder_outputs = {
            "encoder_states": enc_states,
            "keys": enc_states,
            "src_mask": src_mask,
            "dec_init_state": None
        }

        return encoder_outputs
예제 #19
0
    def forward(self, x_t, h_t, c_t, m_t):
        x_concat = self.conv_x(x_t)
        x_concat = f.layer_norm(x_concat, x_concat.size()[1:])
        h_concat = self.conv_h(h_t)
        h_concat = f.layer_norm(h_concat, h_concat.size()[1:])
        m_concat = self.conv_m(m_t)
        m_concat = f.layer_norm(m_concat, m_concat.size()[1:])
        i_x, f_x, g_x, i_x_prime, f_x_prime, g_x_prime, o_x = torch.split(
            x_concat, self.num_hidden, dim=1)
        i_h, f_h, g_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
        i_m, f_m, g_m = torch.split(m_concat, self.num_hidden, dim=1)

        i_t = torch.sigmoid(i_x + i_h)
        f_t = torch.sigmoid(f_x + f_h + self._forget_bias)
        g_t = torch.tanh(g_x + g_h)

        c_new = f_t * c_t + i_t * g_t

        i_t_prime = torch.sigmoid(i_x_prime + i_m)
        f_t_prime = torch.sigmoid(f_x_prime + f_m + self._forget_bias)
        g_t_prime = torch.tanh(g_x_prime + g_m)

        m_new = f_t_prime * m_t + i_t_prime * g_t_prime

        mem = torch.cat((c_new, m_new), 1)
        o_t = torch.sigmoid(o_x + o_h + self.conv_o(mem))
        h_new = o_t * torch.tanh(self.conv_last(mem))

        return h_new, c_new, m_new
예제 #20
0
    def forward(self, x):
        x = (x - torch.mean(x)) / torch.var(x)
        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv1(x))
        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv2(x))
        x = F.layer_norm(x, x.shape)
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout2d(x, 0.3)

        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv3(x))
        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv4(x))
        x = F.layer_norm(x, x.shape)
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout2d(x, 0.3)

        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv5(x))
        x = F.pad(x, (1, 1, 1, 1))
        x = F.relu(self.conv6(x))
        x = F.layer_norm(x, x.shape)
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout2d(x, 0.3)

        x = x.view(-1, 3 * 3 * 256)
        x = self.fc1(x)
        x = F.dropout(x, 0.3)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)

        return F.log_softmax(x, dim=1)
예제 #21
0
 def forward(self, x):
     ### YOUR CODE HERE ###
     att = self.attention(x, x, x)
     interm = x + F.layer_norm(att, normalized_shape=att.size()[1:])
     ff = self.feed_forward(interm.float())
     transformed_skip = interm + F.layer_norm(
         ff, normalized_shape=ff.size()[1:]).double()
     return transformed_skip
예제 #22
0
    def forward(self, inputs):
        qkv = self.produce_qkv(inputs)
        queries, keys, values = qkv.split(self.in_channels, -1)
        attention = self.attention(queries, keys, values)
        outputs = F.layer_norm(attention + inputs, (self.in_channels,))

        outputs = F.layer_norm(self.linear(outputs) + outputs, (self.in_channels,))
        return outputs 
예제 #23
0
 def forward(self, x, z, src_mask, trg_mask):
     att_out, _ = self.self_masked_attention(x, x, x, trg_mask)
     middle_out_1 = F.layer_norm(self.dropout(att_out) + x, x.shape)
     att_out_2, _ = self.encoder_attention(middle_out_1, z, z, src_mask)
     middle_out_2 = F.layer_norm(self.dropout(att_out_2) + middle_out_1, middle_out_1.shape)
     fc_out = self.fc(middle_out_2)
     out = F.layer_norm(middle_out_2 + self.dropout(fc_out), middle_out_2.shape)
     return out
예제 #24
0
 def forward(self, x):
     # print(x.size())
     # print('x', x)
     x = F.layer_norm(x, x.size())
     x = F.leaky_relu(self.fc1(x))
     x = F.layer_norm(x, x.size())
     x = torch.sigmoid(self.fc2(x))
     return x
예제 #25
0
 def forward(self, x):
     normalized_shape = x.size()[1:]
     if self.affine:
         return F.layer_norm(x, normalized_shape,
                             self.weight.expand(normalized_shape),
                             self.bias.expand(normalized_shape))
     else:
         return F.layer_norm(x, normalized_shape)
예제 #26
0
파일: layer.py 프로젝트: salvaRC/El-GNNino
 def forward(self, input, idx):
     if self.elementwise_affine:
         return F.layer_norm(input, tuple(input.shape[1:]),
                             self.weight[:, idx, :], self.bias[:, idx, :],
                             self.eps)
     else:
         return F.layer_norm(input, tuple(input.shape[1:]), self.weight,
                             self.bias, self.eps)
예제 #27
0
 def forward(self, x):
     x = torch.tanh(F.layer_norm(self.conv1(x), (20, 24, 24)))
     x = F.max_pool2d(x, 2, 2)
     x = torch.tanh(F.layer_norm(self.conv2(x), (50, 8, 8)))
     x = F.max_pool2d(x, 2, 2)
     x = x.view(-1, 4 * 4 * 50)
     x = torch.tanh(F.layer_norm(self.fc1(x), (500, )))
     x = F.layer_norm(self.fc2(x), (10, ))
     return F.log_softmax(x, dim=1)
예제 #28
0
 def forward(self, y_s, y_t):
     y_s = F.layer_norm(y_s, torch.Size(
         (self.n_cls, )), None, None, 1e-7) * self.multiplier
     y_t = F.layer_norm(y_t, torch.Size(
         (self.n_cls, )), None, None, 1e-7) * self.multiplier
     p_s = F.log_softmax(y_s / self.T, dim=1)
     p_t = F.softmax(y_t / self.T, dim=1)
     loss = F.kl_div(p_s, p_t, size_average=False) * (self.T**
                                                      2) / y_s.shape[0]
     return loss
예제 #29
0
    def forward(self, inp, attn_out=None):
        assert inp.size(1) == self.d_model, "Feature dimension not match!!"

        if self.pre_lnorm:
            inp = F.layer_norm(inp.transpose(1,2), (self.d_model,)).transpose(1,2)
        relu_out1 = self.drop1(F.relu(self.ff1_net(inp)))
        out2 = self.drop2(self.ff2_net(relu_out1))
        output = out2 + inp
        if not self.pre_lnorm:
            output = F.layer_norm(output.transpose(1,2), (self.d_model,)).transpose(1,2)
        return output
예제 #30
0
파일: VSRGNN.py 프로젝트: AIM-SE/NeuRec
    def ATTNCell(self, A, hidden):

        attn1 = self.mattn(hidden, hidden, hidden)
        attn1 = F.layer_norm(attn1 + hidden, (self.hidden_size, ))
        attn2 = self.mattn(attn1, hidden, hidden)
        attn2 = F.dropout(attn2, self.dropout, training=self.training)
        attn2 = F.layer_norm(attn1 + attn2, (self.hidden_size, ))

        x = self.feedforward(attn2)
        x = F.layer_norm(attn2 + x, (self.hidden_size, ))

        return x