def parallel_matmul(self, lm_output, logit_weights, parallel_output, topo):
        if topo is not None and topo.mp_info.size > 1:
            input_parallel = paddle.distributed.collective._c_identity(
                lm_output, group=None)

            logits = paddle.matmul(
                input_parallel, logit_weights, transpose_y=True)

            if parallel_output:
                return logits

            return paddle.distributed.collective._c_concat(logits, group=None)
        else:
            logits = paddle.matmul(lm_output, logit_weights, transpose_y=True)
            return logits
Example #2
0
 def __scaled_dot_product_attention(self, q, k, v, r, t, attn_mask):
     q_w, q_r, q_t = q
     score_w = paddle.matmul(q_w, k, transpose_y=True)
     score_r = paddle.matmul(q_r, r, transpose_y=True)
     score_r = self.__rel_shift(score_r, k.shape[2])
     score_t = paddle.matmul(q_t, t, transpose_y=True)
     score = score_w + score_r + score_t
     score = score * (self.d_key**-0.5)
     if attn_mask is not None:
         score += attn_mask
     weights = F.softmax(score)
     if self.dropout:
         weights = self.dropout(weights)
     out = paddle.matmul(weights, v)
     return out
Example #3
0
    def forward(self, graph, feat):
        """Forward
        Args:
            graph: hetergeneous graph built by pgl.HeterGraph.
            inputs: node features/representation from graph/previous layer.
        """
        if self.num_bases < self.num_rels:
            weight = paddle.transpose(self.weight, perm=[1, 0, 2])
            weight = paddle.matmul(self.w_comp, weight)
            weight = paddle.transpose(weight, perm=[1, 0, 2])
        else:
            weight = self.weight

        def send_func(src_feat, dst_feat, edge_feat):
            """
            send function
            """
            return src_feat

        def recv_func(msg):
            """
            receive function
            """
            return msg.reduce_mean(msg['h'])

        feat_list = []
             
        for idx, etype in enumerate(self.etypes):
            sub_g = graph[graph.edge_types[idx]]
            sub_g.tensor()
            if self.norm:
                norm = GF.degree_norm(sub_g)
            feat = feat * norm
            w = weight[idx, :, :].squeeze()
            h = paddle.matmul(feat, w)
            msg = sub_g.send(send_func, src_feat={'h':h})
            h = sub_g.recv(recv_func, msg)
            feat_list.append(h)
        h = paddle.stack(feat_list, axis=0)
        h = paddle.sum(h, axis=0)
        if self.act == 'relu':
            Act = paddle.nn.ReLU()
            h = Act(h)
        else:
            Act = paddle.nn.Sigmoid()
            h = Act(h)

        return h
Example #4
0
    def _get_rand_mask(self, blocked_query_mask, blocked_key_mask,
                       rand_mask_idx, batch_size, sequence_length):
        '''
        return random mask: [B, H, L-G, bs, R * bs]
        '''
        # rand_mask_idx: [H, T]
        # blocked_query_mask: [B, L, bs]
        # blocked_key_mask: [B, L, bs]
        bs = self.block_size
        B = batch_size
        L = sequence_length // bs
        H = self.num_heads
        G = self.num_global_blocks
        GB = self.num_global_blocks_back
        GF = self.num_global_blocks_front
        R = self.num_rand_blocks
        temp_block_key_mask = paddle.unsqueeze(blocked_key_mask, 1)
        temp_block_key_mask = paddle.expand(temp_block_key_mask, [B, H, L, -1])
        temp_block_key_mask_list = [
            paddle.gather_nd(temp_block_key_mask[b], rand_mask_idx)
            for b in range(B)
        ]
        temp_block_key_mask = paddle.concat(temp_block_key_mask_list, 0)
        temp_block_key_mask = paddle.reshape(temp_block_key_mask,
                                             [B, H, L - G, 1, R * bs])

        temp_blocked_query_mask = paddle.unsqueeze(
            blocked_query_mask[:, GF:-GB], 1)
        temp_blocked_query_mask = paddle.expand(temp_blocked_query_mask,
                                                [B, H, L - G, -1])
        temp_blocked_query_mask = paddle.reshape(temp_blocked_query_mask,
                                                 [B, H, L - G, bs, 1])

        rand_mask = paddle.matmul(temp_blocked_query_mask, temp_block_key_mask)
        return rand_mask
Example #5
0
    def forward(self, inputs):
        token_ids = inputs['token_ids']
        type_ids = inputs['type_ids']
        pos_ids = inputs['pos_ids']
        generation_mask = inputs['generation_mask']
        latent_id = inputs['latent_id']
        data_id = inputs['data_id']

        # [-1, 1, latent_type_size]
        latent_id = F.one_hot(latent_id, self.latent_type_size)
        # [-1, 1, hidden_size]
        latent_emb = paddle.matmul(
            latent_id, self.latent_weight, transpose_y=True)

        caches = self.plato2_encoder.gen_caches(token_ids)

        # [-1, seq_len + 1, hidden_size]
        enc_out, new_caches = self.plato2_encoder(
            caches, token_ids, type_ids, pos_ids, generation_mask, latent_emb)

        pred_ids = self.decode(inputs, new_caches)

        nsp_inputs = self.gen_nsp_input(token_ids, pred_ids)
        # [-1, 2]
        probs = self.nsp_predictor(nsp_inputs)

        return self.get_results(data_id, token_ids, pred_ids, probs)
Example #6
0
    def forward(self, inputs, encoder_word_pos, gsrm_word_pos):
        b, c, h, w = inputs.shape
        conv_features = paddle.reshape(inputs, shape=[-1, c, h * w])
        conv_features = paddle.transpose(conv_features, perm=[0, 2, 1])
        # transformer encoder
        b, t, c = conv_features.shape

        enc_inputs = [conv_features, encoder_word_pos, None]
        word_features = self.wrap_encoder_for_feature(enc_inputs)

        # pvam
        b, t, c = word_features.shape
        word_features = self.fc0(word_features)
        word_features_ = paddle.reshape(word_features, [-1, 1, t, c])
        word_features_ = paddle.tile(word_features_,
                                     [1, self.max_length, 1, 1])
        word_pos_feature = self.emb(gsrm_word_pos)
        word_pos_feature_ = paddle.reshape(word_pos_feature,
                                           [-1, self.max_length, 1, c])
        word_pos_feature_ = paddle.tile(word_pos_feature_, [1, 1, t, 1])
        y = word_pos_feature_ + word_features_
        y = F.tanh(y)
        attention_weight = self.fc1(y)
        attention_weight = paddle.reshape(attention_weight,
                                          shape=[-1, self.max_length, t])
        attention_weight = F.softmax(attention_weight, axis=-1)
        pvam_features = paddle.matmul(attention_weight,
                                      word_features)  #[b, max_length, c]
        return pvam_features
Example #7
0
    def model(self,
              input_ids,
              position_ids=None,
              attention_mask=None,
              masked_positions=None,
              use_cache=False,
              cache=None):
        outputs = self.gpt(input_ids,
                           position_ids=position_ids,
                           attention_mask=attention_mask,
                           use_cache=use_cache,
                           cache=cache)
        if use_cache:
            encoder_outputs, cached_kvs = outputs[:2]
        else:
            encoder_outputs = outputs
        logits = paddle.matmul(
            encoder_outputs,
            self.gpt.embeddings.word_embeddings.weight,
            transpose_y=True)

        if use_cache:
            return logits, cached_kvs
        else:
            return logits
Example #8
0
    def generate_relative_positions_embeddings(self,
                                               length,
                                               depth,
                                               max_relative_position=127):
        vocab_size = max_relative_position * 2 + 1
        range_vec = paddle.arange(length)
        range_mat = paddle.tile(range_vec, repeat_times=[length]).reshape(
            (length, length))
        distance_mat = range_mat - paddle.t(range_mat)
        distance_mat_clipped = paddle.clip(distance_mat.astype('float32'),
                                           -max_relative_position,
                                           max_relative_position)
        final_mat = distance_mat_clipped + max_relative_position
        embeddings_table = np.zeros([vocab_size, depth])

        for pos in range(vocab_size):
            for i in range(depth // 2):
                embeddings_table[pos, 2 * i] = np.sin(
                    pos / np.power(10000, 2 * i / depth))
                embeddings_table[pos, 2 * i + 1] = np.cos(
                    pos / np.power(10000, 2 * i / depth))

        embeddings_table_tensor = paddle.to_tensor(embeddings_table,
                                                   dtype='float32')
        flat_relative_positions_matrix = final_mat.reshape((-1, ))
        one_hot_relative_positions_matrix = paddle.nn.functional.one_hot(
            flat_relative_positions_matrix.astype('int64'),
            num_classes=vocab_size)
        embeddings = paddle.matmul(one_hot_relative_positions_matrix,
                                   embeddings_table_tensor)
        my_shape = final_mat.shape
        my_shape.append(depth)
        embeddings = embeddings.reshape(my_shape)
        return embeddings
Example #9
0
    def forward(self, x):
        b, c, h, w = x.shape
        x = paddle.reshape(x, [b, c, h * w])
        mu = paddle.tile(self.mu, [b, 1, 1])

        with paddle.no_grad():
            for i in range(self.stage_num):
                x_t = paddle.transpose(x, [0, 2, 1])
                z = paddle.bmm(x_t, mu)
                z = F.softmax(z, axis=2)
                z_ = F.normalize(z, axis=1, p=1)
                mu = paddle.bmm(x, z_)
                mu = F.normalize(mu, axis=1, p=2)

        z_t = paddle.transpose(z, [0, 2, 1])
        x = paddle.matmul(mu, z_t)
        x = paddle.reshape(x, [b, c, h, w])

        if self.training:
            mu = paddle.mean(mu, 0, keepdim=True)
            if paddle.distributed.get_world_size() > 1:
                paddle.distributed.reduce(
                    mu / paddle.distributed.get_world_size(), 0)
            mu = F.normalize(mu, axis=1, p=2)
            self.mu = self.mu * (1 - self.momentum) + mu * self.momentum
        return x
Example #10
0
 def get_active_filter(self, in_nc, out_nc, kernel_size):
     start, end = compute_start_end(self._kernel_size[0], kernel_size)
     filters = self.weight[:in_nc, :out_nc, start:end, start:end]
     if self.transform_kernel != False and kernel_size < self._kernel_size[
             0]:
         start_filter = self.weight[:in_nc, :out_nc, :, :]
         for i in range(len(self.ks_set) - 1, 0, -1):
             src_ks = self.ks_set[i]
             if src_ks <= kernel_size:
                 break
             target_ks = self.ks_set[i - 1]
             start, end = compute_start_end(src_ks, target_ks)
             _input_filter = start_filter[:, :, start:end, start:end]
             _input_filter = paddle.reshape(
                 _input_filter,
                 shape=[(_input_filter.shape[0] * _input_filter.shape[1]),
                        -1])
             _input_filter = paddle.matmul(
                 _input_filter,
                 self.__getattr__('%dto%d_matrix' %
                                  (src_ks, target_ks)), False, False)
             _input_filter = paddle.reshape(
                 _input_filter,
                 shape=[
                     filters.shape[0], filters.shape[1], target_ks, target_ks
                 ])
             start_filter = _input_filter
         filters = start_filter
     return filters
Example #11
0
    def forward(self, input, label, init_hidden):

        init_h = paddle.reshape(init_hidden,
                                shape=[self.num_layers, -1, self.hidden_size])

        x_emb = self.embedding(input)

        x_emb = paddle.reshape(x_emb,
                               shape=[-1, self.num_steps, self.hidden_size])
        if self.dropout is not None and self.dropout > 0.0:
            x_emb = paddle.nn.functional.dropout(x_emb,
                                                 p=self.dropout,
                                                 mode='upscale_in_train')
        rnn_out, last_hidden = self.simple_gru_rnn(x_emb, init_h)

        projection = paddle.matmul(x=rnn_out, y=self.softmax_weight)
        projection = paddle.add(x=projection, y=self.softmax_bias)
        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=projection, label=label, soft_label=False)
        pre_2d = paddle.reshape(projection, shape=[-1, self.vocab_size])
        label_2d = paddle.reshape(label, shape=[-1, 1])
        acc = paddle.metric.accuracy(input=pre_2d, label=label_2d, k=20)
        loss = paddle.reshape(loss, shape=[-1, self.num_steps])
        loss = paddle.mean(loss, axis=[0])
        loss = paddle.sum(loss)

        return loss, last_hidden, acc
Example #12
0
def _layer_dot(inputs, node):
    """
    dot product, e.g: [2, 1, 128] * ( expand([1, 128, 1])->[2, 128, 1] )
    """
    input_re = paddle.unsqueeze(inputs, axis=[2])
    dot_res = paddle.matmul(node, input_re)
    return dot_res
Example #13
0
    def hierarchical_self_supervision(self, em, adj):
        def row_shuffle(embedding):
            return embedding[paddle.randperm(paddle.shape(embedding)[0])]

        def row_column_shuffle(embedding):
            embedding = paddle.transpose(embedding, perm=[1, 0])
            corrupted_embedding = paddle.transpose(embedding[paddle.randperm(
                paddle.shape(embedding)[0])],
                                                   perm=[1, 0])
            return corrupted_embedding[paddle.randperm(
                paddle.shape(corrupted_embedding)[0])]

        def score(x1, x2):
            return paddle.sum(paddle.multiply(x1, x2), axis=1)

        user_embeddings = em
        edge_embeddings = paddle.matmul(adj, user_embeddings)

        # Local MIN
        pos = score(user_embeddings, edge_embeddings)
        neg1 = score(row_shuffle(user_embeddings), edge_embeddings)
        neg2 = score(row_column_shuffle(edge_embeddings), user_embeddings)
        local_loss = paddle.sum(-paddle.log(F.sigmoid(pos - neg1)) -
                                paddle.log(F.sigmoid(neg1 - neg2)))

        # Global MIN
        graph = paddle.mean(edge_embeddings, axis=0)
        pos = score(edge_embeddings, graph)
        neg1 = score(row_column_shuffle(edge_embeddings), graph)
        global_loss = paddle.sum(-paddle.log(F.sigmoid(pos - neg1)))

        return global_loss + local_loss
Example #14
0
 def channel_attention(self, *channel_embeddings):
     """
         channel_embeddings_1: (num_user, emb_size)
         attention_mat: (emb_size, emb_size)
         attention: (1, emb_size)
     """
     weights = []
     for embedding in channel_embeddings:
         # ((num_user, emb_size) * (emb_size, emb_size)) @ (1, emb_size) = (num_user, emb_size) @ (1, emb_size)
         # = (num_user, emb_size) -> (num_user, )
         weights.append(
             paddle.sum(
                 paddle.multiply(
                     paddle.matmul(embedding,
                                   self.weights["attention_mat"]),
                     self.weights["attention"]), 1))
     t = paddle.stack(weights)
     # (num_user, channel_num)
     score = F.softmax(paddle.transpose(t, perm=[1, 0]))
     mixed_embeddings = 0.0
     for i in range(len(weights)):
         # (emb_size, num_user) @
         # (num_user, emb_size) @ (num_user, 1) -> (num_user, emb_size)
         mixed_embeddings += paddle.transpose(paddle.multiply(
             paddle.transpose(channel_embeddings[i], perm=[1, 0]),
             paddle.transpose(score, perm=[1, 0])[i]),
                                              perm=[1, 0])
     return mixed_embeddings, score
Example #15
0
    def forward(self, input, target=None):
        """
        anchor and positive(should include label)
        """
        features = input["features"]
        reg_lambda = self.reg_lambda
        batch_size = features.shape[0]
        fea_dim = features.shape[1]
        num_class = batch_size // 2

        #reshape
        out_feas = paddle.reshape(features, shape=[-1, 2, fea_dim])
        anc_feas, pos_feas = paddle.split(out_feas, num_or_sections=2, axis=1)
        anc_feas = paddle.squeeze(anc_feas, axis=1)
        pos_feas = paddle.squeeze(pos_feas, axis=1)

        #get simi matrix
        similarity_matrix = paddle.matmul(
            anc_feas, pos_feas, transpose_y=True)  #get similarity matrix
        sparse_labels = paddle.arange(0, num_class, dtype='int64')
        xentloss = paddle.nn.CrossEntropyLoss()(
            similarity_matrix, sparse_labels)  #by default: mean

        #l2 norm
        reg = paddle.mean(paddle.sum(paddle.square(features), axis=1))
        l2loss = 0.5 * reg_lambda * reg
        return {"npairsloss": xentloss + l2loss}
Example #16
0
    def train_iter(self, *inputs, **kwargs):
        img_q, img_k = inputs

        # compute query features
        q = self.encoder_q(img_q)  # queries: NxC
        q = nn.functional.normalize(q, axis=1)

        # compute key features
        with paddle.no_grad():  # no gradient to keys
            self._momentum_update_key_encoder()  # update the key encoder

            # shuffle for making use of BN
            im_k, idx_unshuffle = self._batch_shuffle_ddp(img_k)

            k = self.encoder_k(im_k)  # keys: NxC
            k = nn.functional.normalize(k, axis=1)

            # undo shuffle
            k = self._batch_unshuffle_ddp(k, idx_unshuffle)

        # compute logits
        # FIXME: Einstein sum is more intuitive
        # positive logits: Nx1
        l_pos = paddle.sum(q * k, axis=1).unsqueeze(-1)
        # negative logits: NxK
        l_neg = paddle.matmul(q, self.queue.clone().detach())

        outputs = self.head(l_pos, l_neg)
        self._dequeue_and_enqueue(k)

        return outputs
Example #17
0
    def forward(self, x):
        n, c, h, w = x.shape
        g_x = paddle.reshape(self.g(x), [n, self.inter_channels, -1])
        g_x = paddle.transpose(g_x, [0, 2, 1])

        if self.mode == 'gaussian':
            theta_x = paddle.reshape(x, [n, self.inter_channels, -1])
            theta_x = paddle.transpose(theta_x, [0, 2, 1])
            if self.sub_sample:
                phi_x = paddle.reshape(self.phi(x),
                                       [n, self.inter_channels, -1])
            else:
                phi_x = paddle.reshape(x, [n, self.in_channels, -1])

        elif self.mode == 'concatenation':
            theta_x = paddle.reshape(self.theta(x),
                                     [n, self.inter_channels, -1, 1])
            phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)

        else:
            theta_x = paddle.reshape(self.theta(x),
                                     [n, self.inter_channels, -1, 1])
            theta_x = paddle.transpose(theta_x, [0, 2, 1])
            phi_x = paddle.reshape(self.phi(x), [n, self.inter_channels, -1])

        pairwise_func = getattr(self, self.mode)
        pairwise_weight = pairwise_func(theta_x, phi_x)
        y = paddle.matmul(pairwise_weight, g_x)
        y = paddle.transpose(y, [0, 2, 1])
        y = paddle.reshape(y, [n, self.inter_channels, h, w])

        output = x + self.conv_out(y)

        return output
Example #18
0
    def forward(self, inputs):
        input_emb = self.embedding(inputs[0])
        true_emb_w = self.embedding_w(inputs[1])
        true_emb_b = self.embedding_b(inputs[1])
        input_emb = paddle.squeeze(x=input_emb, axis=[1])
        true_emb_w = paddle.squeeze(x=true_emb_w, axis=[1])
        true_emb_b = paddle.squeeze(x=true_emb_b, axis=[1])

        neg_emb_w = self.embedding_w(inputs[2])
        neg_emb_b = self.embedding_b(inputs[2])

        neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, self.neg_num])

        true_logits = paddle.add(x=paddle.sum(x=paddle.multiply(x=input_emb,
                                                                y=true_emb_w),
                                              axis=1,
                                              keepdim=True),
                                 y=true_emb_b)

        input_emb_re = paddle.reshape(input_emb, shape=[-1, 1, self.emb_dim])
        neg_matmul = paddle.matmul(input_emb_re, neg_emb_w, transpose_y=True)
        neg_matmul_re = paddle.reshape(neg_matmul, shape=[-1, self.neg_num])
        neg_logits = paddle.add(x=neg_matmul_re, y=neg_emb_b_vec)

        return true_logits, neg_logits
Example #19
0
 def define_layer(self, input):
     x = fluid.data(name="x", shape=self.x_shape)
     y = fluid.data(name="y", shape=self.y_shape)
     self.input = x
     self.y = y
     out = paddle.matmul(x, y)
     self.output = out
Example #20
0
 def model(self, x, w, bias, opt):
     paddle.seed(0)
     place = paddle.CPUPlace()
     if paddle.device.is_compiled_with_cuda():
         place = paddle.CUDAPlace(0)
     exe = paddle.static.Executor(place)
     main = paddle.static.Program()
     startup = paddle.static.Program()
     with paddle.static.program_guard(main, startup):
         input_x = paddle.static.data('x', x.shape, dtype=x.dtype)
         input_x.stop_gradient = False
         params_w = paddle.static.create_parameter(shape=w.shape,
                                                   dtype=w.dtype,
                                                   is_bias=False)
         params_bias = paddle.static.create_parameter(shape=bias.shape,
                                                      dtype=bias.dtype,
                                                      is_bias=True)
         y = paddle.tanh(paddle.matmul(input_x, params_w) + params_bias)
         loss = paddle.norm(y, p=2)
         opt = opt
         _, grads = opt.minimize(loss)
         if prim_enabled():
             prim2orig(main.block(0))
     exe.run(startup)
     grads = exe.run(main,
                     feed={
                         'x': x,
                         'w': w,
                         'bias': bias
                     },
                     fetch_list=grads)
     return grads
Example #21
0
def build_program():
    main_program = paddle.static.Program()
    startup_program = paddle.static.Program()

    with paddle.static.program_guard(main_program, startup_program):
        with paddle.static.device_guard('cpu'):
            data = paddle.ones([4, 64], dtype='float32', name='data')

        # data -> [memcpy_h2d] -> data' -> [matmul] -> out ->[add] -> add_out
        with paddle.static.device_guard('gpu'):
            weight = paddle.randn([64, 64], name='weight')  # gpu
            matmul_out = paddle.matmul(data, weight, name='matmul_out')  # gpus
            bias = paddle.ones([4, 64], dtype='float32', name='bias')
            add_out = paddle.add(matmul_out, bias, name='add_out')

        # add_out -> [memcpy_d2h] -> add_out' -> [sub] -> sub_out -> [tanh] -> tanh_out
        with paddle.static.device_guard('cpu'):
            sub_out = paddle.subtract(add_out, data, name='sub_out')
            tanh_out = paddle.tanh(sub_out, name='tanh_out')

        with paddle.static.device_guard('gpu'):
            bias_1 = paddle.add(bias, sub_out, name='bias_1')
            out_before = paddle.tanh(bias_1, name='out_before')
            out_last = paddle.subtract(tanh_out, data, name='out_last')

            out = paddle.add(out_before, out_last, name='out')
            mean = paddle.mean(out, name='mean_out')

    return main_program, startup_program, [mean]
Example #22
0
    def encode_box3d(self, rotys, dims, locs):
        """
        construct 3d bounding box for each object.
        Args:
            rotys: rotation in shape N
            dims: dimensions of objects
            locs: locations of objects

        Returns:

        """
        if len(rotys.shape) == 2:
            rotys = rotys.flatten()
        if len(dims.shape) == 3:
            dims = paddle.reshape(dims, (-1, 3))
        if len(locs.shape) == 3:
            locs = paddle.reshape(locs, (-1, 3))

        N = rotys.shape[0]
        ry = self.rad_to_matrix(rotys, N)

        # if test:
        #     dims.register_hook(lambda grad: print('dims grad', grad.sum()))
        # dims = paddle.reshape(dims, (-1, 1)).tile([1, 8])

        # dims[::3, :4] = 0.5 * dims[::3, :4]
        # dims[1::3, :4] = 0.
        # dims[2::3, :4] = 0.5 * dims[2::3, :4]

        # dims[::3, 4:] = -0.5 * dims[::3, 4:]
        # dims[1::3, 4:] = -dims[1::3, 4:]
        # dims[2::3, 4:] = -0.5 * dims[2::3, 4:]

        dim_left_1 = (0.5 * dims[:, 0]).unsqueeze(-1)
        dim_left_2 = paddle.zeros([dims.shape[0], 1]).astype(
            "float32")  #(paddle.zeros_like(dims[:, 1])).unsqueeze(-1)
        dim_left_3 = (0.5 * dims[:, 2]).unsqueeze(-1)
        dim_left = paddle.concat([dim_left_1, dim_left_2, dim_left_3], axis=1)
        dim_left = paddle.reshape(dim_left, (-1, 1)).tile([1, 4])

        dim_right_1 = (-0.5 * dims[:, 0]).unsqueeze(-1)
        dim_right_2 = (-dims[:, 1]).unsqueeze(-1)
        dim_right_3 = (-0.5 * dims[:, 2]).unsqueeze(-1)
        dim_right = paddle.concat([dim_right_1, dim_right_2, dim_right_3],
                                  axis=1)
        dim_right = paddle.reshape(dim_right, (-1, 1)).tile([1, 4])

        dims = paddle.concat([dim_left, dim_right], axis=1)

        index = paddle.to_tensor([[4, 0, 1, 2, 3, 5, 6, 7],
                                  [4, 5, 0, 1, 6, 7, 2, 3],
                                  [4, 5, 6, 0, 1, 2, 3, 7]]).tile([N, 1])

        box_3d_object = gather_op(dims, 1, index)

        box_3d = paddle.matmul(ry, paddle.reshape(box_3d_object, (N, 3, -1)))
        # box_3d += locs.unsqueeze(-1).repeat(1, 1, 8)
        box_3d += locs.unsqueeze(-1).tile((1, 1, 8))

        return box_3d
Example #23
0
    def forward(self, embedding, targets):
        if isinstance(embedding, dict):
            embedding = embedding['features']
        # Normalize embedding features
        embedding = F.normalize(embedding, axis=1)
        dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)

        N = dist_mat.shape[0]
        is_pos = targets.reshape([N, 1]).expand([N, N]).equal(
            paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
        is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal(
            paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')

        # Mask scores related to itself
        is_pos = is_pos - paddle.eye(N, N)

        s_p = dist_mat * is_pos
        s_n = dist_mat * is_neg

        logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
        logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 -
                                                                     is_neg)

        loss = F.softplus(
            paddle.logsumexp(logit_p, axis=1) +
            paddle.logsumexp(logit_n, axis=1)).mean()

        return {"PairwiseCosface": loss}
Example #24
0
def matmul(name, x1, x2, x_transpose=False, y_transpose=False):
    import paddle as pdpd

    pdpd.enable_static()
    with pdpd.static.program_guard(pdpd.static.Program(),
                                   pdpd.static.Program()):
        node_x1 = pdpd.static.data(name='x1', shape=x1.shape, dtype=x1.dtype)
        node_x2 = pdpd.static.data(name='x2', shape=x2.shape, dtype=x2.dtype)
        result = pdpd.matmul(node_x1, node_x2, x_transpose, y_transpose)
        #result = pdpd.static.nn.batch_norm(mul_node, use_global_stats=True)

        cpu = pdpd.static.cpu_places(1)
        exe = pdpd.static.Executor(cpu[0])
        # startup program will call initializer to initialize the parameters.
        exe.run(pdpd.static.default_startup_program())

        outs = exe.run(feed={'x1': x1, 'x2': x2}, fetch_list=[result])
        saveModel(name,
                  exe,
                  feedkeys=['x1', 'x2'],
                  fetchlist=[result],
                  inputs=[x1, x2],
                  outputs=[outs[0]],
                  target_dir=sys.argv[1])

    return outs[0]
Example #25
0
    def forward(self,
                query_input_ids,
                pos_title_input_ids,
                neg_title_input_ids,
                is_prediction=False,
                query_token_type_ids=None,
                query_position_ids=None,
                query_attention_mask=None,
                pos_title_token_type_ids=None,
                pos_title_position_ids=None,
                pos_title_attention_mask=None,
                neg_title_token_type_ids=None,
                neg_title_position_ids=None,
                neg_title_attention_mask=None):
        query_cls_embedding = self.get_pooled_embedding(
            query_input_ids, query_token_type_ids, query_position_ids,
            query_attention_mask)

        pos_title_cls_embedding = self.get_pooled_embedding(
            pos_title_input_ids, pos_title_token_type_ids,
            pos_title_position_ids, pos_title_attention_mask)

        neg_title_cls_embedding = self.get_pooled_embedding(
            neg_title_input_ids, neg_title_token_type_ids,
            neg_title_position_ids, neg_title_attention_mask)

        all_title_cls_embedding = paddle.concat(
            x=[pos_title_cls_embedding, neg_title_cls_embedding], axis=0)

        if is_prediction:
            logits = paddle.dot(query_cls_embedding, pos_title_cls_embedding)
            outputs = {
                "probs": logits,
                "q_rep": query_cls_embedding,
                "p_rep": pos_title_cls_embedding
            }
            return outputs

        if self.use_cross_batch:
            tensor_list = []
            paddle.distributed.all_gather(tensor_list, all_title_cls_embedding)
            all_title_cls_embedding = paddle.concat(x=tensor_list, axis=0)

        # multiply
        logits = paddle.matmul(query_cls_embedding,
                               all_title_cls_embedding,
                               transpose_y=True)

        batch_size = query_cls_embedding.shape[0]

        labels = paddle.arange(batch_size * self.rank * 2,
                               batch_size * (self.rank * 2 + 1),
                               dtype='int64')
        labels = paddle.reshape(labels, shape=[-1, 1])

        accuracy = paddle.metric.accuracy(input=logits, label=labels)
        loss = F.cross_entropy(input=logits, label=labels)
        outputs = {"loss": loss, "accuracy": accuracy}

        return outputs
Example #26
0
    def forward(self, x):
        x_shape = paddle.shape(x)
        x = x.flatten(2)
        mu = paddle.tile(self.mu, [x_shape[0], 1, 1])

        with paddle.no_grad():
            for i in range(self.stage_num):
                x_t = paddle.transpose(x, [0, 2, 1])
                z = paddle.bmm(x_t, mu)
                z = F.softmax(z, axis=2)
                z_ = F.normalize(z, axis=1, p=1)
                mu = paddle.bmm(x, z_)
                mu = F.normalize(mu, axis=1, p=2)

        z_t = paddle.transpose(z, [0, 2, 1])
        x = paddle.matmul(mu, z_t)
        x = paddle.reshape(x, [0, self.c, x_shape[2], x_shape[3]])

        if self.training:
            mu = paddle.mean(mu, 0, keepdim=True)
            mu = F.normalize(mu, axis=1, p=2)
            mu = self.mu * (1 - self.momentum) + mu * self.momentum
            if paddle.distributed.get_world_size() > 1:
                mu = paddle.distributed.all_reduce(mu)
                mu /= paddle.distributed.get_world_size()
            self.mu = mu

        return x
Example #27
0
def einsum4x4(equation, x, y):
    """
    Only works for 4D x 4D.
    """
    idx_x, idx_y, idx_z = re.split(",|->", equation)
    # Compute repeated index
    repeated_idx = list(set(idx_x + idx_y) - set(idx_z))

    unique_idx_x = list(set(idx_x) - set(idx_y))
    unique_idx_y = list(set(idx_y) - set(idx_x))
    common_idx = list(set(idx_x) & set(idx_y) - set(repeated_idx))

    new_idx_x = common_idx + unique_idx_x + repeated_idx
    new_idx_y = common_idx + unique_idx_y + repeated_idx
    new_idx_z = common_idx + unique_idx_x + unique_idx_y

    perm_x = [idx_x.index(i) for i in new_idx_x]
    perm_y = [idx_y.index(i) for i in new_idx_y]
    perm_z = [new_idx_z.index(i) for i in idx_z]

    x = paddle.transpose(x, perm=perm_x)
    y = paddle.transpose(y, perm=perm_y)
    z = paddle.matmul(x=x, y=y, transpose_y=True)
    z = paddle.transpose(z, perm=perm_z)
    return z
 def forward(self, x):
     # NOTE: manually trigger `__iter__` logic.
     params = list(self.params.__iter__())
     out = paddle.matmul(x, params[0])
     out = paddle.add(out, params[1])
     out = paddle.tanh(out)
     return out
Example #29
0
    def forward(self, input, label, init_hidden, init_cell):

        init_h = paddle.reshape(init_hidden,
                                shape=[self.num_layers, -1, self.hidden_size])

        init_c = paddle.reshape(init_cell,
                                shape=[self.num_layers, -1, self.hidden_size])

        x_emb = self.embedding(input)

        x_emb = paddle.reshape(x_emb,
                               shape=[-1, self.num_steps, self.hidden_size])
        if self.dropout is not None and self.dropout > 0.0:
            x_emb = paddle.nn.functional.dropout(
                x_emb,
                dropout_prob=self.dropout,
                dropout_implementation='upscale_in_train')
        rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(
            x_emb, init_h, init_c)

        projection = paddle.matmul(x=rnn_out, y=self.softmax_weight)
        projection = paddle.add(x=projection, y=self.softmax_bias)

        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=projection, label=label, soft_label=False)
        loss = paddle.reshape(loss, shape=[-1, self.num_steps])
        loss = paddle.mean(loss, axis=[0])
        loss = paddle.fluid.layers.reduce_sum(loss)

        return loss, last_hidden, last_cell
Example #30
0
def gen_bias(encoder_inputs, decoder_inputs, step):
    decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2]
    encoder_bsz, encoder_seqlen = encoder_inputs.shape[:2]
    attn_bias = paddle.reshape(
        paddle.arange(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1])
    decoder_bias = paddle.cast(
        (paddle.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.),
        'float32')  #[1, decoderlen, decoderlen]
    encoder_bias = paddle.unsqueeze(
        paddle.cast(paddle.ones_like(encoder_inputs), 'float32'),
        [1])  #[bsz, 1, encoderlen]
    encoder_bias = paddle.expand(encoder_bias,
                                 [encoder_bsz, decoder_seqlen, encoder_seqlen
                                  ])  #[bsz,decoderlen, encoderlen]
    decoder_bias = paddle.expand(decoder_bias,
                                 [decoder_bsz, decoder_seqlen, decoder_seqlen
                                  ])  #[bsz, decoderlen, decoderlen]
    if step > 0:
        bias = paddle.concat([
            encoder_bias,
            paddle.ones([decoder_bsz, decoder_seqlen, step], 'float32'),
            decoder_bias
        ], -1)
    else:
        bias = paddle.concat([encoder_bias, decoder_bias], -1)
    return bias