Ejemplo n.º 1
0
def _neg_loss(pred, gt):
    ''' Modified focal loss. Exactly the same as CornerNet.
      Runs faster and costs a little bit more memory
    Arguments:
      pred (batch x c x h x w)
      gt_regr (batch x c x h x w)
  '''
    pos_inds = gt.__eq__(1).astype('float32')
    neg_inds = gt.__lt__(1).astype('float32')

    neg_weights = nd.power(1 - gt, 4)

    loss = 0

    pos_loss = nd.log(pred) * nd.power(1 - pred, 2) * pos_inds
    neg_loss = nd.log(1 - pred) * nd.power(pred, 2) * neg_weights * neg_inds

    num_pos = pos_inds.astype('float32').sum()
    pos_loss = pos_loss.sum()
    neg_loss = neg_loss.sum()

    if num_pos == 0:
        loss = loss - neg_loss
    else:
        loss = loss - (pos_loss + neg_loss) / num_pos
    return loss
 def implement_1(self, x, label):
     '''
     following paper to implement
     '''
     #  weight normalize
     with x.context:
         w = self.weight.data()
     w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape((-1, 1))
     #  cos_theta = x'w/|x|. note: |w| = 1
     x_norm = nd.power(x, 2)
     x_norm = nd.sum(x_norm, axis=1)
     x_norm = nd.sqrt(x_norm)
     cos_theta = nd.dot(x, w_norm, transpose_b=True)
     cos_theta = cos_theta / x_norm.reshape((-1, 1))
     cos_theta = nd.clip(cos_theta, -1, 1)
     #  cos_m_theta = cos(m * theta)
     cos_m_theta = self.margin_cos[self.margin](cos_theta)
     #  k
     with mx.autograd.pause():
         theta = nd.arccos(cos_theta)
         k = nd.sign((self.margin * theta / math.pi))
     #  i=j is phi_theta and i!=j is cos_theta
     phi_theta = ((-1)**k) * cos_m_theta - 2 * k
     x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta
     x_norm_cos_theta = x_norm.reshape((-1, 1)) * cos_theta
     #  i=j index
     with mx.autograd.pause():
         index = nd.one_hot(label, x_norm_phi_theta.shape[1])
     #  output
     with mx.autograd.pause():
         lamb = self.__get_lambda()
     output = x_norm_cos_theta * 1.0
     output = output - x_norm_cos_theta * index / (1 + lamb)
     output = output + x_norm_phi_theta * index / (1 + lamb)
     return output
Ejemplo n.º 3
0
def euclidean_dist(x, y):
    m, n = x.shape[0], y.shape[0]
    xx = nd.power(x, 2).sum(axis=1, keepdims=True).broadcast_to((m, n))
    yy = nd.power(y, 2).sum(axis=1, keepdims=True).broadcast_to((n, m)).T
    dist = xx + yy
    dist = dist - 2 * nd.dot(x, y.T)
    dist = dist.clip(a_min=1e-12, a_max=1e12).sqrt()
    return dist
Ejemplo n.º 4
0
 def forward(self, X):
     self.linear_item = nd.dot(X, self.w.data())
     self.interaction_item = nd.sum(
         nd.power(nd.dot(X, self.latent_vec.data()), 2) -
         nd.dot(nd.power(X, 2), nd.power(self.latent_vec.data(), 2)),
         axis=1,
         keepdims=True)
     self.y_hat = self.linear_item + self.interaction_item + self.b.data()
     return self.y_hat
Ejemplo n.º 5
0
    def forward(self, adj, feat):
        r"""Compute (Dense) Graph Convolution layer.

        Parameters
        ----------
        adj : mxnet.NDArray
            The adjacency matrix of the graph to apply Graph Convolution on, when
            applied to a unidirectional bipartite graph, ``adj`` should be of shape
            should be of shape :math:`(N_{out}, N_{in})`; when applied to a h**o
            graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
            a row represents a destination node while a column represents a source
            node.
        feat : torch.Tensor
            The input feature.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        adj = adj.astype(feat.dtype).as_in_context(feat.context)
        src_degrees = nd.clip(adj.sum(axis=0), a_min=1, a_max=float('inf'))
        dst_degrees = nd.clip(adj.sum(axis=1), a_min=1, a_max=float('inf'))
        feat_src = feat

        if self._norm == 'both':
            norm_src = nd.power(src_degrees, -0.5)
            shp_src = norm_src.shape + (1, ) * (feat.ndim - 1)
            norm_src = norm_src.reshape(shp_src).as_in_context(feat.context)
            feat_src = feat_src * norm_src

        if self._in_feats > self._out_feats:
            # mult W first to reduce the feature size for aggregation.
            feat_src = nd.dot(feat_src, self.weight.data(feat_src.context))
            rst = nd.dot(adj, feat_src)
        else:
            # aggregate first then mult W
            rst = nd.dot(adj, feat_src)
            rst = nd.dot(rst, self.weight.data(feat_src.context))

        if self._norm != 'none':
            if self._norm == 'both':
                norm_dst = nd.power(dst_degrees, -0.5)
            else:  # right
                norm_dst = 1.0 / dst_degrees
            shp_dst = norm_dst.shape + (1, ) * (feat.ndim - 1)
            norm_dst = norm_dst.reshape(shp_dst).as_in_context(feat.context)
            rst = rst * norm_dst

        if self.bias is not None:
            rst = rst + self.bias.data(feat.context)

        if self._activation is not None:
            rst = self._activation(rst)

        return rst
Ejemplo n.º 6
0
def GoodFitting(): # Just Fitting, Third Oder Polynomial
    n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5
    features = nd.random.normal(shape=(n_train + n_test, 1))
    poly_features = nd.concat(features, nd.power(features, 2),
                              nd.power(features, 3))
    labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1]
              + true_w[2] * poly_features[:, 2] + true_b)
    labels += nd.random.normal(scale=0.1, shape=labels.shape)
    fit_and_plot(poly_features[:n_train, :], poly_features[n_train:, :],
                 labels[:n_train], labels[n_train:])
Ejemplo n.º 7
0
def box_ciou(b1, b2):
    """
    输入为:
    ----------
    b1: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
    b2: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh

    返回为:
    -------
    ciou: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 1)
    """
    # 求出预测框左上角右下角
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh / 2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half
    # 求出真实框左上角右下角
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh / 2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    # 求真实框和预测框所有的iou
    intersect_mins = nd.max(b1_mins, b2_mins)
    intersect_maxes = nd.min(b1_maxes, b2_maxes)
    intersect_wh = nd.max(intersect_maxes - intersect_mins,
                          nd.zeros_like(intersect_maxes))
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    union_area = b1_area + b2_area - intersect_area
    iou = intersect_area / nd.clip(union_area, a_min=1e-6)

    # 计算中心的差距
    center_distance = nd.sum(nd.power((b1_xy - b2_xy), 2), axis=-1)

    # 找到包裹两个框的最小框的左上角和右下角
    enclose_mins = nd.min(b1_mins, b2_mins)
    enclose_maxes = nd.max(b1_maxes, b2_maxes)
    enclose_wh = nd.max(enclose_maxes - enclose_mins,
                        nd.zeros_like(intersect_maxes))
    # 计算对角线距离
    enclose_diagonal = nd.sum(nd.power(enclose_wh, 2), axis=-1)
    ciou = iou - 1.0 * (center_distance) / nd.clip(enclose_diagonal,
                                                   a_min=1e-6)

    v = (4 / (math.pi**2)) * nd.power(
        (nd.arctan(b1_wh[..., 0] / nd.clip(b1_wh[..., 1], min=1e-6)) -
         nd.arctan(b2_wh[..., 0] / nd.clip(b2_wh[..., 1], a_min=1e-6))), 2)
    alpha = v / nd.clip((1.0 - iou + v), a_max=1e-6)
    ciou = ciou - alpha * v
    return ciou
Ejemplo n.º 8
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     if not self._from_sigmoid:
         max_val = F.relu(-pred)
         loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val))
     else:
         p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx)
         weights = nd.exp(label + (1 - label * 2) * batch_ratios)
         gamma = 2
         w_p, w_n = nd.power(1. - p, gamma), nd.power(p, gamma)
         loss = - (w_p * F.log(p + 1e-12) * label + w_n * F.log(1. - p + 1e-12) * (1. - label))
         loss *= weights
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Ejemplo n.º 9
0
def euclidean_dist(x, y):
    """
    Args:
      x: pytorch Variable, with shape [m, d]
      y: pytorch Variable, with shape [n, d]
    Returns:
      dist: pytorch Variable, with shape [m, n]
    """
    m, n = x.shape[0], y.shape[0]
    xx = nd.power(x, 2).sum(axis=1, keepdims=True).broadcast_to((m, n))
    yy = nd.power(y, 2).sum(axis=1, keepdims=True).broadcast_to((n, m)).T
    dist = xx + yy
    dist = dist - 2 * nd.dot(x, y.T)
    dist = dist.clip(a_min=1e-12, a_max=1e12).sqrt()  # for numerical stability
    return dist
Ejemplo n.º 10
0
def _not_faster_neg_loss(pred, gt):
    pos_inds = gt.__eq__(1).astype('float32')
    neg_inds = gt.__lt__(1).astype('float32')
    num_pos = pos_inds.astype('float32').sum()
    neg_weights = nd.power(1 - gt, 4)

    loss = 0
    trans_pred = pred * neg_inds + (1 - pred) * pos_inds
    weight = neg_weights * neg_inds + pos_inds
    all_loss = nd.log(1 - trans_pred) * nd.power(trans_pred, 2) * weight
    all_loss = all_loss.sum()

    if num_pos > 0:
        all_loss /= num_pos
    loss -= all_loss
    return loss
def train_model(model, train_xs, train_ys):
    with device_ctx:
        # Convert to ndarray
        train_xs, train_ys = nd.array(train_xs), nd.array(train_ys)

        # Prepare the train dataset and model
        batch_size = 100
        train_dataset = data.ArrayDataset(train_xs, train_ys)
        train_data_iter = data.DataLoader(train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

        # model = nn.Sequential()
        # model.add(nn.Dense(1, activation=None))
        # # model.add(nn.Dense(1))

        model.initialize(init.Normal(sigma=0.1))
        # model.initialize(init.Xavier())

        loss_f = loss.L2Loss()

        trainer = gluon.Trainer(model.collect_params(), 'sgd',
                                {'learning_rate': 0.05})

        # Train the model
        num_epochs = 25
        for epoch in range(1, num_epochs + 1):
            l = None
            for X, y in train_data_iter:
                with autograd.record():
                    # l = loss_f(model(X), y)
                    l = nd.power(model(X) - y, 2)
                l.backward()
                trainer.step(batch_size)

            # l = loss_f(model(train_xs), train_ys)
            l = nd.power(model(train_xs) - train_ys, 2)
            mse = np.sum(
                np.power(
                    train_ys.asnumpy().reshape(-1, 1) -
                    model(train_xs).asnumpy().reshape(-1, 1),
                    2)) / len(train_xs)
            if epoch % 5 == 0:
                print('epoch %d, loss: %.4f, mse: %.4f' %
                      (epoch, l.mean().asnumpy(), mse))
Ejemplo n.º 12
0
def position_encoding_init(max_length, dim):
    X = nd.arange(0, max_length).reshape(
        (-1, 1)) / nd.power(10000,
                            nd.arange(0, dim, 2) / dim)
    position_weight = nd.zeros((max_length, dim))

    position_weight[:, 0::2] = nd.sin(X)
    position_weight[:, 1::2] = nd.cos(X)
    return position_weight
Ejemplo n.º 13
0
def load_data_polynomial(true_w, true_b, num_train=5000, num_test=1000):
    """
    """
    features = nd.normal(shape=(num_train + num_test, 1))
    poly_features = [nd.power(features, i) for i in range(1, len(true_w) + 1)]
    poly_features = nd.concat(*poly_features)
    labels = nd.dot(poly_features, true_w) + true_b
    labels += nd.random.normal(scale=0.1)
    return features, poly_features, labels
Ejemplo n.º 14
0
 def __init__(self, units, dropout, max_len=1000):
     super(PositionalEncoding, self).__init__()
     self.dropout = nn.Dropout(dropout)
     # Create a long enough P, max_len来充当T吗
     # P: (1, max_len, D)
     self.P = nd.zeros((1, max_len, units))
     # X: (max_len, D/2)
     X = nd.arange(0, max_len).reshape((-1,1)) / nd.power(10000, nd.arange(0, units, 2) / units)
     self.P[:, :, 0::2] = nd.sin(X)  # 从0开始间隔2填充P 如0 2 4 偶数序列对应sin
     self.P[:, :, 1::2] = nd.cos(X)  # 从1开始间隔2填充P 如1 3 5 奇数序列对应cos
 def _sum_n_square(self):
     """Helper function for paramater regularisation
     """
     sum_of_square = 0
     pdict = self.layer.params
     with autograd.record():
         for param in pdict:
             sum_of_square = sum_of_square + nd.sum(
                 nd.power(pdict[param].data(), 2))
     return sum_of_square
Ejemplo n.º 16
0
 def __init__(self, units, dropout, max_len=1000):
     super(PositionalEncoding, self).__init__()
     self.dropout = nn.Dropout(dropout)
     # Create a long enough P
     self.P = nd.zeros((1, max_len, units))
     X = nd.arange(0, max_len).reshape(
         (-1, 1)) / nd.power(10000,
                             nd.arange(0, units, 2) / units)
     self.P[:, :, 0::2] = nd.sin(X)
     self.P[:, :, 1::2] = nd.cos(X)
 def implement_0(self, x, label):
     '''
     following the sphereface code of caffe
     '''
     #  weight normalize
     with x.context:
         w = self.weight.data()
     with mx.autograd.pause():
         w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape(
             (-1, 1))
         w[:] = w_norm
     #  x_norm = |x|
     x_norm = nd.power(x, 2)
     x_norm = nd.sum(x_norm, axis=1)
     x_norm = nd.sqrt(x_norm)
     #  cos_theta = x'w/|x|. note: |w| = 1
     cos_theta = nd.dot(x, w, transpose_b=True)
     cos_theta = cos_theta / x_norm.reshape((-1, 1))
     #  cos_theta_quadratic & cos_theta_quartic
     cos_theta_quadratic = cos_theta**2
     cos_theta_quartic = cos_theta**4
     with mx.autograd.pause():
         #  sign_0 = sign(cos_theta)
         sign_0 = nd.sign(cos_theta)
         #  sign_3 = sign_0 * sign(2 * cos_theta_quadratic_ - 1)
         sign_3 = sign_0 * nd.sign(2 * cos_theta_quadratic - 1)
         #  sign_4 = 2 * sign_0 + sign_3 - 3
         sign_4 = 2 * sign_0 + sign_3 - 3
     #  phi_theta = (sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic + 1) + sign_4)
     phi_theta = sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic +
                           1) + sign_4
     x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta
     #  i=j index
     with mx.autograd.pause():
         index = nd.one_hot(label, x_norm_phi_theta.shape[1])
     #  output
     with mx.autograd.pause():
         lamb = self.__get_lambda()  # 10
     output = nd.dot(x, w, transpose_b=True)
     output2 = output * (1.0 - index) + x_norm_phi_theta * index
     output3 = (output2 + lamb * nd.dot(x, w, transpose_b=True)) / (1 +
                                                                    lamb)
     return output3
Ejemplo n.º 18
0
def predict(yolo: Yolo, x, threshold=0.5):
    """
    return label ,C,location
    :param yolo:
    :return:
    """
    assert len(x) == 1, "Only One image for now"
    ypre = yolo(x)
    label, preds, location = deal_output(ypre,
                                         yolo.s,
                                         b=yolo.b,
                                         c=yolo.class_num)
    indexs = []
    for i, c in enumerate(preds[0]):
        if c > threshold:
            indexs.append(i)
    class_names = []
    C_list = []
    bos_list = []
    for index in indexs:
        label_index = int(index / 2)
        location_offect = int(index % 2)
        class_index = nd.argmax(label[0][label_index], axis=0)
        C = preds[0][index]
        locat = location[0][label_index][location_offect]
        C_list.append(C.asscalar())
        #######traslate the name
        label_name = yolo.class_names
        text = label_name[int(class_index.asscalar())]
        class_names.append(text)
        ###traslate the locat
        x, y, w, h = locat
        w, h = nd.power(w, 2), nd.power(h, 2)
        ceil = 1 / 4
        row = int(label_index / 4)
        columns = label_index % 4
        x_center = columns * ceil + x
        y_center = row * ceil + y
        x_min, y_min, x_max, y_max = x_center - 0.5 * w, y_center - 0.5 * h, x_center + 0.5 * w, y_center + 0.5 * h
        box = nd.concatenate([x_min, y_min, x_max, y_max], axis=0) * 256
        bos_list.append(box.asnumpy())
        return class_names, C_list, bos_list
Ejemplo n.º 19
0
def normal():
    """
    它的每个元素都随机采样于均值为0、标准差为1的正态分布。nd.sqrt(nd.power(a, 2).sum())
    :return:
    """
    n = nd.normal(0, 1, shape=(2, 2))
    logger.info(n)

    a = nd.array([1, 2, 3, 4])
    print(a.norm())
    print(nd.sqrt(nd.power(a, 2).sum()))
Ejemplo n.º 20
0
    def scale_and_bound(self, sample, log_prob, mean):
        action_bounded = sample.tanh()  # bound action
        action_scaled = action_bounded * self.action_scale + self.action_bias  # scale action

        mean_bounded = mean.tanh(
        ) * self.action_scale + self.action_bias  # bound and scale mean

        log_prob_bounded = log_prob - (self.action_scale *
                                       (1 - nd.power(action_bounded, 2)) +
                                       EPSILON).log()

        return action_scaled, log_prob_bounded, mean_bounded
    def _update_params(self, accumulated_grads):
        # scale gradients by lot size, add noise, and update the parameters
        for param_name, param in self._params.items():
            # average the clipped gradients and then add noise to each averaged gradient
            param_grad_update = (accumulated_grads[param_name] / self._hyperparams['lot_size']) + \
                                mx.random.normal(0, self._hyperparams['sigma'], param.shape, ctx=self._model_ctx)

            # update biased first moment estimate
            self._m[param_name] = self._hyperparams['beta_1'] * self._m[param_name] + (1 - self._hyperparams['beta_1']) * param_grad_update

            # update biased second raw moment estimate
            self._v[param_name] = self._hyperparams['beta_2'] * self._v[param_name] + (1 - self._hyperparams['beta_2']) * nd.square(param_grad_update)

            # compute bias-corrected first moment estimate
            m_hat = self._m[param_name] / (1 - nd.power(self._hyperparams['beta_1'], self._step + 1))

            # compute bias-corrected second raw moment estimate
            v_hat = self._v[param_name] / (1 - nd.power(self._hyperparams['beta_2'], self._step + 1))

            # update params with ADAM
            param[:] = param - self._hyperparams['lr'] * m_hat / (nd.sqrt(v_hat) + 1e-8)
Ejemplo n.º 22
0
    def positional(x):
        batch_size, length, model_dim = x.shape
        # (length, 1)
        pos = nd.arange(length).expand_dims(1)

        # (1, model_dim/2),  10000^(2i/model_dim)
        div = nd.power(10000, nd.arange(model_dim / 2) * 2 / model_dim)

        out = nd.zeros((length, model_dim))

        out[:, 0::2] = nd.sin(pos / div)
        out[:, 1::2] = nd.cos(pos / div)

        return nd.broadcast_axis(out.expand_dims(0), axis=0, size=batch_size)
Ejemplo n.º 23
0
    def power(self, tensor_in_1, tensor_in_2):
        """
        Result of first array elements raised to powers from second array,
        element-wise with broadcasting.

        Args:
            tensor_in_1 (Tensor): Tensor object
            tensor_in_2 (Tensor): Tensor object

        Returns:
            MXNet NDArray: First array elements raised to powers from second array.
        """
        tensor_in_1 = self.astensor(tensor_in_1)
        tensor_in_2 = self.astensor(tensor_in_2)
        return nd.power(tensor_in_1, tensor_in_2)
Ejemplo n.º 24
0
    def forward(self, adj, feat):
        r"""Compute (Dense) Graph Convolution layer.

        Parameters
        ----------
        adj : mxnet.NDArray
            The adjacency matrix of the graph to apply Graph Convolution on,
            should be of shape :math:`(N, N)`, where a row represents the destination
            and a column represents the source.
        feat : mxnet.NDArray
            The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
            is size of input feature, :math:`N` is the number of nodes.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        adj = adj.astype(feat.dtype).as_in_context(feat.context)
        if self._norm:
            in_degrees = adj.sum(axis=1)
            norm = nd.power(in_degrees, -0.5)
            shp = norm.shape + (1,) * (feat.ndim - 1)
            norm = norm.reshape(shp).as_in_context(feat.context)
            feat = feat * norm

        if self._in_feats > self._out_feats:
            # mult W first to reduce the feature size for aggregation.
            feat = nd.dot(feat, self.weight.data(feat.context))
            rst = nd.dot(adj, feat)
        else:
            # aggregate first then mult W
            rst = nd.dot(adj, feat)
            rst = nd.dot(rst, self.weight.data(feat.context))

        if self._norm:
            rst = rst * norm

        if self.bias is not None:
            rst = rst + self.bias.data(feat.context)

        if self._activation is not None:
            rst = self._activation(rst)

        return rst
Ejemplo n.º 25
0
    def forward(self, graph, feat):
        r"""Compute Simplifying Graph Convolution layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : mxnet.NDArray
            The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
            is size of input feature, :math:`N` is the number of nodes.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.

        Notes
        -----
        If ``cache`` is se to True, ``feat`` and ``graph`` should not change during
        training, or you will get wrong results.
        """
        graph = graph.local_var()
        if self._cached_h is not None:
            feat = self._cached_h
        else:
            # compute normalization
            degs = nd.clip(graph.in_degrees().astype(feat.dtype), 1,
                           float('inf'))
            norm = nd.power(degs, -0.5).expand_dims(1)
            norm = norm.as_in_context(feat.context)
            # compute (D^-1 A D)^k X
            for _ in range(self._k):
                feat = feat * norm
                graph.ndata['h'] = feat
                graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
                feat = graph.ndata.pop('h')
                feat = feat * norm

            if self.norm is not None:
                feat = self.norm(feat)

            # cache feature
            if self._cached:
                self._cached_h = feat
        return self.fc(feat)
Ejemplo n.º 26
0
import sys
sys.path.append('..')
import gluonbook as gb
from mxnet import autograd, gluon, nd
from mxnet.gluon import data as gdata, loss as gloss, nn

# y=1.2(X)−3.4X(2)+5.6X(3)+5+ϵ

n_train = 100
n_test = 100
true_w = [1.2, 3.4, 5.6]
true_b = 5

features = nd.random.normal(shape=(n_train + n_test, 1))
poly_features = nd.concat(features, nd.power(features, 2),
                          nd.power(features, 3))

labels = true_w[0] * poly_features[:, 0] + true_w[
    1] * poly_features[:, 1] + true_w[2] * poly_features[:, 2] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

from IPython.display import set_matplotlib_formats


def semilogy(x_vals,
             y_vals,
             x_label,
             y_label,
             x2_vals=None,
             y2_vals=None,
             legend=None,
Ejemplo n.º 27
0
 def sum_squared_error(self, yhat, y):
     return nd.nansum(nd.power(y - yhat, 2), axis=0, exclude=True)
Ejemplo n.º 28
0
    def forward(self, graph, feat):
        r"""

        Description
        -----------
        Compute Simplifying Graph Convolution layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : mxnet.NDArray
            The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
            is size of input feature, :math:`N` is the number of nodes.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.

        Raises
        ------
        DGLError
            If there are 0-in-degree nodes in the input graph, it will raise DGLError
            since no message will be passed to those nodes. This will cause invalid output.
            The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``.

        Note
        ----
        If ``cache`` is set to True, ``feat`` and ``graph`` should not change during
        training, or you will get wrong results.
        """
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if graph.in_degrees().min() == 0:
                    raise DGLError(
                        'There are 0-in-degree nodes in the graph, '
                        'output for those nodes will be invalid. '
                        'This is harmful for some applications, '
                        'causing silent performance regression. '
                        'Adding self-loop on the input graph by '
                        'calling `g = dgl.add_self_loop(g)` will resolve '
                        'the issue. Setting ``allow_zero_in_degree`` '
                        'to be `True` when constructing this module will '
                        'suppress the check and let the code run.')

            if self._cached_h is not None:
                feat = self._cached_h
            else:
                # compute normalization
                degs = nd.clip(graph.in_degrees().astype(feat.dtype), 1,
                               float('inf'))
                norm = nd.power(degs, -0.5).expand_dims(1)
                norm = norm.as_in_context(feat.context)
                # compute (D^-1 A D)^k X
                for _ in range(self._k):
                    feat = feat * norm
                    graph.ndata['h'] = feat
                    graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
                    feat = graph.ndata.pop('h')
                    feat = feat * norm

                if self.norm is not None:
                    feat = self.norm(feat)

                # cache feature
                if self._cached:
                    self._cached_h = feat
            return self.fc(feat)
Ejemplo n.º 29
0
def total_variation_loss(x):
    """ regularize convolutional masks (not currently in use) """
    a = nd.square(x[:, :, :-1, :-1] - x[:, :, 1:, :-1])
    b = nd.square(x[:, :, :-1, :-1] - x[:, :, :-1, 1:])
    return nd.sum(nd.mean(nd.power(a + b, 1.25), axis=(2, 3)))
Ejemplo n.º 30
0
 def hybrid_forward(self, F, x, a, b):
     mean = x.mean(axis = -1) # batch * _in_seq_len
     _mean = nd.repeat(mean.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim
     std = nd.sqrt(nd.sum(nd.power((x - _mean), 2), axis = -1) / x.shape[1]) # batch * _in_seq_len
     _std = nd.repeat(std.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim
     return F.elemwise_div(F.multiply((x - _mean), a), (_std  + self.eps)) + b