Ejemplo n.º 1
0
def calc_potential(exe, params, label_name, noise_precision, prior_precision):
    exe.copy_params_from(params)
    exe.forward(is_train=False)
    ret = 0.0
    ret += (nd.norm(
        exe.outputs[0] - exe.arg_dict[label_name]).asscalar() ** 2) / 2.0 * noise_precision
    for v in params.values():
        ret += (nd.norm(v).asscalar() ** 2) / 2.0 * prior_precision
    return ret
Ejemplo n.º 2
0
def calc_potential(exe, params, label_name, noise_precision, prior_precision):
    exe.copy_params_from(params)
    exe.forward(is_train=False)
    ret = 0.0
    ret += (nd.norm(exe.outputs[0] - exe.arg_dict[label_name]).asscalar()**
            2) / 2.0 * noise_precision
    for v in params.values():
        ret += (nd.norm(v).asscalar()**2) / 2.0 * prior_precision
    return ret
Ejemplo n.º 3
0
def batched_l2_dist(a, b):
    a_squared = nd.power(nd.norm(a, axis=-1), 2)
    b_squared = nd.power(nd.norm(b, axis=-1), 2)

    squared_res = nd.add(nd.linalg_gemm(
        a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2
    ), nd.expand_dims(a_squared, axis=-1))
    res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max))
    return res
Ejemplo n.º 4
0
def extended_jaccard_dist(x, y, pw=False):
    score = dot_dist(x, y, pw)

    x = nd.norm(x, ord=2, axis=-1)**2
    y = nd.norm(y, ord=2, axis=-1)**2
    if pw is False:
        x = x.expand_dims(axis=1)
        y = y.expand_dims(axis=0)

    return score / (x + y - score)
Ejemplo n.º 5
0
def cosine_dist(x, y, pw=False):
    score = dot_dist(x, y, pw)

    x = nd.norm(x, ord=2, axis=-1)
    y = nd.norm(y, ord=2, axis=-1)
    if pw is False:
        x = x.expand_dims(axis=1)
        y = y.expand_dims(axis=0)

    return score / (x * y)
Ejemplo n.º 6
0
 def debug_norm_all(self, debug_gnorm=True):
     if debug_gnorm:
         for k, v, grad_v in zip(self._param_names,
                                 self._exec_group.param_arrays,
                                 self._exec_group.grad_arrays):
             logging.debug("%s: v-norm: %g, g-norm: %g" % (k, nd.norm(
                 v[0]).asnumpy()[0], nd.norm(grad_v[0]).asnumpy()[0]))
     else:
         for k, v in zip(self._param_names, self._exec_group.param_arrays):
             logging.debug("%s: v-norm: %g" %
                           (k, nd.norm(v[0]).asnumpy()[0]))
Ejemplo n.º 7
0
def cal_my_acc(test_files, target_files):
    '''
    this method is deprecated
    :param test_files:
    :param target_files:
    :return:
    '''
    mTransform = MTransform()
    normalize = transforms.Normalize(mean=0.5, std=0.5)
    transform = transforms.Compose([
        # transforms.Resize((96, 112)),
        transforms.ToTensor(),
        normalize,
        # mTransform,
    ])
    model = sphere_net.SphereNet20()
    model.load_params("log_bn_dy/spherenet.model", ctx=mx.gpu())
    correct = 0
    total = 0
    target_emb = {}
    for target_file in target_files:
        target_image = transform(nd.array(
            Image.open(target_file))).as_in_context(mx.gpu())
        target_image = nd.expand_dims(target_image, axis=0)
        target_label = ''.join(target_file.split('/')[-1].split('.')[:-1])
        target_out = model(target_image)
        target_emb[target_label] = target_out
    test_emb = {}
    for test_file in test_files:
        test_image = Image.open(test_file)
        test_image = nd.expand_dims(transform(nd.array(test_image)),
                                    axis=0).as_in_context(mx.gpu())
        test_label = ''.join(test_file.split('/')[-1].split('.')[:-1])
        test_out = model(test_image)
        max_s = mx.nd.zeros(1, ctx=mx.gpu())
        max_label = ''
        sims = {}
        for target_label, target_out in target_emb.items():
            similarity = nd.sum(test_out * target_out) / \
                         (nd.norm(test_out) * nd.norm(target_out))
            sims[target_label] = similarity.asscalar()
            if max_s < similarity:
                max_s = similarity
                max_label = target_label
        if ''.join(max_label.split('_')[:-1]) == ''.join(
                test_label.split('_')[:-1]):
            correct += 1
        else:
            print test_label, max_s.asscalar(), max_label
        total += 1
        test_emb[test_label] = test_out
        # print correct, total, float(correct)/total

    return float(correct) / total, test_emb, target_emb
Ejemplo n.º 8
0
def f(a):
    b = a * 2
    print('a', a)
    print('nd.norm(a).asscalar()', nd.norm(a).asscalar())
    print('nd.norm(b).asscalar()', nd.norm(b).asscalar())
    while nd.norm(b).asscalar() < 1000:
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Ejemplo n.º 9
0
def _realize_parameters(sym,
                        params,
                        graph,
                        inputs_ext,
                        target_bits={},
                        params_sim={}):
    logger = logging.getLogger('log.calib.realize.parameters')
    name = sym.attr('name')
    attr = sym.list_attr()
    if 'precision' not in attr or name in inputs_ext:
        return sym, params
    target_bit = int(attr['precision'])
    data = params[name]
    params[name] = sim.int_realize(data, target_bit, logger=logger)
    # calculate error
    error = params[name].astype('float32') - data
    error_rate = error / data
    if nd.sum(error).asscalar() == 0:
        rate = 0
    else:
        rate = nd.norm(error_rate).asscalar() / np.product(data.shape)
    if rate > 0.001:
        logger.warn("realize parameter %-60s avg error=%10.9f shape=%s", name,
                    rate, data.shape)
    else:
        logger.debug("realize parameter %-60s avg error=%10.9f shape=%s", name,
                     rate, data.shape)
    return sym, params
Ejemplo n.º 10
0
def _realize_parameters(sym, params, graph, inputs_ext, precs):
    logger = logging.getLogger('log.realize.parameters')
    name, op_name = sym.attr('name'), sym.attr('op_name')
    attr = sym.list_attr()
    if op_name != 'null':
        return sym, params
    if name in inputs_ext:
        attr['precision'] = str(precs[name][out_key])
        return mx.sym.var(name, attr=attr), params
    prec = precs[name][out_key]
    data = params[name]
    params[name] = sim.int_realize(data, prec, logger=logger)
    # calculate error
    error = params[name].astype('float32') - data
    if nd.sum(error).asscalar() == 0:
        rate = 0
    else:
        rate = nd.norm(error / data).asscalar() / np.product(data.shape)
    if rate > 0.001:
        logger.warn("realize parameter %-60s avg error=%10.9f shape=%s", name,
                    rate, data.shape)
    else:
        logger.debug("realize parameter %-60s avg error=%10.9f shape=%s", name,
                     rate, data.shape)
    attr['precision'] = str(prec)
    node = mx.sym.var(name, attr=attr)
    return node, params
Ejemplo n.º 11
0
    def infer(self, head_emb, rel_emb, tail_emb):
        head_emb = head_emb.expand_dims(axis=1)
        rel_emb = rel_emb.expand_dims(axis=0)
        score = (head_emb + rel_emb).expand_dims(
            axis=2) - tail_emb.expand_dims(axis=0).expand_dims(axis=0)

        return self.gamma - nd.norm(score, ord=self.dist_ord, axis=-1)
Ejemplo n.º 12
0
def my_loss(data, nc, ns, nq):
    data = data.astype('float64')
    cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1))
    cls_center = nd.mean(cls_data, axis=1) + 1e-10
    data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) -
                              cls_center.expand_dims(axis=0),
                              axis=2)**2

    weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64')
    for i in range(0, nc):
        weight[i * nq:i * nq + nq, i] = 1
    weight2 = 1 - weight

    temp1 = nd.log_softmax(-data_center_dis, axis=1)
    temp2 = nd.sum(temp1, axis=1)
    temp3 = nd.sum(-temp2)
    label = nd.argmin(data_center_dis, axis=1)
    return temp3 / (nc * nq), label

    loss1 = nd.sum(data_center_dis * weight)

    temp = nd.sum(nd.exp(-data_center_dis), axis=1)
    loss2 = nd.sum(nd.log(temp))

    if loss1 is np.nan or loss2 is np.nan:
        raise StopIteration

    return (loss1 + loss2) / (nc * nq), label
Ejemplo n.º 13
0
def proto_loss(embedding, nc, ns, nq):
    embedding = embedding.astype('float64');
    cls_data = nd.reshape(embedding[0:nc*ns], (nc, ns, -1)); cls_data.attach_grad()
    cls_center = nd.mean(cls_data, axis=1);
    data_center_dis = nd.norm(embedding[nc*ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0),
                              axis=2) ** 2

    # print(nd.max(data_center_dis).asscalar())


    weight = nd.zeros((nc*nq, nc), ctx=embedding.context, dtype='float64')
    pick_vec = nd.zeros((nc*nq), ctx=embedding.context)
    for i in range(0, nc):
        weight[i*nq:i*nq+nq, i] = 1
        pick_vec[i*nq:i*nq+nq] = i
    """
    temp = nd.SoftmaxOutput(-data_center_dis, label)
    temp = nd.log(temp) * weight
    temp = nd.sum(-temp, axis=1)
    predict = nd.argmin(data_center_dis, axis=1)
    return -temp * nd.log(temp), predict
    """

    temp1 = nd.log_softmax(-data_center_dis, axis=1);
    temp2 = nd.pick(temp1, index=pick_vec, axis=1);
    temp3 = nd.sum(-temp2);
    label = nd.argmin(data_center_dis, axis=1)
    return temp3 / (nc * nq), label
Ejemplo n.º 14
0
def _get_opt(out, lambd):
    absmax = out.abs().max().asscalar()
    if lambd is None:
        return absmax
    mean = nd.mean(out).asscalar()
    sqrt_n = math.sqrt(np.product(out.shape))
    std = nd.norm(out - mean).asscalar() / sqrt_n
    alpha = abs(mean) + lambd * std

    #  pos_out = nd.abs(out)
    #  pos_mean = nd.mean(pos_out).asscalar()
    #  pos_std = nd.norm(pos_out - pos_mean).asscalar() / sqrt_n
    #  pos_alpha = abs(pos_mean) + lambd * pos_std

    opt = absmax
    if alpha < 0.95 * absmax:
        print("mean, std = [", mean, std, "]", "alpha=", alpha, "absmax=",
              absmax)
        opt = alpha
    #  if opt > 30:
    #  print ("mean, std = [", mean, std, "]", "alpha=", alpha,
    #  "absmax=", absmax)
    #  print ("ABS mean, std = [", pos_mean, pos_std, "]",
    #  "alpha=", pos_alpha, "absmax=", absmax)
    return opt
Ejemplo n.º 15
0
    def compute_retrospective_loss(self, observed_arr, encoded_arr,
                                   decoded_arr, re_encoded_arr):
        '''
        Compute retrospective loss.

        Returns:
            The tuple data.
            - `np.ndarray` of delta.
            - `np.ndarray` of losses of each batch.
            - float of loss of all batch.

        '''
        if self.__output_neuron_count == self.__hidden_neuron_count:
            target_arr = nd.broadcast_sub(
                encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2))
            summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2))
        else:
            # For each batch, draw a samples from the Uniform distribution.
            if self.__output_neuron_count > self.__hidden_neuron_count:
                all_dim_arr = np.arange(self.__output_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr,
                    nd.expand_dims(observed_arr[:, :,
                                                choiced_dim_arr].mean(axis=2),
                                   axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr,
                             2))
            else:
                all_dim_arr = np.arange(self.__hidden_neuron_count)
                np.random.shuffle(all_dim_arr)
                choiced_dim_arr = all_dim_arr[:self.__output_neuron_count]
                target_arr = nd.broadcast_sub(
                    encoded_arr[:, :, choiced_dim_arr],
                    nd.expand_dims(observed_arr.mean(axis=2), axis=2))
                summary_delta_arr = nd.sqrt(
                    nd.power(decoded_arr - target_arr, 2))

        match_delta_arr = None
        for i in range(self.__batch_size):
            arr = nd.sqrt(
                nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2))
            if match_delta_arr is None:
                match_delta_arr = nd.expand_dims(arr, axis=0)
            else:
                match_delta_arr = nd.concat(match_delta_arr,
                                            nd.expand_dims(arr, axis=0),
                                            dim=0)

        delta_arr = summary_delta_arr + nd.expand_dims(
            self.__retrospective_lambda * match_delta_arr, axis=1)
        v = nd.norm(delta_arr)
        if v > self.__grad_clip_threshold:
            delta_arr = delta_arr * self.__grad_clip_threshold / v

        loss = nd.mean(delta_arr, axis=0, exclude=True)

        return loss
Ejemplo n.º 16
0
 def edge_func(self, edges):
     head = edges.src['emb']
     tail = edges.dst['emb']
     rel = edges.data['emb']
     score = head + rel - tail
     return {
         'score': self.gamma - nd.norm(score, ord=self.dist_ord, axis=-1)
     }
Ejemplo n.º 17
0
 def fn(heads, relations, tails, num_chunks, chunk_size,
        neg_sample_size):
     hidden_dim = heads.shape[1]
     heads = heads + relations
     heads = heads.reshape(num_chunks, chunk_size, 1, hidden_dim)
     tails = tails.reshape(num_chunks, 1, neg_sample_size,
                           hidden_dim)
     return gamma - nd.norm(heads - tails, ord=1, axis=-1)
Ejemplo n.º 18
0
 def fn(heads, relations, tails, num_chunks, chunk_size,
        neg_sample_size):
     relations = relations.reshape(num_chunks, -1,
                                   self.relation_dim)
     heads = heads - relations
     heads = heads.reshape(num_chunks, -1, 1, self.relation_dim)
     score = heads - tails
     return gamma - nd.norm(score, ord=1, axis=-1)
Ejemplo n.º 19
0
def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
         data_inputs=None,
         learning_rate=None,
         lr_scheduler=None, prior_precision=1,
         out_grad_f=None,
         initializer=None,
         minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, task='classification',
         dev=mx.gpu()):
    if out_grad_f is None:
        label_key = list(set(data_inputs.keys()) - set(['data']))[0]
    exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
    optimizer = mx.optimizer.create('sgld', learning_rate=learning_rate,
                                    rescale_grad=X.shape[0] / minibatch_size,
                                    lr_scheduler=lr_scheduler,
                                    wd=prior_precision)
    updater = mx.optimizer.get_updater(optimizer)
    sample_pool = []
    start = time.time()
    for i in xrange(total_iter_num):
        indices = numpy.random.randint(X.shape[0], size=minibatch_size)
        X_batch = X[indices]
        Y_batch = Y[indices]
        exe.arg_dict['data'][:] = X_batch
        if out_grad_f is None:
            exe.arg_dict[label_key][:] = Y_batch
            exe.forward(is_train=True)
            exe.backward()
        else:
            exe.forward(is_train=True)
            exe.backward(out_grad_f(exe.outputs, nd.array(Y_batch, ctx=dev)))
        for k in params:
            updater(k, params_grad[k], params[k])
            print k, nd.norm(params_grad[k]).asnumpy()
        if i < burn_in_iter_num:
            continue
        else:
            if 0 == (i - burn_in_iter_num) % thin_interval:
                if optimizer.lr_scheduler is not None:
                    lr = optimizer.lr_scheduler(optimizer.num_update)
                else:
                    lr = learning_rate
                sample_pool.append([lr, copy_param(exe)])
        if (i + 1) % 100000 == 0:
            end = time.time()
            if task == 'classification':
                print "Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start)
                test_correct, test_total, test_acc = \
                    sample_test_acc(exe, sample_pool=sample_pool, X=X_test, Y=Y_test, label_num=10,
                                    minibatch_size=minibatch_size)
                print "Test %d/%d=%f" % (test_correct, test_total, test_acc)
            else:
                print "Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start), "MSE:",
                print sample_test_regression(exe=exe, sample_pool=sample_pool,
                                             X=X_test,
                                             Y=Y_test, minibatch_size=minibatch_size,
                                             save_path='regression_SGLD.txt')
            start = time.time()
    return exe, sample_pool
Ejemplo n.º 20
0
def f(a):
    b = a * 2
    while nd.norm(b).asscalar() < 1000:
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Ejemplo n.º 21
0
def calculate_norm(x, y):
    assert x.shape == y.shape
    ndims = np.product(x.shape)
    x = nd.reshape(x, shape=(ndims, ))
    y = nd.reshape(y, shape=(ndims, ))
    res = x - y
    nx = nd.norm(x)
    ny = nd.norm(y)
    nr = nd.norm(res)
    print("saving...")
    f = "/home/ryt/data/cmp_"
    names = ["nx", "ny", "nr"]
    objs = [nx, ny, nr]
    for obj in objs:
        print(type(obj), obj.shape)
    for i in range(3):
        nd.save(f + names[i], objs[i])
    print('success')
Ejemplo n.º 22
0
def norm_clipping(params_grad, threshold):
    assert isinstance(params_grad, dict)
    norm_val = numpy.sqrt(sum([nd.norm(grad).asnumpy()[0]**2 for grad in params_grad.values()]))
    # print('grad norm: %g' % norm_val)
    ratio = 1.0
    if norm_val > threshold:
        ratio = threshold / norm_val
        for grad in params_grad.values():
            grad *= ratio
    return norm_val
Ejemplo n.º 23
0
def norm_clipping(params_grad, threshold):
    assert isinstance(params_grad, dict)
    norm_val = numpy.sqrt(sum([nd.norm(grad).asnumpy()[0]**2 for grad in params_grad.values()]))
    # print('grad norm: %g' % norm_val)
    ratio = 1.0
    if norm_val > threshold:
        ratio = threshold / norm_val
        for grad in params_grad.values():
            grad *= ratio
    return norm_val
Ejemplo n.º 24
0
def f(a):
    b = a *2
    #b的L2范数的标量
    while nd.norm(b).asscalar() < 1000:
        b = b *2
    #b的轴上和的标量
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Ejemplo n.º 25
0
def _get_opt(out, lambd):
    absmax = out.abs().max().asscalar()
    if lambd is None:
        return absmax
    mean = nd.mean(out).asscalar()
    std = nd.norm(out - mean).asscalar() / math.sqrt(np.product(out.shape))
    alpha = abs(mean) + lambd * std
    if alpha < 0.95 * absmax:
        print("[", mean, std, "]", alpha, absmax)
        return alpha
    return absmax
Ejemplo n.º 26
0
def nd_global_norm(t_list):
    """Computes the global norm of multiple tensors.

    Given a tuple or list of tensors t_list, this operation returns the global norm of the elements
     in all tensors in t_list. The global norm is computed as:

    ``global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))``

    Any entries in t_list that are of type None are ignored.

    Parameters
    ----------
    t_list: list or tuple
        The NDArray list

    Returns
    -------
    ret: NDArray
        The global norm. The shape of the NDArray will be (1,)

    Examples
    --------
    >>> x = mx.nd.ones((2, 3))
    >>> y = mx.nd.ones((5, 6))
    >>> z = mx.nd.ones((4, 2, 3))
    >>> print(nd_global_norm([x, y, z]).asscalar())
    7.74597
    >>> xnone = None
    >>> ret = nd_global_norm([x, y, z, xnone])
    >>> print(ret.asscalar())
    7.74597
    """
    ret = None
    for arr in t_list:
        if arr is not None:
            if ret is None:
                ret = nd.square(nd.norm(arr))
            else:
                ret += nd.square(nd.norm(arr))
    ret = nd.sqrt(ret)
    return ret
Ejemplo n.º 27
0
def f(a):
    b = a * 2
    i = 0
    while nd.norm(b).asscalar() < 1000:
        i += 1
        print(i)
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        print('100')
        c = 100 * b
    return c
Ejemplo n.º 28
0
def norm_clipping(params_grad, threshold):
    norm_val = 0.0
    for i in range(len(params_grad[0])):
        norm_val += np.sqrt(
            sum([nd.norm(grads[i]).asnumpy()[0]**2 for grads in params_grad]))
    norm_val /= float(len(params_grad[0]))

    if norm_val > threshold:
        ratio = threshold / float(norm_val)
        for grads in params_grad:
            for grad in grads:
                grad[:] *= ratio

    return norm_val
Ejemplo n.º 29
0
def predict(net, data_loader, ctx):
    label = []
    acc = 0
    for data, cls_id in data_loader:
        data = data.as_in_context(ctx)
        out = net(data)
        min_dis = math.inf
        p_key = None
        for key in net.cls_center:
            cur_dis = nd.norm(net.cls_center[key] - out)
            if cur_dis.asscalar() < min_dis:
                min_dis = cur_dis.asscalar()
                p_key = key
        if p_key == cls_id.asscalar():
            acc += 1
        label.append(p_key)

    return label, acc / len(label)
Ejemplo n.º 30
0
    def get_opt(self, raw_ft, out, **kwargs):
        logger = kwargs.get("logger", logging.getLogger("optimize"))
        hist_ft = kwargs.get("hist_ft", None)
        name = kwargs.get("name", _NULL_NAME)

        if isinstance(raw_ft, AFeature):
            # hyperparameter 'lambd' for fine tuning
            absmax = raw_ft.get()
            if self.lambd is not None:
                mean = nd.mean(out).asscalar()
                sqrt_n = math.sqrt(np.product(out.shape))
                std = nd.norm(out - mean).asscalar() / sqrt_n
                alpha = abs(mean) + self.lambd * std
                absmax = alpha if alpha < 0.95 * absmax else absmax
            if hist_ft is None:
                p = logger.debug if absmax < 30 else logger.warn
                p("collect symbol %-40s, out_shape=%-20s, opt: (%s)", name,
                  out.shape, absmax)
                opt = AFeature(absmax)
            else:
                opt = AFeature(max(habsmax, hist_ft.get()))
        elif isinstance(raw_ft, MMFeature):
            minv, maxv = raw_ft.get()
            if hist_ft is None:
                opt = MMFeature(minv, maxv)
            else:
                hminv, hmaxv = hist_ft.get()
                opt = MMFeature(min(minv, hminv), max(maxv, hmaxv))
        elif isinstance(raw_ft, ALFeature):
            if hist_ft is None:
                opt = raw_ft
            else:
                absmax_list = raw_ft.get()
                habsmax_list = raw_ft.get()
                nabsmax_list = [ \
                    max(absmax_list[i], habsmax_list[i]) \
                    for i in range(len(absmax_list))
                ]
                opt = ALFeature(nabsmax_list)
        else:
            raise TypeError("Unsupported feature type: %s for HVOptimizor",
                            type(raw_ft))
        return opt
Ejemplo n.º 31
0
 def get_global_norm_val(self):
     """Get the overall gradient norm ||W||_2
     Parameters
     ----------
     net : mx.mod.Module
     Returns
     -------
     norm_val : float
     """
     assert self.binded and self.params_initialized
     #TODO The code in the following will cause the estimated norm to be different for multiple gpus
     norm_val = 0.0
     for i in range(len(self._exec_group.grad_arrays[0])):
         norm_val += np.sqrt(
             sum([
                 nd.norm(grads[i]).asnumpy()[0]**2
                 for grads in self._exec_group.grad_arrays
             ]))
     norm_val /= float(len(self._exec_group.grad_arrays[0]))
     return norm_val
Ejemplo n.º 32
0
def l1_dist(x, y, pw=False):
    if pw is False:
        x = x.expand_dims(axis=1)
        y = y.expand_dims(axis=0)

    return -nd.norm(x-y, ord=1, axis=-1)
Ejemplo n.º 33
0
def gradient_penalty(gradient):
    gradient = gradient.reshape(gradient.shape[0], -1)
    gradient_norm = nd.norm(gradient, ord=2, axis=1)
    penalty = nd.mean((gradient_norm - 1) ** 2)
    return penalty