def calc_potential(exe, params, label_name, noise_precision, prior_precision): exe.copy_params_from(params) exe.forward(is_train=False) ret = 0.0 ret += (nd.norm( exe.outputs[0] - exe.arg_dict[label_name]).asscalar() ** 2) / 2.0 * noise_precision for v in params.values(): ret += (nd.norm(v).asscalar() ** 2) / 2.0 * prior_precision return ret
def calc_potential(exe, params, label_name, noise_precision, prior_precision): exe.copy_params_from(params) exe.forward(is_train=False) ret = 0.0 ret += (nd.norm(exe.outputs[0] - exe.arg_dict[label_name]).asscalar()** 2) / 2.0 * noise_precision for v in params.values(): ret += (nd.norm(v).asscalar()**2) / 2.0 * prior_precision return ret
def batched_l2_dist(a, b): a_squared = nd.power(nd.norm(a, axis=-1), 2) b_squared = nd.power(nd.norm(b, axis=-1), 2) squared_res = nd.add(nd.linalg_gemm( a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2 ), nd.expand_dims(a_squared, axis=-1)) res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) return res
def extended_jaccard_dist(x, y, pw=False): score = dot_dist(x, y, pw) x = nd.norm(x, ord=2, axis=-1)**2 y = nd.norm(y, ord=2, axis=-1)**2 if pw is False: x = x.expand_dims(axis=1) y = y.expand_dims(axis=0) return score / (x + y - score)
def cosine_dist(x, y, pw=False): score = dot_dist(x, y, pw) x = nd.norm(x, ord=2, axis=-1) y = nd.norm(y, ord=2, axis=-1) if pw is False: x = x.expand_dims(axis=1) y = y.expand_dims(axis=0) return score / (x * y)
def debug_norm_all(self, debug_gnorm=True): if debug_gnorm: for k, v, grad_v in zip(self._param_names, self._exec_group.param_arrays, self._exec_group.grad_arrays): logging.debug("%s: v-norm: %g, g-norm: %g" % (k, nd.norm( v[0]).asnumpy()[0], nd.norm(grad_v[0]).asnumpy()[0])) else: for k, v in zip(self._param_names, self._exec_group.param_arrays): logging.debug("%s: v-norm: %g" % (k, nd.norm(v[0]).asnumpy()[0]))
def cal_my_acc(test_files, target_files): ''' this method is deprecated :param test_files: :param target_files: :return: ''' mTransform = MTransform() normalize = transforms.Normalize(mean=0.5, std=0.5) transform = transforms.Compose([ # transforms.Resize((96, 112)), transforms.ToTensor(), normalize, # mTransform, ]) model = sphere_net.SphereNet20() model.load_params("log_bn_dy/spherenet.model", ctx=mx.gpu()) correct = 0 total = 0 target_emb = {} for target_file in target_files: target_image = transform(nd.array( Image.open(target_file))).as_in_context(mx.gpu()) target_image = nd.expand_dims(target_image, axis=0) target_label = ''.join(target_file.split('/')[-1].split('.')[:-1]) target_out = model(target_image) target_emb[target_label] = target_out test_emb = {} for test_file in test_files: test_image = Image.open(test_file) test_image = nd.expand_dims(transform(nd.array(test_image)), axis=0).as_in_context(mx.gpu()) test_label = ''.join(test_file.split('/')[-1].split('.')[:-1]) test_out = model(test_image) max_s = mx.nd.zeros(1, ctx=mx.gpu()) max_label = '' sims = {} for target_label, target_out in target_emb.items(): similarity = nd.sum(test_out * target_out) / \ (nd.norm(test_out) * nd.norm(target_out)) sims[target_label] = similarity.asscalar() if max_s < similarity: max_s = similarity max_label = target_label if ''.join(max_label.split('_')[:-1]) == ''.join( test_label.split('_')[:-1]): correct += 1 else: print test_label, max_s.asscalar(), max_label total += 1 test_emb[test_label] = test_out # print correct, total, float(correct)/total return float(correct) / total, test_emb, target_emb
def f(a): b = a * 2 print('a', a) print('nd.norm(a).asscalar()', nd.norm(a).asscalar()) print('nd.norm(b).asscalar()', nd.norm(b).asscalar()) while nd.norm(b).asscalar() < 1000: b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def _realize_parameters(sym, params, graph, inputs_ext, target_bits={}, params_sim={}): logger = logging.getLogger('log.calib.realize.parameters') name = sym.attr('name') attr = sym.list_attr() if 'precision' not in attr or name in inputs_ext: return sym, params target_bit = int(attr['precision']) data = params[name] params[name] = sim.int_realize(data, target_bit, logger=logger) # calculate error error = params[name].astype('float32') - data error_rate = error / data if nd.sum(error).asscalar() == 0: rate = 0 else: rate = nd.norm(error_rate).asscalar() / np.product(data.shape) if rate > 0.001: logger.warn("realize parameter %-60s avg error=%10.9f shape=%s", name, rate, data.shape) else: logger.debug("realize parameter %-60s avg error=%10.9f shape=%s", name, rate, data.shape) return sym, params
def _realize_parameters(sym, params, graph, inputs_ext, precs): logger = logging.getLogger('log.realize.parameters') name, op_name = sym.attr('name'), sym.attr('op_name') attr = sym.list_attr() if op_name != 'null': return sym, params if name in inputs_ext: attr['precision'] = str(precs[name][out_key]) return mx.sym.var(name, attr=attr), params prec = precs[name][out_key] data = params[name] params[name] = sim.int_realize(data, prec, logger=logger) # calculate error error = params[name].astype('float32') - data if nd.sum(error).asscalar() == 0: rate = 0 else: rate = nd.norm(error / data).asscalar() / np.product(data.shape) if rate > 0.001: logger.warn("realize parameter %-60s avg error=%10.9f shape=%s", name, rate, data.shape) else: logger.debug("realize parameter %-60s avg error=%10.9f shape=%s", name, rate, data.shape) attr['precision'] = str(prec) node = mx.sym.var(name, attr=attr) return node, params
def infer(self, head_emb, rel_emb, tail_emb): head_emb = head_emb.expand_dims(axis=1) rel_emb = rel_emb.expand_dims(axis=0) score = (head_emb + rel_emb).expand_dims( axis=2) - tail_emb.expand_dims(axis=0).expand_dims(axis=0) return self.gamma - nd.norm(score, ord=self.dist_ord, axis=-1)
def my_loss(data, nc, ns, nq): data = data.astype('float64') cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1)) cls_center = nd.mean(cls_data, axis=1) + 1e-10 data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2)**2 weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64') for i in range(0, nc): weight[i * nq:i * nq + nq, i] = 1 weight2 = 1 - weight temp1 = nd.log_softmax(-data_center_dis, axis=1) temp2 = nd.sum(temp1, axis=1) temp3 = nd.sum(-temp2) label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label loss1 = nd.sum(data_center_dis * weight) temp = nd.sum(nd.exp(-data_center_dis), axis=1) loss2 = nd.sum(nd.log(temp)) if loss1 is np.nan or loss2 is np.nan: raise StopIteration return (loss1 + loss2) / (nc * nq), label
def proto_loss(embedding, nc, ns, nq): embedding = embedding.astype('float64'); cls_data = nd.reshape(embedding[0:nc*ns], (nc, ns, -1)); cls_data.attach_grad() cls_center = nd.mean(cls_data, axis=1); data_center_dis = nd.norm(embedding[nc*ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2) ** 2 # print(nd.max(data_center_dis).asscalar()) weight = nd.zeros((nc*nq, nc), ctx=embedding.context, dtype='float64') pick_vec = nd.zeros((nc*nq), ctx=embedding.context) for i in range(0, nc): weight[i*nq:i*nq+nq, i] = 1 pick_vec[i*nq:i*nq+nq] = i """ temp = nd.SoftmaxOutput(-data_center_dis, label) temp = nd.log(temp) * weight temp = nd.sum(-temp, axis=1) predict = nd.argmin(data_center_dis, axis=1) return -temp * nd.log(temp), predict """ temp1 = nd.log_softmax(-data_center_dis, axis=1); temp2 = nd.pick(temp1, index=pick_vec, axis=1); temp3 = nd.sum(-temp2); label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label
def _get_opt(out, lambd): absmax = out.abs().max().asscalar() if lambd is None: return absmax mean = nd.mean(out).asscalar() sqrt_n = math.sqrt(np.product(out.shape)) std = nd.norm(out - mean).asscalar() / sqrt_n alpha = abs(mean) + lambd * std # pos_out = nd.abs(out) # pos_mean = nd.mean(pos_out).asscalar() # pos_std = nd.norm(pos_out - pos_mean).asscalar() / sqrt_n # pos_alpha = abs(pos_mean) + lambd * pos_std opt = absmax if alpha < 0.95 * absmax: print("mean, std = [", mean, std, "]", "alpha=", alpha, "absmax=", absmax) opt = alpha # if opt > 30: # print ("mean, std = [", mean, std, "]", "alpha=", alpha, # "absmax=", absmax) # print ("ABS mean, std = [", pos_mean, pos_std, "]", # "alpha=", pos_alpha, "absmax=", absmax) return opt
def compute_retrospective_loss(self, observed_arr, encoded_arr, decoded_arr, re_encoded_arr): ''' Compute retrospective loss. Returns: The tuple data. - `np.ndarray` of delta. - `np.ndarray` of losses of each batch. - float of loss of all batch. ''' if self.__output_neuron_count == self.__hidden_neuron_count: target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2)) else: # For each batch, draw a samples from the Uniform distribution. if self.__output_neuron_count > self.__hidden_neuron_count: all_dim_arr = np.arange(self.__output_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count] target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr[:, :, choiced_dim_arr].mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr, 2)) else: all_dim_arr = np.arange(self.__hidden_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__output_neuron_count] target_arr = nd.broadcast_sub( encoded_arr[:, :, choiced_dim_arr], nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr - target_arr, 2)) match_delta_arr = None for i in range(self.__batch_size): arr = nd.sqrt( nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2)) if match_delta_arr is None: match_delta_arr = nd.expand_dims(arr, axis=0) else: match_delta_arr = nd.concat(match_delta_arr, nd.expand_dims(arr, axis=0), dim=0) delta_arr = summary_delta_arr + nd.expand_dims( self.__retrospective_lambda * match_delta_arr, axis=1) v = nd.norm(delta_arr) if v > self.__grad_clip_threshold: delta_arr = delta_arr * self.__grad_clip_threshold / v loss = nd.mean(delta_arr, axis=0, exclude=True) return loss
def edge_func(self, edges): head = edges.src['emb'] tail = edges.dst['emb'] rel = edges.data['emb'] score = head + rel - tail return { 'score': self.gamma - nd.norm(score, ord=self.dist_ord, axis=-1) }
def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size): hidden_dim = heads.shape[1] heads = heads + relations heads = heads.reshape(num_chunks, chunk_size, 1, hidden_dim) tails = tails.reshape(num_chunks, 1, neg_sample_size, hidden_dim) return gamma - nd.norm(heads - tails, ord=1, axis=-1)
def fn(heads, relations, tails, num_chunks, chunk_size, neg_sample_size): relations = relations.reshape(num_chunks, -1, self.relation_dim) heads = heads - relations heads = heads.reshape(num_chunks, -1, 1, self.relation_dim) score = heads - tails return gamma - nd.norm(score, ord=1, axis=-1)
def SGLD(sym, X, Y, X_test, Y_test, total_iter_num, data_inputs=None, learning_rate=None, lr_scheduler=None, prior_precision=1, out_grad_f=None, initializer=None, minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, task='classification', dev=mx.gpu()): if out_grad_f is None: label_key = list(set(data_inputs.keys()) - set(['data']))[0] exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer) optimizer = mx.optimizer.create('sgld', learning_rate=learning_rate, rescale_grad=X.shape[0] / minibatch_size, lr_scheduler=lr_scheduler, wd=prior_precision) updater = mx.optimizer.get_updater(optimizer) sample_pool = [] start = time.time() for i in xrange(total_iter_num): indices = numpy.random.randint(X.shape[0], size=minibatch_size) X_batch = X[indices] Y_batch = Y[indices] exe.arg_dict['data'][:] = X_batch if out_grad_f is None: exe.arg_dict[label_key][:] = Y_batch exe.forward(is_train=True) exe.backward() else: exe.forward(is_train=True) exe.backward(out_grad_f(exe.outputs, nd.array(Y_batch, ctx=dev))) for k in params: updater(k, params_grad[k], params[k]) print k, nd.norm(params_grad[k]).asnumpy() if i < burn_in_iter_num: continue else: if 0 == (i - burn_in_iter_num) % thin_interval: if optimizer.lr_scheduler is not None: lr = optimizer.lr_scheduler(optimizer.num_update) else: lr = learning_rate sample_pool.append([lr, copy_param(exe)]) if (i + 1) % 100000 == 0: end = time.time() if task == 'classification': print "Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start) test_correct, test_total, test_acc = \ sample_test_acc(exe, sample_pool=sample_pool, X=X_test, Y=Y_test, label_num=10, minibatch_size=minibatch_size) print "Test %d/%d=%f" % (test_correct, test_total, test_acc) else: print "Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start), "MSE:", print sample_test_regression(exe=exe, sample_pool=sample_pool, X=X_test, Y=Y_test, minibatch_size=minibatch_size, save_path='regression_SGLD.txt') start = time.time() return exe, sample_pool
def f(a): b = a * 2 while nd.norm(b).asscalar() < 1000: b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def calculate_norm(x, y): assert x.shape == y.shape ndims = np.product(x.shape) x = nd.reshape(x, shape=(ndims, )) y = nd.reshape(y, shape=(ndims, )) res = x - y nx = nd.norm(x) ny = nd.norm(y) nr = nd.norm(res) print("saving...") f = "/home/ryt/data/cmp_" names = ["nx", "ny", "nr"] objs = [nx, ny, nr] for obj in objs: print(type(obj), obj.shape) for i in range(3): nd.save(f + names[i], objs[i]) print('success')
def norm_clipping(params_grad, threshold): assert isinstance(params_grad, dict) norm_val = numpy.sqrt(sum([nd.norm(grad).asnumpy()[0]**2 for grad in params_grad.values()])) # print('grad norm: %g' % norm_val) ratio = 1.0 if norm_val > threshold: ratio = threshold / norm_val for grad in params_grad.values(): grad *= ratio return norm_val
def f(a): b = a *2 #b的L2范数的标量 while nd.norm(b).asscalar() < 1000: b = b *2 #b的轴上和的标量 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def _get_opt(out, lambd): absmax = out.abs().max().asscalar() if lambd is None: return absmax mean = nd.mean(out).asscalar() std = nd.norm(out - mean).asscalar() / math.sqrt(np.product(out.shape)) alpha = abs(mean) + lambd * std if alpha < 0.95 * absmax: print("[", mean, std, "]", alpha, absmax) return alpha return absmax
def nd_global_norm(t_list): """Computes the global norm of multiple tensors. Given a tuple or list of tensors t_list, this operation returns the global norm of the elements in all tensors in t_list. The global norm is computed as: ``global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`` Any entries in t_list that are of type None are ignored. Parameters ---------- t_list: list or tuple The NDArray list Returns ------- ret: NDArray The global norm. The shape of the NDArray will be (1,) Examples -------- >>> x = mx.nd.ones((2, 3)) >>> y = mx.nd.ones((5, 6)) >>> z = mx.nd.ones((4, 2, 3)) >>> print(nd_global_norm([x, y, z]).asscalar()) 7.74597 >>> xnone = None >>> ret = nd_global_norm([x, y, z, xnone]) >>> print(ret.asscalar()) 7.74597 """ ret = None for arr in t_list: if arr is not None: if ret is None: ret = nd.square(nd.norm(arr)) else: ret += nd.square(nd.norm(arr)) ret = nd.sqrt(ret) return ret
def f(a): b = a * 2 i = 0 while nd.norm(b).asscalar() < 1000: i += 1 print(i) b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: print('100') c = 100 * b return c
def norm_clipping(params_grad, threshold): norm_val = 0.0 for i in range(len(params_grad[0])): norm_val += np.sqrt( sum([nd.norm(grads[i]).asnumpy()[0]**2 for grads in params_grad])) norm_val /= float(len(params_grad[0])) if norm_val > threshold: ratio = threshold / float(norm_val) for grads in params_grad: for grad in grads: grad[:] *= ratio return norm_val
def predict(net, data_loader, ctx): label = [] acc = 0 for data, cls_id in data_loader: data = data.as_in_context(ctx) out = net(data) min_dis = math.inf p_key = None for key in net.cls_center: cur_dis = nd.norm(net.cls_center[key] - out) if cur_dis.asscalar() < min_dis: min_dis = cur_dis.asscalar() p_key = key if p_key == cls_id.asscalar(): acc += 1 label.append(p_key) return label, acc / len(label)
def get_opt(self, raw_ft, out, **kwargs): logger = kwargs.get("logger", logging.getLogger("optimize")) hist_ft = kwargs.get("hist_ft", None) name = kwargs.get("name", _NULL_NAME) if isinstance(raw_ft, AFeature): # hyperparameter 'lambd' for fine tuning absmax = raw_ft.get() if self.lambd is not None: mean = nd.mean(out).asscalar() sqrt_n = math.sqrt(np.product(out.shape)) std = nd.norm(out - mean).asscalar() / sqrt_n alpha = abs(mean) + self.lambd * std absmax = alpha if alpha < 0.95 * absmax else absmax if hist_ft is None: p = logger.debug if absmax < 30 else logger.warn p("collect symbol %-40s, out_shape=%-20s, opt: (%s)", name, out.shape, absmax) opt = AFeature(absmax) else: opt = AFeature(max(habsmax, hist_ft.get())) elif isinstance(raw_ft, MMFeature): minv, maxv = raw_ft.get() if hist_ft is None: opt = MMFeature(minv, maxv) else: hminv, hmaxv = hist_ft.get() opt = MMFeature(min(minv, hminv), max(maxv, hmaxv)) elif isinstance(raw_ft, ALFeature): if hist_ft is None: opt = raw_ft else: absmax_list = raw_ft.get() habsmax_list = raw_ft.get() nabsmax_list = [ \ max(absmax_list[i], habsmax_list[i]) \ for i in range(len(absmax_list)) ] opt = ALFeature(nabsmax_list) else: raise TypeError("Unsupported feature type: %s for HVOptimizor", type(raw_ft)) return opt
def get_global_norm_val(self): """Get the overall gradient norm ||W||_2 Parameters ---------- net : mx.mod.Module Returns ------- norm_val : float """ assert self.binded and self.params_initialized #TODO The code in the following will cause the estimated norm to be different for multiple gpus norm_val = 0.0 for i in range(len(self._exec_group.grad_arrays[0])): norm_val += np.sqrt( sum([ nd.norm(grads[i]).asnumpy()[0]**2 for grads in self._exec_group.grad_arrays ])) norm_val /= float(len(self._exec_group.grad_arrays[0])) return norm_val
def l1_dist(x, y, pw=False): if pw is False: x = x.expand_dims(axis=1) y = y.expand_dims(axis=0) return -nd.norm(x-y, ord=1, axis=-1)
def gradient_penalty(gradient): gradient = gradient.reshape(gradient.shape[0], -1) gradient_norm = nd.norm(gradient, ord=2, axis=1) penalty = nd.mean((gradient_norm - 1) ** 2) return penalty