def house_prise(): """ 使用线性回归估算房价 :return: """ # 特征 X = nd.array([[120, 2], [100, 1], [130, 3]]) logger.info(nd.norm(X, axis=0)) # 值 lables = nd.array([130, 98, 140]) logger.info(nd.norm(lables, axis=0)) # 权重 偏差 w = nd.random.normal(scale=0.01, shape=(2, 1)) b = nd.zeros(shape=(1, )) w.attach_grad() b.attach_grad() for i in range(5): for x, y in data_iter(10, X, lables): with autograd.record(): l = squared_loss(linreg(x, w, b), y) logger.info(l.mean().asnumpy()) l.backward() sgd([w, b], 0.02, 10) logger.info(w) logger.info(b)
def test_norm(): a = np.array(np.full((1, LARGE_X), 3)) b = np.array(np.full((1, LARGE_X), 4)) c = nd.array(np.concatenate((a,b), axis=0)) d = nd.norm(c, ord=2, axis=0) e = nd.norm(c, ord=1, axis=0) assert d.shape[0] == LARGE_X assert e.shape[0] == LARGE_X assert d[-1] == 5 assert e[-1] == 7
def fltrust(epoch, gradients, net, lr, f, byz): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] # let the malicious clients (first f clients) perform the byzantine attack param_list = byz(epoch, param_list, net, lr, f) n = len(param_list ) - 1 # -1 so as to not include the gradient of the server model # use the last gradient (server update) as the trusted source #print(nd.array(param_list[-1]).shape) baseline = nd.array(param_list[-1]).squeeze() #print(baseline.shape) cos_sim = [] new_param_list = [] #print(param_list[0].shape) print(nd.norm(baseline)) # compute cos similarity for each_param_list in param_list: each_param_array = nd.array(each_param_list).squeeze() cos_sim.append( nd.dot(baseline, each_param_array) / (nd.norm(baseline) + 1e-9) / (nd.norm(each_param_array) + 1e-9)) cos_sim = nd.stack(*cos_sim)[:-1] #print(cos_sim) cos_sim = nd.maximum(cos_sim, 0) # relu cos_sim = nd.minimum(cos_sim, 1) #print(cos_sim) normalized_weights = cos_sim / (nd.sum(cos_sim) + 1e-9 ) # weighted trust score #print(normalized_weights) # normalize the magnitudes and weight by the trust score for i in range(n): new_param_list.append(param_list[i] * normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print(normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print("normalized weights: " + str(normalized_weights[i])) #print("baseline: " + str(nd.norm(baseline))) # update the global model global_update = nd.sum(nd.concat(*new_param_list, dim=1), axis=-1) idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue #print(global_update[idx:(idx+param.data().size)]) param.set_data(param.data() - lr * global_update[idx:( idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size
def E(self, chi): al_id = chi[0:100] al_exp = chi[100:179] al_alb = chi[179:279] [s, pitch, yaw, roll] = chi[279:283, 0] t = chi[283:286] r = chi[286:] gamma = nd.reshape(r, (3, 9)).T lmks_2d = self.lmks['2d'] lmks_3d_ind = self.lmks['3d'] R = self.rot_mat(pitch, yaw, roll) # p = self.p_mu + self.A_id@al_id + self.A_exp@al_exp # b = self.b_mu + self.A_alb@al_alb # self.vertex = np.reshape(p, (no_of_ver, 3)) # self.albedo = np.reshape(b, (no_of_ver, 3)) # p,b = self.cal_ver_alb(al_id, al_exp, al_alb) self.cal_ver_alb(al_id, al_exp, al_alb) s = 150 / nd.max(self.vertex) q_world = s * nd.linalg.gemm2(R, self.vertex.T) + t # q_depth = [0, 0, 1]@[email protected] q_image = self.world_to_image(q_world.T) # tri_ind_info, bary_wts_info = rasterize_triangles(q_image, tri_mesh_data, h, w) # return tri_ind_info,bary_wts_info,albedo I_rend = self.render_color_image(q_image, self.albedo, gamma) self.I_rend = I_rend w_l = 10 w_r = 5e-5 E_con = (1 / self.no_of_face_pxls) * np.linalg.norm( I_rend - self.I_in)**2 #No of face pixels is apporximately 28241 E_lan = (1 / self.no_of_lmks) * np.linalg.norm( lmks_2d - q_image[lmks_3d_ind[0, :], :2])**2 #68 landmarks E_reg = np.linalg.norm(al_id / self.std_id)**2 + np.linalg.norm( al_alb / self.std_alb)**2 + np.linalg.norm( al_exp / self.std_exp)**2 #Gauss Newton minimizes sum of squares of residuals. E(the objective function) is considered as sum of squares of residuals. For calculating the jacobian we only need the residuals not their squares E_con_r = np.sqrt( 1 / self.no_of_face_pxls) * nd.norm(I_rend - self.I_in) E_lan_r = np.sqrt(w_l / self.no_of_lmks) * nd.norm( lmks_2d - q_image[lmks_3d_ind[0, :], :2], axis=1) E_reg_r = np.sqrt(w_r) * nd.concat(al_id / self.std_id, al_alb / self.std_alb, al_exp / self.std_exp, dim=0) return nd.concat(E_con_r, E_lan_r, E_reg_r[:, 0], dim=0)
def faba(epoch, gradients, net, lr, byz, f=0): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] param_list = byz(epoch, param_list, net, f, lr, np.arange(len(param_list))) faba_client_list = np.arange(len(param_list)) dist = np.zeros(len(param_list)) G0 = nd.mean(nd.concat(*param_list, dim=1), axis=-1, keepdims=1) for i in range(f): for j in range(len(param_list)): dist[j] = (nd.norm(G0 - param_list[j]) * (faba_client_list[j] >= 0)).asscalar() client = int(np.argmax(dist)) faba_client_list[client] = -1 dist[client] = 0 G0 = (G0 * (len(param_list) - i) - param_list[client]) / (len(param_list) - i - 1) idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue param.set_data( param.data() - lr * G0[idx:(idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size del param_list del dist del G0 return -np.sort(-faba_client_list)
def add_split(x, leaf, p_tau): center = leaf.parent['node'].center.data() radius = leaf.parent['node'].radius.data() tau = p_tau + nd.random.exponential(radius**-1) while 1: s = nd.random.normal(shape=(2, x.shape[-1])) s = s / nd.norm(s, axis=-1, keepdims=True) r = nd.random.uniform(low=nd.array([0]), high=radius) r = r * nd.random.uniform()**(1 / 3) if nd.sign(s[0][-1]) > 0: weight = s[0] bias = nd.dot(s[0], -1 * r * (s[1] + center)) y = nd.sign(nd.dot(x, weight) + bias) if nd.abs(nd.sum(y)) != len(y): break split = Split(weight=weight, bias=bias, sharpness=3 / radius, tau=tau, decision=leaf.parent['decision'], side=leaf.parent['side']) tree.splits.add(split) leaf.parent['node'].child['decision'] = split leaf.parent['decision'] = split
def get_dis(data, mean, dis_method='iou'): if dis_method == 'iou': # data = bs*(w, h) ndarray # mean = 1*(w, h) ndarray # |--------|-----| # | inters | | # |--------| | h # | | # |--------------| # w data_w, data_h = data.split(num_outputs=2, axis=-1) mean_w, mean_h = mean inters_w = nd.minimum(data_w, mean_w) inters_h = nd.minimum(data_h, mean_h) inters = inters_w * inters_h data_area = data_w * data_h mean_area = mean_w * mean_h ious = inters / (data_area + mean_area - inters) distance = 1 / ious elif dis_method == 'L2': vec = data - mean distance = nd.norm(vec, ord=2, axis=-1).reshape((-1, 1)) return distance
def test_periodic_kernel_compute( x1, x2, amplitude, length_scale, frequency ) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) frequency = frequency.reshape(batch_size, 1, 1) periodic = PeriodicKernel(amplitude, length_scale, frequency) exact = nd.zeros((batch_size, history_length_1, history_length_2)) for i in range(history_length_1): for j in range(history_length_2): val = ( 2 * ( nd.sin(frequency * math.pi * (x1[:, i, :] - x2[:, j, :])) / length_scale ) ** 2 ) exact[:, i, j] = (amplitude * nd.exp(-val)).reshape(-1) res = periodic.kernel_matrix(x1, x2) assert nd.norm(res - exact) < tol
def test_radial_basis_function_kernel( x1, x2, amplitude, length_scale, exact ) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) rbf = RBFKernel(amplitude, length_scale) exact = amplitude * nd.exp(-0.5 * exact / length_scale ** 2) res = rbf.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def test_periodic_kernel(x1, x2, amplitude, length_scale, exact) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) frequency = 1 / 24 * nd.ones_like(length_scale) periodic = PeriodicKernel(amplitude, length_scale, frequency) exact = amplitude * nd.exp( -2 * nd.sin(frequency * math.pi * nd.sqrt(exact))**2 / length_scale**2) res = periodic.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def mmd_loss(x, y, ctx_model, t=0.1, kernel='diffusion'): ''' computes the mmd loss with information diffusion kernel :param x: batch_size x latent dimension :param y: :param t: :return: ''' eps = 1e-6 n,d = x.shape if kernel == 'tv': sum_xx = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(i+1, n): sum_xx = sum_xx + nd.norm(x[i] - x[j], ord=1) sum_xx = sum_xx / (n * (n-1)) sum_yy = nd.zeros(1, ctx=ctx_model) for i in range(y.shape[0]): for j in range(i+1, y.shape[0]): sum_yy = sum_yy + nd.norm(y[i] - y[j], ord=1) sum_yy = sum_yy / (y.shape[0] * (y.shape[0]-1)) sum_xy = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(y.shape[0]): sum_xy = sum_xy + nd.norm(x[i] - y[j], ord=1) sum_yy = sum_yy / (n * y.shape[0]) else: qx = nd.sqrt(nd.clip(x, eps, 1)) qy = nd.sqrt(nd.clip(y, eps, 1)) xx = nd.dot(qx, qx, transpose_b=True) yy = nd.dot(qy, qy, transpose_b=True) xy = nd.dot(qx, qy, transpose_b=True) def diffusion_kernel(a, tmpt, dim): # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt) return nd.exp(- nd.square(nd.arccos(a)) / tmpt) off_diag = 1 - nd.eye(n, ctx=ctx_model) k_xx = diffusion_kernel(nd.clip(xx, 0, 1-eps), t, d-1) k_yy = diffusion_kernel(nd.clip(yy, 0, 1-eps), t, d-1) k_xy = diffusion_kernel(nd.clip(xy, 0, 1-eps), t, d-1) sum_xx = (k_xx * off_diag).sum() / (n * (n-1)) sum_yy = (k_yy * off_diag).sum() / (n * (n-1)) sum_xy = 2 * k_xy.sum() / (n * n) return sum_xx + sum_yy - sum_xy
def getfake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) # fake weight vector for separation bfake = nd.random_normal(shape=(1)) # fake bias wfake = wfake / nd.norm(wfake) # rescale to unit length # making some linearly separable data, simply by chosing the labels accordingly X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while (i < samples): tmp = nd.random_normal(shape=(1,dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon): X[i,:] = tmp[0] Y[i] = 1 if margin.asscalar() > 0 else -1 i += 1 return X, Y
def read_data(graph_path: str, features_path: str, apply_kernel: bool) -> Graph: vertex_map: Dict[str, Vertex] = {} class_map: Dict[str, int] = {} vertices: List[Vertex] = [] features: List[List[float]] = [] # Read features with open(features_path) as fin: for line in fin: # The format is "id feature* class" data = line.rstrip().split() clazz = class_map.setdefault(data[-1], len(class_map)) features_raw = [float(x) for x in data[1:-1]] features.append(features_raw) vertex = Vertex(None, [], clazz, len(vertices)) vertices.append(vertex) vertex_map[data[0]] = vertex if apply_kernel: def sqr(x): return x * x n = len(features) kernels = [[] for _ in range(n)] # feature_indices = random.choices(range(len(features[0])), k=100) nd_features = [nd.array(arr, ctx=data_ctx) for arr in features] # indices = random.choices(range(n), k=100) indices = range(n) for u in range(n): for v in indices: dif = nd_features[u] - nd_features[v] norm = float(nd.norm(dif).asscalar()) res = math.exp(-0.5 * sqr(norm)) # res = math.exp(-0.5 * norm) # print(norm, ) kernels[u].append(100 * res) # print(kernels[u]) features = kernels for v in vertices: v.features = nd.array(features[v.id], ctx=data_ctx).reshape(-1, 1) # Read graph with open(graph_path) as fin: for line in fin: (u, v) = [vertex_map[x] for x in line.rstrip().split()] u.neighbors.append(v) v.neighbors.append(u) num_features = len(vertices[0].features) for v in vertices: v.neighbors.append(v) v.degree = len(v.neighbors) assert len(v.features) == num_features return Graph(vertices, num_features, len(class_map))
def normalize(x, axis=-1): """Normalizing to unit length along the specified dimension. Args: x: pytorch Variable Returns: x: pytorch Variable, same shape as input """ x = 1. * x / (nd.norm(x, axis=axis, keepdim=True) + 1e-12) return x
def f(a): b = a * 2 while nd.norm(b).asscalar() < 1000: b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def getfake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) # fake weight vector for separation bfake = nd.random_normal(shape=(1)) # fake bias wfake = wfake / nd.norm(wfake) # rescale to unit length # making some linearly separable data, simply by chosing the labels accordingly X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while (i < samples): tmp = nd.random_normal(shape=(1, dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon): X[i, :] = tmp Y[i] = 2 * (margin > 0) - 1 i += 1 return X, Y
def get_fake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) bfake = nd.random_normal(shape=(1)) wfake = wfake / nd.norm(wfake) X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while i < samples: tmp = nd.random_normal(shape=(1, dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) and (abs( margin.asscalar() > epsilon)): X[i, :] = tmp Y[i] = 1 if margin.ascalar() > 0 else -1 i += 1 return X, Y
def local_krum(param_list, f): k = len(param_list) - f - 2 dist = mx.nd.zeros((len(param_list), len(param_list))) for i in range(0, len(param_list)): for j in range(0, i): dist[i][j] = nd.norm(param_list[i] - param_list[j]) dist[j][i] = dist[i][j] sorted_dist = mx.nd.sort(dist) sum_dist = mx.nd.sum(sorted_dist[:, :k + 1], axis=1) model_selected = mx.nd.argmin(sum_dist).asscalar().astype(int) return model_selected
def lambda_max(epoch, v, net, f, lr): #(m, c, params, global_param): if (f == 0): return 0.0 m = len(v) dist = mx.nd.zeros((m, m)) for i in range(0, m): for j in range(0, i): dist[i][j] = nd.norm(v[i] - v[j]) * lr dist[j][i] = dist[i][j] sorted_benign_dist = mx.nd.sort(dist[f:, f:]) sum_benign_dist = mx.nd.sum(sorted_benign_dist[:, :(m - f - 1)], axis=1) min_distance = mx.nd.min(sum_benign_dist).asscalar() dist_global = mx.nd.zeros(m - f) for i in range(f, m): dist_global[i - f] = nd.norm(v[i]) * lr max_global_dist = mx.nd.max(dist_global).asscalar() scale = 1.0 / (len(v[0])) return ( math.sqrt(scale) / (m - 2 * f - 1)) * min_distance + math.sqrt(scale) * max_global_dist
def generate_weighted_disp_masks(self, flow, _LARGE_DISP): flow_mag = nd.norm(flow, axis=1, keepdims=True) flow_mag = nd.broadcast_div(flow_mag, _LARGE_DISP) flow_mag = nd.broadcast_minimum(flow_mag, nd.ones((1), ctx=flow_mag.context)) small_disp_masks = 1.0 - flow_mag large_disp_masks = flow_mag stacked_mask = nd.concat(small_disp_masks, large_disp_masks, dim=1) return stacked_mask
def main(): x = nd.arange(20) A = x.reshape(shape=(5, 4)) print(A) print('A[2, 3] = ', A[2, 3]) print('A[2, :] = ', A[2, :]) print('A[:, 3] = ', A[:, 3]) print('A.T = ', A.T) X = nd.arange(24).reshape(shape=(2, 3, 4)) print(X) u = nd.array([1, 2, 4, 8]) v = nd.ones_like(u) * 2 print('u + v = ', u + v) print('u - v = ', u - v) print('u * v = ', u * v) print('u / v = ', u / v) B = nd.ones_like(A) * 3 print('B = ', B) print('A + B = ', A + B) print('A * B = ', A * B) a = 2 x = nd.ones(3) y = nd.zeros(3) print(x.shape) print(y.shape) print((a * x).shape) print((a * x + y).shape) print(nd.sum(u)) print(nd.sum(A)) print(nd.mean(A)) print(nd.sum(A) / A.size) print(nd.dot(u, v)) print(nd.sum(u * v)) print(nd.dot(A, u)) A = nd.ones(shape=(3, 4)) B = nd.ones(shape=(4, 5)) print(nd.dot(A, B)) # L2 norm print(nd.norm(u)) # L1 norm print(nd.sum(nd.abs(u)))
def retrain_enc(self, l2_alpha=0.1): docs = self.data.get_documents(key='train') with autograd.record(): ### reconstruction phase ### y_onehot_u = self.Enc(docs) y_onehot_u_softmax = nd.softmax(y_onehot_u) x_reconstruction_u = self.Dec(y_onehot_u_softmax) logits = nd.log_softmax(x_reconstruction_u) loss_reconstruction = nd.mean(nd.sum(- docs * logits, axis=1)) loss_reconstruction = loss_reconstruction + l2_alpha * nd.mean(nd.norm(y_onehot_u, ord=1, axis=1)) loss_reconstruction.backward() self.optimizer_enc.step(1) return loss_reconstruction.asscalar()
def sample(self, v0, min_steps=1, max_steps=100): # (v0, h0) -> (v1, h1) -> (v2, h2) -> ... -> (vt, ht) # (vc0, hc0) -> (vc1, hc1) -> ... -> (vct, hct) # Init: (v0, h0) = (vc0, hc0) # Iter: (v1, h1, vc0, hc0) -> (v2, h2, vc1, hc1) -> ... # Stop: (vt, ht) = (vct, hct) vc = v0 hc = self.sample_h_given_v(vc) v = self.sample_v_given_h(hc) h = self.sample_h_given_v(v) discarded = 0 vhist = [v] vchist = [] for i in range(max_steps): vc, hc, v, h, disc = self.max_coup(vc, hc, v, h, max_try=10) discarded += disc vhist.append(v) vchist.append(vc) if i >= min_steps - 1 and nd.norm(v - vc).asscalar() == 0 and nd.norm(h - hc).asscalar() == 0: break return nd.stack(*vhist), nd.stack(*vchist), discarded
def autogradV2(): a = nd.random_normal(shape=3) a.attach_grad() with autograd.record(): b = a * 2 while (nd.norm(b) < 1000).asscalar(): b = b * 2 if (mx.nd.sum(b) > 0).asscalar(): c = b else: c = 100 * b head_gradient = nd.array([0.01, 1.0, .1]) c.backward(head_gradient) print(a.grad)
def multiply_norms(gradients, f): euclidean_distance = [] for i, x in enumerate(gradients): norms = [nd.norm(p) for p in x] norm_product = 1 for each in norms: norm_product *= float(each.asnumpy()[0]) euclidean_distance.append((i, norm_product)) # euclidean_distance = sorted(euclidean_distance, key=lambda x: x[1], reverse=True) # output = [] # for i in range(f, len(gradients)): # output.append(gradients[euclidean_distance[i][0]]) output = [ gradients[x[0]] for x in sorted( euclidean_distance, key=lambda x: x[1], reverse=True)[f:] ] return output
def cgc_by_layer(gradients, f): layer_list = [] for layer in range(len(gradients[0])): grads = [x[layer] for x in gradients] norms = [nd.norm(p) for p in grads] euclidean_distance = [(i, norms[i]) for i in range(len(grads))] layer_output = [ grads[x[0]] for x in sorted( euclidean_distance, key=lambda x: x[1], reverse=True)[f:] ] layer_list.append(layer_output) output = [] for i in range(len(gradients) - f): grad = [] for layer in range(len(gradients[0])): grad.append(layer_list[layer][i]) output.append(grad) return output
def kmeans(in_dataSet, k, e): numSamples, dim = in_dataSet.shape dataSet = nd.zeros((numSamples, dim+1), ctx=ctx) dataSet[:, 0:dim] = in_dataSet centroids = initCentroids(dataSet, k) for _ in range(e): for i in range(numSamples): minDist = 100000.0 minIndex = 0 for j in range(k): distance = nd.norm(centroids[j, :] - dataSet[i, :]) if distance < minDist: minDist = distance minIndex = j dataSet[i][-1] = minIndex for j in range(k): outdataSet = dataSet.asnumpy() pointsInCluster = outdataSet[outdataSet[:, -1] == j, :-1] centroids[j, :-1] = np.mean(pointsInCluster, axis=0) return dataSet
def normalize(x, p=2, axis=1, eps=1e-12): r"""Performs :math:`L_p` normalization of inputs over specified dimension. For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as .. math:: v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization. Args: x: input ndarray of any shape ord (float): the exponent value in the norm formulation. Default: 2 dim (int): the dimension to reduce. Default: 1 eps (float): small value to avoid division by zero. Default: 1e-12 """ denom = nd.clip(nd.norm(x, ord=p, axis=axis, keepdims=True), eps, float('inf')) return x / denom
def extract_feat(ids, data, model): ctx = try_gpu() if len(ctx) > 1: _data_list = gluon.utils.split_and_load(data=data, ctx_list=ctx) _id_list = gluon.utils.split_and_load(data=ids, ctx_list=ctx) else: _data_list = [data.as_in_context(ctx[0])] _id_list = [ids.as_in_context(ctx[0])] # print(_id_list) data_list = [] id_list = [] for _ids, _data in zip(_id_list, _data_list): feats = model(_data) feats = map(lambda x: x / nd.norm(x), feats[:, :, 0, 0]) feats = [v.expand_dims(axis=0) for v in feats] feats = nd.concatenate(feats) id_list.append(_ids) data_list.append(feats) id_list = nd.concatenate(id_list) data_list = nd.concatenate(data_list) return id_list, data_list
def bulyan(epoch, gradients, net, lr, byz, f=0): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] param_list = byz(epoch, param_list, net, f, lr, np.arange(len(param_list))) k = len(param_list) - f - 2 dist = mx.nd.zeros((len(param_list), len(param_list))) for i in range(0, len(param_list)): for j in range(0, i): dist[i][j] = nd.norm(param_list[i] - param_list[j]) dist[j][i] = dist[i][j] sorted_dist = mx.nd.sort(dist) sum_dist = mx.nd.sum(sorted_dist[:, :k + 1], axis=1) bulyan_list = [] bul_client_list = np.ones(len(param_list)) * (-1) for i in range(len(param_list) - 2 * f): chosen = int(nd.argmin(sum_dist).asscalar()) sum_dist[chosen] = 10**8 bul_client_list[i] = chosen bulyan_list.append(param_list[chosen]) for j in range(len(sum_dist)): sum_dist[j] = sum_dist[j] - dist[j][chosen] sorted_array = nd.sort(nd.concat(*bulyan_list, dim=1), axis=-1) trim_nd = nd.mean(sorted_array[:, f:(len(bulyan_list) - f)], axis=-1, keepdims=1) idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue param.set_data( param.data() - lr * trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size return trim_nd, bul_client_list
#gluonloss=gluon.loss.L2Loss() gluonloss=gluon.loss.KLDivLoss(from_logits=False) smoothed_loss="null" for e in range(epochs): #This loss function produces the one already done if it is the best, or else the closest improvement for i,(data,labels) in enumerate(train_data): data=data.as_in_context(model_ctx) labels=labels.as_in_context(model_ctx) with autograd.record(): output=net(data) scores=make_batch_scores(output,labels,symbols) avgscore=nd.mean(scores,axis=(1,2),keepdims=True) #print(avgscore) scores-=avgscore scores/=nd.norm(scores) with autograd.record(): loss=gluonloss(output,scores) if(i%100==0): print(i) print("data") print(data) print("output") print(output.argmax(axis=2)) loss.backward() trainer.step(data.shape[0]) if(smoothed_loss=="null"): smoothed_loss=nd.mean(loss).asscalar() else: smoothed_loss=smoothed_loss*smoothing+(1-smoothing)*nd.mean(loss).asscalar() #print(nd.mean(loss).asscalar())