def add_split(x, leaf, p_tau): center = leaf.parent['node'].center.data() radius = leaf.parent['node'].radius.data() tau = p_tau + nd.random.exponential(radius**-1) while 1: s = nd.random.normal(shape=(2, x.shape[-1])) s = s / nd.norm(s, axis=-1, keepdims=True) r = nd.random.uniform(low=nd.array([0]), high=radius) r = r * nd.random.uniform()**(1 / 3) if nd.sign(s[0][-1]) > 0: weight = s[0] bias = nd.dot(s[0], -1 * r * (s[1] + center)) y = nd.sign(nd.dot(x, weight) + bias) if nd.abs(nd.sum(y)) != len(y): break split = Split(weight=weight, bias=bias, sharpness=3 / radius, tau=tau, decision=leaf.parent['decision'], side=leaf.parent['side']) tree.splits.add(split) leaf.parent['node'].child['decision'] = split leaf.parent['decision'] = split
def forward(self, x_i, x_j, t_i, t_j): s_i = self.scorer(x_i) s_j = self.scorer(x_j) s_diff = s_i - s_j if self.loss_type == 'hinge': loss = nd.relu(1.0 - s_diff * nd.sign(t_i - t_j)) else: # more loss_types can be defined here loss = nd.sign(t_j - t_i) * s_diff / 2. + nd.log(1 + nd.exp(-s_diff)) # loss = nd.mean(loss, axis=0) return loss
def predict_auc(net, dataloader, length): num_correct = 0.0 num_total = length prediclist = [] labellist = [] for i, (a, label) in enumerate(dataloader): a = a.as_in_context(model_ctx) label = label.as_in_context(model_ctx) output = net(a) prediction = (nd.sign(output) + 1) / 2 logi = logistic(output) prediclist.append(logi.asnumpy()) labellist.append(label.asnumpy()) num_correct += nd.sum(prediction == label) print("Accuracy: %0.3f (%s/%s)" % (num_correct.asscalar() / num_total, num_correct.asscalar(), num_total)) from sklearn.metrics import roc_curve, auc ytrue = [item[0] for batch in labellist for item in batch] ypred = [item[0] for batch in prediclist for item in batch] fpr, tpr, thresholds = roc_curve(ytrue, ypred, pos_label=1) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (1, roc_auc)) print('AUC: %.5f' % roc_auc)
def get_final_preds(batch_heatmaps, center, scale): coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = nd.concat(hm[py][px+1] - hm[py][px-1], hm[py+1][px] - hm[py-1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def implement_1(self, x, label): ''' following paper to implement ''' # weight normalize with x.context: w = self.weight.data() w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape((-1, 1)) # cos_theta = x'w/|x|. note: |w| = 1 x_norm = nd.power(x, 2) x_norm = nd.sum(x_norm, axis=1) x_norm = nd.sqrt(x_norm) cos_theta = nd.dot(x, w_norm, transpose_b=True) cos_theta = cos_theta / x_norm.reshape((-1, 1)) cos_theta = nd.clip(cos_theta, -1, 1) # cos_m_theta = cos(m * theta) cos_m_theta = self.margin_cos[self.margin](cos_theta) # k with mx.autograd.pause(): theta = nd.arccos(cos_theta) k = nd.sign((self.margin * theta / math.pi)) # i=j is phi_theta and i!=j is cos_theta phi_theta = ((-1)**k) * cos_m_theta - 2 * k x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta x_norm_cos_theta = x_norm.reshape((-1, 1)) * cos_theta # i=j index with mx.autograd.pause(): index = nd.one_hot(label, x_norm_phi_theta.shape[1]) # output with mx.autograd.pause(): lamb = self.__get_lambda() output = x_norm_cos_theta * 1.0 output = output - x_norm_cos_theta * index / (1 + lamb) output = output + x_norm_phi_theta * index / (1 + lamb) return output
def tesselate(x, leaf, p_tau): if (len(x) < 2): return add_split(x, leaf, p_tau) split = leaf.parent['decision'] node = leaf.parent['node'] side = nd.sign(split.split(x)) order = nd.argsort(side, axis=None) x = x[order, :] side = side[order, :] orderside = nd.argsort(side, axis=0) * side cutpt = nd.argsort(orderside, axis=None, dtype='int32')[0].asscalar() + 1 x_l = x[0:cutpt] x_r = x[cutpt:None] leaf.parent['side'] = 0 new_leaf = Leaf(layer=tree.layer_initializer(), node=leaf.parent['node'], decision=leaf.parent['decision'], side=1) tree.leaves.add(new_leaf) add_node(x_l, leaf) add_node(x_r, new_leaf) node.child['left'] = leaf.parent['node'] node.child['right'] = new_leaf.parent['node'] tesselate(x_l, leaf, split.tau.data()) tesselate(x_r, new_leaf, split.tau.data())
def heatmap_to_coord_alpha_pose(hms, boxes): hm_h = hms.shape[2] hm_w = hms.shape[3] coords, maxvals = get_max_pred(hms) if boxes.shape[1] == 1: pt1 = mx.nd.array(boxes[:, 0, (0, 1)], dtype=hms.dtype) pt2 = mx.nd.array(boxes[:, 0, (2, 3)], dtype=hms.dtype) else: assert boxes.shape[1] == 4 pt1 = mx.nd.array(boxes[:, (0, 1)], dtype=hms.dtype) pt2 = mx.nd.array(boxes[:, (2, 3)], dtype=hms.dtype) # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = hms[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < hm_w - 1 and 1 < py < hm_h - 1: diff = nd.concat(hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) for i in range(hms.shape[0]): for j in range(hms.shape[1]): preds[i][j] = transformBoxInvert(coords[i][j], pt1[i], pt2[i], hm_h, hm_w) return preds, maxvals
def get_final_preds(batch_heatmaps, center, scale): from gluoncv.data.transforms.pose import get_max_pred coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = nd.concat(hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): w_ratio = coords[i][:, 0] / heatmap_width h_ratio = coords[i][:, 1] / heatmap_height preds[i][:, 0] = scale[i][0] * 2 * w_ratio + center[i][0] - scale[i][0] preds[i][:, 1] = scale[i][1] * 2 * h_ratio + center[i][1] - scale[i][1] return preds, maxvals
def implement_0(self, x, label): ''' following the sphereface code of caffe ''' # weight normalize with x.context: w = self.weight.data() with mx.autograd.pause(): w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape( (-1, 1)) w[:] = w_norm # x_norm = |x| x_norm = nd.power(x, 2) x_norm = nd.sum(x_norm, axis=1) x_norm = nd.sqrt(x_norm) # cos_theta = x'w/|x|. note: |w| = 1 cos_theta = nd.dot(x, w, transpose_b=True) cos_theta = cos_theta / x_norm.reshape((-1, 1)) # cos_theta_quadratic & cos_theta_quartic cos_theta_quadratic = cos_theta**2 cos_theta_quartic = cos_theta**4 with mx.autograd.pause(): # sign_0 = sign(cos_theta) sign_0 = nd.sign(cos_theta) # sign_3 = sign_0 * sign(2 * cos_theta_quadratic_ - 1) sign_3 = sign_0 * nd.sign(2 * cos_theta_quadratic - 1) # sign_4 = 2 * sign_0 + sign_3 - 3 sign_4 = 2 * sign_0 + sign_3 - 3 # phi_theta = (sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic + 1) + sign_4) phi_theta = sign_3 * (8 * cos_theta_quartic - 8 * cos_theta_quadratic + 1) + sign_4 x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta # i=j index with mx.autograd.pause(): index = nd.one_hot(label, x_norm_phi_theta.shape[1]) # output with mx.autograd.pause(): lamb = self.__get_lambda() # 10 output = nd.dot(x, w, transpose_b=True) output2 = output * (1.0 - index) + x_norm_phi_theta * index output3 = (output2 + lamb * nd.dot(x, w, transpose_b=True)) / (1 + lamb) return output3
def SGD(params, lr): lambdaval = .01 for idx, param in enumerate(params): if idx % 2 == 0: if isinstance(w_mask[idx], list): param[:] = param - lr * (param.grad + lambdaval * nd.sign(param.grad)) else: param[:] = (param - lr * param.grad) * w_mask[idx] else: param[:] = param - lr * param.grad
def trim_attack(epoch, v, net, lr, f): # local model poisoning attack against Trimmed-mean vi_shape = v[0].shape v_tran = nd.concat(*v, dim=1) maximum_dim = nd.max(v_tran, axis=1).reshape(vi_shape) minimum_dim = nd.min(v_tran, axis=1).reshape(vi_shape) direction = nd.sign(nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True)) directed_dim = (direction > 0) * minimum_dim + (direction < 0) * maximum_dim # let the malicious clients (first f clients) perform the attack for i in range(f): random_12 = 1. + nd.random.uniform(shape=vi_shape) v[i] = directed_dim * ((direction * directed_dim > 0) / random_12 + (direction * directed_dim < 0) * random_12) return v
def forward(self, x=0): if (mx.autograd.is_training()): u = nd.random.uniform(0, 1) s = nd.log(u) - nd.log(1 - u) + self._qz_loga.data() if (self._temperature == 0): s = nd.sign(s) else: s = nd.sigmoid(s / self._temperature) else: s = nd.sigmoid(self._qz_loga.data()) s = s * (self._limit_hi - self._limit_lo) + self._limit_lo return nd.minimum(1, nd.maximum(s, 0))
def full_trim(epoch, v, net, f, lr, active, max_flip=1.0): # apply full knowledge trimmed mean attack vi_shape = v[0].shape v_tran = nd.concat(*v, dim=1) maximum_dim = nd.max(v_tran, axis=1).reshape(vi_shape) minimum_dim = nd.min(v_tran, axis=1).reshape(vi_shape) direction = nd.sign(nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True)) #direction = old_direction directed_dim = (direction > 0) * minimum_dim + (direction < 0) * maximum_dim for i in range(f): random_12 = 1 + nd.random.uniform(shape=vi_shape) if (active[0] < f): v[i] = directed_dim * ((direction * directed_dim > 0) / random_12 + (direction * directed_dim < 0) * random_12) #pdb.set_trace() return v
def full_krum(epoch, v, net, f, lr, active, max_flip=1.0): if (f == 0): return v e = 0.00001 / len(v[0]) avg_grads = nd.sum(nd.concat(*v, dim=1), axis=-1, keepdims=True) direction = nd.sign(avg_grads) topk = nd.argsort(nd.abs(avg_grads).reshape(-1)) n_flips = int(max_flip * len(v[0])) current_f = len(np.where(np.where(active < f)[0] < f)[0]) l_max = lambda_max(epoch, v, net, current_f, lr) l = find_lambda(l_max, v, direction, len(v), current_f, lr, topk, max_flip) print(l) if (l > 0 and active[0] < f): v[0][topk[-n_flips:]] = -(direction[topk[-n_flips:]] * l) / lr for i in range(1, f): if (active[i] < f): v[i] = mx.nd.random.uniform(v[0] - e, v[0] + e) return v
def partial_trim(epoch, v, net, f): # apply partial knowledge trimmed mean attack vi_shape = v[0].shape #first compute the distribution parameters all_grads = nd.concat(*v, dim=1) adv_grads = all_grads[:, :f] e_mu = nd.mean(adv_grads, axis=1) # mean e_sigma = nd.sqrt( nd.sum(nd.square(nd.subtract(adv_grads, e_mu.reshape(-1, 1))), axis=1) / f) # standard deviation for i in range(f): # apply attack to compromised worker devices with randomness v[i] = ( e_mu - nd.multiply(e_sigma, nd.sign(e_mu)) * (3. + nd.random.uniform(shape=e_sigma.shape))).reshape(vi_shape) return v
def _shard(split, x, l_fn, r_fn): splitsortorder = nd.argsort(split, axis=None) reorderedx = x[splitsortorder, :] reorderedsplit = split[splitsortorder] if (reorderedsplit[0] > 0): r_fn(reorderedx) elif (reorderedsplit[-1] < 0): l_fn(reorderedx) else: splitpt = nd.argsort(reorderedsplit, axis=0) * nd.sign(reorderedsplit) splitpt = nd.argsort(splitpt, axis=None)[0] + 1 lx = nd.slice_axis(reorderedx, 0, 0, int(splitpt.asscalar())) rx = nd.slice_axis(reorderedx, 0, int(splitpt.asscalar()), None) l_fn(lx) r_fn(rx)
def hybrid_forward(self, F, x, weight, bias=None): Cout, Cin, k1, k2 = weight.shape wmask = nd.topk(nd.abs(weight).reshape(Cout, Cin, -1), k=3, ret_typ='mask') wmask = (wmask == 0) * 0.1 + wmask wmask = wmask.reshape(Cout, Cin, k1, k2).as_in_context(x.context) # ratio = 1 - 0.5 * global_param.get_kept_ratio() # wmask = np.ones((Cout, Cin, k1 * k2)) # wmask[:, :, self.mask] = ratio # name = self.name + '_weight' # global_param.netMask[name] = wmask temp_weight = weight.reshape((Cout, Cin, -1)) new_weight = temp_weight[:, :, self.reidx].reshape(Cout, Cin, k1, k2) new_weight = (weight + nd.sign(weight) * nd.abs(new_weight)) # ratio=0.2 if ratio<0.2 else ratio return super(new_location_conv, self).hybrid_forward(F, x, new_weight * wmask, bias)
def evaluate_accuracy(data_iterator, net, ctx, loss_fun, num_classes): """ This function is used for evaluating accuracy of a given data iterator. (Either Train/Test data) It takes in the loss function used too! """ acc = mx.metric.Accuracy() loss_avg = 0. for i, (data, labels) in enumerate(data_iterator): data = data.as_in_context(ctx) #.reshape((-1,784)) labels = labels.as_in_context(ctx) output = net(data) loss = loss_fun(output, labels) preds = [] if (num_classes == 2): preds = (nd.sign(output) + 1) / 2 preds = preds.reshape(-1) else: preds = nd.argmax(output, axis=1) acc.update(preds=preds, labels=labels) loss_avg = loss_avg * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1) return acc.get()[1], loss_avg
def recurse(x, node, p_tau): n = 1 mean = node.center.data() var = (0.5 * node.radius.data())**2 N = x.shape[0] x_mean = nd.mean(x, axis=0) x_var = (N**-1) * nd.sum((x - x_mean)**2, axis=0) z_mean = (n * mean + N * x_mean) / (n + N) z_var = ((n * (mean + var) + N * (x_mean + x_var)) / (n + N)) - z_mean z_radius = 2 * (nd.max(z_var)**0.5) node.center.set_data(z_mean) node.radius.set_data(z_radius) if node.child['decision'] is None: leaf = next(l for l in tree.leaves if l.parent['node'] == node) tesselate(x, leaf, p_tau) return E = nd.random.exponential(z_radius**-1) node.child['decision'].tau.set_data(p_tau + E) split = node.child['decision'] side = nd.sign(split.split(x)) order = nd.argsort(side, axis=None) x = x[order, :] side = side[order, :] if side[0] > 0: recurse(x, node.child['right'], split.tau.data()) elif side[-1] < 0: recurse(x, node.child['left'], split.tau.data()) else: orderside = nd.argsort(side, axis=0) * side cutpt = nd.argsort(orderside, axis=None, dtype='int32')[0].asscalar() + 1 x_l = x[0:cutpt] x_r = x[cutpt:None] recurse(x_l, node.child['left'], split.tau.data()) recurse(x_r, node.child['right'], split.tau.data())
def _recurse(node, path=nd.zeros_like(splt[:, 0]), prob=nd.ones_like(splt[:, 0]), remain=nd.zeros_like(splt[:, 0])): children = self._structure[node] i_node = next( key for key, value in self._weightlayer._children.items() if value == node._box) i_node = int(i_node) # calculate the embedd matrix embedd[i_node] = node() # calculate the router matrix if (node._box._parent is not None): i = next(key for key, value in self._routerlayer._children.items() if value == node._box._parent._decision) i = int(i) direction = self._structure[node._box._parent][node] # path += splt[:, i] * direction - 1 path = path + splt[:, i] * direction - 1 router[:, i_node] = path + 0.5 # calculate the weight matrix if (node._box._parent is not None and children is not None): i_parent = next( key for key, value in self._weightlayer._children.items() if value == node._box._parent._box) i_parent = int(i_parent) # prob *= (1 - psep[:, i_parent]) prob = prob * (1 - psep[:, i_parent]) if (children is None): w = 1 - remain else: w = psep[:, i_node] * prob # remain += w remain = remain + w weight[:, i_node] = w # calculate the partial router matrix path_mat = nd.zeros_like(psep) pie = nd.maximum(nd.sign(path + 1), 0) cur_node = node cur_path = path + 0 while (1): i_cur_node = next( key for key, value in self._weightlayer._children.items() if value == cur_node._box) i_cur_node = int(i_cur_node) frac = nd.maximum(cur_path + 0.5, -0.5) + 0.5 path_mat[:, i_cur_node] = frac * pie # pie -= frac * pie pie = pie - frac * pie if (cur_node._box._parent is not None): cur_i = next( key for key, value in self._routerlayer._children.items() if value == cur_node._box._parent._decision) cur_i = int(cur_i) cur_direction = self._structure[ cur_node._box._parent][cur_node] # cur_path -= splt[:, cur_i] * cur_direction - 1 cur_path = cur_path - (splt[:, cur_i] * cur_direction - 1) cur_node = cur_node._box._parent else: router_mat[:, i_node, :] = path_mat break if (children is not None): left = next(key for key, value in children.items() if value == -1) right = next(key for key, value in children.items() if value == 1) _recurse(left, path + 0, prob + 0, remain + 0) _recurse(right, path + 0, prob + 0, remain + 0) return (router, router_mat, weight, embedd)
def _recurse(node, path=nd.zeros_like(splt[:, 0]), prob=nd.ones_like(splt[:, 0]), remain=nd.zeros_like(splt[:, 0])): children = self._structure[node] i_node = next( key for key, value in self._weightlayer._children.items() if value == node._box) i_node = int(i_node) # calculate the embedd matrix # embedd[i_node] = node() embedd[i_node] = node() # calculate the router matrix if (node._box._parent is not None): i = next(key for key, value in self._routerlayer._children.items() if value == node._box._parent._decision) i = int(i) direction = self._structure[node._box._parent][node] path = path + splt[:, i] * direction - 1 # router[:, i_node] = path + 0.5 router[i_node] = path + 0.5 # prevent routing decay # path = nd.minimum(0, nd.sign(path + 1)) # calculate the weight matrix if (node._box._parent is not None and children is not None): i_parent = next( key for key, value in self._weightlayer._children.items() if value == node._box._parent._box) i_parent = int(i_parent) prob = prob * (1 - psep[:, i_parent]) if (children is None): w = 1 - remain else: w = psep[:, i_node] * prob remain = remain + w # weight[:, i_node] = w weight[i_node] = w # calculate the partial router matrix # path_mat_t = nd.zeros_like(psep) path_mat = {} pie = nd.maximum(nd.sign(path + 1), 0) cur_node = node cur_path = path + 0 while (1): i_cur_node = next( key for key, value in self._weightlayer._children.items() if value == cur_node._box) i_cur_node = int(i_cur_node) frac = nd.maximum(cur_path + 0.5, -0.5) + 0.5 # path_mat_t[:, i_cur_node] = frac * pie path_mat[i_cur_node] = frac * pie pie = pie - frac * pie if (cur_node._box._parent is not None): cur_i = next( key for key, value in self._routerlayer._children.items() if value == cur_node._box._parent._decision) cur_i = int(cur_i) cur_direction = self._structure[ cur_node._box._parent][cur_node] cur_path = cur_path - (splt[:, cur_i] * cur_direction - 1) cur_node = cur_node._box._parent else: # router_mat_t[:, i_node, :] = path_mat_t n_node = len(self._weightlayer) router_mat[i_node] = nd.stack(*[ path_mat[key] if key in path_mat else nd.zeros_like(splt[:, 0]) for key in range(n_node) ], axis=-1) break if (children is not None): left = next(key for key, value in children.items() if value == -1) right = next(key for key, value in children.items() if value == 1) _recurse(left, path + 0, prob + 0, remain + 0) _recurse(right, path + 0, prob + 0, remain + 0) return (router, router_mat, weight, embedd)
def trim(epoch, gradients, net, lr, byz, old_direction, active, blacklist, susp, f=0, cmax=0, utrg=0.0, udet=0.50, urem=3): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] param_list = byz(epoch, param_list, net, f, lr, active) flip_local = nd.zeros(len(param_list)) flip_new = nd.zeros(len(param_list)) penalty = 1.0 - cmax / len(param_list) reward = 1.0 - penalty for i in range(len(param_list)): direction = nd.sign(param_list[i]) flip_local[i] = 0.5 * (mx.nd.sum( direction.reshape(-1) * (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar() #flip = nd.sign(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1))) #flip_new[i] = nd.sum(flip*(param_list[i].reshape(-1)**2)) #flip[param_list[i]<0.0001] = 0 #flip_new[i] = nd.sum(flip).asscalar() #argsorted = nd.argsort(flip_local) argsorted = nd.argsort(flip_local) if (cmax > 0): susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] + reward susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] - penalty argsorted = nd.argsort(susp) weights = nd.exp(susp) / nd.sum(nd.exp(susp)) matrix = nd.transpose( nd.transpose(nd.concat(*[ii for ii in param_list], dim=1))) trim_nd = nd.linalg.gemm2(matrix, weights.reshape(-1, 1)) #pdb.set_trace() #print (flip_new, weights) #print (nd.mean(flip_local[:cmax]), nd.mean(flip_new[:cmax]), nd.mean(flip_local[cmax:]), nd.mean(flip_new[cmax:])) '''new_list = [] argsorted = nd.argsort(susp) for i in range(len(param_list)-cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1) trim_nd = nd.mean(sorted_array, axis=-1, keepdims=1)''' global_direction = nd.sign(trim_nd) gfs = 0.5 * (mx.nd.sum( global_direction.reshape(-1) * (global_direction.reshape(-1) - old_direction.reshape(-1))) ).asscalar() '''if (utrg > 0): sorted_array = nd.sort(nd.concat(*param_list, dim=1), axis=-1) n = len(param_list) m = n - f*2 trim_nd = nd.mean(sorted_array[:, f:(f+m)], axis=-1, keepdims=1) direction = nd.sign(trim_nd) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)): flip_score = mx.nd.zeros(len(param_list)) rem = [] for i in range (len(param_list)): direction = nd.sign(param_list[i]) flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() flip_local[active[i]] = flip_score[i].asscalar() argsorted = nd.argsort(flip_score) new_list = [] for i in range(len(param_list)-cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) n = len(new_list) f = 0 m = n - f*2 sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1) trim_nd = nd.mean(sorted_array[:, f:(f+m)], axis=-1, keepdims=1) for i in range(len(new_list), len(param_list)): index = int(argsorted[i].asscalar()) if (flip_score[index] >= udet*len(param_list[0])): susp[active[index]] = susp[active[index]] + 1 if (susp[active[index]] >= urem): blacklist[active[index]] = 1 rem.append(active[index]) active = removearr(active, sorted(rem), len(param_list)) direction = nd.sign(trim_nd) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() cmax = cmax - len(rem) ''' idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue param.set_data( param.data() - lr * trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size return trim_nd, direction, cmax, gfs, flip_local, flip_new
cumulative_loss += nd.sum(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss)) loss_sequence.append(cumulative_loss) # plot the convergence of the estimated loss function #%matplotlib inline import matplotlib import matplotlib.pyplot as plt plt.figure(num=None, figsize=(8, 6)) plt.plot(loss_sequence) # Adding some bells and whistles to the plot plt.grid(True, which="both") plt.xlabel('epoch', fontsize=14) plt.ylabel('average loss', fontsize=14) plt.show() num_correct = 0.0 num_total = len(Xtest) for i, (data, label) in enumerate(test_data): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx) output = net(data) prediction = (nd.sign(output) + 1) / 2 num_correct += nd.sum(prediction == label) print("Accuracy: %0.3f (%s/%s)" % (num_correct.asscalar() / num_total, num_correct.asscalar(), num_total))
def krum(epoch, gradients, net, lr, byz, old_direction, active, blacklist, susp, f=0, cmax=0, utrg=0, udet=0.50, urem=3, max_flip=1.0): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] param_list = byz(epoch, param_list, net, f, lr, active, max_flip) flip_local = nd.zeros(len(param_list)) penalty = 1.0 - cmax / len(param_list) reward = 1.0 - penalty for i in range(len(param_list)): direction = nd.sign(param_list[i]) flip_local[i] = 0.5 * (mx.nd.sum( direction.reshape(-1) * (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar() argsorted = nd.argsort(flip_local) susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] - reward susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] + penalty new_list = [] argsorted = nd.argsort(susp) for i in range(len(param_list) - cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) k = len(new_list) - 0 - 2 dist = mx.nd.zeros((len(new_list), len(new_list))) for i in range(0, len(new_list)): for j in range(0, i): dist[i][j] = nd.norm(new_list[i] - new_list[j]) dist[j][i] = dist[i][j] sorted_dist = mx.nd.sort(dist) sum_dist = mx.nd.sum(sorted_dist[:, :k + 1], axis=1) model_selected = argsorted[mx.nd.argmin(sum_dist).asscalar().astype( int)].asscalar().astype(int) global_direction = nd.sign(param_list[model_selected]) gfs = 0.5 * (mx.nd.sum( global_direction.reshape(-1) * (global_direction.reshape(-1) - old_direction.reshape(-1))) ).asscalar() '''if (utrg > 0): k = len(param_list) - f - 2 dist = mx.nd.zeros((len(param_list),len(param_list))) for i in range (0, len(param_list)): for j in range(0, i): dist[i][j] = nd.norm(param_list[i] - param_list[j]) dist[j][i] = dist[i][j] sorted_dist = mx.nd.sort(dist) sum_dist = mx.nd.sum(sorted_dist[:,:k+1], axis=1) model_selected = mx.nd.argmin(sum_dist).asscalar().astype(int) direction = nd.sign(param_list[model_selected]) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() for i in range (len(param_list)): direction = nd.sign(param_list[i]) flip_score = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() flip_local[active[i]] = flip_score if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)): flip_score = mx.nd.zeros(len(param_list)) rem = [] for i in range (len(param_list)): direction = nd.sign(param_list[i]) flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() flip_local[active[i]] = flip_score[i].asscalar() argsorted = nd.argsort(flip_score) new_list = [] for i in range(len(param_list)-cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) k = len(new_list) - 0 - 2 dist = mx.nd.zeros((len(new_list),len(new_list))) for i in range (0, len(new_list)): for j in range(0, i): dist[i][j] = nd.norm(new_list[i] - new_list[j]) dist[j][i] = dist[i][j] for i in range(len(new_list), len(param_list)): index = int(argsorted[i].asscalar()) if (flip_score[index] >= udet*len(param_list[0])): susp[active[index]] = susp[active[index]] + 1 if (susp[active[index]] >= urem): blacklist[active[index]] = 1 rem.append(active[index]) active = removearr(active, sorted(rem), len(param_list)) sorted_dist = mx.nd.sort(dist) sum_dist = mx.nd.sum(sorted_dist[:,:k+1], axis=1) model_selected = argsorted[mx.nd.argmin(sum_dist).asscalar().astype(int)].asscalar().astype(int) direction = nd.sign(param_list[model_selected]) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() cmax = cmax - len(rem) ''' idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue param.set_data(param.data() - lr * param_list[model_selected][idx:( idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size return model_selected, direction, cmax, gfs, flip_local, 1.0 #flip_score[len(param_list)-cmax-1].asscalar()/len(param_list[0])
def poly_kernels(self, x: NDArray, y: NDArray): prod = nd.dot(x, y) return nd.sign(prod) * nd.abs(prod)**2
def median(epoch, gradients, net, lr, byz, old_direction, active, blacklist, susp, f=0, cmax=0, utrg=0.0, udet=0.50, urem=3): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] param_list = byz(epoch, param_list, net, f, lr, active) flip_local = nd.zeros(len(param_list)) penalty = 1.0 - cmax / len(param_list) reward = 1.0 - penalty for i in range(len(param_list)): direction = nd.sign(param_list[i]) flip_local[i] = 0.5 * (mx.nd.sum( direction.reshape(-1) * (direction.reshape(-1) - old_direction.reshape(-1)))).asscalar() argsorted = nd.argsort(flip_local) susp[argsorted[:-cmax]] = susp[argsorted[:-cmax]] - reward susp[argsorted[-cmax:]] = susp[argsorted[-cmax:]] + penalty new_list = [] argsorted = nd.argsort(susp) for i in range(len(param_list) - cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1) if (len(new_list) % 2 == 1): trim_nd = sorted_array[:, int(len(new_list) / 2)] else: trim_nd = (sorted_array[:, int(len(new_list) / 2) - 1] + sorted_array[:, int(len(new_list) / 2)]) / 2 global_direction = nd.sign(trim_nd) gfs = 0.5 * (mx.nd.sum( global_direction.reshape(-1) * (global_direction.reshape(-1) - old_direction.reshape(-1))) ).asscalar() '''if (utrg > 0): sorted_array = nd.sort(nd.concat(*param_list, dim=1), axis=-1) n = len(param_list) m = n - f*2 if (len(param_list)%2 == 1): trim_nd = sorted_array[:, int(len(param_list)/2)] else: trim_nd = (sorted_array[:, int(len(param_list)/2)-1] + sorted_array[:, int(len(param_list)/2)])/2 direction = nd.sign(trim_nd) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() if ((utrg>0 and gfs>=utrg*len(param_list[0])) or (utrg == 0)): flip_score = mx.nd.zeros(len(param_list)) rem = [] for i in range (len(param_list)): direction = nd.sign(param_list[i]) flip_score[i] = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() flip_local[active[i]] = flip_score[i].asscalar() argsorted = nd.argsort(flip_score) new_list = [] for i in range(len(param_list)-cmax): new_list.append(param_list[int(argsorted[i].asscalar())]) n = len(new_list) f = 0 sorted_array = nd.sort(nd.concat(*new_list, dim=1), axis=-1) if (len(new_list)%2 == 1): trim_nd = sorted_array[:, int(len(new_list)/2)] else: trim_nd = (sorted_array[:, int(len(new_list)/2)-1] + sorted_array[:, int(len(new_list)/2)])/2 for i in range(len(new_list), len(param_list)): index = int(argsorted[i].asscalar()) if (flip_score[index] >= udet*len(param_list[0])): susp[active[index]] = susp[active[index]] + 1 if (susp[active[index]] >= urem): blacklist[active[index]] = 1 rem.append(active[index]) active = removearr(active, sorted(rem), len(param_list)) direction = nd.sign(trim_nd) gfs = 0.5*(mx.nd.sum(direction.reshape(-1)*(direction.reshape(-1)-old_direction.reshape(-1)))).asscalar() cmax = cmax - len(rem) ''' idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue param.set_data( param.data() - lr * trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size return trim_nd, direction, cmax, gfs, flip_local
batch = traindata.makebatch(params['symbols_in_batch']) print(batch.shape) batch = nd.array(batch).as_in_context(model_ctx) data = batch[1] labels = batch[0] with autograd.record(): output = net(data) cpuoutput = output.as_in_context(data_ctx) cpulabels = labels.as_in_context(data_ctx) alignedlabels = alignbatch(cpuoutput, cpulabels).as_in_context(model_ctx) with autograd.record(): #print("#######################") #print(alignedlabels.asnumpy().astype(np.int32)) mask = nd.sign(alignedlabels) #*(1-blank_weight)+blank_weight mask = mask.reshape((mask.shape[0], mask.shape[1], 1)) #print(mask.shape) #print(output.shape) loss = loss_fn(output, alignedlabels, mask) count += 1 if (count % 10 == 0): printvalid() loss.backward() trainer.step(data.shape[0]) if (smoothed_loss == "null"): smoothed_loss = nd.mean(loss).asscalar() else: smoothed_loss = smoothed_loss * smoothing + ( 1 - smoothing) * nd.mean(loss).asscalar() #print(nd.mean(loss).asscalar())