def test_jitter_synthetic( jitter_method, float_type, ctx=mx.Context('cpu') ) -> None: # Initialize problem parameters batch_size = 1 prediction_length = 50 context_length = 5 num_samples = 3 # Initialize test data to generate Gaussian Process from lb = -5 ub = 5 dx = (ub - lb) / (prediction_length - 1) x_test = nd.arange(lb, ub + dx, dx, ctx=ctx, dtype=float_type).reshape( -1, 1 ) x_test = nd.tile(x_test, reps=(batch_size, 1, 1)) # Define the GP hyper parameters amplitude = nd.ones((batch_size, 1, 1), ctx=ctx, dtype=float_type) length_scale = math.sqrt(0.4) * nd.ones_like(amplitude) sigma = math.sqrt(1e-5) * nd.ones_like(amplitude) # Instantiate desired kernel object and compute kernel matrix rbf_kernel = RBFKernel(amplitude, length_scale) # Generate samples from 0 mean Gaussian process with RBF Kernel and plot it gp = GaussianProcess( sigma=sigma, kernel=rbf_kernel, prediction_length=prediction_length, context_length=context_length, num_samples=num_samples, ctx=ctx, float_type=float_type, jitter_method=jitter_method, sample_noise=False, # Returns sample without noise ) # Generate training set on subset of interval using the sine function x_train = nd.array([-4, -3, -2, -1, 1], ctx=ctx, dtype=float_type).reshape( context_length, 1 ) x_train = nd.tile(x_train, reps=(batch_size, 1, 1)) y_train = nd.sin(x_train.squeeze(axis=2)) # Predict exact GP using the GP predictive mean and covariance using the same fixed hyper-parameters samples, predictive_mean, predictive_std = gp.exact_inference( x_train, y_train, x_test ) assert ( np.sum(np.isnan(samples.asnumpy())) == 0 ), 'NaNs in predictive samples!'
def get_gen_loss(gen, disc, loss_fn, batch_size, z_dim, ctx): z = nd.random.randn(batch_size, z_dim, ctx=ctx) xhat = gen(z) y_pred = disc(xhat) y_true = nd.ones_like(y_pred) loss = loss_fn(y_pred, y_true) return loss
def format_groundtruth(self,labels,XYWH): #generate target online with given labels B,H,W,boxNum,_ = XYWH.shape boxMask = nd.zeros((B,H,W,boxNum,1),ctx=XYWH.context) boxCls = nd.ones_like(boxMask, ctx=XYWH.context) * (-1) #-1 to indicated ignored item boxObj = nd.zeros((B,H,W,boxNum,1),ctx = XYWH.context) boxXYWH = nd.zeros((B,H,W,boxNum,4), ctx = XYWH.context) for b in range(B): label = labels[b].asnumpy() validLabel = label[np.where(label[:,1] > -0.5)[0],:] np.random.shuffle(validLabel) #shuffle to add random following for l in validLabel: cls,x0,y0,x1,y1 = l #stand label format w,h = x1 - x0, y1 - y0 indx,indy = int(x0*W),int(y0*H) #different to paper, here using left-top to determinet cell ious = [] pws, phs = [1/16.0,1/16.0],[1/16.0,2*1/16.0] #!!!! #comparsion between anchor and object bbox(resized to last layer) #so anchors stand for size estimation of target in last layer? #update: now it switch to ratio for pw, ph in zip(pws,phs): intersect = np.minimum(pw,w) * np.minimum(ph,h) ious.append( intersect / (pw*ph + w*h - intersect) ) bestBoxInd = int(np.argmax(ious)) boxMask[b,indy,indx,bestBoxInd,:] = 1.0 #select the sell to estimate object boxCls[b,indy,indx,bestBoxInd,:] = cls #target class id boxObj[b,indy,indx,bestBoxInd,:] = 1.0 #target objectness tx,ty = x0 * W - indx, y0 * H - indy #xy is offset from cell left-top(not image) #for loss reasion, here set target to be sqrted+ #updated: using log to replace sqrt (failure if you using log(w) instead of log(1+w) tw,th = np.log(1+w),np.log(1+h) boxXYWH[b,indy,indx,bestBoxInd,:] = nd.array([tx,ty,tw,th]) return boxMask, boxCls, boxObj, boxXYWH
def test_detach_updated_grad(): x = nd.ones((2, 2)) dx = nd.zeros_like(x) y = nd.ones_like(x) dy = nd.zeros_like(x) mark_variables([x, y], [dx, dy]) assert x._fresh_grad == False assert y._fresh_grad == False with train_section(): x2 = x + 2 y2 = x2 + y y2.backward() assert (dx.asnumpy() == 1).all() assert x._fresh_grad == True assert y._fresh_grad == True dx[:] = 0 x._fresh_grad = False y._fresh_grad = False assert x._fresh_grad == False assert y._fresh_grad == False with train_section(): x2 = x + 2 x2 = x2.detach() y2 = x2 + y y2.backward() assert (dx.asnumpy() == 0).all() assert y._fresh_grad == True assert x._fresh_grad == False
def test_detach_updated_grad(): x = nd.ones((2, 2)) dx = nd.zeros_like(x) y = nd.ones_like(x) dy = nd.zeros_like(x) mark_variables([x, y], [dx, dy]) assert x._fresh_grad == False assert y._fresh_grad == False with record(): x2 = x + 2 y2 = x2 + y y2.backward() assert (dx.asnumpy() == 1).all() assert x._fresh_grad == True assert y._fresh_grad == True dx[:] = 0 x._fresh_grad = False y._fresh_grad = False assert x._fresh_grad == False assert y._fresh_grad == False with record(): x2 = x + 2 x2 = x2.detach() y2 = x2 + y y2.backward() assert (dx.asnumpy() == 0).all() assert y._fresh_grad == True assert x._fresh_grad == False
def get_gradient(crit, real, fake, epsilon): mixed_images = epsilon * real + (1 - epsilon) * fake mixed_images.attach_grad() # with autograd.record(): mixed_scores = crit(mixed_images) grad = autograd.grad(mixed_scores, [mixed_images], retain_graph=True, create_graph=True, head_grads=nd.ones_like(mixed_scores))[0] return grad
def unsorted_1d_segment_mean(input, seg_id, n_segs, dim): # TODO: support other dimensions assert dim == 0, 'MXNet only supports segment mean on first dimension' n_ones = nd.ones_like(seg_id).astype(input.dtype) w = unsorted_1d_segment_sum(n_ones, seg_id, n_segs, 0) w = nd.clip(w, a_min=1, a_max=np.inf) y = unsorted_1d_segment_sum(input, seg_id, n_segs, dim) y = y / w.reshape((-1, ) + (1, ) * (y.ndim - 1)) return y
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def yolo2_target(scores, output, labels, anchors, ignore_label=-1, thresh=0.5): """ 定义一个函数来生成yolo2训练目标 YOLO2寻找真实目标的方法比较特殊,是在每个格点内各自比较,而不是使用全局的预设。 这里我们使用了一个技巧:sample_weight(个体权重)矩阵, 用于损失函数内部权重的调整, 我们也可以通过权重矩阵来控制哪些个体需要被屏蔽,这一点在目标检测中尤其重要,因为往往大多数的背景区域不需要预测检测框。 网络预测的输出为 (32,16,16,2,5) 而label的形式为:labels 即 ground truth(32,1,5),其中 5 包括一个class label:0,以及左上、右下两个corner相对于整张图的坐标 模型回归的目标形式: """ b, h, w, n, _ = scores.shape anchors = np.reshape(np.array(anchors), (-1, 2)) """ 这里传入scores只是为了用其shape和context scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1) boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1) gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1) """ target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context) target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context) sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context) for b in range(output.shape[0]): # find the best match for each ground-truth label = labels[b].asnumpy() valid_label = label[np.where(label[:, 0] > -0.5)[0], :] # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly np.random.shuffle(valid_label) for l in valid_label: gx, gy, gw, gh = (l[1] + l[3]) / 2, ( l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2] ind_x = int(gx * w) ind_y = int(gy * h) tx = gx * w - ind_x ty = gy * h - ind_y gw = gw * w gh = gh * h # find the best match using width and height only, assuming centers are identical intersect = np.minimum(anchors[:, 0], gw) * np.minimum( anchors[:, 1], gh) ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] - intersect) best_match = int(np.argmax(ovps)) target_id[b, ind_y, ind_x, best_match, :] = l[0] target_score[b, ind_y, ind_x, best_match, :] = 1.0 tw = np.log(gw / anchors[best_match, 0]) th = np.log(gh / anchors[best_match, 1]) target_box[b, ind_y, ind_x, best_match, :] = mx.nd.array([tx, ty, tw, th]) sample_weight[b, ind_y, ind_x, best_match, :] = 1.0 # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1]) return target_id, target_score, target_box, sample_weight
def forward(self, is_train, req, in_data, out_data, aux): arm_cls_preds = in_data[0] odm_cls_target = in_data[1] odm_loc_target_mask = in_data[2] arm_cls_preds = nd.softmax(data=arm_cls_preds) arm_cls_preds_classes = nd.split(data=arm_cls_preds,axis=1,num_outputs=2) # arm_cls_preds_bg shape : (batch , h*w*num_anchors[:layers]) 负类【0】 arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_classes[0],shape=(0,-1)) prob_temp = nd.ones_like(arm_cls_preds_bg)*0.99 cond1 = arm_cls_preds_bg >= prob_temp # > 0.99 idx is 1 # print('negative cond1 ------- :',heapq.nlargest(2,arm_cls_preds_bg[0])) temp1 = nd.ones_like(odm_cls_target)*(-1) ### TODO: 0 还是-1表示背景?? # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的anchor标号中去掉(-1替代),负类转换为背景 odm_cls_target_mask = nd.where(condition=cond1,x=temp1,y=odm_cls_target) # apply filtering to odm_loc_target_mask # odm_loc_target_mask_shape: (batch, num_anchors, 4) arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_bg,shape=(0,-1,1))#(batch , h*w*num_anchors[:layers],1) # (batch , h*w*num_anchors[:layers] , 4 ) odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,4)) odm_loc_target_mask = odm_loc_target_mask[:,:,0] #(batch , h*w*num_anchors[:layers]) #(batch , h*w*num_anchors[:layers], 1) ## 取整个batch中 所有行的 第一列,相当于对原来的4个相同label[0 0 0 0 ],[1 1 1 1]变成[0],[1] odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,1)) loc_temp = nd.ones_like(odm_loc_target_mask)*0.99 cond2 = arm_cls_preds_bg >= loc_temp temp2 = nd.zeros_like(odm_loc_target_mask) # 取0 # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的掩码置0 ## 实际上不管IOU计算的大小,用AMR的分类结果,如果是大于0.99的负类,不管通过IOU判断的正负类结果如何,都设置为背景 odm_loc_target_bg_mask = nd.where(cond2,temp2,odm_loc_target_mask) odm_loc_target_bg_mask = nd.concat(*[odm_loc_target_bg_mask]*4,dim=2) # 还原维度 odm_loc_target_bg_mask = nd.reshape(odm_loc_target_bg_mask,shape=(0,-1)) for ind, val in enumerate([odm_cls_target_mask, odm_loc_target_bg_mask]): self.assign(out_data[ind], req[ind], val)
def get_disc_loss(gen, disc, loss_fn, X, batch_size, z_dim, ctx): # loss from real images y_pred_real = disc(X).reshape(X.shape[0], -1) y_true_real = nd.ones_like(y_pred_real) loss_real = loss_fn(y_pred_real, y_true_real) # loss from fake images z = nd.random.randn(batch_size, z_dim, 1, 1, ctx=ctx) xhat = gen(z).detach() y_pred_fake = disc(xhat).reshape(X.shape[0]) y_true_fake = nd.zeros_like(y_pred_fake) loss_fake = loss_fn(y_pred_fake, y_true_fake) # total discriminator loss loss = 0.5 * (loss_real + loss_fake) return loss
def get_crit_loss(gen, crit, real, batch_size, z_dim, ctx): z = nd.random.randn(batch_size, z_dim, 1, 1, ctx=ctx) fake = gen(z).detach() y_pred_fake = crit(fake).reshape(real.shape[0], -1) y_pred_real = crit(real).reshape(real.shape[0], -1) epsilon = np.random.rand(len(real), 1, 1, 1) epsilon = nd.array(epsilon, ctx=ctx) # grad = get_gradient(crit, X, Xhat.detach(), epsilon) mixed_images = epsilon * real + (1 - epsilon) * fake mixed_images.attach_grad() # with autograd.record(): mixed_scores = crit(mixed_images) grad = autograd.grad(mixed_scores, [mixed_images], retain_graph=True, create_graph=True, head_grads=nd.ones_like(mixed_scores))[0] gp = gradient_penalty(grad) crit_loss = crit_loss_fn(y_pred_fake, y_pred_real, gp, C_LAMBDA) return crit_loss
def yolo2_target(scores, output, labels, anchors, ignore_label=-1, thresh=0.5): """Generate training targets given predictions and labels.""" b, h, w, n, _ = scores.shape anchors = np.reshape(np.array(anchors), (-1, 2)) #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1) #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1) gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1) target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context) target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context) sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context) for b in range(output.shape[0]): # find the best match for each ground-truth label = labels[b].asnumpy() valid_label = label[np.where(label[:, 0] > -0.5)[0], :] # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly np.random.shuffle(valid_label) for l in valid_label: gx, gy, gw, gh = (l[1] + l[3]) / 2, ( l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2] ind_x = int(gx * w) ind_y = int(gy * h) tx = gx * w - ind_x ty = gy * h - ind_y gw = gw * w gh = gh * h # find the best match using width and height only, assuming centers are identical intersect = np.minimum(anchors[:, 0], gw) * np.minimum( anchors[:, 1], gh) ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] - intersect) best_match = int(np.argmax(ovps)) target_id[b, ind_y, ind_x, best_match, :] = l[0] target_score[b, ind_y, ind_x, best_match, :] = 1.0 tw = np.log(gw / anchors[best_match, 0]) th = np.log(gh / anchors[best_match, 1]) target_box[b, ind_y, ind_x, best_match, :] = mx.nd.array([tx, ty, tw, th]) sample_weight[b, ind_y, ind_x, best_match, :] = 1.0 # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1]) return target_id, target_score, target_box, sample_weight
def parse_groundtruth_for_target(labels, box_per_cell, xywh): B, H, W, A, _ = xywh.shape _, maxObjNum, _ = labels.shape #pdb.set_trace() boxMask = nd.zeros((B, H, W, A, 1), ctx=xywh.context) boxCls = nd.ones_like(boxMask, ctx=xywh.context) * ( -1) #default negative label boxObject = nd.zeros((B, H, W, A, 1), ctx=xywh.context) boxXYWH = nd.zeros((B, H, W, A, 4), ctx=xywh.context) for b in range(B): label = labels[b].asnumpy() validLabel = label[np.where(label[:, 1] > -0.5)[0], :] #pdb.set_trace() np.random.shuffle(validLabel) for l in validLabel: cls, x0, y0, x1, y1 = l w = x1 - x0 h = y1 - y0 #find best box for this object indx, indy = int(x0 * W), int(y0 * H) #position pws, phs = xywh[b, indy, indx, :, -2], xywh[b, indy, indx, :, -1] ious = [] pws = pws.asnumpy() phs = phs.asnumpy() pws, phs = [1, 1], [1, 1] for pw, ph in zip(pws, phs): intersect = np.minimum(pw, w * W) * np.minimum(ph, h * H) ious.append(intersect / (pw * ph + w * h - intersect)) #pdb.set_trace() bestbox = int(np.argmax(ious)) boxMask[b, indy, indx, bestbox, :] = 1.0 boxCls[b, indy, indx, bestbox, :] = cls boxObject[b, indy, indx, bestbox, :] = 1.0 # ious[bestbox] tx = x0 * W - indx ty = y0 * H - indy tw, th = math.sqrt(w), math.sqrt(h) #predict sqrt(w) sqrt(h) #pdb.set_trace() boxXYWH[b, indy, indx, bestbox, :] = nd.array([tx, ty, tw, th]) return boxMask, boxCls, boxObject, boxXYWH
def parse_groundtruth_for_target(labels, box_per_cell, xywh): B,H,W,A,_ = xywh.shape _,maxObjNum,_ = labels.shape #pdb.set_trace() boxMask = nd.zeros( (B,H,W,A,1), ctx = xywh.context ) boxCls = nd.ones_like(boxMask, ctx = xywh.context) * (-1) #default negative label boxObject = nd.zeros((B,H,W,A,1),ctx = xywh.context) boxXYWH = nd.zeros((B,H,W,A,4), ctx = xywh.context) for b in range(B): label = labels[b].asnumpy() validLabel = label[np.where(label[:,1] >-0.5)[0],:] #pdb.set_trace() np.random.shuffle(validLabel) for l in validLabel: cls,x0,y0,x1,y1 = l w = x1 - x0 h = y1 - y0 #find best box for this object indx,indy = int(x0*W), int(y0*H) #position pws, phs = xywh[b,indy, indx, :, -2], xywh[b,indy,indx,:,-1] ious = [] pws = pws.asnumpy() phs = phs.asnumpy() pws, phs = [1,1],[1,1] for pw, ph in zip(pws,phs): intersect = np.minimum(pw,w*W) * np.minimum(ph,h*H) ious.append( intersect / (pw * ph + w * h - intersect) ) #pdb.set_trace() bestbox = int(np.argmax(ious)) boxMask[b,indy,indx,bestbox,:] = 1.0 boxCls[b,indy,indx,bestbox,:] = cls boxObject[b,indy,indx,bestbox,:] = 1.0 # ious[bestbox] tx = x0 * W - indx ty = y0 * H - indy tw,th = math.sqrt(w), math.sqrt(h) #predict sqrt(w) sqrt(h) #pdb.set_trace() boxXYWH[b,indy,indx,bestbox,:] = nd.array([tx,ty,tw,th]) return boxMask, boxCls, boxObject,boxXYWH
def main(): # Initialize problem parameters batch_size = 1 prediction_length = 50 context_length = 5 axis = [-5, 5, -3, 3] float_type = np.float64 ctx = mx.Context("gpu") num_samples = 3 ts_idx = 0 # Initialize test data to generate Gaussian Process from lb = -5 ub = 5 dx = (ub - lb) / (prediction_length - 1) x_test = nd.arange(lb, ub + dx, dx, ctx=ctx, dtype=float_type).reshape(-1, 1) x_test = nd.tile(x_test, reps=(batch_size, 1, 1)) # Define the GP hyper parameters amplitude = nd.ones((batch_size, 1, 1), ctx=ctx, dtype=float_type) length_scale = math.sqrt(0.4) * nd.ones_like(amplitude) sigma = math.sqrt(1e-5) * nd.ones_like(amplitude) # Instantiate desired kernel object and compute kernel matrix rbf_kernel = RBFKernel(amplitude, length_scale) # Generate samples from 0 mean Gaussian process with RBF Kernel and plot it gp = GaussianProcess( sigma=sigma, kernel=rbf_kernel, prediction_length=prediction_length, context_length=context_length, num_samples=num_samples, ctx=ctx, float_type=float_type, sample_noise=False, # Returns sample without noise ) mean = nd.zeros((batch_size, prediction_length), ctx=ctx, dtype=float_type) covariance = rbf_kernel.kernel_matrix(x_test, x_test) gp.plot(x_test=x_test, samples=gp.sample(mean, covariance), ts_idx=ts_idx) # Generate training set on subset of interval using the sine function x_train = nd.array([-4, -3, -2, -1, 1], ctx=ctx, dtype=float_type).reshape(context_length, 1) x_train = nd.tile(x_train, reps=(batch_size, 1, 1)) y_train = nd.sin(x_train.squeeze(axis=2)) # Predict exact GP using the GP predictive mean and covariance using the same fixed hyper-parameters samples, predictive_mean, predictive_std = gp.exact_inference( x_train, y_train, x_test) assert (np.sum(np.isnan( samples.asnumpy())) == 0), "NaNs in predictive samples!" gp.plot( x_train=x_train, y_train=y_train, x_test=x_test, ts_idx=ts_idx, mean=predictive_mean, std=predictive_std, samples=samples, axis=axis, )
def inference_g(self, observed_arr): ''' Inference with generator. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: Tuple data. - re-parametric data. - encoded data points. - re-encoded data points. ''' generated_arr, encoded_arr, re_encoded_arr = super().inference_g(observed_arr) if autograd.is_recording(): limit = self.long_term_seq_len seq_len = self.noise_sampler.seq_len self.noise_sampler.seq_len = limit long_term_observed_arr = self.noise_sampler.draw() observed_mean_arr = nd.expand_dims(nd.mean(long_term_observed_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_observed_arr.shape[1]): add_arr = nd.sum(long_term_observed_arr[:, :seq] - observed_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_observed_arr - observed_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_observed_arr.shape[1] / 2) observed_H_arr = nd.log(R_S_arr) / len_arr self.noise_sampler.seq_len = seq_len g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(observed_arr.max(axis=1), axis=1) _observed_arr = generated_arr long_term_generated_arr = None for i in range(limit): generated_arr, _, _ = super().inference_g(_observed_arr) g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(_observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(_observed_arr.max(axis=1), axis=1) generated_arr = (generated_arr - g_min_arr) / (g_max_arr - g_min_arr) generated_arr = (o_max_arr - o_min_arr) * generated_arr generated_arr = o_min_arr + generated_arr if self.condition_sampler is not None: self.condition_sampler.output_shape = generated_arr.shape noise_arr = self.condition_sampler.generate() generated_arr += noise_arr if long_term_generated_arr is None: long_term_generated_arr = generated_arr else: long_term_generated_arr = nd.concat( long_term_generated_arr, generated_arr, dim=1 ) _observed_arr = generated_arr generated_mean_arr = nd.expand_dims(nd.mean(long_term_generated_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_generated_arr.shape[1]): add_arr = nd.sum(long_term_generated_arr[:, :seq] - generated_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_generated_arr - generated_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_generated_arr.shape[1] / 2) generated_H_arr = nd.log(R_S_arr) / len_arr multi_fractal_loss = nd.abs(generated_H_arr - observed_H_arr) multi_fractal_loss = nd.mean(multi_fractal_loss, axis=0, exclude=True) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) generated_arr = generated_arr + multi_fractal_loss return generated_arr, encoded_arr, re_encoded_arr
def forward(self, inputs, loss=None, training=True, commtype='average', topo='FUC'): assert len(inputs) == self.slots + 1 # if self.non_local_mode: # return self.forward_non_local(inputs, loss, training) # if self.message_embedding: # return self.forward_message_embedding(inputs, loss, training) local_drop_vec = nd.ones_like(inputs[0]) local_drop_vec = self.local_dropout_op(local_drop_vec) for i in range(self.slots): inputs[i] = inputs[i] * local_drop_vec inputs[-1] = self.global_dropout_op(inputs[-1]) # if topo == 'FC': # comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) # elif topo == 'FUC': # comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1)) # elif topo == 'Master': # comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) # for i in range(self.slots): # for j in range(self.slots): # comm_rate[i][j] = 0 # if self.use_comm and self.topo_learning_mode: # proba = nd.sigmoid(self.topo.data()) # if random.random() < 1e-2: # print '---------------------------------------------' # print proba.asnumpy() # print '---------------------------------------------' # u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) # comm_rate = nd.sigmoid(10. * ( # nd.log(proba) - nd.log(1. - proba) + # nd.log(u_vec) - nd.log(1. - u_vec) # )) # if loss is not None: # loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) results = [] for i in range(self.slots): results.append(self.local_share_trans.forward(inputs[i])) results.append(self.local_share_trans.forward(inputs[-1])) # if self.use_comm: # if self.topo_learning_mode: # assert self.concrete_share_rate is False # for i in range(self.slots): # tmp = nd.zeros_like(results[i]) # norm = nd.zeros_like(comm_rate[0][0]) # for j in range(self.slots): # if i != j: # tmp = tmp + self.local2local_share_comm(inputs[j], training=training) * comm_rate[j][i] # norm = norm + comm_rate[j][i] # # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] # tmp = tmp + self.global2local_comm(inputs[-1], training=training) * comm_rate[-1][i] # norm = norm + comm_rate[-1][i] # if nd.sum(norm) > 1e-5: # results[i] = results[i] + tmp / norm # tmp = nd.zeros_like(results[-1]) # norm = nd.zeros_like(comm_rate[0][0]) # for j in range(self.slots): # tmp = tmp + self.local2global_comm(inputs[j], training=training) * comm_rate[j][-1] # norm = norm + comm_rate[j][-1] # if nd.sum(norm) > 1e-5: # results[-1] = results[-1] + tmp / norm # else: # if commtype == 'average': # for i in range(self.slots): # tmp = nd.zeros_like(results[i]) # norm = nd.zeros_like(comm_rate[0][0]) # for j in range(self.slots): # if i != j: # tmp = tmp + self.local2local_share_comm.forward(nd.concat(*[inputs[i], inputs[j]], dim=1), training=training) * comm_rate[j][i] # norm = norm + comm_rate[j][i] # # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] # tmp = tmp + self.global2local_comm.forward(nd.concat(*[inputs[i], inputs[-1]], dim=1), training=training) * comm_rate[-1][i] # norm = norm + comm_rate[-1][i] # if nd.sum(norm) > 1e-5: # results[i] = results[i] + tmp / norm # tmp = nd.zeros_like(results[-1]) # norm = nd.zeros_like(comm_rate[0][0]) # for j in range(self.slots): # tmp = tmp + self.local2global_comm.forward(nd.concat(*[inputs[j], inputs[-1]], dim=1), training=training) * comm_rate[j][-1] # norm = norm + comm_rate[j][-1] # if nd.sum(norm) > 1e-5: # results[-1] = results[-1] + tmp / norm # elif commtype == 'maxpooling': # for i in range(self.slots): # tmp = [] # for j in range(self.slots): # if j != i: # tmp.append(self.local2local_share_comm.forward(inputs[j], training=training)) # tmp.append(self.global2local_comm.forward(inputs[-1], training=training)) # for k in range(len(tmp)): # tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) # tmp = nd.concat(*tmp, dim=1) # maxcomm = nd.max(tmp, axis=1) # results[i] = results[i] + maxcomm # tmp = [] # for i in range(self.slots): # tmp.append(self.local2global_comm.forward(inputs[i], training=training)) # for k in range(len(tmp)): # tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) # tmp = nd.concat(*tmp, dim=1) # maxcomm = nd.max(tmp, axis=1) # results[-1] = results[-1] + maxcomm # if self.block_mode: # assert self.local_in_units == self.local_units # assert self.global_in_units == self.global_units # for i in range(self.slots): # results[i] = self.yz_weight_local(results[i], training=training) + inputs[i] # results[-1] = self.yz_weight_global(results[-1], training=training) + inputs[-1] return results
def forward(self, inputs, loss=None, training=True, commtype='average', topo='FC'): assert len(inputs) == self.slots + 1 local_drop_vec = nd.ones_like(inputs[0]) local_drop_vec = self.local_dropout_op(local_drop_vec) for i in range(self.slots): inputs[i] = inputs[i] * local_drop_vec inputs[-1] = self.global_dropout_op(inputs[-1]) if topo == 'FC': comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) elif topo == 'FUC': comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1)) elif topo == 'Master': comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) for i in range(self.slots): for j in range(self.slots): comm_rate[i][j] = 0 if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) results = [] for i in range(self.slots): results.append(self.local_share_trans.forward(inputs[i], training=training)) results.append(self.global_trans.forward(inputs[-1], training=training)) if commtype == 'average': for i in range(self.slots): tmp = nd.zeros_like(results[i]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): if i != j: tmp = tmp + self.local2local_share_comm.forward(nd.concat(inputs[j], dim=1), training=training) * comm_rate[j][i] norm = norm + comm_rate[j][i] # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] tmp = tmp + self.global2local_comm.forward(nd.concat(inputs[-1], dim=1), training=training) * \ comm_rate[-1][i] norm = norm + comm_rate[-1][i] if nd.sum(norm) > 1e-5: results[i] = results[i] + tmp / norm tmp = nd.zeros_like(results[-1]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): tmp = tmp + self.local2global_comm.forward(nd.concat(inputs[j], dim=1), training=training) * \ comm_rate[j][-1] norm = norm + comm_rate[j][-1] if nd.sum(norm) > 1e-5: results[-1] = results[-1] + tmp / norm elif commtype == 'maxpooling': for i in range(self.slots): tmp = [] for j in range(self.slots): if j != i: tmp.append(self.local2local_share_comm.forward(inputs[j], training=training)) tmp.append(self.global2local_comm.forward(inputs[-1], training=training)) for k in range(len(tmp)): tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) tmp = nd.concat(*tmp, dim=1) maxcomm = nd.max(tmp, axis=1) results[i] = results[i] + maxcomm tmp = [] for i in range(self.slots): tmp.append(self.local2global_comm.forward(inputs[i], training=training)) for k in range(len(tmp)): tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) tmp = nd.concat(*tmp, dim=1) maxcomm = nd.max(tmp, axis=1) results[-1] = results[-1] + maxcomm return results
def forward(self, inputs, loss=None): assert len(inputs) == self.slots + 1 if self.non_local_mode: return self.forward_multidims(inputs, loss) if self.message_embedding: return self.forward_message_embedding(inputs, loss) local_drop_vec = nd.ones_like(inputs[0]) local_drop_vec = self.local_dropout_op(local_drop_vec) for i in range(self.slots): inputs[i] = inputs[i] * local_drop_vec inputs[-1] = self.global_dropout_op(inputs[-1]) # local_share_vec = [] # local_private_vec = [] # if self.concrete_share_rate: # raise ValueError('no share_private!!!') # for i in range(self.slots): # proba = nd.sigmoid(data=self.share_rate[i].data()) # proba = nd.broadcast_axis(data=proba, axis=(0, 1), size=inputs[0].shape) # u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=inputs[0].shape, ctx=CTX) # local_share_vec.append(nd.sigmoid(10. * ( # nd.log(proba) - nd.log(1. - proba) + # nd.log(u_vec) - nd.log(1. - u_vec) # ))) # local_private_vec.append(1. - local_share_vec[i]) # # print 'proba:', proba # # print 'dropout_regularizer:', self.dropout_regularizer # if loss is not None: # loss.append( # self.dropout_regularizer * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) # if random.random() < 0.01: # for i in range(self.slots): # proba = nd.sigmoid(data=self.share_rate[i].data()) # print proba.asnumpy(), # print '' # else: # local_share_vec = [nd.ones_like(inputs[0]), ] * self.slots # local_private_vec = [nd.zeros_like(inputs[0]), ] * self.slots # local_share_vec = (1. - self.private_rate) * nd.Dropout( # nd.ones(shape=(inputs[0].shape[0], self.local_units)), p=self.private_rate, mode='always') # local_private_vec = 1. - local_share_vec comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) results = [] for i in range(self.slots): results.append(self.local_share_trans(inputs[i])) results.append(self.global_trans(inputs[-1])) if self.use_comm: if self.topo_learning_mode: assert self.concrete_share_rate is False for i in range(self.slots): tmp = nd.zeros_like(results[i]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): if i != j: tmp = tmp + self.local2local_share_comm(inputs[j]) * comm_rate[j][i] norm = norm + comm_rate[j][i] # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] tmp = tmp + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] norm = norm + comm_rate[-1][i] if nd.sum(norm) > 1e-5: results[i] = results[i] + tmp / norm tmp = nd.zeros_like(results[-1]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): tmp = tmp + self.local2global_comm(inputs[j]) * comm_rate[j][-1] norm = norm + comm_rate[j][-1] if nd.sum(norm) > 1e-5: results[-1] = results[-1] + tmp / norm else: for i in range(self.slots): tmp = nd.zeros_like(results[i]) for j in range(self.slots): if j != i: tmp = tmp + self.local2local_share_comm(inputs[j]) tmp = tmp + self.global2local_comm(inputs[-1]) results[i] = results[i] + (tmp / float(self.slots)) tmp = nd.zeros_like(results[-1]) for i in range(self.slots): tmp = tmp + self.local2global_comm(inputs[i]) results[-1] = results[-1] + (tmp / float(self.slots)) if self.block_mode: assert self.local_in_units == self.local_units assert self.global_in_units == self.global_units for i in range(self.slots): results[i] = self.yz_weight_local(results[i]) + inputs[i] results[-1] = self.yz_weight_global(results[-1]) + inputs[-1] return results