def compute_retrospective_loss(self, observed_arr, encoded_arr, decoded_arr, re_encoded_arr): ''' Compute retrospective loss. Returns: The tuple data. - `np.ndarray` of delta. - `np.ndarray` of losses of each batch. - float of loss of all batch. ''' if self.__output_neuron_count == self.__hidden_neuron_count: target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2)) else: # For each batch, draw a samples from the Uniform distribution. if self.__output_neuron_count > self.__hidden_neuron_count: all_dim_arr = np.arange(self.__output_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count] target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr[:, :, choiced_dim_arr].mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr, 2)) else: all_dim_arr = np.arange(self.__hidden_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__output_neuron_count] target_arr = nd.broadcast_sub( encoded_arr[:, :, choiced_dim_arr], nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr - target_arr, 2)) match_delta_arr = None for i in range(self.__batch_size): arr = nd.sqrt( nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2)) if match_delta_arr is None: match_delta_arr = nd.expand_dims(arr, axis=0) else: match_delta_arr = nd.concat(match_delta_arr, nd.expand_dims(arr, axis=0), dim=0) delta_arr = summary_delta_arr + nd.expand_dims( self.__retrospective_lambda * match_delta_arr, axis=1) v = nd.norm(delta_arr) if v > self.__grad_clip_threshold: delta_arr = delta_arr * self.__grad_clip_threshold / v loss = nd.mean(delta_arr, axis=0, exclude=True) return loss
def batched_l2_dist(a, b): a_squared = nd.power(nd.norm(a, axis=-1), 2) b_squared = nd.power(nd.norm(b, axis=-1), 2) squared_res = nd.add(nd.linalg_gemm( a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2 ), nd.expand_dims(a_squared, axis=-1)) res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) return res
def calc_cos_mt(self, cos_t): cos_mt = 0 sin2_t = 1 - cos_t * cos_t flag = -1 for p in range(self._margin / 2 + 1): flag = flag * (-1) cos_mt = cos_mt + flag * self.c_map[2 * p] * nd.power( cos_t, self._margin - 2 * p) * nd.power(sin2_t, p) return cos_mt
def generateData(): num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random_normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b return x, X, y
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def generateData(): num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random_normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += 0.01 * nd.random_normal(shape=y.shape) X_train, X_test = X[:num_train], X[:num_test] y_train, y_test = y[:num_train], y[:num_test] return X_train, X_test, y_train, y_test
def embedding(self, observed_arr): ''' Embedding. In default, this method does the positional encoding. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: `mxnet.ndarray` of embedded data points. ''' batch_size, seq_len, depth_dim = observed_arr.shape self.batch_size = batch_size self.seq_len = seq_len self.depth_dim = depth_dim if self.embedding_flag is False: return observed_arr depth_arr = nd.tile(nd.expand_dims((nd.arange(depth_dim) / 2).astype(int) * 2, 0), [seq_len, 1]) depth_arr = depth_arr / depth_dim depth_arr = nd.power(10000.0, depth_arr).astype(np.float32) phase_arr = nd.tile(nd.expand_dims((nd.arange(depth_dim) % 2) * np.pi / 2, 0), [seq_len, 1]) positional_arr = nd.tile(nd.expand_dims(nd.arange(seq_len), 1), [1, depth_dim]) sin_arr = nd.sin(positional_arr / depth_arr + phase_arr) positional_encoded_arr = nd.tile(nd.expand_dims(sin_arr, 0), [batch_size, 1, 1]) positional_encoded_arr = positional_encoded_arr.as_in_context(observed_arr.context) result_arr = observed_arr + (positional_encoded_arr * self.embedding_weignt) return result_arr
def update(self, index, weight, grad, state): assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean[:] = self.beta1 * mean + (1. - self.beta1) * grad var[:] = self.beta2 * var + (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) g = mean / (sqrt(var) + self.epsilon) + wd * weight else: # execution bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) g = mean_hat / sqrt(var_hat + self.epsilon) + wd * weight r2 = g.norm() # calculate lamb_trust_ratio r = 1. if r1 == 0. or r2 == 0. else r1 / r2 lr *= r # update weight weight[:] -= lr * g
def loss_function(recon_x, x, mu, logvar): """ recon_x: generating images x: origin images mu: latent mean logvar: latent log variance """ BCE = reconstruction_function(recon_x, x) # mse loss BCE = nd.sum(BCE) # loss = 0.5 * sum(1 - log(sigma^2) + mu^2 + sigma^2) KLD_element = (nd.power(mu, 2) + nd.exp(logvar)) * (-1) + 1 + logvar KLD = nd.sum(KLD_element) * (-0.5) # KLD_element = nd.exp(logvar) + nd.power(mu, 2) - logvar - 1 # KLD = nd.sum(KLD_element) * 0.5 # KL divergence return BCE + KLD
def lab_to_rgb(lab, ctx=None): if ctx is None: raise ValueError("ctx can not be None") if lab is None: raise ValueError("lab can not be None") with mx.Context(ctx): lab = __check_image(lab) lab_pixels = lab.reshape([-1, 3]) lab_to_fxfyfz = nd.array([ # fx fy fz [1 / 116.0, 1 / 116.0, 1 / 116.0], # l [1 / 500.0, 0.0, 0.0], # a [0.0, 0.0, -1 / 200.0], # b ], ctx=ctx) fxfyfz_pixels = nd.dot(lab_pixels + nd.array([16.0, 0.0, 0.0], ctx=ctx), lab_to_fxfyfz) # convert to xyz epsilon = 6 / 29 linear_mask = fxfyfz_pixels <= epsilon exponential_mask = fxfyfz_pixels > epsilon xyz_pixels = (3 * epsilon ** 2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask xyz_pixels = nd.multiply(xyz_pixels, nd.array([0.950456, 1.0, 1.088754])) xyz_to_rgb =nd.array([ # r g b [3.2404542, -0.9692660, 0.0556434], # x [-1.5371385, 1.8760108, -0.2040259], # y [-0.4985314, 0.0415560, 1.0572252], # z ]) rgb_pixels = nd.dot(xyz_pixels, xyz_to_rgb) nd.clip(rgb_pixels, 0.0, 1.0, out=rgb_pixels) linear_mask = rgb_pixels <= 0.0031308 exponential_mask = rgb_pixels > 0.0031308 step1 = nd.multiply(nd.multiply(rgb_pixels, 12.92), linear_mask) step2 = nd.multiply(nd.multiply(nd.power(rgb_pixels, (1 / 2.4)), 1.055) - 0.055, exponential_mask) srgb_pixels = step1 + step2 return srgb_pixels.reshape(lab.shape)
def on_forward(state): if state['train']: img, label = state['sample'] loss_metric.update(None, state['loss']) if cfg.partnum is not None: for metric, id_ in zip(train_accuracy_metrics, state['output']): metric.update(preds=id_, labels=label) else: train_accuracy_metric.update(preds=state['output'], labels=label) else: img, cam, label, ds = state['sample'] if cfg.feature_norm: fnorm = ndarray.power(state['output'], 2) fnorm = ndarray.sqrt(ndarray.sum(fnorm, axis=-1, keepdims=True)) state['output'] = state['output'] / fnorm reid_metric.update(state['output'], cam, label, ds)
from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon import matplotlib as mpl import matplotlib.pyplot as plt mpl.rcParams['figure.dpi'] = 120 num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x0 = nd.random.normal(shape=(num_train + num_test, 1)) x = nd.concat(x0, nd.power(x0, 2), nd.power(x0, 3)) y = true_w[0] * x[:, 0] + true_w[1] * x[:, 1] + true_w[2] * x[:, 2] + true_b y += 0.1 * nd.random.normal(shape=y.shape) def train(x_train, x_test, y_train, y_test): net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Dense(1)) net.initialize() learn_rate = 0.01 epochs = 100 batch_size = min(10, y_train.shape[0]) dateset_train = gluon.data.ArrayDataset(x_train, y_train) data_iter_train = gluon.data.DataLoader(dateset_train,
def train(pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, netD2, trainerEn, trainerDe, trainerD, trainerD2, lambda1, batch_size, expname, append=True, useAE = False): tp_file = open(expname + "_trainloss.txt", "w") tp_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.CustomMetric(facc) metricMSE = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] acc2_rec = [] loss_rec_D2 = [] loss_rec_G2 = [] lr = 0.002 #mu = nd.random_normal(loc=0, scale=1, shape=(batch_size/2,64,1,1), ctx=ctx) mu = nd.random.uniform(low= -1, high=1, shape=(batch_size/2,64,1,1),ctx=ctx) #mu = nd.zeros((batch_size/2,64,1,1),ctx=ctx) sigma = nd.ones((64,1,1),ctx=ctx) mu.attach_grad() sigma.attach_grad() stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_latent= netEn(real_in) #real_latent = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) real_latent = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx)) #nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) output2 = netD2(fake_latent) fake_label = nd.zeros(output.shape, ctx=ctx) fake_latent_label = nd.zeros(output2.shape, ctx=ctx) noiseshape = (fake_latent.shape[0]/2,fake_latent.shape[1],fake_latent.shape[2],fake_latent.shape[3]) eps2 = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx)) #eps2 = nd.random_normal(loc=0, scale=sigma.asscalar(), shape=fake_latent.shape, ctx=ctx) # #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) rec_output = netD(netDe(eps2)) errD_fake = GAN_loss(rec_output, fake_label) errD_fake2 = GAN_loss(output, fake_label) errD2_fake = GAN_loss(output2, fake_latent_label) metric.update([fake_label, ], [output, ]) metric2.update([fake_latent_label, ], [output2, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) output2 = netD2(real_latent) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD2_real = GAN_loss(output2, real_latent_label) #errD = (errD_real + 0.5*(errD_fake+errD_fake2)) * 0.5 errD = (errD_real + errD_fake) * 0.5 errD2 = (errD2_real + errD2_fake) * 0.5 totalerrD = errD+errD2 totalerrD.backward() #errD2.backward() metric.update([real_label, ], [output, ]) metric2.update([real_latent_label, ], [output2, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): sh = fake_latent.shape eps2 = nd.multiply(nd.power(sigma,2),nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx)) #eps2 = nd.random_normal(loc=0, scale=sigma.asscalar(), shape=fake_latent.shape, ctx=ctx) # #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) rec_output = netD(netDe(eps2)) fake_latent= (netEn(real_in)) output2 = netD2(fake_latent) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errG2 = GAN_loss(rec_output, real_label) errR = L1_loss(real_out, fake_out) * lambda1 errG = 10.0*GAN_loss(output2, real_latent_label)+errG2+errR+nd.mean(nd.power(sigma,2)) errG.backward() if epoch>50: sigma -= lr / sigma.shape[0] * sigma.grad print(sigma) trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G2.append(nd.mean(errG2).asscalar()) loss_rec_G.append(nd.mean(nd.mean(errG)).asscalar()-nd.mean(errG2).asscalar()-nd.mean(errR).asscalar()) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) loss_rec_D2.append(nd.mean(errD2).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) #print(errD) logging.info('discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, binary training acc = %f , D2 acc = %f, reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(),nd.mean(errD2).asscalar(), nd.mean(errG-errG2-errR).asscalar(),nd.mean(errG2).asscalar(), acc,acc2,nd.mean(errR).asscalar() ,iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() tp_file = open(expname + "_trainloss.txt", "a") tp_file.write(str(nd.mean(errG2).asscalar()) + " " + str( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str( nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) +" "+str(acc) + " " + str(acc2)+"\n") tp_file.close() metric.reset() metric2.reset() train_data.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch%10 ==0:# and epoch>0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/"+expname+"_"+str(epoch)+"_D.params" netD.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_D2.params" netD2.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_En.params" netEn.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent= netEn(real_in) y = netDe(fake_latent) fake_out = y metricMSE.update([fake_out, ], [real_out, ]) _, acc2 = metricMSE.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metricMSE.reset() images = netDe(eps2) fake_img1T = nd.concat(images[0],images[1], images[2], dim=1) fake_img2T = nd.concat(images[3],images[4], images[5], dim=1) fake_img3T = nd.concat(images[6],images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T,fake_img2T, fake_img3T,dim=2) visual.visualize(fake_img) plt.savefig('outputs/'+expname+'_fakes_'+str(epoch)+'.png') text_file.close() # Do 10 iterations of sampler update fake_img1T = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img1T,fake_img2T, fake_img3T,dim=2) visual.visualize(fake_img) plt.savefig('outputs/'+expname+'_'+str(epoch)+'.png') '''if epoch > 100: for ep2 in range(10): with autograd.record(): #eps = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) eps2 = nd.random_normal(loc=0, scale=0.02, shape=noiseshape, ctx=ctx) eps2 = nd.tanh(eps2*sigma+mu) eps2 = nd.concat(eps,eps2,dim=0) rec_output = netD(netDe(eps2)) fake_label = nd.zeros(rec_output.shape, ctx=ctx) errGS = GAN_loss(rec_output, fake_label) errGS.backward() mu -= lr / mu.shape[0] * mu.grad sigma -= lr / sigma.shape[0] * sigma.grad print('mu ' + str(mu[0,0,0,0].asnumpy())+ ' sigma '+ str(sigma[0,0,0,0].asnumpy())) ''' images = netDe(eps2) fake_img1T = nd.concat(images[0],images[1], images[2], dim=1) fake_img2T = nd.concat(images[3],images[4], images[5], dim=1) fake_img3T = nd.concat(images[6],images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T,fake_img2T, fake_img3T,dim=2) visual.visualize(fake_img) plt.savefig('outputs/'+expname+'_fakespost_'+str(epoch)+'.png') return([loss_rec_D,loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec])
#过拟合:机器学习模型的训练误差远小于其在测试数据集上的误差。 ## 一二次多项式拟合为例子 #y=1.2x−3.4x^2+5.6x^3+5.0+noise from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1))#随机 X = nd.concat(x, nd.power(x, 2), nd.power(x, 3))#x x^2 x^3 y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += .1 * nd.random.normal(shape=y.shape)#加入噪声 print('x:', x[:5], 'X:', X[:5], 'y:', y[:5]) ### 训练 import matplotlib as mpl#画图 mpl.rcParams['figure.dpi']= 120#分辨率 import matplotlib.pyplot as plt#画图 def train(X_train, X_test, y_train, y_test): # 线性回归模型 net = gluon.nn.Sequential() with net.name_scope():
def forward(self, output1, output2, label): euclidean_distance = nd.sqrt(nd.sum(nd.power(nd.subtract(output1, output2),2))) loss_contrastive = nd.mean(nd.add(nd.subtract(1,label) * nd.power(euclidean_distance, 2),(label) * nd.power(nd.subtract(self.margin, euclidean_distance), 2))) return loss_contrastive
def forward(self, x, *args): return (x - x.mean()) / nd.sqrt(nd.mean(nd.power((x - x.mean()), 2)))
def inference_g(self, observed_arr): ''' Inference with generator. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: Tuple data. - re-parametric data. - encoded data points. - re-encoded data points. ''' generated_arr, encoded_arr, re_encoded_arr = super().inference_g(observed_arr) if autograd.is_recording(): limit = self.long_term_seq_len seq_len = self.noise_sampler.seq_len self.noise_sampler.seq_len = limit long_term_observed_arr = self.noise_sampler.draw() observed_mean_arr = nd.expand_dims(nd.mean(long_term_observed_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_observed_arr.shape[1]): add_arr = nd.sum(long_term_observed_arr[:, :seq] - observed_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_observed_arr - observed_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_observed_arr.shape[1] / 2) observed_H_arr = nd.log(R_S_arr) / len_arr self.noise_sampler.seq_len = seq_len g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(observed_arr.max(axis=1), axis=1) _observed_arr = generated_arr long_term_generated_arr = None for i in range(limit): generated_arr, _, _ = super().inference_g(_observed_arr) g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(_observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(_observed_arr.max(axis=1), axis=1) generated_arr = (generated_arr - g_min_arr) / (g_max_arr - g_min_arr) generated_arr = (o_max_arr - o_min_arr) * generated_arr generated_arr = o_min_arr + generated_arr if self.condition_sampler is not None: self.condition_sampler.output_shape = generated_arr.shape noise_arr = self.condition_sampler.generate() generated_arr += noise_arr if long_term_generated_arr is None: long_term_generated_arr = generated_arr else: long_term_generated_arr = nd.concat( long_term_generated_arr, generated_arr, dim=1 ) _observed_arr = generated_arr generated_mean_arr = nd.expand_dims(nd.mean(long_term_generated_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_generated_arr.shape[1]): add_arr = nd.sum(long_term_generated_arr[:, :seq] - generated_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_generated_arr - generated_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_generated_arr.shape[1] / 2) generated_H_arr = nd.log(R_S_arr) / len_arr multi_fractal_loss = nd.abs(generated_H_arr - observed_H_arr) multi_fractal_loss = nd.mean(multi_fractal_loss, axis=0, exclude=True) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) generated_arr = generated_arr + multi_fractal_loss return generated_arr, encoded_arr, re_encoded_arr
#过拟合:机器学习模型的训练误差远小于其在测试数据集上的误差。 ## 一二次多项式拟合为例子 #y=1.2x−3.4x^2+5.6x^3+5.0+noise from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1)) #随机 X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) #x x^2 x^3 y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += .1 * nd.random.normal(shape=y.shape) #加入噪声 print('x:', x[:5], 'X:', X[:5], 'y:', y[:5]) ### 训练 import matplotlib as mpl #画图 mpl.rcParams['figure.dpi'] = 120 #分辨率 import matplotlib.pyplot as plt #画图 def train(X_train, X_test, y_train, y_test): # 线性回归模型 net = gluon.nn.Sequential() with net.name_scope():
# -*- coding: utf-8 -*- from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon import matplotlib as mpl mpl.rcParams['figure.dpi']= 120 import matplotlib.pyplot as plt num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) # power(x,2)表示x中所有元素2次方 # y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y = true_w[0] * X[:, 0] + true_b y += .1 * nd.random.normal(shape=y.shape) y_train, y_test = y[:num_train], y[num_train:] # matplotlib inline import matplotlib as mpl mpl.rcParams['figure.dpi']= 120 import matplotlib.pyplot as plt # def test(net, X, y): # return square_loss(net(X), y).mean().asscalar() def train(X_train, X_test, y_train, y_test):
from mxnet import gluon from mxnet import ndarray as nd from mxnet import autograd num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += 0.1 * nd.random.normal(shape=y.shape) y_train, y_test = y[:num_train], y[num_train:] print(x[:5]) print('----------------') print(X[:5]) print('----------------') print(y[:5]) # 3.定义训练和测试步骤 import matplotlib as mpl mpl.rcParams['figure.dpi'] = 120 import matplotlib.pyplot as plt def test(net, X, y): square_loss = gluon.loss.L2Loss() return square_loss(net(X), y).mean().asscalar() # 将向量X转换成标量,且向量X只能为一维含单个元素的向量
from mxnet import gluon from mxnet import ndarray as nd from mxnet import autograd as ag import matplotlib.pyplot as plt #%% [markdown] # 在训练数据集和测试数据集中,给定样本特征 x ,我们使用如下的三阶多项式函数来生成该样本的标签: # $$ # y=1.2x−3.4x^2+5.6x^3+5+ϵ, # $$ # 其中噪声项 ϵ 服从均值为0、标准差为0.1的正态分布。训练数据集和测试数据集的样本数都设为100。 #%% n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5 features = nd.random.normal(shape=(n_train + n_test, 1)) poly_features = nd.concat(features, nd.power(features, 2), nd.power(features, 3)) labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1] + true_w[2] * poly_features[:, 2] + true_b) labels += nd.random.normal(scale=0.1, shape=labels.shape) #%% def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None, y2_vals=None, legend=None): plt.xlabel(x_label)