def Pre_zero(ZERO_DET = ZERO_DET, size = (2,8192), fs = 8192, fmin = 20, fmax = 4000): (D, *N) = size # N is a list low_f_max = fmin high_f_min = fmax # Interpolation freqs = fftfreq(N[-1], 1./fs) asd_zero = np.interp(freqs[(freqs>=ZERO_DET[:,0].min())&(freqs<=high_f_min)], ZERO_DET[:,0], ZERO_DET[:,1]) shiftsize = int(low_f_max - ZERO_DET[:,0].min()) xf = fftfreq(N[-1], 1./fs) xf_noise = xf[xf>=0] slc, slc_, slc__ = (xf_noise >= low_f_max)&(xf_noise<=high_f_min), (xf_noise < low_f_max), (xf_noise > high_f_min) if ctx == mx.gpu(): asd_zero = nd.array(asd_zero, ctx = ctx, dtype='float64') asd_pos = nd.square(asd_zero)[shiftsize * N[-1]//8192:] asd_neg = nd.square(asd_zero)[shiftsize * N[-1]//8192:][::-1] elif ctx == mx.cpu(): asd_pos = np.square(asd_zero)[shiftsize:] asd_neg = np.square(asd_zero)[shiftsize:][::-1] else: raise assert slc_.argmin() == slc.argmax() low_f = slc_.argmin() high_f = slc[slc.argmax():].argmin()+slc.argmax() high_f_ = N[-1]//2 - slc__.argmax() assert asd_pos.shape[0] == high_f - low_f # print(asd_neg) return (asd_pos, asd_neg, low_f, high_f, high_f_, size, fs, fmin, fmax)
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) is_sparse = grad.stype == 'row_sparse' history = state[0] cache_history = state[1] # if self._full_sync: # print("full sync") # else: # print("local sgd") if is_sparse: kwargs = { 'epsilon': self.float_stable_eps, 'rescale_grad': self.rescale_grad } if self.clip_gradient: kwargs['clip_gradient'] = self.clip_gradient if self._full_sync: sparse.adaalter_update(weight, grad, history, out=weight, lr=lr, wd=wd, **kwargs) else: sparse.local_adaalter_update(weight, grad, history, cache_history, out=weight, lr=lr, wd=wd, **kwargs) # raise NotImplementedError('AdaAlter has not been implemented for sparse nd') else: grad[:] = grad * self.rescale_grad if self.clip_gradient is not None: grad[:] = clip(grad, -self.clip_gradient, self.clip_gradient) div = grad / sqrt(history + self.float_stable_eps) weight[:] += (div + weight * wd) * -lr if self._full_sync: history[:] += square(grad) else: cache_history[:] += square(grad)
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10, eps=1E-6): init_params = {k: v.copyto(v.context) for k, v in exe_params.items()} end_params = {k: v.copyto(v.context) for k, v in exe_params.items()} init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()} end_momentums = {k: v.copyto(v.context) for k, v in init_momentums.items()} init_potential = calc_potential(exe, init_params, label_key, noise_precision, prior_precision) # 0. Calculate Initial Energy and Kinetic init_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0 for momentum in init_momentums.values()]).asscalar() # 1. Make a half step for momentum at the beginning exe.copy_params_from(end_params) exe.forward(is_train=True) exe.backward() for k, v in exe_grads.items(): v.wait_to_read() for k, momentum in end_momentums.items(): momentum[:] = momentum - (eps / 2) * exe_grads[k] # 2. Alternate full steps for position and momentum for i in range(L): # 2.1 Full step for position for k, param in exe_params.items(): param[:] = param + eps * end_momentums[k] # 2.2 Full step for the momentum, except at the end of trajectory we perform a half step exe.forward(is_train=True) exe.backward() for v in exe_grads.values(): v.wait_to_read() if i != L - 1: for k, momentum in end_momentums.items(): momentum[:] = momentum - eps * exe_grads[k] else: for k, momentum in end_momentums.items(): # We should reverse the sign of the momentum at the end momentum[:] = -(momentum - eps / 2.0 * exe_grads[k]) copy_param(exe, end_params) # 3. Calculate acceptance ratio and accept/reject the move end_potential = calc_potential(exe, end_params, label_key, noise_precision, prior_precision) end_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0 for momentum in end_momentums.values()]).asscalar() # print init_potential, init_kinetic, end_potential, end_kinetic r = numpy.random.rand(1) if r < numpy.exp(-(end_potential + end_kinetic) + (init_potential + init_kinetic)): exe.copy_params_from(end_params) return end_params, 1 else: exe.copy_params_from(init_params) return init_params, 0
def inference_g(self, observed_arr): ''' Inference with generator. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: Tuple data. - re-parametric data. - encoded data points. - re-encoded data points. ''' encoded_arr = self.model.encoder(observed_arr) decoded_arr = self.model.decoder(encoded_arr) re_encoded_arr = self.re_encoder_model(decoded_arr) anomaly_arr = nd.square(encoded_arr - re_encoded_arr) anomaly_arr = nd.expand_dims(nd.exp(anomaly_arr.mean(axis=1)), axis=1) mean_arr = nd.expand_dims(decoded_arr.mean(axis=1), axis=1) gauss_arr = nd.random.normal_like(data=observed_arr, loc=0, scale=3.0) re_param_arr = mean_arr + (gauss_arr * anomaly_arr) kl_arr = -0.5 * (1 + nd.log(anomaly_arr) - mean_arr + anomaly_arr) re_param_arr = re_param_arr + kl_arr return re_param_arr, encoded_arr, re_encoded_arr
def train(self, inputs, action, sampled_q): inputs = copy.deepcopy(inputs) action = copy.deepcopy(action) sampled_q = copy.deepcopy(sampled_q) inputs = nd.array(inputs, ctx=CTX) action = nd.array(action, ctx=CTX) sampled_q = nd.array(sampled_q, ctx=CTX) sampled_q = sampled_q.reshape(shape=(sampled_q.shape[0],)) with mx.autograd.record(): loss_vec = [] outputs = self.qnet(inputs, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss td_error = nd.sum(data=outputs * action, axis=1) - sampled_q for i in range(self.minibatch_size): if nd.abs(td_error[i]) < 1.0: loss = loss + 0.5 * nd.square(td_error[i]) else: loss = loss + nd.abs(td_error[i]) - 0.5 # print loss loss.backward() self.trainer.step(batch_size=self.minibatch_size, ignore_stale_grad=True)
def goodness_of_function_optimizer_function(self): for param, sqr in zip(self.__params, self.__sqrs): g = param.grad / self.__batch_size # 注意 这里不是 += sqr[:] = self.__gamma * sqr + (1. - self.__gamma) * nd.square(g) div = self.__learning_rate * g / nd.sqrt(sqr + self.__eps_stable) param[:] -= div
def adagrad(params, sqrs, lr, batch_size): eps_stable = 1e-7 for param, sqr in zip(params, sqrs): g = param.grad / batch_size sqr[:] = sqr + nd.square(g) div = lr * g / (nd.sqrt(eps_stable + sqr)) param[:] -= div
def forward(self, feat): square_sum = nd.sum(nd.square(feat), axis=self.axis, keepdims=True) inv_norm = nd.rsqrt(nd.maximum(square_sum, self.epsilon)) l2_res = nd.multiply(feat, inv_norm) # print(l2_res.shape) return nd.multiply(l2_res.transpose([0, 2, 3, 1]), self.scale.data()).transpose([0, 3, 1, 2])
def rmsprop(params, sqrs, lr, gamma, batch_size): eps_stable = 1e-8 for param, sqr in zip(params, sqrs): g = param.grad / batch_size sqr[:] = gamma * sqr + (1. - gamma) * nd.square(g) div = lr * g / nd.sqrt(sqr + eps_stable) param[:] -= div
def adagrad(params, sqrs, lr, batch_size): eps_stable = 1e-7 for param, sqr in zip(params, sqrs): g = param.grad / batch_size sqr[:] += nd.square(g) div = lr * g / nd.sqrt(sqr + eps_stable) param[:] -= div
def observe_reward_value( self, state_arr, action_arr, meta_data_arr=None, ): ''' Compute the reward value. Args: state_arr: Tensor of state. action_arr: Tensor of action. meta_data_arr: Meta data of actions. Returns: Reward value. ''' if state_arr is not None: mse_arr = nd.mean( nd.square( nd.flatten(state_arr), nd.flatten(action_arr) ), axis=0, exclude=True ) reward_value_arr = 1 / mse_arr reward_value_arr = nd.expand_dims(reward_value_arr, axis=1) else: reward_value_arr = nd.zeros(( action_arr.shape[0], 1 ), ctx=action_arr.context) return reward_value_arr
def loss_fe_fn(data, label): y_data_list = loss_fe_forward(data) y_label_list = loss_fe_forward(label) loss = nd.zeros(shape=data.shape[0], ctx=data.context, dtype=data.dtype) for i in range(len(y_data_list)): loss = loss + nd.sum(nd.square(y_data_list[i] - y_label_list[i]), axis=[1, 2, 3]) return loss
def adadelta(params, sqrs, deltas, roh, batch_size): eps_stable = 1e-5 for param, sqr, delta in zip(params, sqrs, deltas): g = param.grad / batch_size sqr[:] = roh * sqr + (1. - roh) * nd.square(g) g_next = nd.sqrt(delta + eps_stable) / nd.sqrt(sqr + eps_stable) * g delta[:] = roh * delta + (1. - roh) * g_next * g_next param[:] -= g_next
def hybrid_forward(self, F, pred, label, sample_weight=None): #label = _reshape_like(F, label, pred) #loss = F.square(pred-label) #loss = _apply_weighting(F, loss, self._weight/2, sample_weight) loss = F.sqrt(F.square(pred - label)) #return F.mean(loss, axis=self._batch_axis, exclude=True) return loss
def adadelta(params, sqrs, deltas, batch_size, rho): eps_stable = 1e-5 for param, sqr, delta in zip(params, sqrs, deltas): g = param.grad / batch_size sqr[:] = rho * sqr + (1. - rho) * nd.square(g) cur_delta = nd.sqrt(delta + eps_stable) / nd.sqrt(sqr + eps_stable) * g delta[:] = rho * delta + (1. - rho) * cur_delta * cur_delta param[:] -= cur_delta
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision): student_mean = student_outputs[0] student_var = student_outputs[1] grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred) grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred) + 1.0 / teacher_noise_precision)) / 2 return [grad_mean, grad_var]
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): mean = in_data[0] var = in_data[1] if self.implicit_backward: action = out_data[0] else: action = in_data[3] score = in_data[2] grad_mu = in_grad[0] grad_var = in_grad[1] self.assign( grad_mu, req[0], -(action - mean) * score.reshape( (score.shape[0], 1)) * self.grad_scale / var) self.assign( grad_var, req[1], self.grad_scale * ((-nd.square(action - mean) / (2.0 * nd.square(var)) + 1.0 / (2.0 * var)) * score.reshape((score.shape[0], 1)) - numpy.float32(self.entropy_regularization) / (2.0 * var)))
def goodness_of_function_optimizer_function(self): for param, v, sqr in zip(self.__params, self.__vs, self.__sqrs): g = param.grad / self.__batch_size v[:] = self.__beta1 * v + (1 - self.__beta1) * g sqr[:] = self.__beta2 * sqr + (1 - self.__beta2) * nd.square(g) v_hat = v / (1 - self.__beta1**self.__t) sqr_hat = sqr / (1 - self.__beta2**self.__t) div = self.__learning_rate * v_hat / nd.sqrt(sqr_hat + self.__eps_stable) param[:] -= div
def hybrid_forward(self, F, input_logits, target_logits, sample_weight=None): input_softmax = F.softmax(input_logits, axis=1) target_softmax = F.softmax(target_logits, axis=1) loss = F.square(input_softmax - target_softmax) return F.mean(loss, axis=self._batch_axis, exclude=True)
def nd_global_norm(t_list): """Computes the global norm of multiple tensors. Given a tuple or list of tensors t_list, this operation returns the global norm of the elements in all tensors in t_list. The global norm is computed as: ``global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`` Any entries in t_list that are of type None are ignored. Parameters ---------- t_list: list or tuple The NDArray list Returns ------- ret: NDArray The global norm. The shape of the NDArray will be (1,) Examples -------- >>> x = mx.nd.ones((2, 3)) >>> y = mx.nd.ones((5, 6)) >>> z = mx.nd.ones((4, 2, 3)) >>> print(nd_global_norm([x, y, z]).asscalar()) 7.74597 >>> xnone = None >>> ret = nd_global_norm([x, y, z, xnone]) >>> print(ret.asscalar()) 7.74597 """ ret = None for arr in t_list: if arr is not None: if ret is None: ret = nd.square(nd.norm(arr)) else: ret += nd.square(nd.norm(arr)) ret = nd.sqrt(ret) return ret
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot,axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5) entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3*criticloss + 0.001*entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() # CTname = threading.currentThread().getName() # print(CTname + ' actorloss : '+str(actorloss)) # print(CTname + ' criticloss : '+str(criticloss)) # print(CTname + ' entropy_loss : '+str(entropy_loss)) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def adam(params, vs, sqrs, batch_size, lr, t): eps_stable = 1e-5 beta1 = 0.9 beta2 = 0.999 for param, v, sqr in zip(params, vs, sqrs): g = param.grad / batch_size v[:] = beta1 * v + (1 - beta1) * g sqr[:] = beta2 * sqr + (1 - beta2) * nd.square(g) v_bias_corr = v / (1 - beta1**t) sqr_bias_corr = sqr / (1 - beta2**t) div = lr * v_bias_corr / nd.sqrt(sqr_bias_corr + eps_stable) param[:] -= div
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision): student_mean = student_outputs[0] student_var = student_outputs[1] grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred) grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred) + 1.0 / teacher_noise_precision)) / 2 # print student_mean # print teacher_pred # print grad_mean.asnumpy(), grad_var.asnumpy() # ch = raw_input() return [grad_mean, grad_var]
def adam(params, lr, vals, sqrs, iter, batch_size, beta1=0.9, beta2=0.999): eps_stable = 1e-8 for param, val, sqr in zip(params, vals, sqrs): g = param.grad / batch_size val[:] = beta1 * val + (1 - beta1) * g sqr[:] = beta2 * sqr + (1 - beta2) * nd.square(g) #val_next = val / (1 - nd.power(beta1, iter)) val_next = val / (1. - beta1**iter) #sqr_next = sqr / (1. - nd.power(beta2, iter)) sqr_next = sqr / (1. - beta2**iter) g_next = lr * val_next / (nd.sqrt(sqr_next) + eps_stable) param[:] -= g_next
def BN(X,gamma,beta,momentum=0.9,eps=1e-5,scope_name="",is_training=True): if len(X.shape)==2 : mean = nd.mean(X,axis=0) variance = nd.mean(nd.square(X-mean),axis=0) if is_training: Normalized_X=(X-mean)/nd.sqrt(variance+eps) elif is_training==False and not os.path.exists(path1) and epoch==0: #not param Normalized_X = (X - mean) / nd.sqrt(variance + eps) else: Normalized_X=(X-MOVING_MEANS[scope_name] / nd.sqrt(MOVING_VARS[scope_name]+eps)) out=gamma*Normalized_X+beta #pay attention that when it comes to (2D) CNN , We normalize batch_size * height * width over each channel, so that gamma and beta have the lengths the same as channel_count , #referenced by http://gluon.mxnet.io/chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.html elif len(X.shape)==4: N , C , H , W = X.shape mean = nd.mean(X , axis=(0,2,3)) #normalize batch_size * height * width over each channel variance = nd.mean(nd.square(X-mean.reshape((1,C,1,1))),axis=(0,2,3)) if is_training: Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps) elif is_training == False and not os.path.exists(path1) and epoch==0: # load param , when epoch=0 Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps) else: Normalized_X = (X - MOVING_MEANS[scope_name].reshape((1, C, 1, 1))) / nd.sqrt(MOVING_VARS[scope_name].reshape((1, C, 1, 1)) + eps) out=gamma.reshape((1,C,1,1))*Normalized_X+beta.reshape((1,C,1,1)) if scope_name not in MOVING_MEANS and scope_name not in MOVING_VARS: MOVING_MEANS[scope_name] = mean MOVING_VARS[scope_name] = variance else: MOVING_MEANS[scope_name] = MOVING_MEANS[scope_name] * momentum + mean * (1.0 - momentum) MOVING_VARS[scope_name] = MOVING_VARS[scope_name] * momentum + variance * (1.0 - momentum) return out
def forward(self, is_train, req, in_data, out_data, aux): data_input = in_data[0] batch_size = data_input.shape[0] label_input = in_data[1] center_input = in_data[2] label_index = self.class_index[label_input] batch_center = center_input[label_index] batch_diff = data_input - batch_center loss = nd.sum(nd.square(batch_diff)) / batch_size / 2 self.assign(out_data[0], req[0], loss) self.assign(out_data[1], req[0], batch_diff)
def tinyimagenet200_iterator(args, logger): assert (args.name == 'tinyimagenet200') os.environ['MXNET_CPU_WORKER_NTHREADS'] = '%d' % args.num_threads train_size = 100000 classes = 200 data_shape = (3, 64, 64) if len(args.data_shape) == 0 else tuple(args.data_shape) batch_size = args.batch_size # compute mean and std based on 10% of the training data stats_iter = mx.image.ImageIter(train_size / 10, data_shape, path_imgrec=args.train_rec, path_imgidx=args.train_idx, shuffle=True) sample = stats_iter.next().data[0].transpose(axes=(1, 0, 2, 3)).reshape( (3, -1)) mean_rgb = sample.mean(axis=-1, keepdims=True) std_rgb = nd.sqrt( nd.mean(nd.square(sample - mean_rgb), axis=-1, keepdims=True)) mean_rgb, std_rgb = mean_rgb.reshape((-1, )).asnumpy(), std_rgb.reshape( (-1, )).asnumpy() train_aug = get_train_aug(data_shape, mean_rgb, std_rgb, args) train_iter = MultiSequenceImageIter(args.train_rec, args.train_idx, data_shape=data_shape, batch_size=batch_size, shuffle=True, aug_list=train_aug, logger=logger) val_aug = mx.image.CreateAugmenter(data_shape=data_shape, mean=mean_rgb, std=std_rgb) # val_aug = mx.image.CreateAugmenter(data_shape=data_shape, mean=True, std=True) val_iter = mx.image.ImageIter(batch_size, data_shape, path_imgrec=args.test_rec, path_imgidx=args.test_idx, shuffle=False, aug_list=val_aug) # train_aug = get_train_aug(data_shape, args) # train_iter = MultiSequenceImageIter(args.train_rec, args.train_idx, data_shape, batch_size, shuffle=True, aug_list=train_aug, logger=logger) # val_aug = mx.image.CreateAugmenter(data_shape=data_shape, mean=True, std=True) # val_iter = MultiSequenceImageIter(args.test_rec, args.test_idx, data_shape, batch_size, shuffle=False, aug_list=val_aug, logger=logger) return train_iter, val_iter, classes, train_size
def var(array,W=_W,B=None,square=0,sqrt=0,V=False,order='NCHW',sizz=0): arrs=array.shape ashp=W.shape xi=(-2,-1) x2=(-2,-1,-3) sb=(ashp[1],1,1) WV=ashp[-2:] print(sb) mnc=mnd.tile(mnd.reshape(mnd.array([WV[0]*WV[1]]), shape=(1,1,1)),ashp[1]) print(mnc) if V: print(W.eval()) print(arrs,ashp) mul=(mnd.broadcast_mul(array,W)) if V: print('Wsamp',W[-1,-1]) print('array*w',mul[0,-1]) size=mnd.sum(W,axis=xi,keepdims=True)#shape=(outputs, channel) if V: print("sizesamp",size.shape,size) if B is None: B=mnd.zeros(W.shape[0:2],dtype=np.float32)#channel B=mnd.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))])) if sizz==1: mean=mnd.sum(mul,axis=xi,keepdims=True)/size else: mean=mnd.sum(mul,axis=xi,keepdims=True)/mnc if V: print("meansamp",mean[0,-1]) if square: i=mnd.square(mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B)) else: i=mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B) di=i/size if V==2: print("i",i,"i") print("di",di,"di") if V: print('isamp',i.shape,i[-1,-1,]) out=mnd.sum(mnd.broadcast_add(i,B),axis=x2) #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1) #print(out.shape) if sqrt: out=mnd.sqrt(out) out=mnd.swapaxes(out, 3, 1) #print(out.shape,(arrs[0],ashp[0],arrs[1],arrs[2])) assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2]) return(out)
def forward(self): # 2-step diff = nd.subtract(nd.expand_dims(self.dataset,axis=0),nd.expand_dims(self.centroid,axis=1)) sqr = nd.square(diff) distance = nd.sum(sqr,axis=2) clustering = nd.argmin(distance,axis=0) # 3-step ''' Because mxnet's nd.where did not return the location. I wrote the np.where function. ''' for j in range(self.centroid_numbers): self.centroid[j][:]=nd.mean(nd.take(self.dataset,nd.array(np.reshape(np.where(np.equal(clustering.asnumpy(), j)), (-1,)), ctx=self.ctx),axis=0),axis=0) return clustering , self.centroid
def hybrid_forward(self, F, fts, ys, ftt, yt): """ Semantic Alignment Loss :param F: Function :param yt: label for the target domain [N] :param ftt: features for the target domain [N, K] :param ys: label for the source domain [M] :param fts: features for the source domain [M, K] :return: """ if self._fn: # Normalize ft fts = F.L2Normalization(fts, mode='instance') ftt = F.L2Normalization(ftt, mode='instance') fts_rpt = F.broadcast_to(fts.expand_dims(axis=0), shape=(self._bs_tgt, self._bs_src, self._embed_size)) ftt_rpt = F.broadcast_to(ftt.expand_dims(axis=1), shape=(self._bs_tgt, self._bs_src, self._embed_size)) dists = F.sum(F.square(ftt_rpt - fts_rpt), axis=2) yt_rpt = F.broadcast_to(yt.expand_dims(axis=1), shape=(self._bs_tgt, self._bs_src)).astype('int32') ys_rpt = F.broadcast_to(ys.expand_dims(axis=0), shape=(self._bs_tgt, self._bs_src)).astype('int32') y_same = F.equal(yt_rpt, ys_rpt).astype('float32') y_diff = F.not_equal(yt_rpt, ys_rpt).astype('float32') intra_cls_dists = dists * y_same inter_cls_dists = dists * y_diff max_dists = F.max(dists, axis=1, keepdims=True) max_dists = F.broadcast_to(max_dists, shape=(self._bs_tgt, self._bs_src)) revised_inter_cls_dists = F.where(y_same, max_dists, inter_cls_dists) max_intra_cls_dist = F.max(intra_cls_dists, axis=1) min_inter_cls_dist = F.min(revised_inter_cls_dists, axis=1) loss = F.relu(max_intra_cls_dist - min_inter_cls_dist + self._margin) return loss
def student_loss(student_mean, student_var, teacher_pred, teacher_noise_precision): return (0.5 * (student_var + nd.exp(-student_var) * (nd.square(teacher_pred - student_mean) + 1 / teacher_noise_precision))).asnumpy()[ 0]
def main(): parser = argparse.ArgumentParser(description='Script to test the trained network on a game.') parser.add_argument('-r', '--rom', required=False, type=str, default=os.path.join('roms', 'breakout.bin'), help='Path of the ROM File.') parser.add_argument('-v', '--visualization', action='store_true', help='Visualize the runs.') parser.add_argument('--lr', required=False, type=float, default=0.01, help='Learning rate of the AdaGrad optimizer') parser.add_argument('--eps', required=False, type=float, default=0.01, help='Eps of the AdaGrad optimizer') parser.add_argument('--clip-gradient', required=False, type=float, default=None, help='Clip threshold of the AdaGrad optimizer') parser.add_argument('--double-q', action='store_true', help='Use Double DQN only if specified') parser.add_argument('--wd', required=False, type=float, default=0.0, help='Weight of the L2 Regularizer') parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu', help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`') parser.add_argument('-d', '--dir-path', required=False, type=str, default='', help='Saving directory of model files.') parser.add_argument('--start-eps', required=False, type=float, default=1.0, help='Eps of the epsilon-greedy policy at the beginning') parser.add_argument('--replay-start-size', required=False, type=int, default=50000, help='The step that the training starts') parser.add_argument('--kvstore-update-period', required=False, type=int, default=1, help='The period that the worker updates the parameters from the sever') parser.add_argument('--kv-type', required=False, type=str, default=None, help='type of kvstore, default will not use kvstore, could also be dist_async') parser.add_argument('--optimizer', required=False, type=str, default="adagrad", help='type of optimizer') args = parser.parse_args() if args.dir_path == '': rom_name = os.path.splitext(os.path.basename(args.rom))[0] args.dir_path = 'dqn-%s-lr%g' % (rom_name, args.lr) replay_start_size = args.replay_start_size max_start_nullops = 30 replay_memory_size = 1000000 history_length = 4 rows = 84 cols = 84 ctx = parse_ctx(args.ctx) q_ctx = mx.Context(*ctx[0]) game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size, resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops, replay_memory_size=replay_memory_size, display_screen=args.visualization, history_length=history_length) ##RUN NATURE freeze_interval = 10000 epoch_num = 200 steps_per_epoch = 250000 update_interval = 4 discount = 0.99 eps_start = args.start_eps eps_min = 0.1 eps_decay = (eps_start - eps_min) / 1000000 eps_curr = eps_start freeze_interval /= update_interval minibatch_size = 32 action_num = len(game.action_set) data_shapes = {'data': (minibatch_size, history_length) + (rows, cols), 'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)} dqn_sym = dqn_sym_nature(action_num) qnet = Base(data_shapes=data_shapes, sym_gen=dqn_sym, name='QNet', initializer=DQNInitializer(factor_type="in"), ctx=q_ctx) target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx) use_easgd = False optimizer = mx.optimizer.create(name=args.optimizer, learning_rate=args.lr, eps=args.eps, clip_gradient=args.clip_gradient, rescale_grad=1.0, wd=args.wd) updater = mx.optimizer.get_updater(optimizer) qnet.print_stat() target_qnet.print_stat() # Begin Playing Game training_steps = 0 total_steps = 0 for epoch in range(epoch_num): # Run Epoch steps_left = steps_per_epoch episode = 0 epoch_reward = 0 start = time.time() game.start() while steps_left > 0: # Running New Episode episode += 1 episode_loss = 0.0 episode_q_value = 0.0 episode_update_step = 0 episode_action_step = 0 time_episode_start = time.time() game.begin_episode(steps_left) while not game.episode_terminate: # 1. We need to choose a new action based on the current game status if game.state_enabled and game.replay_memory.sample_enabled: do_exploration = (npy_rng.rand() < eps_curr) eps_curr = max(eps_curr - eps_decay, eps_min) if do_exploration: action = npy_rng.randint(action_num) else: # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each # We can simply stack the current_state() of gaming instances and give prediction for all of them # We need to wait after calling calc_score(.), which makes the program slow # TODO Profiling the speed of this part! current_state = game.current_state() state = nd.array(current_state.reshape((1,) + current_state.shape), ctx=q_ctx) / float(255.0) qval_npy = qnet.forward(is_train=False, data=state)[0].asnumpy() action = numpy.argmax(qval_npy) episode_q_value += qval_npy[0, action] episode_action_step += 1 else: action = npy_rng.randint(action_num) # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times) game.play(action) total_steps += 1 # 3. Update our Q network if we can start sampling from the replay memory # Also, we update every `update_interval` if total_steps % update_interval == 0 and game.replay_memory.sample_enabled: # 3.1 Draw sample from the replay_memory training_steps += 1 episode_update_step += 1 states, actions, rewards, next_states, terminate_flags \ = game.replay_memory.sample(batch_size=minibatch_size) states = nd.array(states, ctx=q_ctx) / float(255.0) next_states = nd.array(next_states, ctx=q_ctx) / float(255.0) actions = nd.array(actions, ctx=q_ctx) rewards = nd.array(rewards, ctx=q_ctx) terminate_flags = nd.array(terminate_flags, ctx=q_ctx) # 3.2 Use the target network to compute the scores and # get the corresponding target rewards if not args.double_q: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(target_qval))\ * (1.0 - terminate_flags) * discount else: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] qval = qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(qval))\ * (1.0 - terminate_flags) * discount outputs = qnet.forward(is_train=True, data=states, dqn_action=actions, dqn_reward=target_rewards) qnet.backward() qnet.update(updater=updater) # 3.3 Calculate Loss diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards) quadratic_part = nd.clip(diff, -1, 1) loss = 0.5 * nd.sum(nd.square(quadratic_part)).asnumpy()[0] +\ nd.sum(diff - quadratic_part).asnumpy()[0] episode_loss += loss # 3.3 Update the target network every freeze_interval if training_steps % freeze_interval == 0: qnet.copy_params_to(target_qnet) steps_left -= game.episode_step time_episode_end = time.time() # Update the statistics epoch_reward += game.episode_reward info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \ % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward, game.episode_step / (time_episode_end - time_episode_start), eps_curr) if episode_update_step > 0: info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step, episode_update_step) if episode_action_step > 0: info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step, episode_action_step) if episode % 100 == 0: logging.info(info_str) end = time.time() fps = steps_per_epoch / (end - start) qnet.save_params(dir_path=args.dir_path, epoch=epoch) logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d" % (epoch, fps, epoch_reward / float(episode), episode))
def student_grad(student_mean, student_var, teacher_pred, teacher_noise_precision): grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred) grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred) + 1 / teacher_noise_precision))/2 return [grad_mean, grad_var]