def yolo2_decoder(x, num_class, anchor_scales): """ yolo2_decoder 会把卷积的通道分开,转换,最后转成我们需要的检测框 out: (index,score,xmin,ymin,xmax,ymax) """ stride = num_class + 5 x = x.transpose((0, 2, 3, 1)) # (Batch,H,W,Stride*Anchor) x = x.reshape((0, 0, 0, -1, stride)) # (Batch,H,W,Anchor,Stride) xy_pred = x.slice_axis(begin=0, end=2, axis=-1) wh = x.slice_axis(begin=2, end=4, axis=-1) score_pred = x.slice_axis(begin=4, end=5, axis=-1) cls_pred = x.slice_axis(begin=5, end=stride, axis=-1) xy = nd.sigmoid(xy_pred) x, y = transform_center(xy) w, h = transform_size(wh, anchor_scales) score = nd.sigmoid(score_pred) cid = nd.argmax(cls_pred, axis=-1, keepdims=True) left = nd.clip(x - w / 2, 0, 1) top = nd.clip(y - h / 2, 0, 1) right = nd.clip(x + w / 2, 0, 1) bottom = nd.clip(y + h / 2, 0, 1) output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
def yolo2_forward(x, num_class, anchor_scales): """Transpose/reshape/organize convolution outputs.""" stride = num_class + 5 # transpose and reshape, 4th dim is the number of anchors x = x.transpose((0, 2, 3, 1)) x = x.reshape((0, 0, 0, -1, stride)) # now x is (batch, m, n, stride), stride = num_class + 1(object score) + 4(coordinates) # class probs cls_pred = x.slice_axis(begin=0, end=num_class, axis=-1) # object score score_pred = x.slice_axis(begin=num_class, end=num_class + 1, axis=-1) score = nd.sigmoid(score_pred) # center prediction, in range(0, 1) for each grid xy_pred = x.slice_axis(begin=num_class + 1, end=num_class + 3, axis=-1) xy = nd.sigmoid(xy_pred) # width/height prediction wh = x.slice_axis(begin=num_class + 3, end=num_class + 5, axis=-1) # convert x, y to positions relative to image x, y = transform_center(xy) # convert w, h to width/height relative to image w, h = transform_size(wh, anchor_scales) # cid is the argmax channel cid = nd.argmax(cls_pred, axis=-1, keepdims=True) # convert to corner format boxes half_w = w / 2 half_h = h / 2 left = nd.clip(x - half_w, 0, 1) top = nd.clip(y - half_h, 0, 1) right = nd.clip(x + half_w, 0, 1) bottom = nd.clip(y + half_h, 0, 1) output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
def plot_img(losses_log): sw.add_image(tag='A', image=nd.clip(nd.concatenate([losses_log['real_A'][0][0:1], losses_log['fake_B'][0][0:1], losses_log['rec_A'][0][0:1], losses_log['idt_A'][0][0:1]]) * 0.5 + 0.5, 0, 1)) sw.add_image(tag='B', image=nd.clip(nd.concatenate([losses_log['real_B'][0][0:1], losses_log['fake_A'][0][0:1], losses_log['rec_B'][0][0:1], losses_log['idt_B'][0][0:1]]) * 0.5 + 0.5, 0, 1))
def plot_img(losses_log): sw.add_image(tag='lr_img', image=nd.clip( nd.concatenate(losses_log['lr_img'])[0:4], 0, 1)) sw.add_image(tag='hr_img', image=nd.clip( nd.concatenate(losses_log['hr_img'])[0:4], 0, 1)) sw.add_image(tag='hr_img_fake', image=nd.clip( nd.concatenate(losses_log['hr_img_fake'])[0:4], 0, 1))
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) is_sparse = grad.stype == 'row_sparse' history = state if is_sparse: kwargs = { 'epsilon': self.float_stable_eps, 'rescale_grad': self.rescale_grad } if self.clip_gradient: kwargs['clip_gradient'] = self.clip_gradient sparse.adaalter_update(weight, grad, history, out=weight, lr=lr, wd=wd, **kwargs) # raise NotImplementedError('AdaAlter has not been implemented for sparse nd') else: grad = grad * self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) div = grad / sqrt(history + self.float_stable_eps) weight[:] += (div + weight * wd) * -lr history[:] += square(grad)
def get_srme_log(net, X_train, Y_train): num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) #对net(X)结果进行截断[1,正无穷] return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(Y_train))).asscalar() / num_train)
def update(self, index, weight, grad, state): assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] # preprocess grad #grad = grad * self.rescale_grad + wd * weight grad *= self.rescale_grad + wd * weight if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) # warming momentum schedule momentum_t = self.beta1 * (1. - 0.5 * (pow(0.96, t * self.schedule_decay))) momentum_t_1 = self.beta1 * (1. - 0.5 * (pow(0.96, (t + 1) * self.schedule_decay))) self.m_schedule = self.m_schedule * momentum_t m_schedule_next = self.m_schedule * momentum_t_1 # update m_t and v_t m_t, v_t = state m_t[:] = self.beta1 * m_t + (1. - self.beta1) * grad v_t[:] = self.beta2 * v_t + (1. - self.beta2) * grad * grad grad_prime = grad / (1. - self.m_schedule) m_t_prime = m_t / (1. - m_schedule_next) v_t_prime = v_t / (1. - pow(self.beta2, t)) m_t_bar = (1. - momentum_t) * grad_prime + momentum_t_1 * m_t_prime # update weight weight[:] -= lr * m_t_bar / (sqrt(v_t_prime) + self.epsilon)
def get_rmse_log(net, X_train, y_train): num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def update(self, index, weight, grad, state): """Update the parameters. Parameters ---------- index : int An unique integer key used to index the parameters weight : NDArray weight ndarray grad : NDArray grad ndarray state : NDArray or other objects returned by init_state The auxiliary state used in optimization. """ assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) (lr, momentum) = self._get_lr(index) wd = self._get_wd(index) self._update_count(index) grad = grad * self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) if state: mom = state mom[:] *= momentum mom[:] += -lr * (1.0 - momentum) * (grad + wd * weight) weight[:] += mom else: assert self.momentum == 0.0 weight[:] += -lr * (grad + self.wd * weight)
def update(self, index, weight, grad, state): """Update the parameters. Parameters ---------- index : int An unique integer key used to index the parameters weight : NDArray weight ndarray grad : NDArray grad ndarray state : NDArray or other objects returned by init_state The auxiliary state used in optimization. """ assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) (lr, momentum) = self._get_lr(index) wd = self._get_wd(index) self._update_count(index) grad = grad * self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) if state: mom = state mom[:] *= momentum mom[:] += -lr * (1.0 - momentum) * (grad + wd * weight) weight[:] += mom else: assert self.momentum == 0.0 weight[:] += -lr * (grad + self.wd * weight)
def _realize_layer(sym, params, graph, inputs_ext, runtime): logger = logging.getLogger('log.realize.layer') name = sym.attr('name') childs, attr = sym_iter(sym.get_children()), sym.list_attr() if not _is_simulate_op(sym): return sym, params X, scale = childs[0], eval(attr['scale']) in_prec, out_prec = eval(attr['in_prec']), eval(attr['out_prec']) frac, sb = sim.extract_float(scale) assert runtime in ['cvm', 'tvm'] _realize_func = _realize_cvm if runtime == 'cvm' else _realize_tvm def cal_bit(A_bit, B_bit, sb): max_bit = 32 total_bit = A_bit + B_bit excess_bit = (total_bit - max_bit) // 2 if total_bit > max_bit else 0 A_target_bit = A_bit - excess_bit B_target_bit = min(B_bit - excess_bit, 32 - A_target_bit) A_sb, B_sb = A_bit - A_target_bit, B_bit - B_target_bit Y_sb = (-sb) - A_sb - B_sb return A_sb, A_target_bit, B_sb, B_target_bit, Y_sb if scale == 1: node = _realize_func(X, 0, out_prec, params, graph) logger.debug("layer %-40s skip prec=%s", name, out_prec) elif frac == 1: node = _realize_func(X, -sb, out_prec, params, graph) logger.debug("layer %-40s X(%s >> %s) prec=%s", name, in_prec, -sb, out_prec) else: B_bit = math.ceil(math.log2(frac)) + 1 A_sb, A_tb, B_sb, B_tb, Y_sb = cal_bit(in_prec, B_bit, sb) X = _realize_func(X, A_sb, A_tb, params, graph) B_name = name + '_scale' params[B_name] = nd.array([round(frac / (2**B_sb))]) B_range = 2**(B_tb - 1) - 1 params[B_name] = nd.clip(params[B_name], a_min=-B_range, a_max=B_range) attr = {'precision': str(B_tb)} B = graph[B_name] = mx.sym.var(B_name, shape=(1, ), attr=attr) node = mx.sym.broadcast_mul(X, B) node = _realize_func(node, Y_sb, out_prec, params, graph) logger.debug( "layer %-40s Y(INT%s >> %s) X(%s >> %s) B(%s vs. %s %s >> %s)", name, out_prec, Y_sb, in_prec, A_sb, scale, frac, sb, B_sb) # if childs[0].attr('name') in [ # 'yolov30_yolooutputv30_tile0', # 'yolov30_yolooutputv31_tile0', # 'yolov30_yolooutputv32_tile0', # 'yolov30_yolooutputv30_expand_dims0', # 'yolov30_yolooutputv31_expand_dims0', # 'yolov30_yolooutputv32_expand_dims0', # ]: # sb = out_prec - 16 # node = _realize_func(node, sb, 16, params, graph) return node, params
def get_a(self, item): if isinstance(self.a, gluon.nn.Embedding): a = squeeze(self.a(as_array(item))) if self.a_range is not None: return nd.clip(a, *self.a_range) return a else: return self.a
def Prediction(test_data, network, ctx): #Next Lotto Number!!! for data, label in test_data: data = data.as_in_context(ctx) output = mx.nd.round(network(data)) output = nd.clip(data=output, a_min=0, a_max=45) print(output.asnumpy()[0])
def get_c(self, item): if isinstance(self.c, gluon.nn.Embedding): c = squeeze(self.c(as_array(item))) if self.c_range is not None: return nd.clip(c, *self.c_range) return c else: return self.c
def contrast_aug(self, src, x): alpha = 1.0 + random.uniform(-x, x) coef = np.array([[[0.299, 0.587, 0.114]]]) gray = src * coef gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray) src *= alpha src += gray src = nd.clip(src, 0, 255) return src
def save_image(data, file, normalize=True, img_range=None): if img_range is None: img_range = [min(data), max(data)] norm_img = normalize_image(data, img_range[0], img_range[1]) img = nd.clip(norm_img * 255 + 0.5, 0, 255).asnumpy().astype(np.uint8) img = Image.fromarray(np.transpose(img, (1, 2, 0))) img.save(file)
def batched_l2_dist(a, b): a_squared = nd.power(nd.norm(a, axis=-1), 2) b_squared = nd.power(nd.norm(b, axis=-1), 2) squared_res = nd.add(nd.linalg_gemm( a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2 ), nd.expand_dims(a_squared, axis=-1)) res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) return res
def backward(self, out_grad, in_data, out_data, in_grad): x = out_data[0] action = in_data[1] reward = in_data[2] dx = in_grad[0] dx[:] = 0 dx[:] = nd.fill_element_0index(dx, nd.clip(nd.choose_element_0index(x, action) - reward, -1, 1), action)
def W_decay(self, lr): #go through weights if self.L_coeff > 0: #L2 self.W -= self.L_coeff * lr * self.W elif self.L_coeff < 0: #L1 self.W += self.L_coeff * lr * nd.sign(self.W) else: #fix boundary self.W = nd.clip(self.W, -10.0, 10.0) return
def hybrid_forward(self, F, x, weight): x_norm = F.L2Normalization(x, mode='instance', name='x_n') w_norm = F.L2Normalization(weight, mode='instance', name='w_n') cos_theta = F.FullyConnected(x_norm, w_norm, no_bias=True, num_hidden=self._units, name='cos_theta') cos_theta = F.clip(cos_theta, a_min=-1, a_max=1) return cos_theta
def unsorted_1d_segment_mean(input, seg_id, n_segs, dim): # TODO: support other dimensions assert dim == 0, 'MXNet only supports segment mean on first dimension' n_ones = nd.ones_like(seg_id).astype(input.dtype) w = unsorted_1d_segment_sum(n_ones, seg_id, n_segs, 0) w = nd.clip(w, a_min=1, a_max=np.inf) y = unsorted_1d_segment_sum(input, seg_id, n_segs, dim) y = y / w.reshape((-1, ) + (1, ) * (y.ndim - 1)) return y
def lab_to_rgb(lab, ctx=None): if ctx is None: raise ValueError("ctx can not be None") if lab is None: raise ValueError("lab can not be None") with mx.Context(ctx): lab = __check_image(lab) lab_pixels = lab.reshape([-1, 3]) lab_to_fxfyfz = nd.array([ # fx fy fz [1 / 116.0, 1 / 116.0, 1 / 116.0], # l [1 / 500.0, 0.0, 0.0], # a [0.0, 0.0, -1 / 200.0], # b ], ctx=ctx) fxfyfz_pixels = nd.dot(lab_pixels + nd.array([16.0, 0.0, 0.0], ctx=ctx), lab_to_fxfyfz) # convert to xyz epsilon = 6 / 29 linear_mask = fxfyfz_pixels <= epsilon exponential_mask = fxfyfz_pixels > epsilon xyz_pixels = (3 * epsilon ** 2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask xyz_pixels = nd.multiply(xyz_pixels, nd.array([0.950456, 1.0, 1.088754])) xyz_to_rgb =nd.array([ # r g b [3.2404542, -0.9692660, 0.0556434], # x [-1.5371385, 1.8760108, -0.2040259], # y [-0.4985314, 0.0415560, 1.0572252], # z ]) rgb_pixels = nd.dot(xyz_pixels, xyz_to_rgb) nd.clip(rgb_pixels, 0.0, 1.0, out=rgb_pixels) linear_mask = rgb_pixels <= 0.0031308 exponential_mask = rgb_pixels > 0.0031308 step1 = nd.multiply(nd.multiply(rgb_pixels, 12.92), linear_mask) step2 = nd.multiply(nd.multiply(nd.power(rgb_pixels, (1 / 2.4)), 1.055) - 0.055, exponential_mask) srgb_pixels = step1 + step2 return srgb_pixels.reshape(lab.shape)
def run_demo(eval_args): ## load parameters #content_image = 'images/content/xjtlu.jpg' #style_image = 'images/styles/starry_night.jpg' #eval_args = program_args(content_image,content_image,style_image,128,128,0) #eval_args = camero_args(style_image) if eval_args.cuda == 0: ctx = mx.cpu() else: ctx = mx.gpu() ## Change the content and style image using Style Loader #content_image = utils.tensor_load_rgbimage(eval_args.contentImage, ctx, size=eval_args.size, keep_asp=True) style_image = utils.tensor_load_rgbimage(eval_args.styleImage, ctx, size=eval_args.size) style_image = utils.preprocess_batch(style_image) style_model = net.Net(ngf=eval_args.ngf) style_model.load_parameters(eval_args.model, ctx=ctx) style_model.set_target(style_image) cam = cv2.VideoCapture(0) while True: ## read frame ret, frame = cam.read() # read content image (cimg) #cimg = img.copy() #img = np.array(img).transpose(2, 0, 1) content_img = load_image(frame, ctx, eval_args.size) output = style_model(content_img) tensor = output[0] #(b, g, r) = F.split(tensor, num_outputs=3, axis=0) #tensor = F.concat(r, g, b, dim=0) img = F.clip(tensor, 0, 255).asnumpy() img = img.transpose(1, 2, 0).astype('uint8') img = Image.fromarray(img) image = np.array( img.resize((frame.shape[1], frame.shape[0]), Image.ANTIALIAS)) #print(frame.shape,image.shape) numpy_horizontal = np.hstack((frame, image)) #cv2.imshow("Content Window",frame) #cv2.imshow("Style Window",grey) cv2.imshow("Test Window Shape", numpy_horizontal) if cv2.waitKey(1) & 0xFF == ord('q'): break cam.release() cv2.destroyAllWindows()
def generate(self, v_q: nd.NDArray, x_context: nd.NDArray, v_context: nd.NDArray, include_intermediate: bool = False, **kwargs) -> Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Algorithm S3 in paper. :param v_q: Query view camera info. :param x_context: Context frames. :param v_context: Context camera info. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :return: n x *image_shape array of generated samples. If include_intermediate is True, then steps x n x *image_shape. """ u = nd.zeros((self._batch_size, *self._upsample_output_shape), ctx=self._ctx) # canvas (reconstruction) h_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) # reshape camera information so we can concat it to image data v_q = nd.broadcast_to( nd.expand_dims(nd.expand_dims(v_q, axis=-1), axis=-1), (0, 0, *self._downsample_output_shape[1:])) outs = [] # sample(s) over time r = self._representation_nn(x_context, v_context) for i in range(self._num_steps): outs.append(self._out_layer(u)) # Eq. S11 p = self._p_layer(h_dec) p = nd.reshape(p, (self._batch_size, -1)) z = self._latent_layer(p) gen_z = nd.reshape(z, (self._batch_size, self._num_latent_maps, *self._downsample_output_shape[1:])) _, (h_dec, c_dec) = self._gen_rnn(nd.concat(gen_z, v_q, r, dim=1), [h_dec, c_dec]) u = u + self._upsample_nn(h_dec) outs.append(self._out_layer(u)) if include_intermediate: samples = nd.stack(*outs, axis=0) else: samples = outs[-1] return nd.clip(samples, a_min=0.0, a_max=1.0)
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) lr = self._get_lr(index) self._update_count(index) grad = grad * self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) history = state history[:] += (grad * grad) weight[:] += -lr * (grad / sqrt(history + self.float_stable_eps) + self.wd * weight)
def parse_net_output(Y,numClass, box_per_cell): pred = nd.transpose(Y,(0,2,3,1)) pred = pred.reshape((0,0,0,box_per_cell,numClass + 5)) #add one dim for boxes predCls = nd.slice_axis(pred, begin = 0, end = numClass,axis=-1) predObject = nd.slice_axis(pred,begin=numClass,end=numClass+1,axis=-1) #predObject = nd.sigmoid(predObject) predXY = nd.slice_axis(pred, begin = numClass + 1, end = numClass + 3, axis=-1) predWH = nd.slice_axis(pred, begin = numClass + 3, end = numClass + 5, axis=-1) #predXY = nd.sigmoid(predXY) x,y = convert_xy(predXY) w,h = convert_wh(predWH) w = nd.clip(w,0,1) h = nd.clip(h,0,1) x0 = nd.clip(x, 0, 1) y0 = nd.clip(y,0,1) x1 = nd.clip(x0 + w,0,1) y1 = np.clip(y0 + h, 0,1) x = x0 y = y0 w = x1 - x0 h = y1 - y0 XYWH = nd.concat(x,y,w,h,dim=-1) return predCls, predObject, XYWH
def plot_images(images, path, ncols, nrows): fig = plt.figure(figsize=(25, 25)) grid = ImageGrid( fig, 111, # similar to subplot(111) nrows_ncols=(nrows, ncols), # creates 2x2 grid of axes axes_pad=0.1, # pad between axes in inch. ) for ax, img in zip(grid, images): norm_img = normalize_image(img, -1, 1) img = nd.clip(norm_img * 255 + 0.5, 0, 255).asnumpy().astype(np.uint8) img = np.transpose(img, (1, 2, 0)) ax.imshow(img) ax.axis('off') plt.savefig(path, bbox_inches='tight')
def step(self, indices, weights, grads, states): """Perform an optimization step using gradients and states. Parameters ---------- indices : list of int List of unique indices of the parameters into the individual learning rates and weight decays. Learning rates and weight decay may be set via `set_lr_mult()` and `set_wd_mult()`, respectively. weights : list of NDArray List of parameters to be updated. grads : list of NDArray List of gradients of the objective with respect to this parameter. states : List of any obj List of state returned by `create_state()`. """ for index, weight, grad, state in zip(indices, weights, grads, states): self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] if len(list(grad.shape_array())) > 3: grad -= grad.mean(axis=0, exclude=True, keepdims=True) # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) grad += wd * weight coef1 = 1. - self.beta1**t coef2 = 1. - self.beta2**t lr *= math.sqrt(coef2) / coef1 # update mean and var mean, var = state mean[:] *= self.beta1 mean[:] += (1. - self.beta1) * grad var[:] *= self.beta2 var[:] += (1. - self.beta2) * square(grad) # update weight d = mean / (sqrt(var) + self.epsilon) weight[:] -= lr * d
def step(self, indices, weights, grads, states): """Perform an optimization step using gradients and states. Parameters ---------- indices : list of int List of unique indices of the parameters into the individual learning rates and weight decays. Learning rates and weight decay may be set via `set_lr_mult()` and `set_wd_mult()`, respectively. weights : list of NDArray List of parameters to be updated. grads : list of NDArray List of gradients of the objective with respect to this parameter. states : List of any obj List of state returned by `create_state()`. """ for index, weight, grad, state in zip(indices, weights, grads, states): if len(list(grad.shape_array())) > 3: grad -= grad.mean(axis=0, exclude=True, keepdims=True) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) grad += wd * weight # update mom mom = state if mom is not None: mom[:] *= self.momentum mom[:] -= lr * grad d = self.momentum * mom - lr * grad else: d = -lr * grad # update weight weight[:] += d
def imagenet_clamp_batch(batch, low, high): """ Not necessary in practice """ F.clip(batch[:,0,:,:],low-123.680, high-123.680) F.clip(batch[:,1,:,:],low-116.779, high-116.779) F.clip(batch[:,2,:,:],low-103.939, high-103.939)
def main(): parser = argparse.ArgumentParser(description='Script to test the trained network on a game.') parser.add_argument('-r', '--rom', required=False, type=str, default=os.path.join('roms', 'breakout.bin'), help='Path of the ROM File.') parser.add_argument('-v', '--visualization', action='store_true', help='Visualize the runs.') parser.add_argument('--lr', required=False, type=float, default=0.01, help='Learning rate of the AdaGrad optimizer') parser.add_argument('--eps', required=False, type=float, default=0.01, help='Eps of the AdaGrad optimizer') parser.add_argument('--clip-gradient', required=False, type=float, default=None, help='Clip threshold of the AdaGrad optimizer') parser.add_argument('--double-q', action='store_true', help='Use Double DQN only if specified') parser.add_argument('--wd', required=False, type=float, default=0.0, help='Weight of the L2 Regularizer') parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu', help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`') parser.add_argument('-d', '--dir-path', required=False, type=str, default='', help='Saving directory of model files.') parser.add_argument('--start-eps', required=False, type=float, default=1.0, help='Eps of the epsilon-greedy policy at the beginning') parser.add_argument('--replay-start-size', required=False, type=int, default=50000, help='The step that the training starts') parser.add_argument('--kvstore-update-period', required=False, type=int, default=1, help='The period that the worker updates the parameters from the sever') parser.add_argument('--kv-type', required=False, type=str, default=None, help='type of kvstore, default will not use kvstore, could also be dist_async') parser.add_argument('--optimizer', required=False, type=str, default="adagrad", help='type of optimizer') args = parser.parse_args() if args.dir_path == '': rom_name = os.path.splitext(os.path.basename(args.rom))[0] args.dir_path = 'dqn-%s-lr%g' % (rom_name, args.lr) replay_start_size = args.replay_start_size max_start_nullops = 30 replay_memory_size = 1000000 history_length = 4 rows = 84 cols = 84 ctx = parse_ctx(args.ctx) q_ctx = mx.Context(*ctx[0]) game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size, resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops, replay_memory_size=replay_memory_size, display_screen=args.visualization, history_length=history_length) ##RUN NATURE freeze_interval = 10000 epoch_num = 200 steps_per_epoch = 250000 update_interval = 4 discount = 0.99 eps_start = args.start_eps eps_min = 0.1 eps_decay = (eps_start - eps_min) / 1000000 eps_curr = eps_start freeze_interval /= update_interval minibatch_size = 32 action_num = len(game.action_set) data_shapes = {'data': (minibatch_size, history_length) + (rows, cols), 'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)} dqn_sym = dqn_sym_nature(action_num) qnet = Base(data_shapes=data_shapes, sym_gen=dqn_sym, name='QNet', initializer=DQNInitializer(factor_type="in"), ctx=q_ctx) target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx) use_easgd = False optimizer = mx.optimizer.create(name=args.optimizer, learning_rate=args.lr, eps=args.eps, clip_gradient=args.clip_gradient, rescale_grad=1.0, wd=args.wd) updater = mx.optimizer.get_updater(optimizer) qnet.print_stat() target_qnet.print_stat() # Begin Playing Game training_steps = 0 total_steps = 0 for epoch in range(epoch_num): # Run Epoch steps_left = steps_per_epoch episode = 0 epoch_reward = 0 start = time.time() game.start() while steps_left > 0: # Running New Episode episode += 1 episode_loss = 0.0 episode_q_value = 0.0 episode_update_step = 0 episode_action_step = 0 time_episode_start = time.time() game.begin_episode(steps_left) while not game.episode_terminate: # 1. We need to choose a new action based on the current game status if game.state_enabled and game.replay_memory.sample_enabled: do_exploration = (npy_rng.rand() < eps_curr) eps_curr = max(eps_curr - eps_decay, eps_min) if do_exploration: action = npy_rng.randint(action_num) else: # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each # We can simply stack the current_state() of gaming instances and give prediction for all of them # We need to wait after calling calc_score(.), which makes the program slow # TODO Profiling the speed of this part! current_state = game.current_state() state = nd.array(current_state.reshape((1,) + current_state.shape), ctx=q_ctx) / float(255.0) qval_npy = qnet.forward(is_train=False, data=state)[0].asnumpy() action = numpy.argmax(qval_npy) episode_q_value += qval_npy[0, action] episode_action_step += 1 else: action = npy_rng.randint(action_num) # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times) game.play(action) total_steps += 1 # 3. Update our Q network if we can start sampling from the replay memory # Also, we update every `update_interval` if total_steps % update_interval == 0 and game.replay_memory.sample_enabled: # 3.1 Draw sample from the replay_memory training_steps += 1 episode_update_step += 1 states, actions, rewards, next_states, terminate_flags \ = game.replay_memory.sample(batch_size=minibatch_size) states = nd.array(states, ctx=q_ctx) / float(255.0) next_states = nd.array(next_states, ctx=q_ctx) / float(255.0) actions = nd.array(actions, ctx=q_ctx) rewards = nd.array(rewards, ctx=q_ctx) terminate_flags = nd.array(terminate_flags, ctx=q_ctx) # 3.2 Use the target network to compute the scores and # get the corresponding target rewards if not args.double_q: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(target_qval))\ * (1.0 - terminate_flags) * discount else: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] qval = qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(qval))\ * (1.0 - terminate_flags) * discount outputs = qnet.forward(is_train=True, data=states, dqn_action=actions, dqn_reward=target_rewards) qnet.backward() qnet.update(updater=updater) # 3.3 Calculate Loss diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards) quadratic_part = nd.clip(diff, -1, 1) loss = 0.5 * nd.sum(nd.square(quadratic_part)).asnumpy()[0] +\ nd.sum(diff - quadratic_part).asnumpy()[0] episode_loss += loss # 3.3 Update the target network every freeze_interval if training_steps % freeze_interval == 0: qnet.copy_params_to(target_qnet) steps_left -= game.episode_step time_episode_end = time.time() # Update the statistics epoch_reward += game.episode_reward info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \ % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward, game.episode_step / (time_episode_end - time_episode_start), eps_curr) if episode_update_step > 0: info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step, episode_update_step) if episode_action_step > 0: info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step, episode_action_step) if episode % 100 == 0: logging.info(info_str) end = time.time() fps = steps_per_epoch / (end - start) qnet.save_params(dir_path=args.dir_path, epoch=epoch) logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d" % (epoch, fps, epoch_reward / float(episode), episode))
def tensor_save_rgbimage(img, filename, cuda=False): img = F.clip(img, 0, 255).asnumpy() img = img.transpose(1, 2, 0).astype('uint8') img = Image.fromarray(img) img.save(filename)
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum(square_loss( nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)