def linear(x, n): w = jt.make_var([n, x.shape[-1]], init=lambda *a: init.invariant_uniform(*a)) w = w.reindex([w.shape[1], w.shape[0]], ["i1", "i0"]) bound = 1.0 / math.sqrt(w.shape[0]) b = jt.make_var([n], init=lambda *a: init.uniform(*a, -bound, bound)) return jt.matmul(x, w) + b
def conv(x, in_planes, out_planes, kernel_size, padding, stride=1, init_method=None): Kw = kernel_size Kh = kernel_size _C = in_planes Kc = out_planes N, C, H, W = x.shape assert C == _C if init_method == None: w = jt.make_var( [Kc, _C, Kh, Kw], init=lambda *a: init.relu_invariant_gauss(*a, mode="fan_out")) else: w = jt.make_var([Kc, _C, Kh, Kw], init=init_method) xx = x.reindex( [ N, Kc, C, (H + padding * 2 - kernel_size) // stride + 1, (W + padding * 2 - kernel_size) // stride + 1, Kh, Kw ], [ 'i0', # Nid 'i2', # Cid f'i3*{stride}-{padding}+i5', # Hid+Khid f'i4*{stride}-{padding}+i6', # Wid+KWid ]) ww = w.broadcast(xx.shape, [0, 3, 4]) yy = xx * ww y = yy.sum([2, 5, 6]) # C, Kh, Kw return y
def batch_norm(x): xmean = jt.mean(x, dims=[0, 2, 3], keepdims=1) x2mean = jt.mean(x * x, dims=[0, 2, 3], keepdims=1) norm_x = (x - xmean.broadcast_var(x)) / ( jt.sqrt(x2mean - xmean * xmean + jt.float32(1e-5)).broadcast_var(x)) w = jt.make_var([x.shape[1]], init=get_init_var) b = jt.make_var([x.shape[1]], init=get_init_var) w = w.broadcast([1, w.shape[0], 1, 1], [0, 2, 3]) b = b.broadcast([1, b.shape[0], 1, 1], [0, 2, 3]) return norm_x * w + b
def conv_nchw(x, in_planes, out_planes, kernel_size, padding, stride=1, dilation=1, groups=1, init_method=None, w_=None): N, C, H, W = x.shape Kh, Kw = kernel_size, kernel_size G = groups CpG = C // G # channels per group padding = (padding, padding) dilation = (dilation, dilation) stride = (stride, stride) assert C == in_planes oc = out_planes oh = (H + padding[0] * 2 - Kh * dilation[0] + dilation[0] - 1) // stride[0] + 1 ow = (W + padding[1] * 2 - Kw * dilation[1] + dilation[1] - 1) // stride[1] + 1 if w_ is None: if init_method == None: w = jt.make_var( [oc, C // G, Kh, Kw], init=lambda *a: init.relu_invariant_gauss(*a, mode="fan_out")) else: w = jt.make_var([oc, C // G, Kh, Kw], init=init_method) else: w = w_ xx = x.reindex( [N, G, oc // G, CpG, oh, ow, Kh, Kw], [ 'i0', # Nid f'i1*{CpG}+i3', # Gid f'i4*{stride[0]}-{padding[0]}+i6*{dilation[0]}', # Hid+Khid f'i5*{stride[1]}-{padding[1]}+i7*{dilation[1]}', # Wid+KWid ]) # w: [oc, CpG, Kh, Kw] ww = w.reindex([N, G, oc // G, CpG, oh, ow, Kh, Kw], [f'i1*{oc//G}+i2', 'i3', 'i6', 'i7']) yy = xx * ww y = yy.reindex_reduce('add', [N, oc, oh, ow], ['i0', f'i1*{oc//G}+i2', 'i4', 'i5']) return y
def adam(model, loss, lr=3e-4, betas=[0.9, 0.999], eps=1e-8): ps = jt.find_vars(model) gs = jt.grad(loss, ps) with jt.var_scope('_'.join([model, 'adam']), unique=True): adam_step = jt.make_var([1], init=jt.zeros) adam_step += 1 for p,g in zip(ps,gs): m = jt.make_var(p.shape, init=jt.zeros) v = jt.make_var(p.shape, init=jt.zeros) m.assign(betas[0] * m + (1-betas[0]) * g) v.assign(betas[1] * v + (1-betas[1]) * g * g) step_size = lr * jt.sqrt(1-betas[1]**adam_step) / (1-betas[0] ** adam_step) p -= m * step_size / (jt.sqrt(v) + eps)
def __init__(self, n_classes): super(SSD300, self).__init__() self.n_classes = n_classes self.base = VGGBase() self.aux_convs = AuxiliaryConvolutions() self.pred_convs = PredictionConvolutions(n_classes) self.rescale_factors = jt.make_var([1, 512, 1, 1], init=jt.zeros) init.constant_(self.rescale_factors, 20) self.priors_cxcy = self.create_prior_boxes()
def test5(self): with jt.flag_scope(use_cuda=1): f32 = jt.float32 np.random.seed(0) jt.set_seed(3) x = f32(np.random.rand(1, 1)) w = jt.make_var( [x.shape[-1], 10], init=lambda *a: (jt.random(*a) - f32(0.5)) / f32(x.shape[-1])**f32(0.5)) jt.nn.matmul(x, w).data
def batch_norm(x, is_train, eps=1e-5, momentum=0.1): w = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0)) b = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0)) running_mean = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0)) running_var = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0)) w = w.broadcast(x, [0,2,3]) b = b.broadcast(x, [0,2,3]) if is_train: xmean = jt.mean(x, dims=[0,2,3], keepdims=1) x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1) xvar = x2mean-xmean*xmean norm_x = (x-xmean)/jt.sqrt(xvar+eps) running_mean += (xmean.sum([0,2,3])-running_mean)*momentum running_var += (xvar.sum([0,2,3])-running_var)*momentum else: running_mean = running_mean.broadcast(x, [0,2,3]) running_var = running_var.broadcast(x, [0,2,3]) norm_x = (x-running_mean)/jt.sqrt(running_var+eps) return norm_x * w + b
def conv_nhwc(x, in_planes, out_planes, kernel_size, padding, stride=1, dilation=1, init_method=None, w_=None): Kw = kernel_size Kh = kernel_size _C = in_planes Kc = out_planes N, H, W, C = x.shape assert C == _C if w_ is None: if init_method == None: w = jt.make_var( [Kc, _C, Kh, Kw], init=lambda *a: init.relu_invariant_gauss(*a, mode="fan_out")) else: w = jt.make_var([Kc, _C, Kh, Kw], init=init_method) else: w = w_ oh = (H - Kh * dilation + dilation - 1 + padding * 2) // stride + 1 ow = (W - Kw * dilation + dilation - 1 + padding * 2) // stride + 1 xx = x.reindex( [N, Kc, C, oh, ow, Kh, Kw], [ 'i0', # Nid f'i3*{stride}-{padding}+i5*{dilation}', # Hid+Khid f'i4*{stride}-{padding}+i6*{dilation}', # Wid+KWid 'i2', # Cid ]) ww = w.broadcast(xx.shape, [0, 3, 4]) yy = xx * ww y = yy.sum([2, 5, 6]) # C, Kh, Kw return y
def test_get_var_unique(self): jt.clean() x = jt.make_var([1], init=ops.random) y = jt.make_var([1], init=ops.random) z = jt.make_var([1], init=ops.random) assert x.name() == "var_0" assert y.name() == "var_1", y.name() assert z.name() == "var_2" x = jt.make_var([1], name="x", unique=True, init=ops.random) y = jt.make_var([1], name="y", unique=True, init=ops.random) z = jt.make_var([1], name="z", unique=True, init=ops.random) assert x.name() == "x" assert y.name() == "y" assert z.name() == "z" expect_error( lambda: jt.make_var([2], name="x", unique=True, init=ops.random)) jt.clean()
def conv(x, in_planes, out_planes, kernel_size, padding, stride=1): Kw = kernel_size Kh = kernel_size _C = in_planes Kc = out_planes N, C, H, W = x.shape assert C == _C w = jt.make_var([Kc, _C, Kh, Kw], init=get_init_var) xx = x.reindex( [ N, Kc, C, (H + padding * 2 - kernel_size) // stride + 1, (W + padding * 2 - kernel_size) // stride + 1, Kh, Kw ], [ 'i0', # Nid 'i2', # Cid f'i3*{stride}-{padding}+i5', # Hid+Khid f'i4*{stride}-{padding}+i6', # Wid+KWid ]) ww = w.broadcast(xx.shape, [0, 3, 4]) yy = xx * ww y = yy.sum([2, 5, 6]) # Kc, Kh, Kw return y
def linear(x, n): w = jt.make_var([x.shape[-1], n], init=lambda *a: (jt.random(*a) - f32(0.5)) / f32(x.shape[-1])**f32(0.5)) b = jt.make_var([n], init=lambda *a: jt.random(*a) - f32(0.5)) return jt.matmul(x, w) + b
def linear(x, n): w = jt.make_var([x.shape[-1], n], init=ops.random) return jt.matmul(x, w)
def test_get_var_init(self): jt.clean() assert (jt.make_var(init=[1, 2, 3]).data == [1, 2, 3]).all() assert (jt.make_var(shape=[3], init=np.zeros).data == [0, 0, 0]).all() assert (jt.make_var(init=jt.array([1, 2, 3]) == [1, 2, 3]).data).all() jt.clean()