def test_dnn_conv_merge(): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img = T.ftensor4() kern = T.ftensor4() out = T.ftensor4() b = 1 c = 4 f = 3 ih = 5 iw = 8 kh = 2 kw = 6 img_val = numpy.random.random((b, c, ih, iw)).astype("float32") kern_val = numpy.random.random((f, c, kh, kw)).astype("float32") out_val = numpy.random.random((b, f, ih - kh + 1, iw - kw + 1)).astype("float32") conv = dnn.dnn_conv(img, kern) gw = theano.grad(conv.sum(), kern) gi = theano.grad(conv.sum(), img) lr = numpy.asarray(0.05, dtype="float32") if cuda.dnn.version() == -1: # Can't merge alpha with cudnn v1 fr = conv + out wr = kern + gw ir = img + gi else: fr = lr * (conv + out) wr = kern + lr * gw ir = img + lr * gi f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu) assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv) assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW) assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI) mode = mode_with_gpu mode = mode.excluding("local_dnn_conv_alpha_merge") mode = mode.excluding("local_dnn_convw_alpha_merge") mode = mode.excluding("local_dnn_convi_alpha_merge") mode = mode.excluding("local_dnn_conv_output_merge") mode = mode.excluding("local_dnn_convw_output_merge") mode = mode.excluding("local_dnn_convi_output_merge") f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode) assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv) assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW) assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI) out_f1 = f1(img_val, kern_val, out_val) out_f2 = f2(img_val, kern_val, out_val) assert len(out_f1) == len(out_f2) for v1, v2 in zip(out_f1, out_f2): utt.assert_allclose(v1, v2)
def test_conv_gradi(self): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4("img") kerns = T.ftensor4("kerns") out = T.ftensor4("out") img_val = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype="float32") kern_vals = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype="float32") for params in product(["valid"], [(1, 1)], ["conv", "cross"]): # Should this work for 'full'? temp_kerns = kerns.dimshuffle(1, 0, 2, 3) shape = ( img_val.shape[0], kern_vals.shape[1], img_val.shape[2] + kern_vals.shape[2] - 1, img_val.shape[3] + kern_vals.shape[3] - 1, ) out_vals = numpy.zeros(shape, dtype="float32") desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])( out.shape, temp_kerns.shape ) conv_grad_i = dnn.GpuDnnConvGradI()(temp_kerns, img, out, desc) self._compile_and_check( [temp_kerns, img, out], [conv_grad_i], [kern_vals, img_val, out_vals], dnn.GpuDnnConvGradI )
def test_dnn_conv_merge(): # This test that we merge correctly multiple dnn_conv. if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc = dnn.GpuDnnConvDesc( border_mode='valid')(kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern) o2 = dnn.dnn_conv(img, kern) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]) == 1 # Test grad w op o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc) f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]) == 1 # Test grad i op o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc) f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]) == 1
def test_conv_gradw(self): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4("img") kerns = T.ftensor4("kerns") out = T.ftensor4("out") img_val = numpy.asarray(numpy.random.rand(2, 5, 6, 8), dtype="float32") kern_vals = numpy.asarray(numpy.random.rand(2, 1, 5, 6), dtype="float32") out_vals = numpy.zeros((3, 3, 1, 1), dtype="float32") for params in product(["valid", "full"], [(1, 1)], ["conv", "cross"]): # strides besides (1, 1) temp_img = img.dimshuffle(1, 0, 2, 3) temp_kerns = kerns if params[2] == "conv": temp_kerns = temp_kerns[:, :, ::-1, ::-1] temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3) shape = ( kern_vals.shape[1], img_val.shape[1], img_val.shape[2] - kern_vals.shape[2] + 1, img_val.shape[3] - kern_vals.shape[3] + 1, ) out_vals = numpy.zeros(shape, dtype="float32") desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])( temp_img.shape, out.shape ) conv_grad_w = dnn.GpuDnnConvGradW()(temp_img, temp_kerns, out, desc) self._compile_and_check( [temp_img, temp_kerns, out], [conv_grad_w], [img_val, kern_vals, out_vals], dnn.GpuDnnConvGradW )
def test_conv(self): img = T.ftensor4('img') kerns = T.ftensor4('kerns') img_val = numpy.asarray( numpy.random.rand(3, 4, 5, 6), dtype='float32' ) kern_vals = numpy.asarray( numpy.random.rand(3, 4, 5, 6), dtype='float32' ) for params in product( ['valid', 'full'], [(1, 1), (2, 2)], ['conv', 'cross'] ): desc = dnn.GpuDnnConvDesc( border_mode=params[0], subsample=params[1], conv_mode=params[2] )(img.shape, kerns.shape) conv = dnn.GpuDnnConv()(img_val, kern_vals, desc) self._compile_and_check( [img, kerns], [conv], [img_val, kern_vals], dnn.GpuDnnConv )
def test_upsample_pool(): pool_size = (1,2) pool_stride = (1,2) out = T.ftensor4() inputs = T.ftensor4() actual_in = upsample_pool(out, inputs, pool_size, pool_stride) upsample_pool_fn = theano.function([out, inputs], actual_in) # needs pool size, stride= 1,2 output = np.float32([[[[5,4,1]]]]) inputs = np.float32([[[[3,0,8,4,5,6]]]]) upsampled = upsample_pool_fn(output, inputs) assert np.allclose([[[[ 5., 0., 4., 0., 0., 1.]]]], upsampled) # Test for pooling across several channels pool_size = (1,2) pool_stride = (1,2) out = T.ftensor4() inputs = T.ftensor4() actual_in = upsample_pool(out, inputs, pool_size, pool_stride) upsample_pool_fn = theano.function([out, inputs], actual_in) # needs pool size, stride= 1,2 output = np.float32([[[[-3,5,2]],[[5,4,1]]]]) inputs = np.float32([[[[2,1,3,4,1,-7]], [[3,0,8,4,5,6]]]]) upsampled = upsample_pool_fn(output, inputs) assert np.allclose([[[[ -3., 0., 0., 5., 2., 0.]], [[ 5., 0., 4., 0., 0., 1.]]]], upsampled)
def create_back_conv_z_b_fn(min_in, max_in): inputs = T.ftensor4() weights = T.ftensor4() out_relevances = T.ftensor4() in_relevances = relevance_conv_z_b(out_relevances, inputs, weights, min_in, max_in) back_relevance_conv_fn = theano.function([out_relevances, inputs, weights], in_relevances) return back_relevance_conv_fn
def create_back_conv_z_plus_fn(): inputs = T.ftensor4() weights = T.ftensor4() out_relevances = T.ftensor4() in_relevances = relevance_conv_z_plus(out_relevances, inputs, weights) back_relevance_conv_fn = theano.function([out_relevances, inputs, weights], in_relevances) return back_relevance_conv_fn
def test_logical_shapes(self): seed_rng() for stride in range(1, 4): kshp = (10, 2, 10, 10) featshp = (3, 10, 11, 11) a = tensor.ftensor4() A = tensor.ftensor4() # Need to transpose first two dimensions of kernel, and reverse # index kernel image dims (for correlation) kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3]) featshp_logical = (featshp[0], featshp[1], featshp[2] * stride, featshp[3] * stride) kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3]) #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:] image_estimate = tensor.nnet.conv2d(a, kernel_rotated, border_mode='full', image_shape=featshp, filter_shape=kshp_rotated, imshp_logical=featshp_logical[1:], kshp_logical=kshp[2:]) func = theano.function([a, A], image_estimate, mode=theano_mode) #theano.printing.debugprint(func,) assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv) for node in func.maker.fgraph.toposort()]) a_in = numpy.random.randn(*featshp).astype("float32") A_in = numpy.random.randn(*kshp).astype("float32") func(a_in, A_in)
def test_grad_types(self): # This function simply tests the behaviour of the AbstractConv # Ops, not their optimizations cpu_input = tensor.ftensor4() cpu_filters = tensor.ftensor4() cpu_topgrad = tensor.ftensor4() gpu_input = gpu_ftensor4() gpu_filters = gpu_ftensor4() gpu_topgrad = gpu_ftensor4() out_shape = tensor.lvector() # Check the gradient of the forward conv2d for input, filters in itertools.product((cpu_input, gpu_input), (cpu_filters, gpu_filters)): output = conv.conv2d(input, filters) grad_input, grad_filters = theano.grad(output.sum(), wrt=(input, filters)) assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type) assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type) # Check the gradient of gradweight for input, topgrad in itertools.product((cpu_input, gpu_input), (cpu_topgrad, gpu_topgrad)): grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape) grad_input, grad_topgrad = theano.grad(grad_filters.sum(), wrt=(input, topgrad)) assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type) assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type) # Check the gradient of gradinputs for filters, topgrad in itertools.product((cpu_filters, gpu_filters), (cpu_topgrad, gpu_topgrad)): grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape) grad_filters, grad_topgrad = theano.grad(grad_input.sum(), wrt=(filters, topgrad)) assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type) assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
def test_infer_shape(self): shape = (100, 40, 6, 3) images = numpy.ones(shape).astype('float32') x = T.ftensor4() f = self._compile_and_check([x], [images2neibs( x, neib_shape=(2, 1), mode='valid')], [images], Images2Neibs ) f = self._compile_and_check([x], [images2neibs( x, neib_shape=(2, 3), mode='valid')], [images], Images2Neibs ) shape = (100, 40, 5, 4) images = numpy.ones(shape).astype('float32') x = T.ftensor4() f = self._compile_and_check([x], [images2neibs( x, neib_shape=(2, 1), mode='ignore_borders')], [images], Images2Neibs ) shape = (100, 40, 5, 3) images = numpy.ones(shape).astype('float32') x = T.ftensor4() f = self._compile_and_check([x], [images2neibs( x, neib_shape=(2, 3), mode='ignore_borders')], [images], Images2Neibs ) shape = (100, 40, 6, 7) images = numpy.ones(shape).astype('float32') x = T.ftensor4() f = self._compile_and_check([x], [images2neibs( x, neib_shape=(2, 2), mode='ignore_borders')], [images], Images2Neibs ) shape = (100, 40, 5, 10) images = numpy.ones(shape).astype('float32') x = T.ftensor4() f = self._compile_and_check([x], [images2neibs( x, neib_shape=(3, 3), mode='wrap_centered')], [images], Images2Neibs )
def test_conv_gradw(self, border_mode, conv_mode): self._test_conv_gradw(T.ftensor4('img'), T.ftensor4('kerns'), T.ftensor4('out'), numpy.random.rand(2, 5, 6, 8), numpy.random.rand(2, 1, 5, 6), border_mode, conv_mode, (1, 1))
def test_dnn_conv_border_mode(): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img = T.ftensor4() kern = T.ftensor4() dnn.dnn_conv(img, kern, border_mode=1) dnn.dnn_conv(img, kern, border_mode=(2, 3)) dnn.dnn_conv(img, kern, border_mode='full') dnn.dnn_conv(img, kern, border_mode='valid')
def test_dnn_conv_border_mode(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4() kern = T.ftensor4() dnn.dnn_conv(img, kern, border_mode=1) dnn.dnn_conv(img, kern, border_mode=(2, 3)) dnn.dnn_conv(img, kern, border_mode="full") dnn.dnn_conv(img, kern, border_mode="valid")
def test_local_lift_abstractconv_gpu_shape(): prev = theano.config.on_opt_error try: theano.config.on_opt_error = "raise" s = tensor.ivector() a = tensor.ftensor4() b = tensor.ftensor4() c = tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s) theano.function([s, a, b], c, mode=mode_with_gpu) finally: theano.config.on_opt_error = prev
def test_conv_no_bias(self): images = T.ftensor4('inputs') weights = T.ftensor4('weights') convOut = conv2d(images, weights, input_shape=(12, 3, 256, 256), filter_shape=(12, 3, 3, 3), filter_flip=False) theano.printing.pydotprint(convOut, outfile="Conv_before_opt.png", var_with_name_simple=True) fopt = theano.function(inputs=[images, weights], outputs=convOut, mode=mode_with_mkl) theano.printing.pydotprint(fopt, outfile="Conv_OPT_after_opt.png", var_with_name_simple=True) fori = theano.function(inputs=[images, weights], outputs=convOut, mode=mode_without_mkl) theano.printing.pydotprint(fori, outfile="Conv_Original_after_opt.png", var_with_name_simple=True)
def setup(self, bottom, top): if len(bottom) != 2: raise Exception("The layer needs two inputs!") probs = T.ftensor4() labels = T.ftensor4() count = T.sum(labels, axis=(1, 2, 3), keepdims=True) loss_balanced = -T.mean(T.sum(labels * T.log(probs), axis=(1, 2, 3), keepdims=True) / count) self.forward_theano = theano.function([probs, labels], loss_balanced) self.backward_theano = theano.function([probs, labels], T.grad(loss_balanced, probs))
def test_tensor4_roc_auc_scores(): true = np.random.binomial(n=1, p=.5, size=(20, 30, 40, 50)).astype('float32') predicted = np.random.random((20, 30, 40, 50)).astype('float32') yt, yp = T.ftensor4('yt'), T.ftensor4('yp') refscore = tmetrics.classification.last_axis_roc_auc_scores(true, predicted) roc_auc_scores = tmetrics.classification.roc_auc_scores(yt, yp) f = theano.function([yt, yp], roc_auc_scores) score = f(true, predicted) print 'refscore' print refscore print 'score' print score assert np.allclose(refscore, score, equal_nan=True)
def test_conv(self, algo, border_mode, conv_mode): if algo == 'winograd' and dnn.version(raises=False) < 5000: raise SkipTest(dnn.dnn_available.msg) self._test_conv(T.ftensor4('img'), T.ftensor4('kerns'), T.ftensor4('out'), numpy.random.rand(7, 2, 8, 4), numpy.random.rand(8, 2, 4, 3), border_mode, conv_mode, [(1, 1), (2, 2)], algo)
def test_local_abstractconv_gemm(): """ We test it here as this is the optimization only that we test. This test gh-4036""" image = tensor.ftensor4() W = tensor.ftensor4() conv = tensor.nnet.conv2d(image, W, input_shape=(1, 32, 32, 32), filter_shape=(32, 32, 3, 3), border_mode='half') f = theano.function([image, W], [conv], mode=mode_with_gpu) f(numpy.random.rand(1, 32, 32, 32).astype('float32'), numpy.random.rand(32, 32, 3, 3).astype('float32'))
def setup(self, bottom, top): if len(bottom) != 2: raise Exception("The layer needs two inputs!") probs = T.ftensor4() probs_smooth_log = T.ftensor4() probs_smooth = T.exp(probs_smooth_log) loss = T.mean(T.sum(probs_smooth * T.log(probs_smooth / probs), axis=1)) self.forward_theano = theano.function([probs, probs_smooth_log], loss) self.backward_theano = theano.function([probs, probs_smooth_log], T.grad(loss, [probs, probs_smooth_log]))
def test_conv_a_b(): inputs = T.ftensor4() weights = T.ftensor4() relevances = T.ftensor4() bias = T.fvector() in_rel = relevance_conv_a_b(inputs, weights, relevances, a=2, b=1, bias=bias) in_rel_fn = theano.function([inputs, weights, relevances, bias], in_rel) in_relevance = in_rel_fn(np.array([[[[-1,-2,3]]]], dtype=np.float32), np.array([[[[1,-1]]]], dtype=np.float32)[:,:,::-1,::-1], np.array([[[[4,2]]]], dtype=np.float32), np.array([0], dtype=np.float32)) assert np.allclose([[[[-4, 2*4 - 4/5.0,-6/5.0]]]], in_relevance)
def test_conv_with_bias(self): images = T.ftensor4('inputs') weights = T.ftensor4('weights') bias = T.vector('bias') convOut = conv2d(images, weights, input_shape=(12, 3, 256, 256), filter_shape=(12, 3, 3, 3), filter_flip=False) convOutBias = convOut + bias.dimshuffle('x', 0, 'x', 'x') theano.printing.pydotprint(convOutBias, outfile="ConvBias_before_opt.png", var_with_name_simple=True) fopt = theano.function(inputs=[images, weights, bias], outputs=convOutBias, mode=mode_with_mkl) theano.printing.pydotprint(fopt, outfile="ConvBias_OPT_after_opt.png", var_with_name_simple=True) fori = theano.function(inputs=[images, weights, bias], outputs=convOutBias, mode=mode_without_mkl) theano.printing.pydotprint(fori, outfile="ConvBias_Original_after_opt.png", var_with_name_simple=True)
def test_dnn_conv_inplace(): """This test that we have inplace work correctly even when GpuAllocEmpty get merged together. """ if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')( kern.shape) desc2 = dnn.GpuDnnConvDesc( border_mode='valid', conv_mode='cross')(kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern, conv_mode='conv') o2 = dnn.dnn_conv(img, kern, conv_mode='cross') f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad w op out = GpuAllocEmpty(kern.dtype, test_ctx_name)(*kern.shape) o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad i op out = GpuAllocEmpty(img.dtype, test_ctx_name)(*img.shape) o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def test_conv_gradw(self): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4('img') kerns = T.ftensor4('kerns') out = T.ftensor4('out') img_val = numpy.asarray( numpy.random.rand(2, 5, 6, 8), dtype='float32' ) kern_vals = numpy.asarray( numpy.random.rand(2, 1, 5, 6), dtype='float32' ) out_vals = numpy.zeros((3, 3, 1, 1), dtype='float32') for params in product( ['valid', 'full'], [(1, 1)], # strides besides (1, 1) ['conv', 'cross'] ): temp_img = img.dimshuffle(1, 0, 2, 3) temp_kerns = kerns if params[2] == 'conv': temp_kerns = temp_kerns[:, :, ::-1, ::-1] temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3) shape = ( kern_vals.shape[1], img_val.shape[1], img_val.shape[2] - kern_vals.shape[2] + 1, img_val.shape[3] - kern_vals.shape[3] + 1 ) out_vals = numpy.zeros(shape, dtype='float32') desc = dnn.GpuDnnConvDesc( border_mode=params[0], subsample=params[1], conv_mode=params[2] )(temp_img.shape, out.shape) conv_grad_w = dnn.GpuDnnConvGradW()( temp_img, temp_kerns, out, desc, ) self._compile_and_check( [temp_img, temp_kerns, out], [conv_grad_w], [img_val, kern_vals, out_vals], dnn.GpuDnnConvGradW )
def make_node(self, x, x2, x3, x4, x5): # check that the theano version has support for __props__. # This next line looks like it has a typo, # but it's actually a way to detect the theano version # is sufficiently recent to support the use of __props__. assert hasattr(self, '_props'), "Your version of theano is too old to support __props__." x = tensor.as_tensor_variable(x) x2 = tensor.as_tensor_variable(x2) x3 = tensor.as_tensor_variable(x3) x4 = tensor.as_tensor_variable(x4) x5 = tensor.as_tensor_variable(x5) if prm.att_doc: if prm.compute_emb: td = tensor.itensor4().type() else: td = tensor.ftensor4().type() tm = tensor.ftensor3().type() else: if prm.compute_emb: td = tensor.itensor3().type() else: td = tensor.ftensor3().type() tm = tensor.fmatrix().type() return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \ tensor.fmatrix().type(), tensor.ivector().type()])
def test_pool(self): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4('img') img_val = numpy.asarray( numpy.random.rand(2, 3, 4, 5), dtype='float32' ) # 'average_exc_pad' is disabled for versions < 4004 if dnn.version(raises=False) < 4004: modes = ['max', 'average_inc_pad'] else: modes = ['max', 'average_inc_pad', 'average_exc_pad'] for params in product( [(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)], modes ): self._compile_and_check( [img], [dnn.GpuDnnPool(mode=params[2])(img, params[0], params[1], (0, 0))], [img_val], dnn.GpuDnnPool )
def test_softmax(self): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) t = T.ftensor4('t') rand_tensor = numpy.asarray( numpy.random.rand(5, 4, 3, 2), dtype='float32' ) self._compile_and_check( [t], [dnn.GpuDnnSoftmax('accurate', 'channel')(t)], [rand_tensor], dnn.GpuDnnSoftmax ) self._compile_and_check( [t], [ T.grad( dnn.GpuDnnSoftmax( 'accurate', 'channel' )(t).mean(), t ) ], [rand_tensor], dnn.GpuDnnSoftmaxGrad )
def test_dnn_tag(): """ Test that if cudnn isn't avail we crash and that if it is avail, we use it. """ x = T.ftensor4() old = theano.config.on_opt_error theano.config.on_opt_error = "raise" sio = StringIO() handler = logging.StreamHandler(sio) logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler) # Silence original handler when intentionnally generating warning messages logging.getLogger('theano').removeHandler(theano.logging_default_handler) raised = False try: f = theano.function( [x], pool_2d(x, ds=(2, 2), ignore_border=True), mode=mode_with_gpu.including("cudnn")) except (AssertionError, RuntimeError): assert not dnn.dnn_available(test_ctx_name) raised = True finally: theano.config.on_opt_error = old logging.getLogger( 'theano.compile.tests.test_dnn').removeHandler(handler) logging.getLogger('theano').addHandler(theano.logging_default_handler) if not raised: assert dnn.dnn_available(test_ctx_name) assert any([isinstance(n.op, dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])
def __init__(self, config=None, defaults=defaults, inputs_hook=None, hiddens_hook=None, params_hook=None, use_data_layer=None, rand_crop=None, batch_size=None): # combine everything by passing to Model's init super(AlexNet, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'}) # configs can now be accessed through self dictionary if self.inputs_hook or self.hiddens_hook or self.params_hook: log.error("Inputs_hook, hiddens_hook, and params_hook not implemented yet for AlexNet!") self.flag_datalayer = self.use_data_layer #################### # Theano variables # #################### # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data self.x = T.ftensor4('x') self.y = T.lvector('y') self.rand = T.fvector('rand') ########## # params # ########## self.params = [] # make the network! self.build_computation_graph()
def setup(self): """ Set up the model to train. """ # input_words: shape (n_batch, n_sentence, sentence_len) input_words = T.itensor3() n_batch, n_sentences, sentence_len = input_words.shape # query_words: shape (n_batch, query_len) query_words = T.imatrix() # correct_output: shape (n_batch, ?, num_output_words) correct_output = T.ftensor3() # graph_num_new_nodes: shape(n_batch, n_sentence) graph_num_new_nodes = T.imatrix() # graph_new_node_strengths: shape(n_batch, n_sentence, new_nodes_per_iter) graph_new_node_strengths = T.ftensor3() # graph_new_node_ids: shape(n_batch, n_sentence, new_nodes_per_iter, num_node_ids) graph_new_node_ids = T.ftensor4() # graph_new_edges: shape(n_batch, n_sentence, pad_graph_size, pad_graph_size, num_edge_types) graph_new_edges = T.TensorType('floatX', (False, ) * 5)() def _build(with_correct_graph, snap_to_best, using_dropout, evaluate_accuracy): info = {} # Process each sentence, flattened to (?, sentence_len) flat_input_words = input_words.reshape([-1, sentence_len]) flat_input_reprs, flat_ref_matrices = self.input_transformer.process( flat_input_words) # flat_input_reprs of shape (?, input_repr_size) # flat_ref_matrices of shape (?, num_node_ids, input_repr_size) input_reprs = flat_input_reprs.reshape( [n_batch, n_sentences, self.input_repr_size]) ref_matrices = flat_ref_matrices.reshape([ n_batch, n_sentences, self.num_node_ids, self.input_repr_size ]) query_repr, query_ref_matrix = self.input_transformer.process( query_words) if using_dropout: iter_dropouts = [] states_mask = util.make_dropout_mask( (self.node_state_size, ), self.dropout_keep, self.srng) if self.nodes_mutable: iter_dropouts.extend( self.node_state_updater.dropout_masks( self.srng, states_mask)) if len(self.word_node_mapping) > 0: iter_dropouts.extend( self.direct_reference_updater.dropout_masks( self.srng, states_mask)) if self.intermediate_propagate != 0: iter_dropouts.extend( self.intermediate_propagator.dropout_masks( self.srng, states_mask)) if self.dynamic_nodes: iter_dropouts.extend( self.new_node_adder.dropout_masks(self.srng)) iter_dropouts.extend( self.edge_state_updater.dropout_masks(self.srng)) else: iter_dropouts = [] states_mask = None def _iter_fn(input_repr, ref_matrix, gstate, correct_num_new_nodes=None, correct_new_strengths=None, correct_new_node_ids=None, correct_edges=None, dropout_masks=None): # If necessary, update node state if self.nodes_mutable: gstate, dropout_masks = self.node_state_updater.process( gstate, input_repr, dropout_masks) if len(self.word_node_mapping) > 0: gstate, dropout_masks = self.direct_reference_updater.process( gstate, ref_matrix, dropout_masks) # If necessary, propagate node state if self.intermediate_propagate != 0: gstate, dropout_masks = self.intermediate_propagator.process_multiple( gstate, self.intermediate_propagate, dropout_masks) node_loss = None node_accuracy = None # Propose and vote on new nodes if self.dynamic_nodes: new_strengths, new_ids, dropout_masks = self.new_node_adder.get_candidates( gstate, input_repr, self.new_nodes_per_iter, dropout_masks) # new_strengths and correct_new_strengths are of shape (n_batch, new_nodes_per_iter) # new_ids and correct_new_node_ids are of shape (n_batch, new_nodes_per_iter, num_node_ids) if with_correct_graph: perm_idxs = np.array( list( itertools.permutations( range(self.new_nodes_per_iter)))) permuted_correct_str = correct_new_strengths[:, perm_idxs] permuted_correct_ids = correct_new_node_ids[:, perm_idxs] # due to advanced indexing, we should have shape (n_batch, permutation, new_nodes_per_iter, num_node_ids) ext_new_str = T.shape_padaxis(new_strengths, 1) ext_new_ids = T.shape_padaxis(new_ids, 1) strength_ll = permuted_correct_str * T.log( ext_new_str + util.EPSILON) + (1 - permuted_correct_str) * T.log( 1 - ext_new_str + util.EPSILON) ids_ll = permuted_correct_ids * T.log(ext_new_ids + util.EPSILON) reduced_perm_lls = T.sum(strength_ll, axis=2) + T.sum( ids_ll, axis=[2, 3]) if self.best_node_match_only: node_loss = -T.max(reduced_perm_lls, 1) else: full_ll = util.reduce_log_sum(reduced_perm_lls, 1) # Note that some of these permutations are identical, since we likely did not add the maximum # amount of nodes. Thus we will have added repeated elements here. # We have log(x+x+...+x) = log(kx), where k is the repetition factor and x is the probability we want # log(kx) = log(k) + log(x) # Our repetition factor k is given by (new_nodes_per_iter - correct_num_new_nodes)! # Recall that n! = gamma(n+1) # so log(x) = log(kx) - log(gamma(k+1)) log_rep_factor = T.gammaln( T.cast( self.new_nodes_per_iter - correct_num_new_nodes + 1, 'floatX')) scaled_ll = full_ll - log_rep_factor node_loss = -scaled_ll if evaluate_accuracy: best_match_idx = T.argmax(reduced_perm_lls, 1) # should be of shape (n_batch), indexing the best permutation best_correct_str = permuted_correct_str[ T.arange(n_batch), best_match_idx] best_correct_ids = permuted_correct_ids[ T.arange(n_batch), best_match_idx] snapped_strengths = util.independent_best( new_strengths) snapped_ids = util.categorical_best( new_ids) * T.shape_padright(snapped_strengths) close_strengths = T.all( T.isclose(best_correct_str, snapped_strengths), (1)) close_ids = T.all( T.isclose(best_correct_ids, snapped_ids), (1, 2)) node_accuracy = T.and_(close_strengths, close_ids) # now substitute in the correct nodes gstate = gstate.with_additional_nodes( correct_new_strengths, correct_new_node_ids) elif snap_to_best: snapped_strengths = util.independent_best( new_strengths) snapped_ids = util.categorical_best(new_ids) gstate = gstate.with_additional_nodes( snapped_strengths, snapped_ids) else: gstate = gstate.with_additional_nodes( new_strengths, new_ids) # Update edge state gstate, dropout_masks = self.edge_state_updater.process( gstate, input_repr, dropout_masks) if with_correct_graph: cropped_correct_edges = correct_edges[:, :gstate.n_nodes, : gstate.n_nodes, :] edge_lls = cropped_correct_edges * T.log( gstate.edge_strengths + util.EPSILON) + (1 - cropped_correct_edges) * T.log( 1 - gstate.edge_strengths + util.EPSILON) # edge_lls currently penalizes for edges connected to nodes that do not exist # we do not want it to do this, so we mask it with node strengths mask_src = util.shape_padaxes(gstate.node_strengths, [2, 3]) mask_dest = util.shape_padaxes(gstate.node_strengths, [1, 3]) masked_edge_lls = edge_lls * mask_src * mask_dest edge_loss = -T.sum(masked_edge_lls, axis=[1, 2, 3]) if evaluate_accuracy: snapped_edges = util.independent_best( gstate.edge_strengths) close_edges = T.isclose(cropped_correct_edges, snapped_edges) ok_mask = 1 - T.cast( mask_src * mask_dest, 'int8' ) # its OK for things not to match if node strengths are NOT both 1 edge_accuracy = T.all(T.or_(close_edges, ok_mask), (1, 2, 3)) overall_accuracy = edge_accuracy if node_accuracy is None else T.and_( node_accuracy, edge_accuracy) else: overall_accuracy = None gstate = gstate.with_updates( edge_strengths=cropped_correct_edges) return gstate, node_loss, edge_loss, overall_accuracy elif snap_to_best: snapped_edges = util.independent_best( gstate.edge_strengths) gstate = gstate.with_updates(edge_strengths=snapped_edges) return gstate else: return gstate # Scan over each sentence def _scan_fn( input_repr, *stuff ): # (input_repr, [ref_matrix?], [*correct_graph_stuff?], [dropout_masks?], *flat_graph_state, pad_graph_size) stuff = list(stuff) if len(self.word_node_mapping) > 0: ref_matrix = stuff[0] stuff = stuff[1:] else: ref_matrix = None if with_correct_graph: c_num_new_nodes, c_new_strengths, c_new_node_ids, c_edges = stuff[: 4] stuff = stuff[4:] if using_dropout: dropout_masks = stuff[:len(iter_dropouts)] stuff = stuff[len(iter_dropouts):] else: dropout_masks = None flat_graph_state = stuff[:-1] pad_graph_size = stuff[-1] gstate = GraphState.unflatten_from_const_size(flat_graph_state) if with_correct_graph: gstate, node_loss, edge_loss, overall_accuracy = _iter_fn( input_repr, ref_matrix, gstate, c_num_new_nodes, c_new_strengths, c_new_node_ids, c_edges, dropout_masks=dropout_masks) else: gstate = _iter_fn(input_repr, ref_matrix, gstate, dropout_masks=dropout_masks) retvals = gstate.flatten_to_const_size(pad_graph_size) if with_correct_graph: if self.dynamic_nodes: retvals.append(node_loss) retvals.append(edge_loss) if evaluate_accuracy: retvals.append(overall_accuracy) return retvals if self.dynamic_nodes: initial_gstate = GraphState.create_empty( n_batch, self.num_node_ids, self.node_state_size, self.num_edge_types) else: initial_gstate = GraphState.create_full_unique( n_batch, self.num_node_ids, self.node_state_size, self.num_edge_types) # Account for all nodes, plus the extra padding node to prevent GPU unpleasantness if self.dynamic_nodes: pad_graph_size = n_sentences * self.new_nodes_per_iter + 1 else: pad_graph_size = self.num_node_ids outputs_info = initial_gstate.flatten_to_const_size(pad_graph_size) prepped_input = input_reprs.dimshuffle([1, 0, 2]) sequences = [prepped_input] if len(self.word_node_mapping) > 0: sequences.append(ref_matrices.dimshuffle([1, 0, 2, 3])) if with_correct_graph: sequences.append(graph_num_new_nodes.swapaxes(0, 1)) sequences.append(graph_new_node_strengths.swapaxes(0, 1)) sequences.append(graph_new_node_ids.swapaxes(0, 1)) sequences.append(graph_new_edges.swapaxes(0, 1)) if self.dynamic_nodes: outputs_info.extend([None]) if evaluate_accuracy: outputs_info.extend([None]) outputs_info.extend([None]) if using_dropout: sequences.extend(iter_dropouts) all_scan_out, _ = theano.scan(_scan_fn, sequences=sequences, outputs_info=outputs_info, non_sequences=[pad_graph_size]) graph_accurate_list = None if with_correct_graph: if evaluate_accuracy: full_graph_accuracy = all_scan_out[-1] all_scan_out = all_scan_out[:-1] graph_accurate_list = T.all(full_graph_accuracy, 0) info["graph_accuracy"] = T.sum(graph_accurate_list, dtype='floatX') / T.cast( n_batch, 'floatX') if self.dynamic_nodes: all_flat_gstates = all_scan_out[:-2] node_loss, edge_loss = all_scan_out[-2:] reduced_node_loss = T.sum(node_loss) / T.cast( n_batch, 'floatX') reduced_edge_loss = T.sum(edge_loss) / T.cast( n_batch, 'floatX') avg_graph_loss = (reduced_node_loss + reduced_edge_loss) / T.cast( input_words.shape[1], 'floatX') info["node_loss"] = reduced_node_loss info["edge_loss"] = reduced_edge_loss else: all_flat_gstates = all_scan_out[:-1] edge_loss = all_scan_out[-1] reduced_edge_loss = T.sum(edge_loss) / T.cast( n_batch, 'floatX') avg_graph_loss = reduced_edge_loss / T.cast( input_words.shape[1], 'floatX') info["edge_loss"] = reduced_edge_loss else: all_flat_gstates = all_scan_out if self.sequence_representation: # Each part of all_flat_gstates is of shape (n_sentences, n_batch, ...) # except for the last one, which we handle separately # Swap to (n_batch, n_sentences, ...) # Then flatten to (n_batch*n_sentences, ...) for further processing final_flat_gstate = [ x.swapaxes(0, 1).reshape(T.concatenate([[-1], x.shape[2:]]), ndim=(x.ndim - 1)) for x in all_flat_gstates[:-1] ] # As for the last one, we need to get a single scalar value. The last one will be the biggest # so we will take that. Note that this will introduce a bunch of zero-nodes, but thats # OK and we can process that later. (We REQUIRE that padding in graph_state makes zero strength # nodes here!) final_flat_gstate.append(all_flat_gstates[-1][-1]) # We also need to repeat query_repr and query_ref_matrix so that they broadcast together query_repr = T.extra_ops.repeat(query_repr, n_sentences, 0) query_ref_matrix = T.extra_ops.repeat(query_ref_matrix, n_sentences, 0) else: # Extract last timestep final_flat_gstate = [x[-1] for x in all_flat_gstates] final_gstate = GraphState.unflatten_from_const_size( final_flat_gstate) if self.train_with_query: if self.wipe_node_state: final_gstate = final_gstate.with_updates( node_states=T.zeros_like(final_gstate.node_states)) qnsu_dropout_masks = self.query_node_state_updater.dropout_masks( self.srng, states_mask) query_gstate, _ = self.query_node_state_updater.process( final_gstate, query_repr, qnsu_dropout_masks) if len(self.word_node_mapping) > 0: qdru_dropout_masks = self.query_direct_reference_updater.dropout_masks( self.srng, states_mask) query_gstate, _ = self.query_direct_reference_updater.process( query_gstate, query_ref_matrix, qdru_dropout_masks) fp_dropout_masks = self.final_propagator.dropout_masks( self.srng, states_mask) propagated_gstate, _ = self.final_propagator.process_multiple( query_gstate, self.final_propagate, fp_dropout_masks) agg_dropout_masks = self.aggregator.dropout_masks(self.srng) aggregated_repr, _ = self.aggregator.process( propagated_gstate, agg_dropout_masks) # shape (n_batch, output_repr_size) if self.sequence_representation: # aggregated_repr is of shape (n_batch*n_sentences, repr_width) # We want to split back to timesteps: (n_batch, n_sentences, repr_width) agg_repr_seq = aggregated_repr.reshape( [n_batch, n_sentences, -1]) # Now collapse it to a summary representation aggsum_dropout_masks = self.aggregate_summarizer.dropout_masks( self.srng) aggregated_repr, _ = self.aggregate_summarizer.process( agg_repr_seq, aggsum_dropout_masks) # At this point aggregated_repr is (n_batch, repr_width) as desired max_seq_len = correct_output.shape[1] if self.output_format == ModelOutputFormat.sequence: final_output = self.output_processor.process( aggregated_repr, max_seq_len) # shape (n_batch, ?, num_output_words) else: final_output = self.output_processor.process( aggregated_repr) if snap_to_best: final_output = self.output_processor.snap_to_best( final_output) if self.output_format == ModelOutputFormat.subset: elemwise_loss = T.nnet.binary_crossentropy( final_output, correct_output) query_loss = T.sum(elemwise_loss) else: flat_final_output = final_output.reshape( [-1, self.num_output_words]) flat_correct_output = correct_output.reshape( [-1, self.num_output_words]) timewise_loss = T.nnet.categorical_crossentropy( flat_final_output, flat_correct_output) query_loss = T.sum(timewise_loss) query_loss = query_loss / T.cast(n_batch, 'floatX') info["query_loss"] = query_loss else: final_output = T.zeros([]) full_loss = np.array(0.0, np.float32) if with_correct_graph: full_loss = full_loss + avg_graph_loss if self.train_with_query: full_loss = full_loss + query_loss if self.train_with_query: adjusted_query_gstates = [ x.reshape(T.concatenate([[n_batch, n_sentences], x.shape[1:]]), ndim=(x.ndim + 1)) if self.sequence_representation else T.shape_padaxis(x, 1) for x in query_gstate.flatten() ] adjusted_prop_gstates = [ x.reshape(T.concatenate([[n_batch, n_sentences], x.shape[1:]]), ndim=(x.ndim + 1)) if self.sequence_representation else T.shape_padaxis(x, 1) for x in propagated_gstate.flatten() ] full_flat_gstates = [ T.concatenate([a.swapaxes(0, 1), b, c], 1) for a, b, c in zip(all_flat_gstates[:-1], adjusted_query_gstates, adjusted_prop_gstates) ] else: full_flat_gstates = [ a.swapaxes(0, 1) for a in all_flat_gstates[:-1] ] max_seq_len = T.iscalar() return full_loss, final_output, full_flat_gstates, graph_accurate_list, max_seq_len, info train_loss, _, _, _, _, train_info = _build(self.train_with_graph, False, True, False) adam_updates = Adam(train_loss, self.params, lr=self.learning_rate_var) self.info_keys = list(train_info.keys()) print("Compiling...") optimizer = theano.compile.predefined_optimizers[ 'fast_run' if self.check_mode == 'debug' else theano.config.optimizer] optimizer = optimizer.excluding( "scanOp_pushout_output", "remove_constants_and_unused_inputs_scan") if self.check_mode == 'nan': mode = NanGuardMode(optimizer=optimizer, nan_is_error=True, inf_is_error=True, big_is_error=True) elif self.check_mode == 'debug': mode = DebugMode(optimizer=optimizer, check_isfinite=False, check_py_code=False, stability_patience=1) theano.tensor.TensorType.filter_checks_isfinite = False else: mode = theano.Mode(optimizer=optimizer) self.train_fn = theano.function([ input_words, query_words, correct_output, graph_num_new_nodes, graph_new_node_strengths, graph_new_node_ids, graph_new_edges ], [train_loss] + list(train_info.values()), updates=adam_updates, allow_input_downcast=True, on_unused_input='ignore', mode=mode) eval_loss, _, full_flat_gstates, graph_accurate_list, _, eval_info = _build( self.train_with_graph, False, False, True) self.eval_info_keys = list(eval_info.keys()) self.eval_fn = theano.function([ input_words, query_words, correct_output, graph_num_new_nodes, graph_new_node_strengths, graph_new_node_ids, graph_new_edges ], [eval_loss, graph_accurate_list] + list(eval_info.values()), allow_input_downcast=True, on_unused_input='ignore', mode=mode) self.debug_test_fn = theano.function([ input_words, query_words, correct_output, graph_num_new_nodes, graph_new_node_strengths, graph_new_node_ids, graph_new_edges ], full_flat_gstates, allow_input_downcast=True, on_unused_input='ignore', mode=mode) test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build( False, False, False, False) self.fuzzy_test_fn = theano.function( [input_words, query_words] + ([max_seq_len] if self.output_format == ModelOutputFormat.sequence else []), [final_output] + full_flat_gstates, allow_input_downcast=True, on_unused_input='ignore', mode=mode) test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build( False, True, False, False) self.snap_test_fn = theano.function( [input_words, query_words] + ([max_seq_len] if self.output_format == ModelOutputFormat.sequence else []), [final_output] + full_flat_gstates, allow_input_downcast=True, on_unused_input='ignore', mode=mode)
def __init__(self, trainset, testset, testDataset2, num_user, num_item, dim, reg, lr, prefix): self.trainset = trainset self.testset = testset self.testDataset2 = testDataset2 self.reg = numpy.float32(reg) self.lr = numpy.float32(lr) self.num_item = num_item self.video_features = self.trainset.video_features T.config.compute_test_value = 'warn' u = T.ivector('u') #[num_sample,] iv = T.ivector('iv') #[num_sample,] jv = T.ivector('jv') #[num_sample,] mask_frame = T.itensor3( 'mask_frame') #[num_sample, num_video, num_frame] mask = T.imatrix('mask') #[num_sample, num_video] feat = T.ftensor4('feat') u.tag.test_value = np.asarray([0, 1, 2], dtype='int32') iv.tag.test_value = np.asarray([4, 5, 2], dtype='int32') jv.tag.test_value = np.asarray([1, 3, 0], dtype='int32') mask.tag.test_value = np.asarray([[1, 1, 0], [1, 0, 0], [1, 1, 1]], dtype='int32') # feat_idx.tag.test_value = np.asarray([[3,4,-1],[5,-1,-1],[6,2,4]],dtype='int32') rng = np.random layers = [] Uemb = UsrEmblayer(rng, num_user, dim, 'usremblayer', prefix) Vemb = VidEmblayer(rng, num_item, dim, 'videmblayer', prefix) layers.append(Uemb) layers.append(Vemb) uemb_vec = GetuEmbLayer(u, Uemb.output, 'uemb', prefix) iemb_vec = GetvEmbLayer(iv, Vemb.output, 'v1emb', prefix) jemb_vec = GetvEmbLayer(jv, Vemb.output, 'v2emb', prefix) layers.append( AttentionLayer_Feat(rng, 1000, uemb_vec.output, feat, dim, dim, mask_frame, 'attentionlayer_feat', prefix)) layers.append( AttentionLayer_Item(rng, uemb_vec.output, layers[-1].output, dim, dim, mask, 'attentionlayer_item', prefix)) u_vec = uemb_vec.output + layers[-1].output self.layers = layers y_ui = T.dot(u_vec, iemb_vec.output.T).diagonal() y_uj = T.dot(u_vec, jemb_vec.output.T).diagonal() self.params = [] loss = -T.sum(T.log(T.nnet.sigmoid(y_ui - y_uj))) for layer in layers: self.params += layer.params #[U,V,W_Tran,Wu,Wv,b,c] #regularizer = self.reg * ((uemb_vec.output ** 2).sum() + (iemb_vec.output ** 2).sum() + (jemb_vec.output ** 2).sum() + # (self.params[2] ** 2).sum() + (self.params[3] ** 2).sum() + (self.params[4] ** 2).sum() + # (self.params[5] ** 2).sum()) regularizer = self.reg * ((uemb_vec.output**2).sum() + (iemb_vec.output**2).sum() + (jemb_vec.output**2).sum()) for param in self.params[2:]: regularizer += self.reg * (param**2).sum() loss = regularizer + loss updates = [(param, param - self.lr * T.grad(loss, param)) for param in self.params] self.train_model = theano.function( inputs=[u, iv, jv, mask_frame, mask, feat], outputs=loss, updates=updates) self.test_model = theano.function( inputs=[u, mask_frame, mask, feat], outputs=[ u_vec, Vemb.output, layers[-1].atten, layers[-2].atten ], #for test: layers[-2].output,layers[-2].items_emb,layers[-2].atten )
def __init__(self, config, testMode): self.config = config batch_size = config['batch_size'] lib_conv = config['lib_conv'] useLayers = config['useLayers'] #imgWidth = config['imgWidth'] #imgHeight = config['imgHeight'] initWeights = config['initWeights'] #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights if initWeights: weightsDir = config['weightsDir'] weightFileTag = config['weightFileTag'] prob_drop = config['prob_drop'] # ##################### BUILD NETWORK ########################## x = T.ftensor4('x') mean = T.ftensor4('mean') #y = T.lvector('y') print '... building the model' self.layers = [] params = [] weight_types = [] if useLayers >= 1: convpool_layer1 = ConvPoolLayer(input=x-mean, image_shape=(3, None, None, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag] ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type if useLayers >= 2: convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, None, None, batch_size), #change from 27 to appropriate value sbased on conv1's output filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag] ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type if useLayers >= 3: convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, None, None, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag] ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type if useLayers >= 4: convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag] ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type if useLayers >= 5: convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag] ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if useLayers >= 6: fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag]) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type if testMode: dropout_layer6 = fc_layer6 else: dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 7: fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag]) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type if testMode: dropout_layer6 = fc_layer7 else: dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 8: softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag]) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.output = self.layers[useLayers-1] self.params = params self.x = x self.mean = mean self.weight_types = weight_types self.batch_size = batch_size self.useLayers = useLayers self.outLayer = self.layers[useLayers-1] meanVal = np.load(config['mean_file']) meanVal = meanVal[:, :, :, np.newaxis].astype('float32') #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work. meanVal = np.tile(meanVal,(1,1,1,batch_size)) self.meanVal = meanVal #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32') if useLayers >= 8: #if last layer is softmax, then its output is y_pred finalOut = self.outLayer.y_pred else: finalOut = self.outLayer.output self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
def __init__(self, model_network=None, gamma=0.99, learning_method="rmsprop", batch_size=32, input_size=None, learning_params=None, dnn_type=True, clip_delta=0, scale=255., double_q=False, prioritized_exp_replay=False, heads_num=1, action_num=0): x = T.ftensor4() next_x = T.ftensor4() a = T.ivector() r = T.fvector() terminal = T.ivector() self.heads_num = heads_num self.action_num = action_num self.x_shared = theano.shared( np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32')) self.next_x_shared = theano.shared( np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32')) self.a_shared = theano.shared(np.zeros((batch_size), dtype='int32')) self.terminal_shared = theano.shared( np.zeros((batch_size), dtype='int32')) self.r_shared = theano.shared(np.zeros((batch_size), dtype='float32')) self.Q_model = Model(model_network, input_size=input_size, dnn_type=dnn_type) self.Q_prime_model = Model(model_network, input_size=input_size, dnn_type=dnn_type) if double_q: alt_actions = T.argmax(self.Q_model.apply(next_x / scale), axis=1) alt_actions = theano.gradient.disconnected_grad(alt_actions) y = r + (T.ones_like(terminal)-terminal)*gamma*\ self.Q_prime_model.apply(next_x/scale)[T.arange(alt_actions.shape[0]), alt_actions] else: q_stack = self.Q_prime_model.apply(next_x / scale) q_list = [ q_stack[T.arange(a.shape[0]), k * self.action_num:(k + 1) * self.action_num] for k in range(self.heads_num) ] y_list = [ r + (T.ones_like(terminal) - terminal) * gamma * T.max(q_list[k], axis=1) for k in range(self.heads_num) ] y_concat = theano.tensor.concatenate(y_list, axis=0) y = r + (T.ones_like(terminal) - terminal) * gamma * T.max( self.Q_prime_model.apply(next_x / scale), axis=1) all_q_vals = self.Q_model.apply(x / scale) q_vals = all_q_vals[T.arange(a.shape[0]), a] q_vals_list = [ all_q_vals[T.arange(a.shape[0]), a + k * self.heads_num] for k in range(self.heads_num) ] q_vals_concat = theano.tensor.concatenate(q_vals_list, axis=0) # td_errors = y-q_vals td_errors = y_concat - q_vals_concat """ if clip_delta > 0: td_errors = td_errors.clip(-clip_delta, clip_delta) cost = 0.5*td_errors**2 """ if clip_delta > 0: #TOOK THIS FROM GITHUB CODE # If we simply take the squared clipped diff as our loss, # then the gradient will be zero whenever the diff exceeds # the clip bounds. To avoid this, we extend the loss # linearly past the clip point to keep the gradient constant # in that regime. # # This is equivalent to declaring d loss/d q_vals to be # equal to the clipped diff, then backpropagating from # there, which is what the DeepMind implementation does. quadratic_part = T.minimum(abs(td_errors), clip_delta) linear_part = abs(td_errors) - quadratic_part cost = 0.5 * quadratic_part**2 + clip_delta * linear_part else: cost = 0.5 * td_errors**2 #""" cost = T.sum(cost) print self.Q_model.params self.learning_method = self.Q_model.get_learning_method( learning_method, **learning_params) grads = T.grad(cost, self.Q_model.params) param_updates = self.learning_method.apply(self.Q_model.params, grads) target_updates = OrderedDict() for t, b in zip(self.Q_prime_model.params, self.Q_model.params): target_updates[t] = b givens = { x: self.x_shared, a: self.a_shared, r: self.r_shared, terminal: self.terminal_shared, next_x: self.next_x_shared } # print 'fast compile' # theano.config.mode = 'FAST_COMPILE' print "building" self.train_model = theano.function([], td_errors, updates=param_updates, givens=givens) print "compiled train_model (1/3)" self.pred_score = theano.function([], all_q_vals, givens={x: self.x_shared}) print "compiled pred_score (2/3)" self.update_target_params = theano.function([], [], updates=target_updates) print "compiled update_target_params (3/3)" self.update_target_params() print "updated target params"
import theano from theano import tensor, config from blocks.bricks import BatchNormalization, Rectifier, Linear, Softmax, MLP, BatchNormalizedMLP, FeedforwardSequence, Rectifier from blocks.bricks.conv import Convolutional, ConvolutionalSequence, Flattener, MaxPooling from blocks.initialization import IsotropicGaussian, Uniform, Constant from blocks.select import Selector from blocks.graph import ComputationGraph, apply_dropout from blocks.filter import VariableFilter from blocks.roles import OUTPUT import numpy from elementary_blocks_simple import VGG, top_direction_block, StructuredCost images = tensor.ftensor4('images') labels = tensor.ftensor4('labels') def build_model(images, labels): vgg = VGG(layer='conv4_4') vgg.push_initialization_config() vgg.initialize() tdb = top_direction_block() tdb.push_initialization_config() tdb.initialize() # Construct feedforward sequence ss_seq = FeedforwardSequence([vgg.apply, tdb.apply]) ss_seq.push_initialization_config() ss_seq.initialize()
def test_dnn_conv_alpha_output_merge(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4() kern = T.ftensor4() out = T.ftensor4() b = 1 c = 4 f = 3 ih = 5 iw = 8 kh = 2 kw = 6 img_val = numpy.random.random((b, c, ih, iw)).astype('float32') kern_val = numpy.random.random((f, c, kh, kw)).astype('float32') out_val = numpy.random.random((b, f, ih - kh + 1, iw - kw + 1)).astype('float32') conv = dnn.dnn_conv(img, kern) gw = theano.grad(conv.sum(), kern) gi = theano.grad(conv.sum(), img) lr = numpy.asarray(0.05, dtype='float32') fr = lr * (conv + out) wr = kern + lr * gw ir = img + lr * gi f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu) assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv) assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW) assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI) mode = mode_with_gpu mode = mode.excluding('local_dnn_conv_alpha_merge') mode = mode.excluding('local_dnn_convw_alpha_merge') mode = mode.excluding('local_dnn_convi_alpha_merge') mode = mode.excluding('local_dnn_conv_output_merge') mode = mode.excluding('local_dnn_convw_output_merge') mode = mode.excluding('local_dnn_convi_output_merge') f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode) assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv) assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW) assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI) out_f1 = f1(img_val, kern_val, out_val) out_f2 = f2(img_val, kern_val, out_val) assert len(out_f1) == len(out_f2) for v1, v2 in zip(out_f1, out_f2): utt.assert_allclose(v1, v2)
def random_epoch_train_begining(learning_rate=0.05, weight_decay=0.001, nkerns=[20, 50], n_epochs=200, batch_size=500, dataset='mnist.pkl.gz', name_given='test'): #name = 'FashionMnist_'+str(learning_rate)+'_'+str(weight_decay) + '_' + str(nkerns) + 'Rand_Trans_Relu2_Begin' name = name_given rng = numpy.random.RandomState(23455) datasets = loaddata_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train = train_set_x.get_value(borrow=True).shape[0] n_valid = valid_set_x.get_value(borrow=True).shape[0] n_test = test_set_x.get_value(borrow=True).shape[0] test_set_x = test_set_x.reshape((n_test, 1, 28, 28)) valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28)) train_set_x = train_set_x.reshape((n_train, 1, 28, 28)) temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x) temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x) temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x) n_train_batches = n_train // batch_size n_valid_batches = n_valid // batch_size n_test_batches = n_test // batch_size x = T.matrix('x') y = T.ivector('y') index = T.lscalar() dummy = T.ftensor4('dummy') update_train = (temp_train_set_x, dummy) update_valid = (temp_valid_set_x, dummy) update_test = (temp_test_set_x, dummy) replace_train = theano.function([dummy], temp_train_set_x, updates=[update_train]) replace_valid = theano.function([dummy], temp_valid_set_x, updates=[update_valid]) replace_test = theano.function([dummy], temp_test_set_x, updates=[update_test]) print('... loading the model') layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * (grad_i + weight_decay * param_i)) for param_i, grad_i in zip(params, grads)] patience_increase = 2 improvement_threshold = 0.995 start_time = timeit.default_timer() rand_trans_x = numpy.random.random_integers(-10, 10, 200) rand_trans_y = numpy.random.random_integers(-10, 10, 200) numpy.save('rand_trans_x.npy', rand_trans_x) numpy.save('rand_trans_y.npy', rand_trans_y) error_line = numpy.zeros(n_epochs) test_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_test_set_xx[index * 500:(index + 1) * 500], y: test_set_y[index * 500:(index + 1) * 500] }) validate_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_valid_set_xx[index * 500:(index + 1) * 500], y: valid_set_y[index * 500:(index + 1) * 500] }) train_model = theano.function( [index], cost, updates=updates, givens={ layer0.input: temp_train_set_xx[index * 500:(index + 1) * 500], y: train_set_y[index * 500:(index + 1) * 500] }) print('... training') best_validation_loss = numpy.inf best_iter = 0 test_score = 0. patience = 20000 validation_frequency = min(n_train_batches, patience // 2) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): horizontal = rand_trans_x[epoch] vertical = rand_trans_y[epoch] tran_test_set_x = theano_translation_updating(test_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_train_set_x = theano_translation_updating(train_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) replace_test(tran_test_set_x) replace_valid(tran_valid_set_x) replace_train(tran_train_set_x) epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('Horizontal Shift:', horizontal, 'Vertical Shift:', vertical) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) error_line[epoch - 1] = this_validation_loss # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \ [layer0, layer1, layer2_input, layer2, layer3] with open(name + '.pkl', 'wb') as f: pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f) error_line = error_line[0:epoch - 1] * 100 scipy.io.savemat(name + '.mat', mdict={'Error_Spectrum': error_line}) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def test_pseudo_grad(self): cnn = CNN() X = T.ftensor4('X') y = T.fmatrix('y') predictions = cnn(X) print cnn.description() loss = T.mean(objectives.categorical_accuracy(predictions, y)) loss += 1.0e-5 * cnn.reg() upd = pseudograd(loss, cnn.params(learnable=True), temperature=1.0e+1, learning_rate=1.0e-2) train = theano.function([X, y], loss, updates=upd) import subprocess as sb try: import mnist except: sb.check_call( 'wget -q -nc https://raw.githubusercontent.com/amitgroup/amitgroup/master/amitgroup/io/mnist.py', shell=True) finally: import mnist try: X, y = mnist.load_mnist(dataset='training', path='mnist/') X = X.reshape(-1, 1, 28, 28).astype('float32') y = onehot(y, 10) X_test, y_test = mnist.load_mnist(dataset='testing', path='mnist/') X_test = X_test.reshape(-1, 1, 28, 28).astype('float32') y_test = onehot(y_test, 10) except: sb.check_call(""" mkdir -p mnist && { cd mnist; wget -q -nc http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz && wget -q -nc http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz && wget -q -nc http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz && wget -q -nc http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz && gunzip *.gz } """, shell=True) finally: X, y = mnist.load_mnist(dataset='training', path='mnist/') X = X.reshape(-1, 1, 28, 28).astype('float32') y = onehot(y, 10) X_test, y_test = mnist.load_mnist(dataset='testing', path='mnist/') X_test = X_test.reshape(-1, 1, 28, 28).astype('float32') y_test = onehot(y_test, 10) n_batches = 2**10 losses = np.zeros(shape=(n_batches)) for i, indx in enumerate( BatchStreams.random_batch_stream(X.shape[0], batch_size=32, n_batches=n_batches)): losses[i] = train(X[indx], y[indx]) plt.figure() plt.plot(losses) plt.show() assert False
def RelationStackMaker(chips, params, graph=False, weighted=False, batched=False): if batched: emb_input = T.itensor3('emb_input') entities_tv = [ T.fmatrix('enidx_' + str(i)).astype(theano.config.floatX) for i in range(params['num_entity']) ] sample_weights = T.fvector('sample_weight') if graph: if weighted: masks = T.ftensor4('child_mask') else: masks = T.ftensor3('child_mask') else: masks = T.fmatrix('batch_mask') else: emb_input = T.imatrix('emb_input') entities_tv = [ T.fvector('enidx_' + str(i)).astype(theano.config.floatX) for i in range(params['num_entity']) ] sample_weights = T.fvector('sample_weight') if graph: if weighted: masks = T.ftensor3('child_mask') else: masks = T.fmatrix('child_mask') else: masks = None #print masks, type(masks), masks.ndim current_chip = Start(params['voc_size'], emb_input) print '\n', 'Building Stack now', '\n', 'Start: ', params[ 'voc_size'], 'out_tv dim:', current_chip.output_tv.ndim instantiated_chips = stackLayers(chips, current_chip, params, entity_size=params['num_entity']) regularizable_params = computeLayers(instantiated_chips, current_chip, params, entities_input=entities_tv, mask=masks, sample_weights=sample_weights) ### Debug use: Get the attention co-efficiency and visualize. ### for c in instantiated_chips: if c[1].endswith('Entity_Att'): assert hasattr(c[0], 'att_wt_arry') assert hasattr(c[0], 'entity_tvs') attention_weights = c[0].att_wt_arry entity_tvs = c[0].entity_tvs current_chip = instantiated_chips[-1][0] if current_chip.output_tv.ndim == 2: pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv, axis=1) else: pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv) #, axis=1) gold_y = (current_chip.gold_y if hasattr(current_chip, 'gold_y') else None) # Show all parameters that would be needed in this system params_needed = calculate_params_needed(instantiated_chips) print "Parameters Needed", params_needed for k in params_needed: assert k in params, k print k, params[k] assert hasattr(current_chip, 'score') cost = current_chip.score #/ params['nsentences'] cost_arr = [cost] for layer in instantiated_chips[:-1]: if hasattr(layer[0], 'score'): print layer[1] cost += params['cost_coef'] * layer[0].score cost_arr.append(params['cost_coef'] * layer[0].score) grads = T.grad(cost, wrt=regularizable_params) #[params[k] for k in params if (hasattr(params[k], 'is_regularizable') and params[k].is_regularizable)]) print 'Regularizable parameters:' for k, v in params.items(): if hasattr(v, 'is_regularizable'): print k, v, v.is_regularizable if graph or batched: #return (emb_input, masks, entities_tv, attention_weights, entity_tvs, gold_y, pred_y, cost, grads, regularizable_params) return (emb_input, masks, entities_tv, sample_weights, gold_y, pred_y, cost, grads, regularizable_params) else: return (emb_input, entities_tv, sample_weights, gold_y, pred_y, cost, grads, regularizable_params, sample_weights)
def __init__(self, class_size, architecture, n_hidden_neurons=30, conv_type="class"): """ Initialization of Classification neural network. :param class_size: Number of output classes for neural network. :param n_hidden_neurons: Number of hidden neurons in every hidden layer in neural network architecture. :param conv_type: "class" for classification and "reg" for regression. :param architecture: architecture of neural network (supported in classification problem). """ self.class_size = class_size self.n_hidden_neurons = n_hidden_neurons self.n_kernels = 32 self.k = 6 self.final_image_size = 2816 # TODO: Manual fast fix change to a better solution! X = T.ftensor4() Y = T.fmatrix() self.w_h = nnet.init_weights((self.n_kernels, 1, 1, self.k * 4)) self.w_h2 = nnet.init_weights( (self.n_kernels * 2, self.n_kernels, 1, self.k)) self.w_h3 = nnet.init_weights( (self.n_kernels * 4, self.n_kernels * 2, 1, self.k)) self.w_h4 = nnet.init_weights( (self.final_image_size, self.n_hidden_neurons)) self.w_h5 = nnet.init_weights( (self.n_hidden_neurons, self.n_hidden_neurons)) self.w_o = nnet.init_weights((self.n_hidden_neurons, self.class_size)) if conv_type == "reg": self.noise_py_x = nnet.conv_model_reg(X, self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o, 0., 0.) self.py_x = nnet.conv_model_reg(X, self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o, 0., 0.) self.cost = nnet.rmse(self.noise_py_x, Y) self.params = [ self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o ] # 3c2f elif conv_type == "class": if architecture == "3c2f": self.noise_py_x = nnet.conv_model(X, self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o, 0, 0) #3c2f self.py_x = nnet.conv_model(X, self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o, 0., 0.) #3c2f self.params = [ self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o ] #3c2f elif architecture == "2c1f": self.noise_py_x = nnet.conv_model2(X, self.w_h, self.w_h2, self.w_h4, self.w_o, 0.0, 0.0) #2c1f self.py_x = nnet.conv_model2(X, self.w_h, self.w_h2, self.w_h4, self.w_o, 0., 0.) #2c1f self.params = [self.w_h, self.w_h2, self.w_h4, self.w_o] #2c1f elif architecture == "1c2f": self.noise_py_x = nnet.conv_model3(X, self.w_h, self.w_h4, self.w_h5, self.w_o, 0.0, 0.0) #1c2f self.py_x = nnet.conv_model3(X, self.w_h, self.w_h4, self.w_h5, self.w_o, 0., 0.) #1c2f self.params = [self.w_h, self.w_h4, self.w_o] # 1c2f self.cost = T.mean( T.nnet.categorical_crossentropy(self.noise_py_x, Y)) updates = nnet.RMSprop(self.cost, self.params, lr=0.001) self.train = theano.function(inputs=[X, Y], outputs=self.cost, updates=updates, allow_input_downcast=True) self.predict_ = theano.function(inputs=[X], outputs=self.py_x, allow_input_downcast=True)
def train_net(self, train, train_targets, valid, valid_targets, init_learning_rate=3 * 1e-5, batch_size=256, n_units_1=128, n_units_2=128, n_units_3=128, num_epochs=140): start_time = time.time() input_var = T.ftensor4('inputs') target_var = T.ivector('targets') # Build net network = lasagne.layers.InputLayer(shape=(None, train.shape[1], train.shape[2], train.shape[3]), input_var=input_var) network = lasagne.layers.batch_norm( lasagne.layers.Conv2DLayer( network, num_filters=n_units_1, filter_size=(5, 5), pad="same", stride=1, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(val=0.0), nonlinearity=lasagne.nonlinearities.rectify)) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = lasagne.layers.batch_norm( lasagne.layers.Conv2DLayer( network, num_filters=n_units_2, filter_size=(5, 5), pad="same", stride=1, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(val=0.0), nonlinearity=lasagne.nonlinearities.rectify)) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = lasagne.layers.batch_norm( lasagne.layers.Conv2DLayer( network, num_filters=n_units_3, filter_size=(5, 5), pad="same", stride=1, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(val=0.0), nonlinearity=lasagne.nonlinearities.rectify)) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = lasagne.layers.DenseLayer( network, num_units=self.num_classes, nonlinearity=lasagne.nonlinearities.softmax) # Define Theano functions params = lasagne.layers.get_all_params(network, trainable=True) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy( prediction, target_var) loss = loss.mean() test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) learning_rate = theano.shared(np.float32(init_learning_rate)) updates = lasagne.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) print("Starting training...") learning_curve = np.zeros([num_epochs]) cost = np.zeros([num_epochs]) train_loss = np.zeros([num_epochs]) valid_loss = np.zeros([num_epochs]) for e in range(num_epochs): epoch_start_time = time.time() train_err = 0 train_batches = 0 for batch in self.iterate_minibatches(train, train_targets, batch_size, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 val_err = 0 val_acc = 0 val_batches = 0 for batch in self.iterate_minibatches(valid, valid_targets, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 print("Epoch {} of {} took {:.3f}s".format( e + 1, num_epochs, time.time() - epoch_start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) learning_curve[e] = 1 - val_acc / val_batches cost[e] = time.time() - start_time train_loss[e] = train_err / train_batches valid_loss[e] = val_err / val_batches return learning_curve, cost, train_loss, valid_loss
import theano from confusionmatrix import ConfusionMatrix from lasagne.objectives import * from lasagne.updates import * import theano.tensor as T from theano.tensor import * from theano.tensor.signal import downsample import lasagne import numpy as np import try_DP as DP from theano.tensor import nnet import lasagne.layers.dnn dtensor5 = TensorType('float32', (False,)*5) input_var = T.ftensor4('XY') input2_var = T.ftensor4('XZ') input3_var = T.ftensor4('YZ') target_var = T.matrix('Y_train') x1 = T.matrix('x1') x2 = T.matrix('x2') x3 = T.matrix('x3') PS = 29 # Build Neural Network: # Conv Net XY Plane input = lasagne.layers.InputLayer((None, 1, PS, PS), input_var=input_var) l_conv_1 = lasagne.layers.dnn.Conv2DDNNLayer(input, 20, (9,9)) l_maxpool_1 = lasagne.layers.dnn.Pool2DDNNLayer(l_conv_1, (3,3))
def main5(): docs = T.ftensor4("docs") dsnv = T.fvector("dsn") swnm = T.fmatrix("swn") dw = T.fmatrix("dw") sw = T.fmatrix("sw") def localConv(doc, dsn, swnv, dww, sww): # t = T.arange(docSentenceSize) # ccc = docs[t.nonzero()] t = T.arange(dsn).nonzero() t = (T.arange(10000) < dsn).nonzero() # print t # t=T.arange(dsn) docSub = doc[t] p = printing.Print('docSub') docSub = p(docSub) swnvSub = swnv[t] def sentenceConv(sen, wn, sww): t = (T.arange(10000) < wn).nonzero() senSub = sen[t] convRes = theano.tensor.signal.conv.conv2d(senSub, sww) sentence_pool = theano.tensor.signal.downsample.max_pool_2d( convRes, (100000, 1)).flatten(1) return sentence_pool sentenceLayer, _ = theano.scan( fn=lambda sen, wn, sww: sentenceConv(sen, wn, sww), non_sequences=[sww], sequences=[docSub, swnvSub]) convRes = theano.tensor.signal.conv.conv2d(sentenceLayer, dww) sentence_pool = theano.tensor.signal.downsample.max_pool_2d( convRes, (100000, 1)).flatten(1) return sentence_pool res, _ = theano.scan(fn=lambda doc, dsn, swnv, dww, sww: localConv( doc, dsn, swnv, dww, sww), non_sequences=[dw, sw], sequences=[docs, dsnv, swnm]) # p = printing.Print('res') # res = p(res) cost = res.sum() g = T.grad(cost, [dw, sw]) f = theano.function([docs, dsnv, swnm, dw, sw], g) d = [[[[2, 2, 3, 4], [1, 2, 3, 4], [3, 1, 2, 3], [6, 4, 2, 1], [0, 0, 0, 0]], [[4, 3, 2, 1], [4, 6, 9, 2], [6, 6, 3, 1], [2, 5, 2, 9], [3, 2, 1, 7]]], [[[9, 8, 7, 6], [5, 4, 3, 2], [1, 9, 8, 7], [6, 5, 4, 3], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]] docSentenceCount = [2, 1] sentenceWordCount = [[4, 5], [4, 0]] docF = [[1, 1]] senF = [[1, 2], [1, 2]] print f(d, docSentenceCount, sentenceWordCount, docF, senF) print "All finished!"
def CCD(Number_Conv_Layer = 4 , Number_Conv_feature = [32,32,64,128] , Kernel_size = [3,3,5,5], Activation_Conv = [relu,relu,relu, relu],#[T.tanh,T.tanh,T.tanh, T.tanh],# pooling_size = [3,3,3,3], stride = [2,2,2,2], Number_Hidden_Layer = 3 , Number_Hidden_feature = [256, 324, 128] , Activation_Hidden = [relu,relu,relu ],#[T.tanh,T.tanh,T.tanh ],# learning_rate = 0.01, momentum = 0.9, batch_size = 50, input_size = 256, Number_Classes = 2, n_epochs = 300, N_train_example = 22500, N_test_example = 2500 , color = True, ): print '... building the model' if color: c = 3 else: c =1 rng = np.random.RandomState( ) x = T.ftensor4('x') layer_input_size = (batch_size, c, input_size, input_size) Next_layer_input = x.reshape(layer_input_size) Filter_shape = (Number_Conv_feature[0], c,Kernel_size[0], Kernel_size[0] ) Conv_layer =[] Params =[] for i in range(Number_Conv_Layer): Conv_layer.append([]) Conv_layer[i]= LeNetConvPoolLayer( rng, input = Next_layer_input , image_shape= layer_input_size, filter_shape= Filter_shape, poolsize=(pooling_size[i], pooling_size[i]), activation =Activation_Conv[i] , stride =( stride[i], stride[i])) if i ==0: zeros = T.zeros([batch_size,Number_Conv_feature[0] , 127,127 ], dtype='float32') Conv_layer[0].output = T.set_subtensor(zeros[:,:,:126,:126], Conv_layer[0].output) if i ==2: zeros = T.zeros([batch_size,Number_Conv_feature[i] , 29, 29 ], dtype='float32') Conv_layer[i].output = T.set_subtensor(zeros[:,:,:28,:28], Conv_layer[i].output) if Number_Conv_Layer-1 != i: Filter_shape = (Number_Conv_feature[i+1], Filter_shape[0],Kernel_size[i+1], Kernel_size[i+1] ) layer_input_size = (batch_size, Number_Conv_feature[i],1+int( np.floor(((layer_input_size[2] - Kernel_size[i] + 1)-pooling_size[i]+1)/(1.0*stride[i]))), 1+int(np.floor(((layer_input_size[2] - Kernel_size[i] + 1)-pooling_size[i]+1)/(1.0*stride[i])))) Next_layer_input = Conv_layer[i].output Params += Conv_layer[i].params Next_layer_input = Next_layer_input.flatten(2) layer_input_size = (batch_size, layer_input_size[1]*layer_input_size[2]*layer_input_size[3]) Hidden_layer =[] for i in range(Number_Hidden_Layer): Hidden_layer.append([]) Hidden_layer[i]= HiddenLayer( rng=rng, input=Next_layer_input, n_in= layer_input_size[1], n_out= Number_Hidden_feature[i], activation=Activation_Hidden[i] ) layer_input_size = (batch_size,Number_Hidden_feature[i] ) Next_layer_input = Hidden_layer[i].output Params += Hidden_layer[i].params X_t, y_t = load_color() y = T.ivector('y') logRegressionLayer = LogisticRegression( input=Next_layer_input, n_in=layer_input_size[1], n_out=Number_Classes ) Params += logRegressionLayer.params Reg = 0 Reg_cov = 0 ind_param =[8,10,12] for pp in ind_param: Reg += T.sum(abs(Params[pp])) cost = logRegressionLayer.negative_log_likelihood( y) + 0.001*Reg #gparams = T.grad(cost, Params ) learning_r = T.fscalar() updates =gradient_updates_momentum(cost, Params , learning_r , momentum ) Train_function = theano.function( inputs=[x,y, learning_r ], outputs=cost, updates=updates ) Get_Error = theano.function( [x,y], logRegressionLayer.errors(y) ) Get_NLL = theano.function( [x,y], cost ) print '... training' n_train_batches = N_train_example /batch_size n_test_batches = N_test_example /batch_size Test_list_scores = [] Train_list_scores = [] layer_input_size = (batch_size, c, input_size, input_size) for epoch in range( n_epochs): print '--- epoch: ', epoch minibatch_avg_cost_total = 0.0 for minibatch_index in xrange(n_train_batches): srng = np.random.RandomState(rng.randint(999999)) train_set_x= np.array(X_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size], dtype='float32') /np.array(X_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size], dtype='float32').max() train_set_y= np.array(y_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] , dtype='int32') minibatch_avg_cost = Train_function(train_set_x,train_set_y, learning_rate) minibatch_avg_cost_total += minibatch_avg_cost test_losses = [Get_Error( np.array(X_t[N_train_example + i * batch_size:N_train_example + (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[N_train_example + i * batch_size:N_train_example + (i + 1) * batch_size], dtype='float32').max(), np.array(y_t[N_train_example+ i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='int32') ) for i in xrange(n_test_batches) ] test_NLL = [Get_NLL( np.array(X_t[N_train_example + i * batch_size:N_train_example + (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[N_train_example + i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='float32').max(), np.array(y_t[N_train_example+ i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='int32') ) for i in xrange(n_test_batches) ] this_test_loss = np.mean(test_losses) #learning_rate *= 0.995 """if learning_rate> 0.0005: learning_rate *= 0.95 else: learning_rate = 0.0005""" Test_list_scores.append(float(this_test_loss)) print '........................Test error:' ,this_test_loss train_loss = [Get_Error( np.array(X_t[ i * batch_size: (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[ i * batch_size: (i + 1) * batch_size], dtype='float32').max(), np.array(y_t[ i * batch_size: (i + 1) * batch_size] , dtype='int32') ) for i in xrange(n_train_batches) ] train_NLL = [Get_NLL( np.array(X_t[ i * batch_size: (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[ i * batch_size: (i + 1) * batch_size] , dtype='float32').max(), np.array(y_t[ i * batch_size: (i + 1) * batch_size] , dtype='int32') ) for i in xrange(n_train_batches) ] this_train_loss = np.mean(train_loss) print this_train_loss print 'NLL ...... ' , np.mean(train_NLL ), np.mean(test_NLL) Train_list_scores.append(float(this_train_loss)) if (epoch+1)%50 ==0 or epoch==70: plt.figure(1) plt.plot(np.arange(len(Test_list_scores)), np.array(Test_list_scores) , label= 'test error') plt.hold(True) plt.plot(np.arange(len(Train_list_scores)), np.array(Train_list_scores) , label= 'train error') plt.legend(loc='upper left') plt.hold(False) plt.savefig('./MSE_test' +str(epoch)+'.png')
def build_model(self): print 'VGGNet_11 (shallow) 3/19' self.name = 'vggnet' # input shape in c01b self.channels = 3 # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = self.config[ 'input_width'] # '0' single scale training 224 self.input_height = self.config[ 'input_height'] # '1' single scale training 224 self.batch_size = self.config['batch_size'] # 'b' b = self.batch_size # output dimension self.n_softmax_out = self.config['n_softmax_out'] # start graph construction from scratch self.x = T.ftensor4('x') self.y = T.lvector('y') x_shuffled = self.x.dimshuffle(3, 0, 1, 2) # c01b to bc01 layers = [] params = [] weight_types = [] # for distinguishing w and b later # bc01 from now on conv_3x3 = Conv( input=x_shuffled, input_shape=(b, self.channels, self.input_width, self.input_height), # (b, 3, 224, 224) convstride=1, padsize=1, W=Normal((64, self.channels, 3, 3), std=0.3), # bc01 b=Constant((64, ), val=0.2), printinfo=self.verbose #output_shape = (b, 64, 224, 224) ) pool_2x2 = Pool( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 64, 224, 224) poolsize=2, poolstride=2, poolpad=0, mode='max', printinfo=self.verbose #output_shape = (b, 64, 112, 112) ) conv_3x3 = Conv( input=pool_2x2, #input_shape=pool_2x2.output_shape, # (b, 64, 112, 112) convstride=1, padsize=1, W=Normal((128, pool_2x2.output_shape[1], 3, 3), std=0.1), # bc01 b=Constant((128, ), val=0.02), printinfo=self.verbose #output_shape = (b, 128, 112, 112) ) pool_2x2 = Pool( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 128, 112, 112) poolsize=2, poolstride=2, poolpad=0, mode='max', printinfo=self.verbose #output_shape = (b, 128, 56, 56) ) conv_3x3 = Conv( input=pool_2x2, #input_shape=pool_2x2.output_shape, # (b, 128, 56, 56) convstride=1, padsize=1, W=Normal((256, pool_2x2.output_shape[1], 3, 3), std=0.05), # bc01 b=Constant((256, ), val=0.02), printinfo=self.verbose #output_shape = (b, 256, 56, 56) ) conv_3x3 = Conv( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 256, 56, 56) convstride=1, padsize=1, W=Normal((256, conv_3x3.output_shape[1], 3, 3), std=0.05), # bc01 b=Constant((256, ), val=0.01), printinfo=self.verbose #output_shape = (b, 256, 56, 56) ) pool_2x2 = Pool( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 256, 56, 56) poolsize=2, poolstride=2, poolpad=0, mode='max', printinfo=self.verbose #output_shape = (b, 256, 28, 28) ) conv_3x3 = Conv( input=pool_2x2, #input_shape=pool_2x2.output_shape, # (b, 256, 28, 28) convstride=1, padsize=1, W=Normal((512, pool_2x2.output_shape[1], 3, 3), std=0.05), # bc01 b=Constant((512, ), val=0.02), printinfo=self.verbose #output_shape = (b, 512, 28, 28) ) conv_3x3 = Conv( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 512, 28, 28) convstride=1, padsize=1, W=Normal((512, conv_3x3.output_shape[1], 3, 3), std=0.01), # bc01 b=Constant((512, ), val=0.01), printinfo=self.verbose #output_shape = (b, 512, 28, 28) ) pool_2x2 = Pool( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 512, 28, 28) poolsize=2, poolstride=2, poolpad=0, mode='max', printinfo=self.verbose #output_shape = (b, 512, 14, 14) ) conv_3x3 = Conv( input=pool_2x2, #input_shape=pool_2x2.output_shape, # (b, 512, 14, 14) convstride=1, padsize=1, W=Normal((512, pool_2x2.output_shape[1], 3, 3), std=0.005), # bc01 b=Constant((512, )), printinfo=self.verbose #output_shape = (b, 512, 14, 14) ) conv_3x3 = Conv( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 512, 14, 14) convstride=1, padsize=1, W=Normal((512, conv_3x3.output_shape[1], 3, 3), std=0.005), # bc01 b=Constant((512, )), printinfo=self.verbose #output_shape = (b, 512, 14, 14) ) pool_2x2 = Pool( input=conv_3x3, #input_shape=conv_3x3.output_shape, # (b, 512, 14, 14) poolsize=2, poolstride=2, poolpad=0, mode='max', printinfo=self.verbose #output_shape = (b, 512, 7, 7) ) flatten = Flatten( input=pool_2x2, #5 #input_shape = pool_2x2.output_shape, # (b, 512, 7, 7) axis=2, # expand dimensions after the first dimension printinfo=self.verbose #output_shape = (b, 25088) ) fc_4096 = FC(input=flatten, n_out=4096, W=Normal((flatten.output_shape[1], 4096), std=0.001), b=Constant((4096, ), val=0.01), printinfo=self.verbose #input_shape = flatten.output_shape # (b, 25088) ) dropout = Dropout(input=fc_4096, n_out=fc_4096.output_shape[1], prob_drop=0.5, printinfo=self.verbose #input_shape = fc_4096.output_shape # (b, 4096) ) fc_4096 = FC(input=dropout, n_out=4096, W=Normal((dropout.output_shape[1], 4096), std=0.005), b=Constant((4096, ), val=0.01), printinfo=self.verbose #input_shape = dropout.output_shape # (b, 4096) ) dropout = Dropout(input=fc_4096, n_out=fc_4096.output_shape[1], prob_drop=0.5, printinfo=self.verbose #input_shape = fc_4096.output_shape # (b, 4096) ) softmax = Softmax(input=dropout, n_out=self.n_softmax_out, W=Normal( (dropout.output_shape[1], self.n_softmax_out), std=0.005), b=Constant((self.n_softmax_out, ), val=0), printinfo=self.verbose #input_shape = dropout.output_shape # (b, 4096) ) self.output_layer = softmax self.output = self.output_layer.output self.layers = get_layers(lastlayer=self.output_layer) self.layers = [layer for layer in self.layers \ if layer.name not in ['LRN\t','Pool\t','Flatten\t','Dropout'+ str(0.5)]] self.params, self.weight_types = get_params(self.layers) # training related self.base_lr = np.float32(self.config['learning_rate']) self.shared_lr = theano.shared(self.base_lr) self.step_idx = 0 self.mu = self.config['momentum'] # def: 0.9 # momentum self.eta = self.config['weight_decay'] #0.0002 # weight decay self.shared_x = theano.shared(np.zeros( (3, self.input_width, self.input_height, self.config['file_batch_size']), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros( (self.config['file_batch_size'], ), dtype=int), borrow=True) # shared variable for storing momentum before exchanging momentum(delta w) self.vels = [ theano.shared(param_i.get_value() * 0.) for param_i in self.params ] # shared variable for accepting momentum during exchanging momentum(delta w) self.vels2 = [ theano.shared(param_i.get_value() * 0.) for param_i in self.params ] self.train = None self.val = None self.inference = None self.get_vel = None self.descent_vel = None
def setUp(self): self.input = tensor.ftensor4() self.filters = tensor.ftensor4() self.topgrad = tensor.ftensor4()
def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.x_test = x_test self.diff = diff self.batch_size = 100. self.learning_rate = theano.shared(np.float32(0.0008)) self.momentum = 0.3 self.performance = {"train": [], "test": []} self.inpt = T.ftensor4(name='input') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z]))) self.activation = relu self.generative = False self.out_distribution = False #self.y = T.matrix(name="y") self.in_filters = [64, 64, 64] self.filter_lengths = [10., 10., 10.] self.params = [] #magic = 73888. self.magic = magic self.dropout_symbolic = T.fscalar() self.dropout_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", 'b1']) self.params += self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1], st=[2, 1], ignore_border=False).astype( theano.config.floatX) self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic) #self.layer1_out = self.maxpool1 # LAYER2 ################################ self.flattened = T.flatten(self.layer1_out, outdim=2) # Variational Layer ##################### self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z) self.params += self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic) self.params += self.hidden_layer.params self.hid_out = dropout( self.activation(self.hidden_layer.output).reshape( (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1)), self.dropout_symbolic) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out, 1, self.in_filters[2], self.filter_lengths[2], pool=2., param_names=["W3", 'b3'], distribution=False) self.params += self.deconv1.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution == True: self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt. shape[2], :] self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :] ################################### FUNCTIONS ###################################################### self.get_latent_states = theano.function( [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) self.get_flattened = theano.function( [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.generate_from_z = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]]) self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost, self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates = adam(self.params, self.derivatives, self.learning_rate) #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum) self.train_model = theano.function( inputs=[self.inpt, self.df], outputs=self.cost, updates=self.updates, givens=[[self.dropout_symbolic, self.dropout_prob]])
def test_pooling(): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) x = T.ftensor4() for mode, pad in product(('max', 'average_inc_pad', 'average_exc_pad'), ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))): if mode == 'max': func = T.max else: func = T.mean if pad != (0, 0) and func is T.mean: continue for ws in (4, 2, 5): for stride in (2, 3): if stride > ws: continue if pad[0] > stride or pad[1] > stride: # Not implemented continue # We will check that the opt introduced it. out1 = max_pool_2d(x, (ws, ws), st=(stride, stride), ignore_border=True, padding=pad, mode=mode) out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride), pad=pad, pool_function=func) mode_without_gpu2 = mode_without_gpu.including() mode_without_gpu2.check_isfinite = False f1 = theano.function([x], out1, mode=mode_with_gpu) assert any([isinstance(node.op, dnn.GpuDnnPool) for node in f1.maker.fgraph.apply_nodes]) f2 = theano.function([x], out2, mode=mode_without_gpu2) assert not any([isinstance(node.op, dnn.GpuDnnPool) for node in f2.maker.fgraph.apply_nodes]) for shp in [(1, 10, 100, 100), (1, 3, 99, 99), (32, 1, 147, 197), ]: data = numpy.random.normal(0, 1, shp).astype("float32") a = f1(data) b = f2(data) utt.assert_allclose(a, b) # Test the grad for shp in [(1, 1, 2, 2), (1, 1, 3, 3)]: data = numpy.random.normal(0, 1, shp).astype("float32") * 10 ws = 2 stride = 2 if pad[0] > stride or pad[1] > stride: # Not implemented continue # This test the CPU grad + opt + GPU implemtentation def fn(x): return max_pool_2d(x, (ws, ws), ignore_border=True, padding=pad, mode=mode) utt.verify_grad(fn, [data], cast_to_output_type=False, mode=mode_with_gpu) # Confirm that the opt would have inserted it. fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu) assert any([isinstance(node.op, dnn.GpuDnnPoolGrad) for node in fg.maker.fgraph.toposort()]) # Test the GPU grad + GPU implementation def fn(x): dnn_op = dnn.dnn_pool( x, ws=(ws, ws), stride=(stride, stride), pad=pad, mode=mode) return dnn_op utt.verify_grad(fn, [data], cast_to_output_type=False, mode=mode_with_gpu) # Confirm that we get the good op. fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu) assert any([isinstance(node.op, dnn.GpuDnnPoolGrad) for node in fg.maker.fgraph.toposort()]) g_out = fg(data) # Compare against the CPU result out = max_pool_2d(x, (ws, ws), padding=pad, ignore_border=True, mode=mode) fc = theano.function([x], theano.grad(out.sum(), x), mode=mode_without_gpu) if mode == 'max': assert any([isinstance(node.op, MaxPoolGrad) for node in fc.maker.fgraph.toposort()]) else: assert any([isinstance(node.op, AveragePoolGrad) for node in fc.maker.fgraph.toposort()]) c_out = fc(data) utt.assert_allclose(c_out, g_out)
def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1): input_var = tensor.ftensor4('x') # (B, C, H, W) input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0') tconv0 = TransposedConv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, crop=pad, nonlinearity=LACT.linear, name='tconv0') return tconv0
# Load MNIST # ntrain = # of samples in randomly chosen subset # This is to reproduce Fig. 5 in the paper #-------------------------- parser = argparse.ArgumentParser() parser.add_argument('--ntrain', nargs=1, type=int) parser.add_argument('--epochs', nargs=1, type=float) args = parser.parse_args() trX, teX, trY, teY = mnist(args.ntrain, onehot=True) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) X = T.ftensor4() Y = T.fmatrix() lr = T.scalar() epochs = T.scalar() #------------------------- # Init Basis and Alphas #------------------------- bases_L1 = 10 sigma_L1 = 1.5 bases_L2 = 6 sigma_L2 = 1 bases_L3 = 6 sigma_L3 = 1
def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma, max_experiences=500000, min_experiences=50000, batch_sz=32): self.K = K lr = np.float32(2.5e-4) mu = np.float32(0) decay = np.float32(0.99) # inputs and targets X = T.ftensor4('X') G = T.fvector('G') actions = T.ivector('actions') # create the graph self.conv_layers = [] num_input_filters = 4 # number of filters / color channels for num_output_filters, filtersz, stride in conv_layer_sizes: layer = ConvLayer(num_input_filters, num_output_filters, filtersz, stride) self.conv_layers.append(layer) num_input_filters = num_output_filters # get conv output size Z = X / 255.0 for layer in self.conv_layers: Z = layer.forward(Z) conv_out = Z.flatten(ndim=2) conv_out_op = theano.function(inputs=[X], outputs=conv_out, allow_input_downcast=True) test = conv_out_op(np.random.randn(1, 4, IM_HEIGHT, IM_WIDTH)) flattened_ouput_size = test.shape[1] # print("test.shape:", test.shape) # print("flattened_ouput_size:", flattened_ouput_size) # build fully connected layers self.layers = [] M1 = flattened_ouput_size for M2 in hidden_layer_sizes: layer = HiddenLayer(M1, M2) self.layers.append(layer) M1 = M2 # final layer layer = HiddenLayer(M1, K, lambda x: x) self.layers.append(layer) # collect params for copy self.params = [] for layer in (self.conv_layers + self.layers): self.params += layer.params caches = [ theano.shared(np.ones_like(p.get_value()) * 0.1) for p in self.params ] velocities = [theano.shared(p.get_value() * 0) for p in self.params] # calculate final output and cost Z = conv_out for layer in self.layers: Z = layer.forward(Z) Y_hat = Z selected_action_values = Y_hat[T.arange(actions.shape[0]), actions] cost = T.mean((G - selected_action_values)**2) # create train function grads = T.grad(cost, self.params) g_update = [(p, p + v) for p, v, g in zip(self.params, velocities, grads)] c_update = [(c, decay * c + (np.float32(1) - decay) * g * g) for c, g in zip(caches, grads)] v_update = [(v, mu * v - lr * g / T.sqrt(c)) for v, c, g in zip(velocities, caches, grads)] # v_update = [(v, mu*v - lr*g) for v, g in zip(velocities, grads)] # c_update = [] updates = c_update + g_update + v_update # compile functions self.train_op = theano.function(inputs=[X, G, actions], updates=updates, allow_input_downcast=True) self.predict_op = theano.function(inputs=[X], outputs=Y_hat, allow_input_downcast=True) # create replay memory self.experience = [] self.max_experiences = max_experiences self.min_experiences = min_experiences self.batch_sz = batch_sz self.gamma = gamma
test_Tstamp = 1 pat = readPatMS.new(1,1) num_patches = np.shape(image.extract_patches_2d(pat.data[0,20:160,17:192,0],patch_size))[0] train_patches = np.zeros([num_patches, num_channels, patch_size[0], patch_size[1]]) trpatches_truth = np.zeros([num_patches]) shared_data = theano.shared(numpy.asarray(train_patches,dtype = theano.config.floatX),borrow = True) shared_truth = theano.shared(numpy.asarray(trpatches_truth,dtype = 'int32'),borrow = True) rng = numpy.random.RandomState(23455) #Define Theano Tensors nz = T.lscalar() x = T.ftensor4('x') y = T.ivector('y') ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' layer0input = x.dimshuffle(1,0,2,3) layer0 = ConvPoolLayer( rng, input=layer0input, image_shape=(num_patches, num_channels, 19, 19), filter_shape=(nkerns[0], num_channels, 5, 5), poolsize=(2, 2) )
def test_pooling(): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) x = T.ftensor4() for func in (T.max, T.mean): for ws in (2, 4, 5): for stride in (2, 3): if stride > ws: continue if ws == stride and func is T.max: # We will check that the opt introduced it. out1 = max_pool_2d(x, (ws, ws), ignore_border=True) else: out1 = cuda.dnn.dnn_pool( x, ws=(ws, ws), stride=(stride, stride), mode='max' if func is T.max else "average") out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride), pool_function=func) f1 = theano.function([x], out1, mode=mode_with_gpu) assert any([ isinstance(node.op, cuda.dnn.GpuDnnPool) for node in f1.maker.fgraph.apply_nodes ]) f2 = theano.function([x], out2, mode=mode_with_gpu) assert not any([ isinstance(node.op, cuda.dnn.GpuDnnPool) for node in f2.maker.fgraph.apply_nodes ]) for shp in [ (1, 10, 100, 100), (1, 3, 99, 99), (32, 1, 147, 197), ]: data = numpy.random.normal(0, 1, shp).astype("float32") a = f1(data).__array__() b = f2(data).__array__() assert numpy.allclose(a, b, atol=numpy.finfo(numpy.float32).eps) # Test the grad for shp in [(1, 1, 2, 2), (1, 1, 3, 3)]: data = numpy.random.normal(0, 1, shp).astype("float32") * 10 ws = 2 strides = 2 # This test the CPU grad + opt + GPU implemtentation def fn(x): return max_pool_2d(x, (ws, ws), ignore_border=True) theano.tests.unittest_tools.verify_grad(fn, [data], cast_to_output_type=False, mode=mode_with_gpu) # Confirm that the opt would have inserted it. f = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu) assert any([ isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) for node in f.maker.fgraph.toposort() ]) # Test the GPU grad + GPU implementation def fn(x): dnn_op = cuda.dnn.dnn_pool( x, ws=(ws, ws), stride=(stride, stride), mode='max' if func is T.max else "average") return dnn_op theano.tests.unittest_tools.verify_grad(fn, [data], cast_to_output_type=False, mode=mode_with_gpu) # Confirm that we get the good op. f = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu) assert any([ isinstance(node.op, cuda.dnn.GpuDnnPoolGrad) for node in f.maker.fgraph.toposort() ]) g_out = f(data) if func is T.max: # Compare again the CPU result out = max_pool_2d(x, (ws, ws), ignore_border=True) f = theano.function([x], theano.grad(out.sum(), x), mode=mode_without_gpu) assert any([ isinstance(node.op, DownsampleFactorMaxGrad) for node in f.maker.fgraph.toposort() ]) c_out = f(data) assert numpy.allclose(c_out, g_out)
import theano from confusionmatrix import ConfusionMatrix from lasagne.objectives import * from lasagne.updates import * import theano.tensor as T from theano.tensor import * from theano.tensor.signal import pool import lasagne import numpy as np import DP1 as DP from theano.tensor import nnet import lasagne.layers.dnn dtensor5 = TensorType('float32', (False, ) * 5) input_var = T.ftensor4('XY') target_var = T.ivector('Y_train') x1 = T.matrix('x1') PS = 15 P2 = 3 # Build Neural Network: # Conv Net XY Plane input = lasagne.layers.InputLayer((None, 15, PS, PS), input_var=input_var) l_conv_1 = lasagne.layers.dnn.Conv2DDNNLayer(input, 20, (3, 3)) l_maxpool_1 = lasagne.layers.dnn.Pool2DDNNLayer(l_conv_1, (2, 2)) l_conv_2 = lasagne.layers.dnn.Conv2DDNNLayer(l_maxpool_1, 20, (3, 3)) l_conv_3 = lasagne.layers.dnn.Conv2DDNNLayer(l_conv_2, 20, (3, 3))
def __init__(self, config): self.config = config self.verbose = self.config['verbose'] self.name = 'alexnet' batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] n_softmax_out=config['n_softmax_out'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.lvector('y') rand = T.fvector('rand') lr = T.scalar('lr') if self.verbose: print 'AlexNet 2/16' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, verbose = self.verbose ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, verbose = self.verbose ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose = self.verbose ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, verbose = self.verbose ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose = self.verbose ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, verbose = self.verbose ) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, verbose = self.verbose) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, verbose = self.verbose ) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, verbose = self.verbose) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=n_softmax_out, verbose = self.verbose) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.p_y_given_x = softmax_layer8.p_y_given_x self.y_pred = softmax_layer8.y_pred self.output = self.p_y_given_x self.cost = softmax_layer8.negative_log_likelihood(y) self.error = softmax_layer8.errors(y) if n_softmax_out < 5: self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out) else: self.error_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params # inputs self.x = x self.y = y self.rand = rand self.lr = lr self.shared_x = theano.shared(np.zeros((3, config['input_width'], config['input_height'], config['file_batch_size']), # for loading large batch dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((config['file_batch_size'],), dtype=int), borrow=True) self.shared_lr = theano.shared(np.float32(config['learning_rate'])) # training related self.base_lr = np.float32(config['learning_rate']) self.step_idx = 0 self.mu = config['momentum'] # def: 0.9 # momentum self.eta = config['weight_decay'] #0.0002 # weight decay self.weight_types = weight_types self.batch_size = batch_size self.grads = T.grad(self.cost,self.params) subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
def main(): # load the training and validation data sets train_X, test_X, train_y, test_y = load_data_cv('data/train.csv') X = T.ftensor4() Y = T.fmatrix() # set up theano functions to generate output by feeding data through network output_layer = lasagne_model() output_train = lasagne.layers.get_output(output_layer, X) output_valid = lasagne.layers.get_output(output_layer, X, deterministic=True) # set up the loss that we aim to minimize loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y)) loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y)) # prediction functions for classifications pred = T.argmax(output_train, axis=1) pred_valid = T.argmax(output_valid, axis=1) # get parameters from network and set up sgd with nesterov momentum to update parameters params = lasagne.layers.get_all_params(output_layer) updates = nesterov_momentum(loss_train, params, learning_rate=0.003, momentum=0.9) # set up training and prediction functions train = theano.function(inputs=[X, Y], outputs=loss_train, updates=updates, allow_input_downcast=True) valid = theano.function(inputs=[X, Y], outputs=loss_valid, allow_input_downcast=True) predict_valid = theano.function(inputs=[X], outputs=pred_valid, allow_input_downcast=True) # loop over training functions for however many iterations, print information while training train_eval = [] valid_eval = [] valid_acc = [] try: for i in range(45): train_loss = batch_iterator(train_X, train_y, BATCHSIZE, train) train_eval.append(train_loss) valid_loss = valid(test_X, test_y) valid_eval.append(valid_loss) acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X)) valid_acc.append(acc) print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc except KeyboardInterrupt: pass # save weights all_params = helper.get_all_param_values(output_layer) f = gzip.open('data/weights.pklz', 'wb') pickle.dump(all_params, f) f.close() # plot loss and accuracy train_eval = np.array(train_eval) valid_eval = np.array(valid_eval) valid_acc = np.array(valid_acc) sns.set_style("whitegrid") pyplot.plot(train_eval, linewidth=3, label='train loss') pyplot.plot(valid_eval, linewidth=3, label='valid loss') pyplot.legend(loc=2) pyplot.twinx() pyplot.plot(valid_acc, linewidth=3, label='valid accuracy', color='r') pyplot.grid() pyplot.ylim([.9, 1]) pyplot.legend(loc=1) pyplot.savefig('data/training_plot.png')
def setUp(self): self.input = tensor.ftensor4() self.filters = tensor.ftensor4() self.topgrad = tensor.ftensor4() self.constant_tensor = numpy.zeros((3, 5, 7, 11), dtype='float32')
# gpu id gpu_id = 1 # create the mem recorder object mem_recorder = gpu_memory_recorder(gpu_id=gpu_id, process_id=current_process_id, log_dir=log_dir, log_filename=mem_usage_filename, recording_interval=interval) # start recording mem_recorder.start_recording() # write some theano code x = T.ftensor4() y = T.ftensor4() z = 2 * x + y f = theano.function([x, y], z.mean()) # do some computation for i in xrange(100): a = np.random.sample((i, 100, 100, 100)).astype('float32') b = np.random.sample((i, 100, 100, 100)).astype('float32') c = f(a, b) # we can generate the chart at any point after we started recording. # the 50 means it will use only the last 50 data points when
def initializeModel(self): ''' define your deep learning model ''' print 'defining model' X = T.ftensor4() Y = T.fmatrix() #initialize your weghts, kernels # format n kernels, n channels, kernel_w x kernel_h # 20 kernels on gray scale image with 5 x 5 sized kernel w1 = self.init_weights((20, 3, 5, 5), weightType='Xavier', caffeLayerName='conv1') # 50 20-channel 5 x 5 sized kernel w2 = self.init_weights((50, 20, 5, 5), weightType='Xavier', caffeLayerName='conv2') # flatten the inputs and pass to fully connected layer w4 = self.init_weights((7200, 1000), weightType='Xavier') # flatten the inputs and pass to fully connected layer w5 = self.init_weights((1000, 500), weightType='Xavier') # flatten the inputs and pass to fully connected layer w_output = self.init_weights((500, 2), weightType='Xavier') # define your deep model if (self.dropout_params == None): # if there is no default dropout params mentioned, just set them manually self.dropout_params = {} self.dropout_params['conv'] = 0.1 self.dropout_params['fc'] = 0.2 print 'initializing with dropout_params: ', self.dropout_params[ 'conv'], self.dropout_params['fc'] noise_l1, noise_l2, noise_l3, noise_l4, noise_l5, noise_py_x, convOut1 = self.model( X, w1, w2, w4, w5, w_output, p_drop_conv=self.dropout_params['conv'], p_drop_hidden=self.dropout_params['fc']) # get your label from the predicted probabilties y_x = T.argmax(noise_py_x, axis=1) # y_x = noise_py_x >= 0.5 self.learning_rate = 0.0001 self.params = [w1, w2, w4, w5, w_output] L1_norm = self.getL1Norm(self.params) L2_norm = self.getL2Norm(self.params) # pd = np.array(self.params) # mean cross entropy with L2 regularization self.cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) self.paramUpdates = self.RMSprop(self.cost, self.params, lr=self.learning_rate) #self.paramUpdates = self.MomentumOptimizer(self.cost, self.params, lr = self.learning_rate) if (self.modelToLoad != None): self.loadThisModel(self.modelToLoad) # self.cost = T.mean((T.nnet.binary_crossentropy(noise_py_x, Y))) print 'compiling functions' print 'current learning rate: ', self.learning_rate start_compilation_time = time.clock() if (self.mode == "Train"): print 'compiling train function startin at ', strftime( "%Y-%m-%d %H:%M:%S") self.train = theano.function(inputs=[X, Y], outputs=self.cost, updates=self.paramUpdates, allow_input_downcast=True) print 'compiling predict function' self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) print 'compiling predictProb function' self.predictProb = theano.function(inputs=[X], outputs=noise_py_x, allow_input_downcast=True) end_compilation_time = time.clock() self.getFirstLayerOutput = theano.function(inputs=[X], outputs=convOut1) print 'compiled the functions, ended at ', strftime( "%Y-%m-%d %H:%M:%S") print 'time takent compile the functions: ', end_compilation_time - start_compilation_time