def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))] for _ in xrange(2 * n_layers): inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem))) outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer * 2] prev_c = inputs[i_layer * 2 + 1] if i_layer == 0: x = inputs[0] size_x = size_input else: x = outputs[(i_layer - 1) * 2] size_x = size_mem input_sums = nn.Affine(size_x, 4 * size_mem)(x) + nn.Affine( size_x, 4 * size_mem)(prev_h) sigmoid_chunk = cgt.sigmoid(input_sums[:, 0:3 * size_mem]) in_gate = sigmoid_chunk[:, 0:size_mem] forget_gate = sigmoid_chunk[:, size_mem:2 * size_mem] out_gate = sigmoid_chunk[:, 2 * size_mem:3 * size_mem] in_transform = cgt.tanh(input_sums[:, 3 * size_mem:4 * size_mem]) next_c = forget_gate * prev_c + in_gate * in_transform next_h = out_gate * cgt.tanh(next_c) outputs.append(next_c) outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output)(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))] for _ in xrange(2*n_layers): inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem))) outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer*2] prev_c = inputs[i_layer*2+1] if i_layer==0: x = inputs[0] size_x = size_input else: x = outputs[(i_layer-1)*2] size_x = size_mem input_sums = nn.Affine(size_x, 4*size_mem)(x) + nn.Affine(size_x, 4*size_mem)(prev_h) sigmoid_chunk = cgt.sigmoid(input_sums[:,0:3*size_mem]) in_gate = sigmoid_chunk[:,0:size_mem] forget_gate = sigmoid_chunk[:,size_mem:2*size_mem] out_gate = sigmoid_chunk[:,2*size_mem:3*size_mem] in_transform = cgt.tanh(input_sums[:,3*size_mem:4*size_mem]) next_c = forget_gate*prev_c + in_gate * in_transform next_h = out_gate*cgt.tanh(next_c) outputs.append(next_c) outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output)(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)] outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[ i_layer + 1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer == 0 else outputs[i_layer - 1] size_x = size_input if i_layer == 0 else size_mem update_gate = cgt.sigmoid( nn.Affine(size_x, size_mem, name="i2u")(x) + nn.Affine(size_mem, size_mem, name="h2u")(prev_h)) reset_gate = cgt.sigmoid( nn.Affine(size_x, size_mem, name="i2r")(x) + nn.Affine(size_mem, size_mem, name="h2r")(prev_h)) gated_hidden = reset_gate * prev_h p2 = nn.Affine(size_mem, size_mem)(gated_hidden) p1 = nn.Affine(size_x, size_mem)(x) hidden_target = cgt.tanh(p1 + p2) next_h = (1.0 - update_gate) * prev_h + update_gate * hidden_target outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output, name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(relu1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def make_deep_rrnn(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer==0 else outputs[i_layer-1] size_x = size_input if i_layer==0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters_in = [prev_h_3] colon = slice(None, None, None) for i in xrange(2 * k_in): inter_in = inters_in[-1] r_cur = cgt.subtensor(r, [colon, i, colon]) r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - 2 * ref_cur inters_in.append(inter_out) h_in_rot = cgt.reshape(inters_in[-1], (size_batch, size_mem)) inters_h = [h_in_rot] for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
def build_fc_return_loss(X, y): """ Build fully connected network and return loss """ np.random.seed(0) h1 = nn.rectify(nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=0.1))(X)) h2 = nn.rectify(nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=0.1))(h1)) logprobs = nn.logsoftmax(nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=0.1))(h2)) neglogliks = -logprobs[cgt.arange(X.shape[0]), y] loss = neglogliks.mean() return loss
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[ i_layer + 1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer == 0 else outputs[i_layer - 1] size_x = size_input if i_layer == 0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") add_in_lin = nn.Affine(size_x, size_mem)(x) add_in_relu = nn.rectify(add_in_lin) prev_h_scaled = nn.scale_mag(prev_h) h_in_added = prev_h_scaled + add_in_relu inters_h = [h_in_added] colon = slice(None, None, None) for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = xform_h[i, :] #r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output, name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
def build_fc_return_loss(X, y): """ Build fully connected network and return loss """ np.random.seed(0) h1 = nn.rectify( nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=.1))(X)) h2 = nn.rectify( nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=.1))(h1)) logprobs = nn.logsoftmax( nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=.1))(h2)) neglogliks = -logprobs[cgt.arange(X.shape[0]), y] loss = neglogliks.mean() return loss
def build_convnet_return_loss(X, y): np.random.seed(0) conv1 = nn.rectify( nn.SpatialConvolution(1, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(X) ) pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2)) conv2 = nn.rectify( nn.SpatialConvolution(32, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=0.1))(pool1) ) pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2)) d0, d1, d2, d3 = pool2.shape flatlayer = pool2.reshape([d0, d1 * d2 * d3]) nfeats = cgt.infer_shape(flatlayer)[1] logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer)) loss = -logprobs[cgt.arange(X.shape[0]), y].mean() return loss
def build_convnet_return_loss(X, y): np.random.seed(0) conv1 = nn.rectify( nn.SpatialConvolution(1, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=.1))(X)) pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2)) conv2 = nn.rectify( nn.SpatialConvolution(32, 32, kernelshape=(3, 3), pad=(0, 0), weight_init=nn.IIDGaussian(std=.1))(pool1)) pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2)) d0, d1, d2, d3 = pool2.shape flatlayer = pool2.reshape([d0, d1 * d2 * d3]) nfeats = cgt.infer_shape(flatlayer)[1] logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer)) loss = -logprobs[cgt.arange(X.shape[0]), y].mean() return loss
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)] outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer==0 else outputs[i_layer-1] size_x = size_input if i_layer==0 else size_mem update_gate = cgt.sigmoid( nn.Affine(size_x, size_mem,name="i2u")(x) + nn.Affine(size_mem, size_mem, name="h2u")(prev_h)) reset_gate = cgt.sigmoid( nn.Affine(size_x, size_mem,name="i2r")(x) + nn.Affine(size_mem, size_mem, name="h2r")(prev_h)) gated_hidden = reset_gate * prev_h p2 = nn.Affine(size_mem, size_mem)(gated_hidden) p1 = nn.Affine(size_x, size_mem)(x) hidden_target = cgt.tanh(p1+p2) next_h = (1.0-update_gate)*prev_h + update_gate*hidden_target outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)