def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[ i_layer + 1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer == 0 else outputs[i_layer - 1] size_x = size_input if i_layer == 0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") add_in_lin = nn.Affine(size_x, size_mem)(x) add_in_relu = nn.rectify(add_in_lin) prev_h_scaled = nn.scale_mag(prev_h) h_in_added = prev_h_scaled + add_in_relu inters_h = [h_in_added] colon = slice(None, None, None) for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = xform_h[i, :] #r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output, name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer==0 else outputs[i_layer-1] size_x = size_input if i_layer==0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") add_in_lin = nn.Affine(size_x, size_mem)(x) add_in_relu = nn.rectify(add_in_lin) prev_h_scaled = nn.scale_mag(prev_h) h_in_added = prev_h_scaled + add_in_relu inters_h = [h_in_added] colon = slice(None, None, None) for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = xform_h[i, :] #r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
def tinyconv_model(X, w, w2, p_drop): l1 = nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1), stride=(3, 3)) l1a = nn.dropout(l1, p_drop) batchsize, channels, rows, cols = l1.shape l1flat = cgt.reshape(l1, [batchsize, channels * rows * cols]) pyx = nn.softmax(l1flat.dot(w2)) return l1, pyx
def tinyconv_model(X, w, w2, p_drop): l1 = nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1),stride=(3,3)) l1a = nn.dropout(l1, p_drop) batchsize,channels,rows,cols = l1.shape l1flat = cgt.reshape(l1, [batchsize,channels*rows*cols]) pyx = nn.softmax(l1flat.dot(w2)) return l1, pyx
def reshape(x, shp): from ..utils import wrap_into_tuple import operator shp = wrap_into_tuple(shp) neg_indices = [(idx, shp_slice) for idx, shp_slice in enumerate(shp) if shp_slice == -1] if len(neg_indices) > 1: raise ValueError('At most one reshaped dimension can be -1') elif len(neg_indices) == 1: idx, shp_slice = neg_indices[0] neg_size = reduce( operator.mul, x.shape, 1) // reduce(operator.mul, shp[:idx] + shp[idx + 1:], 1) shp = tuple(shp[:idx] + (neg_size,) + shp[idx + 1:]) return cgt.reshape(x, shp) else: return cgt.reshape(x, shp)
def reshape(x, shp): from ..utils import wrap_into_tuple import operator shp = wrap_into_tuple(shp) neg_indices = [(idx, shp_slice) for idx, shp_slice in enumerate(shp) if shp_slice == -1] if len(neg_indices) > 1: raise ValueError('At most one reshaped dimension can be -1') elif len(neg_indices) == 1: idx, shp_slice = neg_indices[0] neg_size = reduce(operator.mul, x.shape, 1) // reduce( operator.mul, shp[:idx] + shp[idx + 1:], 1) shp = tuple(shp[:idx] + (neg_size, ) + shp[idx + 1:]) return cgt.reshape(x, shp) else: return cgt.reshape(x, shp)
def denseLayer(nn_input, num_units, activation=rectify, w_init=XavierNormal(), bias_init=Constant(0)): """ Batch by feature input. """ if len(nn_input.shape) > 2: nn_input = cgt.reshape(nn_input, [nn_input.shape[0], reduce(lambda x, y: x*y, nn_input.shape[1:])]) feature_dims = cgt.infer_shape(nn_input)[1] return activation(Affine(feature_dims, num_units, weight_init=w_init, bias_init=bias_init)(nn_input))
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden): l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1))) l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2, 2)) l1 = nn.dropout(l1, p_drop_conv) l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3, 3), pad=(1, 1))) l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2, 2)) l2 = nn.dropout(l2, p_drop_conv) l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3, 3), pad=(1, 1))) l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2, 2)) batchsize, channels, rows, cols = l3b.shape l3 = cgt.reshape(l3b, [batchsize, channels * rows * cols]) l3 = nn.dropout(l3, p_drop_conv) l4 = nn.rectify(cgt.dot(l3, w4)) l4 = nn.dropout(l4, p_drop_hidden) pyx = nn.softmax(cgt.dot(l4, w_o)) return pyx
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden): l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1))) l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2)) l1 = nn.dropout(l1, p_drop_conv) l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1))) l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2)) l2 = nn.dropout(l2, p_drop_conv) l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1))) l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2)) batchsize,channels,rows,cols = l3b.shape l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols]) l3 = nn.dropout(l3, p_drop_conv) l4 = nn.rectify(cgt.dot(l3, w4)) l4 = nn.dropout(l4, p_drop_hidden) pyx = nn.softmax(cgt.dot(l4, w_o)) return pyx
import cgt from cgt import nn, utils import numpy as np, numpy.random as nr from numpy.linalg import norm from param_collection import ParamCollection k_in = 1 size_x = 3 size_mem = 4 size_batch = 4 x = cgt.matrix(fixed_shape=(size_batch, size_x)) prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem)) r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul( r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1]
elif layer.type == "Pooling": param = layer.pooling_param X = inputs[0] pool_type = {param.MAX: "max", param.AVE: "mean"}[param.pool] height_in, width_in = infer_shape(X)[2:4] kernel = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\ else (param.kernel_h, param.kernel_w) stride = (param.stride, param.stride) if param.HasField("stride")\ else (param.stride_h, param.stride_w) pad = (param.pad, param.pad) if param.HasField("pad")\ else (param.pad_h, param.pad_w) output = [nn.pool(pool_type, X, stride, kernel, pad)] elif layer.type == "InnerProduct": X = inputs[0] if X.ndim == 4: X = cgt.reshape( X, [X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]]) param = layer.inner_product_param nchanin = infer_shape(X)[1] Wshape = (param.num_output, nchanin) Wname = layer.param[0].name or layer.name + ":W" Wval = np.empty(Wshape, dtype=cgt.floatX) W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all") bshape = (1, param.num_output) bname = layer.param[1].name or layer.name + ":b" bval = np.empty(bshape, dtype=cgt.floatX) b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all") yname = layer.top[0]
elif layer.type == "Pooling": param = layer.pooling_param X = inputs[0] pool_type = {param.MAX : "max", param.AVE : "mean"}[param.pool] height_in,width_in = infer_shape(X)[2:4] kernel = (param.kernel_size, param.kernel_size) if param.HasField("kernel_size")\ else (param.kernel_h, param.kernel_w) stride = (param.stride, param.stride) if param.HasField("stride")\ else (param.stride_h, param.stride_w) pad = (param.pad, param.pad) if param.HasField("pad")\ else (param.pad_h, param.pad_w) output = [nn.pool(pool_type, X, stride, kernel, pad)] elif layer.type == "InnerProduct": X = inputs[0] if X.ndim == 4: X = cgt.reshape(X, [X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]] ) param = layer.inner_product_param nchanin = infer_shape(X)[1] Wshape = (param.num_output, nchanin) Wname = layer.param[0].name or layer.name+":W" Wval = np.empty(Wshape, dtype=cgt.floatX) W = name2node[Wname] = cgt.shared(Wval, name=Wname, fixed_shape_mask="all") bshape = (1, param.num_output) bname = layer.param[1].name or layer.name+":b" bval = np.empty(bshape, dtype=cgt.floatX) b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all") yname = layer.top[0] output = [cgt.broadcast("+",X.dot(W), b, "xx,1x") ] elif layer.type == "ReLU": output = [nn.rectify(inputs[0])] elif layer.type == "Softmax":
def stack(tensors, axis=0): if axis is not 0: raise ValueError('only axis=0 is supported under cgt') return cgt.concatenate(map(lambda x: cgt.reshape(x, [1] + x.shape), tensors), axis=0)
import cgt from cgt import nn, utils import numpy as np, numpy.random as nr from numpy.linalg import norm from param_collection import ParamCollection k_in = 1 size_x = 3 size_mem = 4 size_batch = 4 x = cgt.matrix(fixed_shape=(size_batch, size_x)) prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem)) r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1] r_nn = nn.Module([x], [h])