def build_model(): net = {} net['input'] = InputLayer((1, 3, IMAGE_W, IMAGE_W)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2, mode='average_exc_pad') net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2, mode='average_exc_pad') net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_4'], 2, mode='average_exc_pad') net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_4'], 2, mode='average_exc_pad') net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_4'], 2, mode='average_exc_pad') return net
def mutate_pool_unit(self, unit, eta): #kernel size, pool_type ksize = np.log2(unit.kernel_width) pool_type = unit.kernel_type new_ksize = self.pm(self.pool_kernel_size_range[0], self.pool_kernel_size_range[-1], ksize, eta) new_ksize = int(np.power(2, new_ksize)) new_pool_type = self.pm(0, 1, pool_type, eta) pool_layer = PoolLayer(kernel_size=[new_ksize, new_ksize], pool_type=new_pool_type) return pool_layer
def pool_layer(self, dtype, op, N, C, D=1, H=1, W=1, J=1, T=1, R=1, S=1, pad_j=0, pad_d=0, pad_h=0, pad_w=0, str_j=None, str_d=None, str_h=None, str_w=None): """ Create a new PoolLayer parameter object. This then is passed as an argument to all pooling kernels. op: max, avg, l2 pooling N: Number of images in mini-batch C: Number of input feature maps D: Depth of input image H: Height of input image W: Width of input image J: Size of feature map pooling window (maxout n_pieces) T: Depth of pooling window R: Height of pooling window S: Width of pooling window padding: amount of zero-padding around the given image or feature map edge strides: factor to step the window by in a given direction (overlap allowed) Leave spatial dimensions at 1 to allow feature map pooling in the fc layers. """ # default to non-overlapping if str_j is None: str_j = J if str_d is None: str_d = T if str_h is None: str_h = R if str_w is None: str_w = S return PoolLayer(self, dtype, op, N, C, D, H, W, J, T, R, S, pad_j, pad_d, pad_h, pad_w, str_j, str_d, str_h, str_w)
def __init__(self, layers): self._network = [] for layer in layers: layer_type = layer.pop("type") if layer_type == "data": # this is a data layer new_layer = DataLayer(**layer) elif layer_type == "conv": new_layer = ConvLayer(**layer) elif layer_type == "pool": new_layer = PoolLayer(**layer) elif layer_type == "dense": new_layer = DenseLayer(**layer) elif layer_type == "relu": new_layer = ReLULayer() elif layer_type == "loss": new_layer = LossLayer(**layer) else: raise NotImplementedError( "Layer type: {0} not found".format(layer_type)) self._network.append(new_layer) self.initialize()
def __init__(self, config): self.config = config batch_size = config['batch_size'] num_seq = config['num_seq'] lib_conv = config['lib_conv'] # ##################### BUILD NETWORK ########################## img_scale_x = config['img_scale_x'] img_scale_y = config['img_scale_y'] reg_scale_x = config['reg_scale_x'] reg_scale_y = config['reg_scale_y'] use_noise = T.fscalar('use_noise') input_dim = config['input_dim'] print '... building the model' self.layers = [] params = [] weight_types = [] x_temporal = T.ftensor4('x') conv1_temporal = ConvPoolLayer(input=x_temporal, image_shape=(input_dim, img_scale_x, img_scale_y, batch_size), filter_shape=(input_dim, 7, 7, 64), convstride=2, padsize=3, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, caffe_style=True, poolpadsize=(1, 1)) self.layers.append(conv1_temporal) conv_temporal_2_reduce = ConvPoolLayer( input=conv1_temporal.output, image_shape=(64, 56, 56, batch_size), filter_shape=(64, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, ) self.layers.append(conv_temporal_2_reduce) # convpool_temporal_2 = ConvPoolLayer( input=conv_temporal_2_reduce.output, image_shape=(64, 56, 56, batch_size), filter_shape=(64, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=3, poolstride=2, #poolpadsize=(1,1), bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, caffe_style=True, poolpadsize=(1, 1)) self.layers.append(convpool_temporal_2) ##############----3a---######### inception_temporal_3a_1x1 = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_1x1) ################# inception_temporal_3a_3x3_reduce = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_3x3_reduce) inception_temporal_3a_3x3 = ConvPoolLayer( input=inception_temporal_3a_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 64), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_3x3) ############ inception_temporal_3a_double_3x3_reduce = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_reduce) inception_temporal_3a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3a_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_1) inception_temporal_3a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3a_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_2) ############## inception_temporal_3a_pool = PoolLayer( input=convpool_temporal_2.output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_3a_pool_proj = ConvPoolLayer( input=inception_temporal_3a_pool.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 32), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_pool_proj) #################### inception_temporal_3a_output = T.concatenate([ inception_temporal_3a_1x1.output, inception_temporal_3a_3x3.output, inception_temporal_3a_double_3x3_2.output, inception_temporal_3a_pool_proj.output ], axis=0) ##############----3b---######### inception_temporal_3b_1x1 = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_1x1) ####################### inception_temporal_3b_3x3_reduce = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_3x3_reduce) inception_temporal_3b_3x3 = ConvPoolLayer( input=inception_temporal_3b_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_3x3) ############ inception_temporal_3b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_reduce) inception_temporal_3b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3b_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_1) inception_temporal_3b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3b_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_2) ############## inception_temporal_3b_pool = PoolLayer( input=inception_temporal_3a_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_3b_pool_proj = ConvPoolLayer( input=inception_temporal_3b_pool.output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_pool_proj) ###############33 inception_temporal_3b_output = T.concatenate([ inception_temporal_3b_1x1.output, inception_temporal_3b_3x3.output, inception_temporal_3b_double_3x3_2.output, inception_temporal_3b_pool_proj.output ], axis=0) ##############----3c---######### inception_temporal_3c_3x3_reduce = ConvPoolLayer( input=inception_temporal_3b_output, image_shape=(320, 28, 28, batch_size), filter_shape=(320, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_3x3_reduce) inception_temporal_3c_3x3 = ConvPoolLayer( input=inception_temporal_3c_3x3_reduce.output, image_shape=(128, 28, 28, batch_size), filter_shape=(128, 3, 3, 160), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_3x3) ############ inception_temporal_3c_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3b_output, image_shape=(320, 28, 28, batch_size), filter_shape=(320, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_reduce) inception_temporal_3c_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3c_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_1) inception_temporal_3c_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3c_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_2) ############## inception_temporal_3c_pool = PoolLayer( input=inception_temporal_3b_output, poolsize=3, poolstride=2, lib_conv=lib_conv, caffe_style=True, poolpad=1) # inception_temporal_3c_pool=PoolLayer(input=inception_temporal_3b_output,caffe_style=True,poolsize=3,poolpad=1,poolstride=2,lib_conv=lib_conv) ################################# inception_temporal_3c_output = T.concatenate([ inception_temporal_3c_3x3.output, inception_temporal_3c_double_3x3_2.output, inception_temporal_3c_pool.output ], axis=0) ################################----4a------########## inception_temporal_4a_1x1 = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 224), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_1x1) ################# inception_temporal_4a_3x3_reduce = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_3x3_reduce) inception_temporal_4a_3x3 = ConvPoolLayer( input=inception_temporal_4a_3x3_reduce.output, image_shape=(64, 14, 14, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_3x3) ############ inception_temporal_4a_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_reduce) inception_temporal_4a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4a_double_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_1) inception_temporal_4a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4a_double_3x3_1.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_2) ############## inception_temporal_4a_pool = PoolLayer( input=inception_temporal_3c_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4a_pool_proj = ConvPoolLayer( input=inception_temporal_4a_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_pool_proj) #################### inception_temporal_4a_output = T.concatenate([ inception_temporal_4a_1x1.output, inception_temporal_4a_3x3.output, inception_temporal_4a_double_3x3_2.output, inception_temporal_4a_pool_proj.output ], axis=0) #####################----4b------################# inception_temporal_4b_1x1 = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_1x1) ################# inception_temporal_4b_3x3_reduce = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_3x3_reduce) inception_temporal_4b_3x3 = ConvPoolLayer( input=inception_temporal_4b_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_3x3) ############ inception_temporal_4b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_reduce) inception_temporal_4b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4b_double_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_1) inception_temporal_4b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4b_double_3x3_1.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_2) ############## inception_temporal_4b_pool = PoolLayer( input=inception_temporal_4a_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4b_pool_proj = ConvPoolLayer( input=inception_temporal_4b_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_pool_proj) #################### inception_temporal_4b_output = T.concatenate([ inception_temporal_4b_1x1.output, inception_temporal_4b_3x3.output, inception_temporal_4b_double_3x3_2.output, inception_temporal_4b_pool_proj.output ], axis=0) #####################----4c------################# inception_temporal_4c_1x1 = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_1x1) ################# inception_temporal_4c_3x3_reduce = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_3x3_reduce) inception_temporal_4c_3x3 = ConvPoolLayer( input=inception_temporal_4c_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_3x3) ############ inception_temporal_4c_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_reduce) inception_temporal_4c_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4c_double_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_1) inception_temporal_4c_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4c_double_3x3_1.output, image_shape=(160, 14, 14, batch_size), filter_shape=(160, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_2) ############## inception_temporal_4c_pool = PoolLayer( input=inception_temporal_4b_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4c_pool_proj = ConvPoolLayer( input=inception_temporal_4c_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_pool_proj) #################### inception_temporal_4c_output = T.concatenate([ inception_temporal_4c_1x1.output, inception_temporal_4c_3x3.output, inception_temporal_4c_double_3x3_2.output, inception_temporal_4c_pool_proj.output ], axis=0) #####################----4d------################# inception_temporal_4d_1x1 = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_1x1) ################# inception_temporal_4d_3x3_reduce = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_3x3_reduce) inception_temporal_4d_3x3 = ConvPoolLayer( input=inception_temporal_4d_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_3x3) ############ inception_temporal_4d_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_reduce) inception_temporal_4d_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4d_double_3x3_reduce.output, image_shape=(160, 14, 14, batch_size), filter_shape=(160, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_1) inception_temporal_4d_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4d_double_3x3_1.output, image_shape=(192, 14, 14, batch_size), filter_shape=(192, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_2) ############## inception_temporal_4d_pool = PoolLayer( input=inception_temporal_4c_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4d_pool_proj = ConvPoolLayer( input=inception_temporal_4d_pool.output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_pool_proj) #################### inception_temporal_4d_output = T.concatenate([ inception_temporal_4d_1x1.output, inception_temporal_4d_3x3.output, inception_temporal_4d_double_3x3_2.output, inception_temporal_4d_pool_proj.output ], axis=0) ##############----4e---######### inception_temporal_4e_3x3_reduce = ConvPoolLayer( input=inception_temporal_4d_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_3x3_reduce) inception_temporal_4e_3x3 = ConvPoolLayer( input=inception_temporal_4e_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 192), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_3x3) ############ inception_temporal_4e_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4d_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_reduce) inception_temporal_4e_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4e_double_3x3_reduce.output, image_shape=(192, 14, 14, batch_size), filter_shape=(192, 3, 3, 256), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_1) inception_temporal_4e_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4e_double_3x3_1.output, image_shape=(256, 14, 14, batch_size), filter_shape=(256, 3, 3, 256), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_2) ############## inception_temporal_4e_pool = PoolLayer( input=inception_temporal_4d_output, poolsize=3, poolstride=2, lib_conv=lib_conv, caffe_style=True, poolpad=1) ################################# inception_temporal_4e_output = T.concatenate([ inception_temporal_4e_3x3.output, inception_temporal_4e_double_3x3_2.output, inception_temporal_4e_pool.output ], axis=0) ################################----5a------########## inception_temporal_5a_1x1 = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 352), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_1x1) ################# inception_temporal_5a_3x3_reduce = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_3x3_reduce) inception_temporal_5a_3x3 = ConvPoolLayer( input=inception_temporal_5a_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 320), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_3x3) ############ inception_temporal_5a_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_reduce) inception_temporal_5a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_5a_double_3x3_reduce.output, image_shape=(160, 7, 7, batch_size), filter_shape=(160, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_1) inception_temporal_5a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_5a_double_3x3_1.output, image_shape=(224, 7, 7, batch_size), filter_shape=(224, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_2) ############## inception_temporal_5a_pool = PoolLayer( input=inception_temporal_4e_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_5a_pool_proj = ConvPoolLayer( input=inception_temporal_5a_pool.output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_pool_proj) #################### inception_temporal_5a_output = T.concatenate([ inception_temporal_5a_1x1.output, inception_temporal_5a_3x3.output, inception_temporal_5a_double_3x3_2.output, inception_temporal_5a_pool_proj.output ], axis=0) inception_temporal_5a_output_1 = inception_temporal_5a_output ################################----5b------########## inception_temporal_5b_1x1 = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 352), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_1x1) ################# inception_temporal_5b_3x3_reduce = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_3x3_reduce) inception_temporal_5b_3x3 = ConvPoolLayer( input=inception_temporal_5b_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 320), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_3x3) ############ inception_temporal_5b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_reduce) inception_temporal_5b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_5b_double_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_1) inception_temporal_5b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_5b_double_3x3_1.output, image_shape=(224, 7, 7, batch_size), filter_shape=(224, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_2) ############## inception_temporal_5b_pool = PoolLayer( input=inception_temporal_5a_output, poolsize=3, poolstride=1, poolpad=1, lib_conv=lib_conv) inception_temporal_5b_pool_proj = ConvPoolLayer( input=inception_temporal_5b_pool.output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_pool_proj) #params += inception_temporal_5b_pool_proj.params # weight_types += inception_temporal_5b_pool_proj.weight_type #################### dummy_fea = T.zeros([1024, 1, num_seq, batch_size / num_seq]) pool5_fea_tmp = T.reshape( inception_temporal_5a_output_1, [1024, reg_scale_x * reg_scale_y, num_seq, batch_size / num_seq]) pool5_fea_tmp = T.concatenate([pool5_fea_tmp, dummy_fea], axis=1) pool5_fea_tmp = pool5_fea_tmp.dimshuffle(1, 3, 2, 0) self.fea_tmp = pool5_fea_tmp # self.fea_lstm_tmp = pool5_fea_tmp self.params = params self.x_temporal = x_temporal self.weight_types = weight_types self.batch_size = batch_size self.num_seq = num_seq self.use_noise = use_noise
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.matrix(name="input", dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output", dtype=dtype) # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "same" cnn_batch_size = batch_size pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (128, 1, 10, 10) input_shape = (cnn_batch_size, 1, 144, 176 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train) #Layer2: conv2+pool subsample = (1, 1) filter_shape = (256, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, dl1.output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (256, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: conv2+pool filter_shape = (128, p3.output_shape[1], 3, 3) c4 = ConvLayer(rng, p3.output, filter_shape, p3.output_shape, border_mode, subsample, activation=nn.relu) p4 = PoolLayer(c4.output, pool_size=pool_size, input_shape=c4.output_shape) #Layer5: hidden n_in = reduce(lambda x, y: x * y, p4.output_shape[1:]) x_flat = p4.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) #Layer6: hidden lreg = LogisticRegression(rng, h1.output, 1024, params['n_output']) self.output = lreg.y_pred self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params cost = get_err_fn(self, cost_function, Y) L2_reg = 0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2)) cost += L2_reg * L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)
def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop): lr=params["lr"] n_lstm=params['n_hidden'] n_out=params['n_output'] batch_size=params["batch_size"] sequence_length=params["seq_length"] X = T.tensor3() # batch of sequence of vector Y = T.tensor3() # batch of sequence of vector is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction #CNN global parameters. subsample=(1,1) p_1=0.5 border_mode="valid" cnn_batch_size=batch_size*sequence_length pool_size=(2,2) #Layer1: conv2+pool+drop filter_shape=(64,1,9,9) input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols) input= X.reshape(input_shape) c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu) p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape) dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train) #Layer2: conv2+pool filter_shape=(128,p1.output_shape[1],3,3) c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu) p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape=(128,p2.output_shape[1],3,3) c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu) p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape) #Layer4: hidden n_in= reduce(lambda x, y: x*y, p3.output_shape[1:]) x_flat = p3.output.flatten(2) h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu) n_in=1024 rnn_input = h1.output.reshape((batch_size,sequence_length, n_in)) #Layer5: LSTM self.n_in = n_in self.n_lstm = n_lstm self.n_out = n_out self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot') self.b_y = init_bias(self.n_out,rng=rng, sample='zero') layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm) self.params = layer1.params self.params.append(self.W_hy) self.params.append(self.b_y) def step_lstm(x_t,h_tm1,c_tm1): [h_t,c_t,y_t]=layer1.run(x_t,h_tm1,c_tm1) y = T.dot(y_t, self.W_hy) + self.b_y return [h_t,c_t,y] H = T.matrix(name="H",dtype=dtype) # initial hidden state C = T.matrix(name="C",dtype=dtype) # initial hidden state [h_t,c_t,y_vals], _ = theano.scan(fn=step_lstm, sequences=[rnn_input.dimshuffle(1,0,2)], outputs_info=[H, C, None]) self.output = y_vals.dimshuffle(1,0,2) self.params =c1.params+c2.params+c3.params+h1.params+self.params cost=get_err_fn(self,cost_function,Y) L2_reg=0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param ** 2)) cost += L2_reg*L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t[-1],c_t[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True) self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t[-1],c_t[-1]],allow_input_downcast=True) self.n_param=count_params(self.params)
print("\nclass_label:\n", class_label[0:4], ", shape:", class_label.shape) print("\nclass_dictionary:\n", class_dictionary) # Prepocess images prepocessed_images = apply_zeropadding(file_path) print("prepocessed_images.shape:", prepocessed_images.shape) prepocessed_images = np.transpose(prepocessed_images, (0,3,1,2)) print("prepocessed_images.shape after transpose:", prepocessed_images.shape) # Train-test split 90%-10% X_train, X_test, y_train, y_test = train_test_split(prepocessed_images, class_label, test_size=0.1) cnn = MyCNN ( ConvLayer(filter_size=3,num_filter=3,num_channel=3), DetectorLayer(), PoolLayer(filter_size=3,stride_size=4,mode="Max"), ConvLayer(filter_size=3,num_filter=3,num_channel=3), DetectorLayer(), PoolLayer(filter_size=3,stride_size=1,mode="Max"), FlattenLayer(), DenseLayer(n_units=100, activation='relu'), DenseLayer(n_units=10, activation='relu'), DenseLayer(n_units=1, activation='sigmoid'), ) cnn.fit( features=X_train, target=y_train, batch_size=5, epochs=5, learning_rate=0.1
def create_a_pool(self, kernel_size, stride): pool = PoolLayer(kernel=kernel_size, stride=stride) return pool
def __init__(self, x, input_shape): n_convfilter = [16, 32, 64, 64, 64, 64] n_fc_filters = [1024] n_deconvfilter = [64, 64, 64, 16, 8, 2] self.x = x # To define weights, define the network structure first x_ = InputLayer(input_shape) conv1a = ConvLayer(x_, (n_convfilter[0], 7, 7)) conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3)) pool1 = PoolLayer(conv1b) print( 'Conv1a = ConvLayer(x, (%s, 7, 7) => input_shape %s, output_shape %s)' % (n_convfilter[0], conv1a._input_shape, conv1a._output_shape)) print( 'Conv1b = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[0], conv1b._input_shape, conv1b._output_shape)) print('pool1 => input_shape %s, output_shape %s)' % (pool1._input_shape, pool1._output_shape)) conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3)) conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3)) conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1)) pool2 = PoolLayer(conv2c) print( 'Conv2a = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[1], conv2a._input_shape, conv2a._output_shape)) print( 'Conv2b = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[1], conv2b._input_shape, conv2b._output_shape)) conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3)) conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3)) conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1)) pool3 = PoolLayer(conv3b) print( 'Conv3a = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[2], conv3a._input_shape, conv3a._output_shape)) print( 'Conv3b = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[2], conv3b._input_shape, conv3b._output_shape)) print( 'Conv3c = ConvLayer(x, (%s, 1, 1) => input_shape %s, output_shape %s)' % (n_convfilter[1], conv3c._input_shape, conv3c._output_shape)) print('pool3 => input_shape %s, output_shape %s)' % (pool3._input_shape, pool3._output_shape)) conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3)) conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4b) conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3)) conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3)) conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1)) pool5 = PoolLayer(conv5b) conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3)) conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3)) pool6 = PoolLayer(conv6b) print( 'Conv6a = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[5], conv6a._input_shape, conv6a._output_shape)) print( 'Conv6b = ConvLayer(x, (%s, 3, 3) => input_shape %s, output_shape %s)' % (n_convfilter[5], conv6b._input_shape, conv6b._output_shape)) print('pool6 => input_shape %s, output_shape %s)' % (pool6._input_shape, pool6._output_shape)) flat6 = FlattenLayer(pool6) print('flat6 => input_shape %s, output_shape %s)' % (flat6._input_shape, flat6._output_shape)) fc7 = TensorProductLayer(flat6, n_fc_filters[0]) print('fc7 => input_shape %s, output_shape %s)' % (fc7._input_shape, fc7._output_shape)) # Set the size to be 64x4x4x4 #s_shape_1d = (cfg.batch, n_deconvfilter[0]) s_shape_1d = (cfg.batch, n_fc_filters[0]) self.prev_s = InputLayer(s_shape_1d) #view_features_shape = (cfg.batch, n_fc_filters[0], cfg.CONST.N_VIEWS) self.t_x_s_update = FCConv1DLayer(self.prev_s, fc7, n_fc_filters[0], isTrainable=True) self.t_x_s_reset = FCConv1DLayer(self.prev_s, fc7, n_fc_filters[0], isTrainable=True) self.reset_gate = SigmoidLayer(self.t_x_s_reset) self.rs = EltwiseMultiplyLayer(self.reset_gate, prev_s) self.t_x_rs = FCConv1DLayer(self.rs, fc7, n_fc_filters[0], isTrainable=True) def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape_1d, prev_s_tensor) #print(self.prev_s_._output_shape) t_x_s_update_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_update.params, isTrainable=True) t_x_s_reset_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_reset.params, isTrainable=True) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv1DLayer(rs_, rect7_, n_fc_filters[0], params=self.t_x_rs.params, isTrainable=True) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output time_features, _ = theano.scan( recurrence, sequences=[ self.x ], # along with images, feed in the index of the current frame outputs_info=[ tensor.zeros_like(np.zeros(s_shape_1d), dtype=theano.config.floatX), tensor.zeros_like(np.zeros(s_shape_1d), dtype=theano.config.floatX) ]) time_all = time_features[0] time_last = time_all[-1] self.features = time_last
def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape_1d, prev_s_tensor) #print(self.prev_s_._output_shape) t_x_s_update_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_update.params, isTrainable=True) t_x_s_reset_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_reset.params, isTrainable=True) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv1DLayer(rs_, rect7_, n_fc_filters[0], params=self.t_x_rs.params, isTrainable=True) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] n_lstm = params['n_hidden'] n_out = params['n_output'] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.tensor3() # batch of sequence of vector Y = T.tensor3() # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "valid" cnn_batch_size = batch_size * sequence_length pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (64, 1, 9, 9) input_shape = (cnn_batch_size, 1, 120, 60 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1) retain_prob = 1. - p_1 test_output = p1.output * retain_prob d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output) #Layer2: conv2+pool filter_shape = (128, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, d1_output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (128, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: hidden n_in = reduce(lambda x, y: x * y, p3.output_shape[1:]) x_flat = p3.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) n_in = 1024 rnn_input = h1.output.reshape((batch_size, sequence_length, n_in)) #Layer5: gru self.n_in = n_in self.n_lstm = n_lstm self.n_out = n_out self.W_xr = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xi', sample='glorot') self.W_hr = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hr', sample='glorot') self.b_r = init_bias(self.n_lstm, rng=rng, sample='zero') self.W_xz = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xz', sample='glorot') self.W_hz = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hz', sample='glorot') self.b_z = init_bias(self.n_lstm, rng=rng, sample='zero') self.W_xh = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xh', sample='glorot') self.W_hh = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hh', sample='glorot') self.b_h = init_bias(self.n_lstm, rng=rng, sample='zero') self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng, name='W_hy', sample='glorot') self.b_y = init_bias(self.n_out, rng=rng, sample='zero') self.params = [ self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z, self.W_xh, self.W_hh, self.b_h, self.W_hy, self.b_y ] def step_lstm(x_t, h_tm1): r_t = T.nnet.sigmoid( T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r) z_t = T.nnet.sigmoid( T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz) + self.b_z) h_t = T.tanh( T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh) + self.b_h) hh_t = z_t * h_t + (1 - z_t) * h_tm1 y_t = T.dot(hh_t, self.W_hy) + self.b_y return [hh_t, y_t] h0 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial hidden state #(1, 0, 2) -> AxBxC to BxAxC #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in) #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi= [self.n_in, self.n_lstm] [h_vals, y_vals], _ = theano.scan(fn=step_lstm, sequences=rnn_input.dimshuffle(1, 0, 2), outputs_info=[h0, None]) self.output = y_vals.dimshuffle(1, 0, 2) self.params = c1.params + c2.params + c3.params + h1.params + self.params cost = get_err_fn(self, cost_function, Y) _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)
def add_a_random_pool_layer(self): s1 = self.init_kernel_size() kernel_size = s1, s1 pool_type = np.random.random(size=1) pool_layer = PoolLayer(kernel_size=kernel_size, pool_type=pool_type[0]) return pool_layer
""" Pooling layer """ height = 5 width = 6 channels = 4 kernel_size = 3 stride = 1 X = np.random.rand(batch_size, height, width, channels) pool_torch = nn.MaxPool2d(kernel_size, stride) X_tensor = torch.tensor(X, requires_grad=True) y_tensor = pool_torch(X_tensor.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) loss_torch = y_tensor[:, :3, :3, 0].sum() loss_torch.backward() pool = PoolLayer("maximum", kernel_size, stride, 0) X_dict = OrderedDict() X_dict["height"] = height X_dict["width"] = width X_dict["channels"] = channels X_dict["data"] = X.reshape(batch_size, -1) y_dict = pool.forward(X_dict) y_grad = OrderedDict() y_grad["grad"] = np.zeros(y_tensor.size()) y_grad["grad"][:, :3, :3, 0] = 1 y_grad["grad"] = y_grad["grad"].reshape(batch_size, -1) X_grad = pool.backward(y_grad) print("Pytorch pooling layer output:") print(y_tensor.reshape(batch_size, -1)) print()
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] n_lstm = params['n_hidden'] n_out = params['n_output'] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.tensor3() # batch of sequence of vector Y = T.tensor3() # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "valid" cnn_batch_size = batch_size * sequence_length pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (64, 1, 9, 9) input_shape = (cnn_batch_size, 1, 120, 60 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train) retain_prob = 1. - p_1 test_output = p1.output * retain_prob d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output) #Layer2: conv2+pool filter_shape = (128, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, d1_output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (128, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: hidden n_in = reduce(lambda x, y: x * y, p3.output_shape[1:]) x_flat = p3.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) n_in = 1024 rnn_input = h1.output.reshape((batch_size, sequence_length, n_in)) #Layer5: gru self.n_in = n_in self.n_lstm = n_lstm self.n_out = n_out self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng, name='W_hy', sample='glorot') self.b_y = init_bias(self.n_out, rng=rng, sample='zero') layer1 = LSTMLayer(rng, 0, self.n_in, self.n_lstm) layer2 = LSTMLayer(rng, 1, self.n_lstm, self.n_lstm) layer3 = LSTMLayer(rng, 2, self.n_lstm, self.n_lstm) self.params = layer1.params + layer2.params + layer3.params self.params.append(self.W_hy) self.params.append(self.b_y) def step_lstm(x_t, mask, h_tm1_1, c_tm1_1, h_tm1_2, c_tm1_2, h_tm1_3, c_tm1_3): [h_t_1, c_t_1, y_t_1] = layer1.run(x_t, h_tm1_1, c_tm1_1) dl1 = DropoutLayer(rng, input=y_t_1, prob=0.5, is_train=is_train, mask=mask) [h_t_2, c_t_2, y_t_2] = layer2.run(dl1.output, h_tm1_2, c_tm1_2) [h_t_3, c_t_3, y_t_3] = layer3.run(y_t_2, h_tm1_3, c_tm1_3) y = T.dot(y_t_3, self.W_hy) + self.b_y return [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y] h0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial hidden state c0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial cell state h0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial hidden state c0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial cell state h0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial hidden state c0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype)) # initial cell state mask_shape = (sequence_length, batch_size, self.n_lstm) p_1 = 0.5 mask = rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype) #(1, 0, 2) -> AxBxC to BxAxC #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in) #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi= [self.n_in, self.n_lstm] [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y_vals], _ = theano.scan( fn=step_lstm, sequences=[rnn_input.dimshuffle(1, 0, 2), mask], outputs_info=[h0_1, c0_1, h0_2, c0_2, h0_3, c0_3, None]) self.output = y_vals.dimshuffle(1, 0, 2) self.params = c1.params + c2.params + c3.params + h1.params + self.params cost = get_err_fn(self, cost_function, Y) _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)