def __init__(self, input, pool_shape, method="max"): """ method: "max", "avg", "L2", "L4", ... """ self.__dict__.update(locals()) del self.self if method == "max": out = max_pool_3d(input, pool_shape) else: raise NotImplementedError() self.output = out
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, pool_shape, batch_size, layer_name="Conv", rng=RandomState(1234), borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ # init W if W is not None: W_val = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = 2. * sqrt(1. / fan_in) W_shape = (n_out_maps, n_in_maps) + kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape), \ dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W') self.params = [self.W] # init bias if b is not None: b_val = b else: b_val = zeros((n_out_maps,), dtype=floatX) self.b = shared(b_val, name=layer_name + "_b", borrow=borrow) self.params.append(self.b) # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape out = conv3d( signals=input.dimshuffle([0, 2, 1, 3, 4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) pooled_out = max_pool_3d(out, pool_shape, ignore_border=True) pooled_out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.output = T.tanh(pooled_out)