Ejemplo n.º 1
0
                    rng=rng,
                    W_scale=net.W_scale,
                    b_scale=net.b_scale,
                    activation=activation))
    out = T.concatenate([layers[-1].output, layers[-2].output], axis=1)

# out = normalize(out)

if use.drop: out = DropoutLayer(out, rng=rng, p=drop.p_hidden).output

# softmax layer
layers.append(
    LogRegr(out,
            rng=rng,
            activation=activation,
            n_in=net.hidden,
            W_scale=net.W_scale,
            b_scale=net.b_scale,
            n_out=20))
"""
layers[-1] : softmax layer
layers[-2] : hidden layer (video if late fusion)
layers[-3] : hidden layer (trajectory, only if late fusion)
"""

# cost function
cost = layers[-1].negative_log_likelihood(y)

if reg.L1_vid > 0 or reg.L2_vid > 0:
    # L1 and L2 regularization
    L1 = T.abs_(layers[-2].W).sum() + T.abs_(layers[-1].W).sum()
                rng=tr.rng,
                W_scale=net.W_scale[-1],
                b_scale=net.b_scale[-1],
                activation=net.activation))
out = layers[-1].output

if tr.inspect: insp.append(T.mean(out))
if use.drop: out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output

insp = T.stack(insp)

# softmax layer
layers.append(
    LogRegr(out,
            rng=tr.rng,
            n_in=net.hidden,
            W_scale=net.W_scale[-1],
            b_scale=net.b_scale[-1],
            n_out=net.n_class))
# number of inputs for MLP = (# maps last stage)*(# convnets)*(resulting video shape) + trajectory size
print 'MLP:', video_cnn.n_in_MLP, "->", net.hidden_penultimate, "+", net.hidden_traj, '->', \
   net.hidden, '->', net.hidden, '->', net.n_class, ""

# cost function
cost = layers[-1].negative_log_likelihood(y)

# function computing the number of errors
errors = layers[-1].errors(y)

# gradient descent
# parameter list
for layer in video_cnn.layers:
Ejemplo n.º 3
0
                    rng=rng,
                    activation=activation))
    n_in_MLP = net.maps[-1] * net.n_convnets * prod(video_shapes[-1])
    layers.append(
        HiddenLayer(vid_,
                    n_in=n_in_MLP,
                    n_out=net.hidden_vid,
                    rng=rng,
                    activation=activation))
    out = T.concatenate([layers[-1].output, layers[-2].output], axis=1)

if use.drop: out = DropoutLayer(out, rng=rng, p=drop.p_hidden).output

# softmax layer
layers.append(
    LogRegr(out, rng=rng, activation=activation, n_in=net.hidden, n_out=20))
"""
layers[-1] : softmax layer
layers[-2] : hidden layer (video if late fusion)
layers[-3] : hidden layer (trajectory, only if late fusion)
"""

# cost function
cost = layers[-1].negative_log_likelihood(y)

if reg.L1_vid > 0 or reg.L2_vid > 0:
    # L1 and L2 regularization
    L1 = T.abs_(layers[-2].W).sum() + T.abs_(layers[-1].W).sum()
    L2 = (layers[-2].W**2).sum() + (layers[-1].W**2).sum()

    cost += reg.L1_vid * L1 + reg.L2_vid * L2
Ejemplo n.º 4
0
                    n_in=n_in_MLP,
                    n_out=net.hidden_vid,
                    rng=rng,
                    W_scale=net.W_scale[-2],
                    b_scale=net.b_scale,
                    activation=activation))
    out = T.concatenate([layers[-1].output, layers[-2].output], axis=1)

# out = normalize(out)
if use.drop: out = DropoutLayer(out, rng=rng, p=drop.p_hidden).output
# softmax layer
layers.append(
    LogRegr(out,
            rng=rng,
            activation=activation,
            n_in=net.hidden,
            W_scale=net.W_scale[-1],
            b_scale=net.b_scale,
            n_out=net.n_class))

#-------------------LATE LATE FUSION---------------------------------------------
# vid_out = layers[-1].p_y_given_x

# n_hidden = 200

# traj_ = t.flatten(2)
# layers.append(HiddenLayer(traj_, n_in=traj_size, n_out=n_hidden, rng=rng,
#         W_scale=0.01, b_scale=net.b_scale, activation=activation))
# traj_out = layers[-1].output

# traj_out = DropoutLayer(traj_out, rng=rng, p=drop.p_hidden).output
    out = layers[-1].output


if tr.inspect: insp = T.stack(insp[0],insp[1],insp[2],insp[3],insp[4],insp[5], T.mean(out))
else: insp =  T.stack(0,0)
# out = normalize(out)
if use.drop: out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output
#maxout
if use.maxout:
    out = maxout(out, (batch.micro,net.hidden))
    net.hidden /= 2

# softmax layer
if use.load:
    Ws, bs = load_params(use) # This is test, wudi added this!
    layers.append(LogRegr(out, W = Ws, b = bs, rng=tr.rng, activation=lin, n_in=net.hidden, 
        W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], n_out=net.n_class))
else:
    layers.append(LogRegr(out, rng=tr.rng, activation=lin, n_in=net.hidden, 
        W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], n_out=net.n_class))


"""
layers[-1] : softmax layer
layers[-2] : hidden layer (video if late fusion)
layers[-3] : hidden layer (trajectory, only if late fusion)
"""
# cost function
cost = layers[-1].negative_log_likelihood(y)

if reg.L1_vid > 0 or reg.L2_vid > 0:
    # L1 and L2 regularization
Ejemplo n.º 6
0
    out = layers[-1].output
else: # late fusion
    n_in_MLP -= net.maps[-1]*net.n_convnets*prod(video_shapes[-1])
    layers.append(HiddenLayer(traj_, n_in=n_in_MLP, n_out=net.hidden_traj, rng=rng, 
        W_scale=net.W_scale[-2], b_scale=net.b_scale, activation=activation))
    n_in_MLP = net.maps[-1]*net.n_convnets*prod(video_shapes[-1])
    layers.append(HiddenLayer(vid_, n_in=n_in_MLP, n_out=net.hidden_vid, rng=rng, 
        W_scale=net.W_scale[-2], b_scale=net.b_scale, activation=activation))
    out = T.concatenate([layers[-1].output, layers[-2].output], axis=1)

# out = normalize(out)

if use.drop: out = DropoutLayer(out, rng=rng, p=drop.p_hidden).output

# softmax layer
layers.append(LogRegr(out, rng=rng, activation=activation, n_in=net.hidden, 
    W_scale=net.W_scale[-1], b_scale=net.b_scale, n_out=20))


#-------------------LATE LATE FUSION---------------------------------------------
# vid_out = layers[-1].p_y_given_x

# n_hidden = 200

# traj_ = t.flatten(2)
# layers.append(HiddenLayer(traj_, n_in=traj_size, n_out=n_hidden, rng=rng, 
#         W_scale=0.01, b_scale=net.b_scale, activation=activation))
# traj_out = layers[-1].output

# traj_out = DropoutLayer(traj_out, rng=rng, p=drop.p_hidden).output

# layers.append(LogRegr(traj_out, rng=rng, activation=activation, n_in=n_hidden, 
Ejemplo n.º 7
0
def build(cpu):

    # constants
    floatX = config.floatX
    enum = enumerate

    file = gzip.GzipFile("params.zip", 'rb')
    params = load(file)
    file.close()
    print params
    W = [[None,None],[None,None],[None,None]]
    b = [[None,None],[None,None],[None,None]]
    W[0][0],b[0][0],W[0][1],b[0][1],W[1][0],b[1][0],W[1][1],b[1][1],W[2][0],b[2][0],W[2][1],b[2][1],Wh,bh,Ws,bs = params

    #-----------------------------FLIP KERNEL------------------------------------------
    if cpu:
        W = array(W)
        W_new = [[None,None],[None,None],[None,None]]
        for i in range(W.shape[0]):
            for j in range(W.shape[1]):
                w = W[i,j].get_value()
                print w.shape, w.dtype
                for k in range(w.shape[0]):
                    for l in range(w.shape[1]):
                        for m in range(w.shape[2]):
                            w[k,l,m] = cv2.flip(w[k,l,m],-1)
                W_new[i][j] = shared(array(w, dtype=floatX), borrow=True)
        W = W_new
    #-----------------------------FLIP KERNEL------------------------------------------


    rng = random.RandomState(1337) # this will make sure results are always the same
    batch_size = 1

    in_shape = (1,2,2,32,64,64) # (batchsize, maps, frames, w, h) input video shapes 
    traj_shape = (batch_size,3,32) # (batchsize, input shape of the trajectory

    # hyper parameters
    # ------------------------------------------------------------------------------

    # use techniques/methods
    class use:
        drop = True # dropout
        depth = True # use depth map as input
        aug = False # data augmentation
        load = False # load params.p file
        traj = False # trajectory
        trajconv = False # convolutions on trajectory
        valid2 = False
        fast_conv = not cpu
        norm_div = False

        norm = True # normalization layer
        mom = True # momentum

    # regularization
    class reg:
        L1_traj = .0 # degree/amount of regularization
        L2_traj = .0 # 1: only L1, 0: only L2
        L1_vid = .0 # degree/amount of regularization
        L2_vid = .0 # 1: only L1, 0: only L2

    class trajconv:
        append = False # append convolutions result to original traject
        filter_size = 5
        layers = 3 # number of convolution layers
        res_shape = traj_shape[-1]-layers*(filter_size-1)

    class net:
        shared_stages = [] # stages where weights are shared
        shared_convnets = [] # convnets that share weights ith beighbouring convnet
        n_convnets = 2 # number of convolutional networks in the architecture
        maps = [2,16,32,64] # feature maps in each convolutional network
        # maps = [2,5,25,25] # feature maps in each convolutional network
        kernels = [(1,7,7), (1,8,8), (1,6,6)] # convolution kernel shapes
        pools = [(2,2,2), (2,2,2), (2,2,2)] # pool/subsampling shapes
        hidden_traj = 200 # hidden units in MLP
        hidden_vid = 300 # hidden units in MLP
        W_scale = 0.01
        b_scale = 0.1
        norm_method = "lcn" # normalisation method: lcn = local contrast normalisation
        pool_method = "max" # maxpool
        fusion = "early" # early or late fusion
        hidden = hidden_traj+hidden_vid if fusion=="late" else 500 # hidden units in MLP
        n_class = 21

    activation = relu
    n_stages = len(net.kernels)
    video_shapes = [in_shape[-3:]]

    def _shared(val, borrow=True):
        return shared(array(val, dtype=floatX), borrow=borrow)

    def ndtensor(n): return TensorType(floatX, (False,)*n) # n-dimensional tensor

    for i in xrange(n_stages):
        k,p,v = array(net.kernels[i]), array(net.pools[i]), array(video_shapes[i])
        conv_s = tuple(v-k+1)
        video_shapes.append(tuple((v-k+1)/p))
    n_in_MLP = net.maps[-1]*net.n_convnets*prod(video_shapes[-1]) 


    def conv_args(stage, i):
        """ ConvLayer arguments, i: stage index """
        args = {
            'batch_size':1, 
            'activation':activation, 
            'rng':rng,
            'n_in_maps':net.maps[stage],
            'n_out_maps':net.maps[stage+1], 
            'kernel_shape':net.kernels[stage], 
            'video_shape':video_shapes[stage],
            "fast_conv":use.fast_conv,
            "layer_name":"Conv"+str(stage),
            "W_scale":net.W_scale,
            "b_scale":net.b_scale,
            "stride":1,
            "W":W[stage][i],
            "b":b[stage][i]
        }
        return args

    # print conv_args(0,0)
    x = ndtensor(len(in_shape))(name = 'x') # video input


    def var_norm(_x,imgs=True,axis=[-3,-2,-1]):
        if imgs:
            return (_x-T.mean(_x,axis=axis,keepdims=True))/T.maximum(1e-4,T.std(_x,axis=axis,keepdims=True))
        return (_x-T.mean(_x))/T.maximum(1e-4,T.std(_x))
    def std_norm(_x,axis=[-3,-2,-1]):
        return _x/T.maximum(1e-4,T.std(_x,axis=axis,keepdims=True))

    out = [x[:,0], x[:,1]]

    for stage in xrange(n_stages):
        for i in xrange(len(out)): # for each convnet of the stage
            if stage==0: 
                gray_norm = NormLayer(out[i][:,0:1], method="lcn",use_divisor=False).output
                gray_norm = std_norm(gray_norm)
                depth_norm = var_norm(out[i][:,1:])
                out[i]  = T.concatenate([gray_norm,depth_norm],axis=1)
            else:
                out[i] = NormLayer(out[i], method="lcn",use_divisor=False).output
                out[i] = std_norm(out[i])
            out[i] = ConvLayer(out[i], **conv_args(stage, i)).output
            out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output

    out = [out[i].flatten(2) for i in range(len(out))]
    out = T.concatenate(out, axis=1)

    #hidden layer
    out = HiddenLayer(out, 
            W = Wh,
            b = bh,
            n_in=n_in_MLP, 
            n_out=net.hidden, 
            rng=rng, 
            activation=activation).output

    logreg = LogRegr(out, 
        W = Ws,
        b = bs,
        rng=rng, 
        activation=activation, 
        n_in=net.hidden, 
        n_out=net.n_class)

    pred = logreg.p_y_given_x

    x_ = _shared(empty(in_shape))

    print "compiling..."
    eval_model = function([], [pred], 
        givens={x:x_},
        on_unused_input='ignore')
    print "compiling done"

    return eval_model, x_
Ejemplo n.º 8
0
#     # net.hidden /= 2
#----EXTRA LAYER----------------------------------------------------------------

# softmax layer
args = {
    'activation': lin,
    'rng': rng,
    "W_scale": net.W_scale[-1],
    "b_scale": net.b_scale[-1],
    "n_in": net.hidden,
    "n_out": net.n_class
}
if use.load:
    args["W"], args["b"] = load_params()

layers.append(LogRegr(out, **args))
"""
layers[-1] : softmax layer
layers[-2] : hidden layer (video if late fusion)
layers[-3] : hidden layer (trajectory, only if late fusion)


"""

# cost function
cost = layers[-1].negative_log_likelihood(y)

if reg.L1_vid > 0 or reg.L2_vid > 0:
    # L1 and L2 regularization
    L1 = T.abs_(layers[-2].W).sum() + T.abs_(layers[-1].W).sum()
    L2 = (layers[-2].W**2).sum() + (layers[-1].W**2).sum()