Ejemplo n.º 1
0
Archivo: compare.py Proyecto: zenna/ig
def learn_to_move(nprims = 200, nbatch = 50, width = 224, height = 224):
    """Creates a network which takes as input a image and returns a cost.
    Network extracts features of image to create shape params which are rendered.
    The similarity between the rendered image and the actual image is the cost
    """

    assert nbatch % 2 == 0      # Minibatch must be even in size
    params_per_prim = 3
    nshape_params = nprims * params_per_prim

    # Render the input shapes
    fragCoords = T.tensor3('fragCoords')
    shape_params = T.tensor3("scenes")
    res, scan_updates = symbolic_render(nprims, shape_params, fragCoords, width, height)

    res_reshape = res.dimshuffle([2,'x',0,1])

    # Split batch in half and give each image two channels
    res_reshape_split = T.reshape(res_reshape, (nbatch/2, 2, width, height))

    # Put the different convnets into two channels
    net = {}
    net['input'] = InputLayer((nbatch/2, 2, width, height), input_var = res_reshape_split)
    net['conv1'] = ConvLayer(net['input'], num_filters=96, filter_size=7, stride=2)
    net['norm1'] = NormLayer(net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size
    net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False)
    net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5)
    net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False)
    net['conv3'] = ConvLayer(net['pool2'], num_filters=512, filter_size=3, pad=1)
    net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, pad=1)
    net['conv5'] = ConvLayer(net['conv4'], num_filters=512, filter_size=3, pad=1)
    net['pool5'] = PoolLayer(net['conv5'], pool_size=3, stride=3, ignore_border=False)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['drop6'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['drop6'], num_units=1, nonlinearity=lasagne.nonlinearities.tanh)
    # net['fc7'] = DenseLayer(net['pool5'], num_units=nshape_params, nonlinearity=lasagne.nonlinearities.tanh)
    output_layer = net['fc7']
    output = lasagne.layers.get_output(output_layer)

    #3 First half mvoe
    learning_rate = 1.0
    shape_params_split =  T.reshape(shape_params, (nprims, nbatch/2, 2, params_per_prim))
    first_half_params = shape_params_split[:,:,0,:]

    # Get partial derivatives of half of the.g parameters with respect to the cost and move them
    # Have to be careful about splitting to make sure that first half of params are those that render to
    # first channel of each image (and not that they render first half of all images in all channels)
    # shape_params_split = T.reshape(shape_params, (nprims, nbatch/2, 2, 4))
    summed_op = T.sum(output) / nbatch

    delta_shape = T.grad(summed_op, shape_params)
    delta_shape_split = T.reshape(delta_shape, (nprims, nbatch/2, 2, params_per_prim))
    first_half_delta = delta_shape_split[:,:,0,:]
    new_first_half = first_half_params - learning_rate * first_half_delta

    # Then render this half again to produce new images (width, height, nbatch/2)
    res2, scan_updates2 = symbolic_render(nprims, new_first_half, fragCoords, width, height)
    res_reshape2 = res2.dimshuffle([2,0,1])

    # unchanged images
    unchanged_img = res_reshape_split[:,1,:,:]
    changed_img = res_reshape_split[:,0,:,:]

    eps = 1e-9
    diff = T.maximum(eps, (unchanged_img - res_reshape2)**2)
    loss1 = T.sum(diff) / (nbatch/2*width*height)

    ## Loss2 is to force change, avoid plateaus
    # diff2 = T.maximum(eps, (changed_img - res_reshape2)**2)
    # sumdiff2 = T.sum(diff2) / (nbatch/2*width*height)
    # mu = 0
    # sigma = 0.05
    # a = 1/(sigma*np.sqrt(2*np.pi))
    # b = mu
    # c = sigma
    # loss2 = a*T.exp((-sumdiff2**2)/(2*c**2))/40.0
    # loss = loss1 + loss2

    param_diff = T.sum(first_half_delta**2)/nbatch
    loss2 = -gauss(param_diff, mu=10.0, sigma=100.0)*600
    loss = loss1 + loss2

    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    # network_updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=1.0, momentum=1.0)
    network_updates = lasagne.updates.adam(loss, params)
    # network_updates = lasagne.updates.rmsprop(loss, params)

    ## Merge Updates
    for k in network_updates.keys():
        assert not(scan_updates.has_key(k))
        scan_updates[k] = network_updates[k]

    for k in scan_updates2.keys():
        # assert not(scan_updates.has_key(k)) #FIXME
        scan_updates[k] = scan_updates2[k]

    params = lasagne.layers.get_all_params(output_layer)
    last_layer_params = T.grad(loss, params[-2])
    print("Compiling Loss Function")
    netcost = function([fragCoords, shape_params], [loss, loss1, loss2, param_diff, summed_op, delta_shape, res2, last_layer_params, unchanged_img, changed_img, res_reshape2], updates=scan_updates, mode=curr_mode)
    return netcost, output_layer
Ejemplo n.º 2
0
def second_order(nprims = 200, nbatch = 50):
    """Creates a network which takes as input a image and returns a cost.
    Network extracts features of image to create shape params which are rendered.
    The similarity between the rendered image and the actual image is the cost
    """
    width = 224
    height = 224
    params_per_prim = 3
    nshape_params = nprims * params_per_prim

    img = T.tensor4("input image")
    net = {}
    net['input'] = InputLayer((nbatch, 1, 224, 224), input_var = img)
    net['conv1'] = ConvLayer(net['input'], num_filters=96, filter_size=7, stride=2)
    net['norm1'] = NormLayer(net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size
    net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False)
    net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5)
    net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False)
    net['conv3'] = ConvLayer(net['pool2'], num_filters=512, filter_size=3, pad=1)
    net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, pad=1)
    net['conv5'] = ConvLayer(net['conv4'], num_filters=512, filter_size=3, pad=1)
    net['pool5'] = PoolLayer(net['conv5'], pool_size=3, stride=3, ignore_border=False)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['drop6'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['drop6'], num_units=nshape_params)
    # net['fc7'] = DenseLayer(net['pool5'], num_units=nshape_params, nonlinearity=lasagne.nonlinearities.tanh)
    output_layer = net['fc7']
    output = lasagne.layers.get_output(output_layer)
    scaled_output = output - 1

    ## Render these parameters
    shape_params = T.reshape(scaled_output, (nprims, nbatch, params_per_prim))
    fragCoords = T.tensor3('fragCoords')
    print "Symbolic Render"
    res, scan_updates = symbolic_render(nprims, shape_params, fragCoords, width, height)
    res_reshape = res.dimshuffle([2,'x',0,1])

    # Simply using pixel distance
    eps = 1e-9

    diff = T.maximum(eps, (res_reshape - img)**2)
    loss1 = T.sum(diff) / (224*224*nbatch)

    mean_shape = T.mean(shape_params, axis=1) # mean across batches
    mean_shape = T.reshape(mean_shape, (nprims, 1, params_per_prim))
    scale = 1.0
    diff2 = T.maximum(eps, (mean_shape - shape_params)**2)
    loss2 = T.sum(diff2) / (nprims * params_per_prim * nbatch)
    mu = 0
    sigma = 4
    a = 1/(sigma*np.sqrt(2*np.pi))
    b = mu
    c = sigma
    loss2 = -a*T.exp((-loss2**2)/(2*c**2))

    loss = loss2 + loss1
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    network_updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)

    ## Merge Updates
    for k in network_updates.keys():
        assert not(scan_updates.has_key(k))
        scan_updates[k] = network_updates[k]

    print("Compiling Loss Function")
    grad = T.grad(loss, params[0])
    netcost = function([fragCoords, img], [loss, grad, res_reshape, shape_params, diff, loss1, loss2], updates=scan_updates, mode=curr_mode)
    # netcost = function([fragCoords, img], loss, updates=scan_updates, mode=curr_mode)

    ## Generate Render Function to make data
    # Generate initial rays
    exfragcoords = gen_fragcoords(width, height)
    print("Compiling Renderer")
    render = make_render(nprims, width, height)
    return render, netcost, output_layer