Example #1
0
dump_path = sys.argv[1]
model_data = pickle.load(open(dump_path, 'r'))

# Setting some vars for easier ref.
chunk_size = model_data['chunk_size'] * 2
batch_size = model_data['batch_size']

l_out = model_data['l_out']
l_ins = model_data['l_ins']

# Print some basic stuff about the model.
num_params = nn.layers.count_params(l_out)
print("\n\t\tNumber of parameters: %d" % num_params)

model_arch = architecture_string(model_data['l_out'])

print(model_arch)

# Set up Theano stuff to compute output.
output = nn.layers.get_output(l_out, deterministic=True)
input_ndims = [len(nn.layers.get_output_shape(l_in))
               for l_in in l_ins]
xs_shared = [nn.utils.shared_empty(dim=ndim)
             for ndim in input_ndims]
idx = T.lscalar('idx')

givens = {}
for l_in, x_shared in zip(l_ins, xs_shared):
    givens[l_in.input_var] = x_shared[
        idx * batch_size:(idx + 1) * batch_size
Example #2
0
# doesn't have the attribute
if lr_decay:
    lr_init = model.lr_init
    lr_final = model.lr_final
else:
    lr_init = LEARNING_RATE_SCHEDULE[1]

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

model_id = strftime("%Y_%m_%d_%H%M%S", gmtime())

dump_path = '/' + model_id + '_' + model.config_name + '.pkl'

# prints the architecture of the model
model_arch = architecture_string(l_out)
print model_arch

num_params = nn.layers.count_params(l_out, trainable=True)
print "\n\t\tNumber of trainable parameters: %d\n" % num_params
print "\t\tModel id: %s\n" % model_id
print "\t\tModel name: %s\n" % model.config_name

input_ndims = [len(nn.layers.get_output_shape(l_in)) for l_in in l_ins]
xs_shared = [nn.utils.shared_empty(dim=ndim) for ndim in input_ndims]
y_shared = nn.utils.shared_empty(dim=2)

idx = T.lscalar('idx')

obj = model.build_objective(l_out)
train_loss = obj.get_loss()
if lr_decay:
    lr_init = model.lr_init
    lr_final = model.lr_final
else:
    lr_init = LEARNING_RATE_SCHEDULE[1]

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

model_id = strftime("%Y_%m_%d_%H%M%S", gmtime())


dump_path = 'dumps/' + model_id + '_' + model.config_name + '.pkl'

model_arch = architecture_string(l_out)
print model_arch

num_params = nn.layers.count_params(l_out, trainable=True)
print "\n\t\tNumber of trainable parameters: %d\n" % num_params
print "\t\tModel id: %s\n" % model_id
print "\t\tModel name: %s\n" % model.config_name

input_ndims = [len(nn.layers.get_output_shape(l_in))
               for l_in in l_ins]
xs_shared = [nn.utils.shared_empty(dim=ndim)
             for ndim in input_ndims]
y_shared = nn.utils.shared_empty(dim=2)

idx = T.lscalar('idx')
dump_path = sys.argv[1]
model_data = pickle.load(open(dump_path, 'r'))

# Setting some vars for easier ref.
chunk_size = model_data['chunk_size'] * 2
batch_size = model_data['batch_size']

l_out = model_data['l_out']
l_ins = model_data['l_ins']

# Print some basic stuff about the model.
num_params = nn.layers.count_params(l_out)
print "\n\t\tNumber of parameters: %d" % num_params

model_arch = architecture_string(model_data['l_out'])

print model_arch

# Set up Theano stuff to compute output.
output = nn.layers.get_output(l_out, deterministic=True)
input_ndims = [len(nn.layers.get_output_shape(l_in))
               for l_in in l_ins]
xs_shared = [nn.utils.shared_empty(dim=ndim)
             for ndim in input_ndims]
idx = T.lscalar('idx')

givens = {}
for l_in, x_shared in zip(l_ins, xs_shared):
    givens[l_in.input_var] = x_shared[
        idx * batch_size:(idx + 1) * batch_size
Example #5
0
def main(data,
         cls_num,
         dim,
         pos_ratio,
         w=None,
         increase=True,
         n=2,
         num_epochs=500,
         strategy='momentum',
         model=None,
         thres=None,
         lb_num=None):

    x_train = data['training']
    x_val = data['val']

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    w_var = T.vector('w')

    num_boxes_train = len(x_train['image_ids'])
    pos_idx = (x_train['boxes'][:, -1] == 1).nonzero()[0]
    print(num_boxes_train, 'pos:{}'.format(pos_idx.shape[0]))
    # Create neural network model
    print("Building model and compiling functions...")
    cls_network, kl_network = build_cnn(cls_num, input_var, n, dim)

    epsilon = 1e-35
    margin = 0.5
    batchsize = target_var.shape[0]

    cls_prediction = lasagne.layers.get_output(cls_network)
    # cls_prediction = T.clip(cls_prediction, 1e-7, 1-(1e-7))
    kl_prediction = lasagne.layers.get_output(kl_network)

    # add weight decay
    all_layers = lasagne.layers.get_all_layers([cls_network, kl_network])
    l2_penalty = lasagne.regularization.regularize_layer_params(
        all_layers, lasagne.regularization.l2) * 0.0005

    cls_loss = lasagne.objectives.categorical_crossentropy(
        cls_prediction, target_var)
    cls_loss = aggregate(cls_loss, w_var)

    kl_prediction = kl_prediction + epsilon

    kl_loss = get_klloss(batchsize, margin, kl_prediction, target_var)
    loss = l2_penalty + 0 * kl_loss + cls_loss

    lr = 0.1
    sh_lr = theano.shared(lasagne.utils.floatX(lr))
    params = lasagne.layers.get_all_params([cls_network, kl_network],
                                           trainable=True)
    if strategy == 'momentum':
        updates = lasagne.updates.momentum(loss,
                                           params,
                                           learning_rate=sh_lr,
                                           momentum=0.9)  #nesterov_
    else:
        updates = lasagne.updates.adam(loss, params, learning_rate=1e-4)

    train_fn = theano.function([input_var, target_var, w_var],
                               [loss, cls_prediction, kl_prediction],
                               updates=updates,
                               allow_input_downcast=True)  #, w_var

    # Create a loss expression for validation/testing
    test_cls_prediction = lasagne.layers.get_output(cls_network,
                                                    deterministic=True)
    test_cls_prediction = T.clip(test_cls_prediction, 1e-7, 1 - 1e-7)
    test_cls_loss = lasagne.objectives.categorical_crossentropy(
        test_cls_prediction, target_var)
    test_cls_loss = test_cls_loss.mean()

    test_kl_prediction = lasagne.layers.get_output(kl_network,
                                                   deterministic=True)
    test_kl_prediction = test_kl_prediction + epsilon
    test_kl_loss = get_klloss(batchsize, margin, test_kl_prediction,
                              target_var)

    test_loss = l2_penalty + test_kl_loss + test_cls_loss

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function(
        [input_var, target_var],
        [test_loss, test_cls_prediction, test_kl_prediction],
        allow_input_downcast=True)

    print(
        "number of parameters in model: %d" %
        lasagne.layers.count_params([cls_network, kl_network], trainable=True))
    print(architecture_string([cls_network, kl_network]))
    if model is not None and os.path.exists(model):
        # load network weights from model file
        with np.load(model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        with np.load('weighted_indexes_{}'.format(model)) as f:
            indexes = f['arr_0']
        lasagne.layers.set_all_param_values([cls_network, kl_network],
                                            param_values)
    else:
        # launch the training loop
        print("Starting training...")
        for epoch in np.arange(num_epochs):
            print("Epoch {} of {}:".format(epoch + 1, num_epochs))

            cls_preds, kl_preds, my_hat, indexes, cm = get_mtresults(x_train,\
                'train','train', train_fn, cls_num, dim, pos_ratio, True, w=w, batchsize=128)

            cls_preds, kl_preds, my_hat, indexes, cm = get_mtresults(x_val,\
                'val', 'train',val_fn, cls_num, dim, pos_ratio, indexes = indexes)

            if (epoch + 1) == 30 or (epoch + 1) == 50 or (epoch + 1) == 70:
                new_lr = sh_lr.get_value() * 0.1
                print("New LR:" + str(new_lr))
                sh_lr.set_value(lasagne.utils.floatX(new_lr))
        # dump the network weights to a file :
        if model is not None:
            np.savez(
                model,
                *lasagne.layers.get_all_param_values([cls_network,
                                                      kl_network]))
            np.savez('weighted_indexes_{}'.format(model), indexes)

    return input_var, cls_network, kl_network, val_fn, indexes
Example #6
0
def main(data, cls_num, dim, pos_ratio, w=None, increase=True, n=2, num_epochs=500, strategy='momentum', last_model=None, model=None, thres=None,lb_num=None):


    x_train = data['training']
    x_val = data['val']


    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    w_var = T.vector('w')

    neg_idx = (x_train['boxes'][:,-1]==0).nonzero()[0]
    pos_idx = (x_train['boxes'][:,-1]==1).nonzero()[0]
    print('neg:{}'.format(neg_idx.shape[0]), 'pos:{}'.format(pos_idx.shape[0]))
    # Create neural network model
    print("Building model and compiling functions...")
    network = build_alexnet(cls_num, input_var, dim)
    fc8 = network['fc8']
    print(architecture_string(fc8))

    batchsize = target_var.shape[0]

    prediction = lasagne.layers.get_output(fc8)

    # add weight decay
    all_layers = lasagne.layers.get_all_layers(fc8)
    l2_penalty = lasagne.regularization.regularize_layer_params(
                    all_layers, lasagne.regularization.l2) * 0.0005

    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    #loss = loss.mean()
    loss = aggregate(loss, w_var)
    loss = loss + l2_penalty

    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX)

    lr = 0.001
    sh_lr = theano.shared(lasagne.utils.floatX(lr))
    params = lasagne.layers.get_all_params(fc8, trainable=True)
    if strategy=='momentum':
        updates = lasagne.updates.momentum(loss, params, learning_rate=sh_lr, momentum=0.9)#nesterov_
    else:
        updates = lasagne.updates.adam(loss, params, learning_rate=1e-4)

    train_fn = theano.function([input_var, target_var, w_var], [loss, acc, prediction],
                               updates=updates, allow_input_downcast=True)

    # Create a loss expression for validation/testing
    test_prediction = lasagne.layers.get_output(fc8, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                    dtype=theano.config.floatX)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc,
        test_prediction], allow_input_downcast=True)

    print("number of parameters in model: %d" % lasagne.layers.count_params(fc8, trainable=True))
    # print(architecture_string(fc8))

    if model is not None and os.path.exists(model):
        # load network weights from model file
        with np.load(model) as f:
             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        #with np.load('weighted_indexes_{}'.format(model)) as f:
        #    indexes = f['arr_0']
        lasagne.layers.set_all_param_values(fc8, param_values)
    else:
        if last_model is None:
            # first loop of training, copy model from caffe

            caffe_root = '/home/dl/caffe/'
            sys.path.insert(0, caffe_root + 'python')
            import caffe

            caffe.set_mode_gpu()
            caffe_net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',
                caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',
                caffe.TEST)

            layers_caffe = dict(zip(list(caffe_net._layer_names), caffe_net.layers))
            for name, layer in network.items():
                try:
                    if name == 'conv2':
                        W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1]
                        b = layers_caffe[name].blobs[1].data

                        network['conv2_part1'].W.set_value(W[0:128,:,:,:])
                        network['conv2_part1'].b.set_value(b[0:128])
                        network['conv2_part2'].W.set_value(W[128:,:,:,:])
                        network['conv2_part2'].b.set_value(b[128:])
                    elif name == 'conv4':
                        W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1]
                        b = layers_caffe[name].blobs[1].data

                        network['conv4_part1'].W.set_value(W[0:192,:,:,:])
                        network['conv4_part1'].b.set_value(b[0:192])
                        network['conv4_part2'].W.set_value(W[192:,:,:,:])
                        network['conv4_part2'].b.set_value(b[192:])
                    elif name == 'conv5':
                        W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1]
                        b = layers_caffe[name].blobs[1].data

                        network['conv5_part1'].W.set_value(W[0:128,:,:,:])
                        network['conv5_part1'].b.set_value(b[0:128])
                        network['conv5_part2'].W.set_value(W[128:,:,:,:])
                        network['conv5_part2'].b.set_value(b[128:])
                    elif name == 'fc6' or name == 'fc7':
                        layer.W.set_value(np.transpose(layers_caffe[name].blobs[0].data))
                        layer.b.set_value(layers_caffe[name].blobs[1].data)
                    elif name != 'fc8':
                        layer.W.set_value(layers_caffe[name].blobs[0].data[:,:,::-1,::-1])
                        layer.b.set_value(layers_caffe[name].blobs[1].data)
                except AttributeError:
                    continue
                except KeyError:
                    continue
            print(architecture_string(fc8))
        elif os.path.exists(last_model):
            with np.load(last_model) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            #with np.load('weighted_indexes_{}'.format(last_model)) as f:
            #    indexes = f['arr_0']
            lasagne.layers.set_all_param_values(fc8, param_values)

        print("Starting training...")
        for epoch in np.arange(num_epochs):
            print("Epoch {} of {}:".format(epoch + 1, num_epochs))

            preds = get_mtresults(x_train,\
                'train','train', train_fn, cls_num, dim, pos_ratio, True, w=w, batchsize=128)

            preds = get_mtresults(x_val,\
                'val', 'train', val_fn, cls_num, dim, pos_ratio)

            if (epoch+1) == 10 or (epoch+1) == 20 or (epoch+1)==30:
                new_lr = sh_lr.get_value() * 0.1
                print("New LR:"+str(new_lr))
                sh_lr.set_value(lasagne.utils.floatX(new_lr))
        # dump the network weights to a file :
        if model is not None:
            np.savez(model, *lasagne.layers.get_all_param_values(fc8))
            #np.savez('weighted_indexes_{}'.format(model), indexes)

    return input_var, fc8, val_fn