Example #1
0
def main(train_file,
         logit_folder,
         val_file,
         savename,
         num_epochs=500,
         margin=25,
         base=0.01,
         mb_size=50,
         momentum=0.9,
         temp=1,
         loss_type='VPPD',
         preproc=True,
         hw=0.1,
         synsets=None,
         modelFile='./myModel.pkl'):
    print('Using temperature: %f' % (temp, ))
    print('Loss type: %s' % (loss_type, ))
    print('Model file: %s' % (modelFile, ))
    print("Loading data...")
    tr_addresses, tr_labels = hd.get_traindata(train_file, synsets)
    vl_addresses, vl_labels = hd.get_valdata(val_file)
    # Variables
    input_var = T.tensor4('inputs')
    soft_target = T.fmatrix('soft_target')
    hard_target = T.ivector('hard_target')
    learning_rate = T.fscalar('learning_rate')
    im_shape = (227, 227)
    print("Building model and compiling functions...")
    network = build_cnn(im_shape, temp, input_var=input_var)
    # Losses and updates
    soft_prediction, hard_prediction = lasagne.layers.get_output(
        network, deterministic=False)
    _, test_prediction = lasagne.layers.get_output(network, deterministic=True)
    loss = losses(soft_prediction, hard_prediction, soft_target, hard_target,
                  temp, hw, loss_type)
    params = lasagne.layers.get_all_params(network)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=learning_rate,
                                                momentum=momentum)
    train_acc = T.mean(T.eq(T.argmax(soft_prediction, axis=1),
                            T.argmax(soft_target, axis=1)),
                       dtype=theano.config.floatX)
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), hard_target),
                      dtype=theano.config.floatX)
    # Theano functions
    train_fn = theano.function(
        [input_var, soft_target, hard_target, learning_rate],
        [loss, train_acc],
        updates=updates)
    val_fn = theano.function([input_var, hard_target], test_acc)
    print("Starting training...")
    # We iterate over epochs:
    start_time = time.time()
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        learning_rate = get_learning_rate(epoch, margin, base)
        train_err = 0
        train_batches = 0
        running_error = []
        t_acc = 0
        running_acc = []
        trdlg = distillation_generator(tr_addresses,
                                       logit_folder,
                                       im_shape,
                                       mb_size,
                                       temp=temp,
                                       preproc=preproc,
                                       shuffle=True,
                                       synsets=synsets)
        for batch in hd.threaded_gen(trdlg, num_cached=500):
            inputs, soft, hard = batch
            local_train_err, acc = train_fn(inputs, soft, hard, learning_rate)
            train_err += local_train_err
            t_acc += acc
            running_error.append(local_train_err)
            running_acc.append(acc)
            h, m, s = theTime(start_time)
            train_batches += 1
            if train_batches % 257 == 0:
                save_errors(savename, running_error, err_type='error')
                save_errors(savename, running_acc, err_type='acc')
                running_error = []
                running_acc = []
            sys.stdout.write(
                'Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' %
                (h, m, s, train_batches, train_err / train_batches)),
            sys.stdout.flush()
        print
        val_acc = 0
        val_batches = 0
        vldlg = hd.data_and_label_generator(vl_addresses,
                                            vl_labels,
                                            im_shape,
                                            mb_size,
                                            shuffle=False,
                                            preproc=False)
        running_val_acc = []
        for batch in hd.threaded_gen(vldlg, num_cached=50):
            inputs, targets = batch
            val_acc += val_fn(inputs, targets)
            val_batches += 1
            sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' %
                             (val_batches, val_acc / val_batches * 100)),
            sys.stdout.flush()
        running_val_acc.append(val_acc / val_batches)
        save_errors(savename, running_val_acc, err_type='val_acc')
        print
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  train loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.))
        save_model(network, modelFile)
Example #2
0
def main(train_file, logit_folder, val_file, savename, num_epochs=500,
         margin=25, base=0.01, mb_size=50, momentum=0.9, synsets=None,
         preproc=False, loss_type='crossentropy', bridgecoeff=1.):
    print("Loading data...")
    print('Loss type: %s' % (loss_type,))
    print('Bridge coeff: %s' % (bridgecoeff,))
    print('Save name: %s' % (savename,))
    tr_addresses, tr_labels = hd.get_traindata(train_file, synsets)
    vl_addresses, vl_labels = hd.get_valdata(val_file)
    # Variables
    input_var = T.tensor4('inputs')
    soft_target = T.fmatrix('soft_target')
    hard_target = T.ivector('hard_target')
    learning_rate = T.fscalar('learning_rate')
    temp = T.fscalar('temp')
    im_shape = (227, 227)
    max_norm = 3.87
    t = 10.
    print("Building model and compiling functions...")
    network = build_cnn(im_shape, input_var=input_var)
    # Losses and updates
    prediction = lasagne.layers.get_output(network, deterministic=False)
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    loss = losses(prediction, soft_target, loss_type)
    loss += bridgecoeff*regularization(prediction, t)
    train_acc = T.mean(T.eq(T.argmax(prediction, axis=1),
                        T.argmax(soft_target, axis=1)), dtype=theano.config.floatX)
    params = lasagne.layers.get_all_params(network)
    for param in params:
        if param.name == 'W':
            param = lasagne.updates.norm_constraint(param, max_norm, epsilon=1e-3)
    updates = lasagne.updates.nesterov_momentum(loss, params,
                                                learning_rate=learning_rate,
                                                momentum=momentum)
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), hard_target),
                      dtype=theano.config.floatX)
    # Theano functions
    train_fn = theano.function([input_var, soft_target, learning_rate],
        [loss, train_acc], updates=updates)
    val_fn = theano.function([input_var, hard_target], test_acc)
    print("Starting training...")
    # We iterate over epochs:
    start_time = time.time()
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        learning_rate = get_learning_rate(epoch, margin, base)
        train_err = 0; train_batches = 0; running_error = []
        t_acc = 0; running_acc = []
        trdlg = hd.data_target_generator(tr_addresses, logit_folder,
                                              im_shape, mb_size, preproc=preproc,
                                              shuffle=True, synsets=synsets)
        for batch in hd.threaded_gen(trdlg, num_cached=500):
            inputs, soft = batch
            local_train_err, acc = train_fn(inputs, soft, learning_rate)
            train_err += local_train_err; t_acc += acc
            running_error.append(local_train_err); running_acc.append(acc)
            h, m, s = theTime(start_time)
            train_batches += 1
            if train_batches % 257 == 0:
                save_errors(savename, running_error, err_type='error')
                save_errors(savename, running_acc, err_type='acc')
                running_error = []; running_acc = []
            sys.stdout.write('Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' %
                             (h, m, s, train_batches, train_err/train_batches)),
            sys.stdout.flush()
        print
        val_acc = 0; val_batches = 0
        vldlg = hd.data_and_label_generator(vl_addresses, vl_labels, im_shape,
                                         mb_size)
        running_val_acc = []
        for batch in hd.threaded_gen(vldlg, num_cached=50):
            inputs, targets = batch
            val_acc += val_fn(inputs, targets)
            val_batches += 1
            sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' %
                             (val_batches, val_acc/val_batches * 100)),
            sys.stdout.flush()
        running_val_acc.append(val_acc/val_batches)
        save_errors(savename, running_val_acc, err_type='val_acc')
        print
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  train loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.))
Example #3
0
def main(train_file, val_file, savename, synmap_file, num_epochs=500, alpha=0.1,
         margin=25, base=0.01, mb_size=50, momentum=0.9, synsets=None):
    print("Loading data...")
    print('Alpha: %f' % (alpha,))
    print('Save name: %s' % (savename,))
    tr_addresses, tr_labels = hd.get_traindata(train_file, synsets)
    vl_addresses, vl_labels = hd.get_valdata(val_file)
    synmap = hd.get_synmap(synmap_file)
    tr_labels = hd.map_labels(tr_labels, synmap)
    vl_labels = hd.map_labels(vl_labels, synmap)
    N = len(tr_addresses)
    print('Num training examples: %i' % (N,))
    print('Alpha/N: %e' % (alpha/N,))
    # Variables
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    learning_rate = T.fscalar('learning_rate')
    im_shape = (227, 227)
    max_grad = 1.
    print("Building model and compiling functions...")
    network = build_cnn(im_shape, input_var=input_var)
    # Losses and updates
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean() + regularization(prediction, alpha/N).mean()
    params = lasagne.layers.get_all_params(network, deterministic=False)
    #updates = lasagne.updates.nesterov_momentum(loss, params,
    #                                learning_rate=learning_rate,
    #                                momentum=momentum)
    updates = clipped_nesterov_momentum(loss, params, learning_rate, max_grad,
                                        momentum=momentum)
    # Validation and testing
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var),
                      dtype=theano.config.floatX)
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)
    # Theano functions
    train_fn = theano.function([input_var, target_var, learning_rate],
        [loss, train_acc], updates=updates)
    val_fn = theano.function([input_var, target_var], test_acc)
    print("Starting training...")
    # We iterate over epochs:
    start_time = time.time()
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        learning_rate = get_learning_rate(epoch, margin, base)
        train_err = 0; train_batches = 0; running_error = []; running_acc = []
        acc = 0.
        trdlg = hd.data_and_label_generator(tr_addresses, tr_labels, im_shape,
                                            mb_size, shuffle=True, preproc=True)
        for batch in threaded_gen(trdlg, num_cached=500):
            inputs, targets = batch
            local_train_err, local_train_acc = train_fn(inputs, targets, learning_rate)
            train_err += local_train_err; acc += local_train_acc
            train_batches += 1
            if np.isnan(local_train_err):
                sys.exit()
            running_error.append(local_train_err)
            running_acc.append(local_train_acc)
            if train_batches % 257 == 0:
                save_errors(savename, running_error, err_type='error')
                save_errors(savename, running_acc, err_type='acc')
                running_error = []; running_acc = []
            h, m, s = theTime(start_time)
            sys.stdout.write('Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' %
                             (h, m, s, train_batches, train_err/train_batches)),
            sys.stdout.flush()
        print
        val_acc = 0; val_batches = 0; running_val_acc=[]
        vldlg = hd.data_and_label_generator(vl_addresses, vl_labels, im_shape,
                                            mb_size, shuffle=False, preproc=False)
        for batch in threaded_gen(vldlg, num_cached=50):
            inputs, targets = batch
            val_acc += val_fn(inputs, targets)
            val_batches += 1
            sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' %
                             (val_batches, val_acc/val_batches * 100)),
            sys.stdout.flush()
        running_val_acc.append(val_acc/val_batches)
        save_errors(savename, running_val_acc, err_type='val_acc')
        print
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  train loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.))