def train_simple_model(model='custom_mlp', 
    data = None,
    n_values = None,
    num_epochs=5,
    desc_n_values = 5000,
    depth = 10,
    width = 256,
    drop_in = 0.2,
    drop_hid = 0.5,
    batch_size = 32,
    learning_rate = 0.01,
    valid_freq = 100,
    save_path = '../results/',
    saveto = 'test_mlp.npz',
    reload_model = None,
    num_targets = 1):

    train, valid, test = data

    layer_shape = desc_n_values + n_values['brands'] #CG TODO: add image dimensions here

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs',dtype='float32')

    target_var = []
    for i in range(num_targets):
        target_var.append(T.vector('target_%s' % i,dtype = 'int64'))

    n_val_keys = n_values.keys()

    # Create neural network model (depending on first command line parameter)
    start_time = time.time()
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var, layer_shape, n_values['y_1'])
    elif model == 'custom_mlp':
        #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        #network = build_custom_mlp(input_var, int(depth), int(width),
        #                          float(drop_in), float(drop_hid))
        network = build_custom_mlp(input_var, 
            depth, width, drop_in, drop_hid, 
            layer_shape, [[n_values['y_1']],[n_values['y_2']],[n_values['y_3']]])
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = []
    for n in network:
        prediction.append(lasagne.layers.get_output(n))

    #for p,t in zip(prediction,target_var):
    #    loss += lasagne.objectives.categorical_crossentropy(p, t)
    
    loss = lasagne.objectives.categorical_crossentropy(prediction[0],target_var[0]) + lasagne.objectives.categorical_crossentropy(prediction[1],target_var[1]) + lasagne.objectives.categorical_crossentropy(prediction[2],target_var[2])
    loss = loss.mean()


    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    params = []
    for i, n in enumerate(network):
        if i == 0:
            p = lasagne.layers.get_all_params(n, trainable=True)
        else:
            p = lasagne.layers.get_all_params(n, trainable=True)[-2:]
        params += p
    updates = lasagne.updates.adadelta(
            loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = []
    test_loss = []
    test_acc = []
    preds = []
    for n,t in zip(network,target_var):
        p = lasagne.layers.get_output(n, deterministic=True)
        test_prediction.append(p)
        l = lasagne.objectives.categorical_crossentropy(p,t)
        test_loss.append(l.mean())

        # As a bonus, also create an expression for the classification accuracy:
        acc = T.mean(T.eq(T.argmax(p, axis=1), t),
                      dtype=theano.config.floatX)
        test_acc.append(acc)
        preds.append(theano.function([input_var],p))


    inputs = []
  
    val_fn = []
    train_fn = theano.function([input_var] + target_var, loss, updates=updates)
    for t,l,a in zip(target_var, test_loss, test_acc):
        val_fn.append(theano.function([input_var, t], [l, a]))

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        t_idx = 0
        for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True):
            t_idx += 1
            inputs = [[train[0][idx] for idx in batch],[train[1][idx] for idx in batch]]
            target = [[train[2][idx] for idx in batch],[train[3][idx] for idx in batch],
                        [train[4][idx] for idx in batch]]
            #CG TODO: iterate over image vectors
            desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
            brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
            inputs = np.hstack((desc, brands)) #CG TODO: hstack with images as well
            train_err += train_fn(inputs, target[0], target[1], target[2])
            train_batches += 1

            if t_idx % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs,target[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                np.savez(save_path + saveto,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        *params)

        # And a full pass over the validation data:
        val_err = np.zeros(num_targets)
        val_acc = np.zeros(num_targets)
        val_batches = 0
        for batch in iterate_minibatches(valid[0],valid[1], batch_size, shuffle=False):
            inputs = [[valid[0][idx] for idx in batch],[valid[1][idx] for idx in batch]]
            target = [[valid[2][idx] for idx in batch], [valid[3][idx] for idx in batch],
                        [valid[4][idx] for idx in batch]]

            desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
            brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
            inputs = np.hstack((desc, brands))

            for i in range(num_targets):
                e,a = val_fn[i](inputs, target[i])
                val_err[i] += e
                val_acc[i] += a
            val_batches += 1

            params = get_all_params(network)

            if t_idx % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e,a = val_fn[i](inputs, target[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                print('saving...')
                np.savez(save_path + saveto,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                         *params)

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        max_train = np.max(train_err / train_batches)
        min_train = np.min(train_err / train_batches)
        max_val = np.max(val_err / val_batches)
        min_val = np.min(val_err / val_batches)
        avg_val_acc = np.mean(val_acc / val_batches)
        print("  max training loss:\t\t{:.6f}".format(max_train))
        print("  min training loss:\t\t{:.6f}".format(min_train))
        print("  max validation loss:\t\t{:.6f}".format(max_val))
        print("  min validation loss:\t\t{:.6f}".format(min_val))
        print("  avg validation accuracy:\t\t{:.2f} %".format(
            avg_val_acc * 100))


    end_time = time.time()
    print("The code ran for %d epochs, with %f sec/epochs" % (
        (num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and print the test error:
    test_err = np.zeros(num_targets)
    test_acc = np.zeros(num_targets)
    test_batches = 0
    test_preds = []
    for i in range(num_targets):
        test_preds.append(np.zeros(len(test[i+2])))
    for batch in iterate_minibatches(test[0],test[2], batch_size, shuffle=False):
        inputs = [[test[0][idx] for idx in batch],[test[1][idx] for idx in batch]]
        target = [[test[2][idx] for idx in batch],[test[3][idx] for idx in batch],
                    [test[4][idx] for idx in batch]]
        desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
        brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
        inputs = np.hstack((desc, brands))
        
        for i in range(num_targets):
            e,a = val_fn[i](inputs, target[i])
            pred_prob = preds[i](inputs)
            pred = pred_prob.argmax(axis = 1)
            test_preds[i][batch[0]:batch[-1]+1] = pred
            test_err[i] += e
            test_acc[i] += a
        test_batches += 1

    max_err = np.max(test_err / test_batches)
    min_err = np.min(test_err / test_batches)
    avg_acc = np.mean(test_acc / test_batches)
    max_acc = np.max(test_acc / test_batches)
    min_acc = np.min(test_acc / test_batches)
    print("Final results:")
    print("  max test loss:\t\t\t{:.6f}".format(max_err))
    print("  min test loss:\t\t\t{:.6f}".format(min_err))
    print("  avg test accuracy:\t\t{:.2f} %".format(
        avg_acc * 100))
    print("  max test accuracy:\t\t{:.2f} %".format(
        max_acc * 100))
    print("  min test accuracy:\t\t{:.2f} %".format(
        min_acc * 100))

    params = get_all_params(network)
    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path + saveto,train_err=train_err / train_batches,
                        valid_err=val_err / val_batches, 
                        test_err=test_err / test_batches,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        predictions = test_preds,
                         *params)

    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return params, test_preds
Example #2
0
def train_simple_model(model='custom_mlp',
                       data=None,
                       n_values=None,
                       num_epochs=5,
                       desc_n_values=5000,
                       depth=10,
                       width=256,
                       drop_in=0.2,
                       drop_hid=0.5,
                       batch_size=32,
                       learning_rate=0.01,
                       valid_freq=100,
                       save_path='../results/',
                       saveto='test_mlp.npz',
                       reload_model=None,
                       num_targets=1):

    train, valid, test = data

    layer_shape = desc_n_values + n_values[
        'brands']  #CG TODO: add image dimensions here

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs', dtype='float32')

    target_var = []
    for i in range(num_targets):
        target_var.append(T.vector('target_%s' % i, dtype='int64'))

    n_val_keys = n_values.keys()

    # Create neural network model (depending on first command line parameter)
    start_time = time.time()
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var, layer_shape, n_values['y_1'])
    elif model == 'custom_mlp':
        #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        #network = build_custom_mlp(input_var, int(depth), int(width),
        #                          float(drop_in), float(drop_hid))
        network = build_custom_mlp(
            input_var, depth, width, drop_in, drop_hid, layer_shape,
            [[n_values['y_1']], [n_values['y_2']], [n_values['y_3']]])
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = []
    for n in network:
        prediction.append(lasagne.layers.get_output(n))

    #for p,t in zip(prediction,target_var):
    #    loss += lasagne.objectives.categorical_crossentropy(p, t)

    loss = lasagne.objectives.categorical_crossentropy(
        prediction[0],
        target_var[0]) + lasagne.objectives.categorical_crossentropy(
            prediction[1],
            target_var[1]) + lasagne.objectives.categorical_crossentropy(
                prediction[2], target_var[2])
    loss = loss.mean()

    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    params = []
    for i, n in enumerate(network):
        if i == 0:
            p = lasagne.layers.get_all_params(n, trainable=True)
        else:
            p = lasagne.layers.get_all_params(n, trainable=True)[-2:]
        params += p
    updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = []
    test_loss = []
    test_acc = []
    preds = []
    for n, t in zip(network, target_var):
        p = lasagne.layers.get_output(n, deterministic=True)
        test_prediction.append(p)
        l = lasagne.objectives.categorical_crossentropy(p, t)
        test_loss.append(l.mean())

        # As a bonus, also create an expression for the classification accuracy:
        acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX)
        test_acc.append(acc)
        preds.append(theano.function([input_var], p))

    inputs = []

    val_fn = []
    train_fn = theano.function([input_var] + target_var, loss, updates=updates)
    for t, l, a in zip(target_var, test_loss, test_acc):
        val_fn.append(theano.function([input_var, t], [l, a]))

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        t_idx = 0
        for batch in iterate_minibatches(train[0],
                                         train[1],
                                         batch_size,
                                         shuffle=True):
            t_idx += 1
            inputs = [[train[0][idx] for idx in batch],
                      [train[1][idx] for idx in batch]]
            target = [[train[2][idx] for idx in batch],
                      [train[3][idx] for idx in batch],
                      [train[4][idx] for idx in batch]]
            #CG TODO: iterate over image vectors
            desc = one_hot_encode_features(inputs[0],
                                           n_values=n_values['desc'])
            brands = one_hot_encode_features(inputs[1],
                                             n_values=n_values['brands'])
            inputs = np.hstack(
                (desc, brands))  #CG TODO: hstack with images as well
            train_err += train_fn(inputs, target[0], target[1], target[2])
            train_batches += 1

            if t_idx % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs, target[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                np.savez(save_path + saveto,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         *params)

        # And a full pass over the validation data:
        val_err = np.zeros(num_targets)
        val_acc = np.zeros(num_targets)
        val_batches = 0
        for batch in iterate_minibatches(valid[0],
                                         valid[1],
                                         batch_size,
                                         shuffle=False):
            inputs = [[valid[0][idx] for idx in batch],
                      [valid[1][idx] for idx in batch]]
            target = [[valid[2][idx] for idx in batch],
                      [valid[3][idx] for idx in batch],
                      [valid[4][idx] for idx in batch]]

            desc = one_hot_encode_features(inputs[0],
                                           n_values=n_values['desc'])
            brands = one_hot_encode_features(inputs[1],
                                             n_values=n_values['brands'])
            inputs = np.hstack((desc, brands))

            for i in range(num_targets):
                e, a = val_fn[i](inputs, target[i])
                val_err[i] += e
                val_acc[i] += a
            val_batches += 1

            params = get_all_params(network)

            if t_idx % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs, target[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                print('saving...')
                np.savez(save_path + saveto,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         *params)

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        max_train = np.max(train_err / train_batches)
        min_train = np.min(train_err / train_batches)
        max_val = np.max(val_err / val_batches)
        min_val = np.min(val_err / val_batches)
        avg_val_acc = np.mean(val_acc / val_batches)
        print("  max training loss:\t\t{:.6f}".format(max_train))
        print("  min training loss:\t\t{:.6f}".format(min_train))
        print("  max validation loss:\t\t{:.6f}".format(max_val))
        print("  min validation loss:\t\t{:.6f}".format(min_val))
        print("  avg validation accuracy:\t\t{:.2f} %".format(avg_val_acc *
                                                              100))

    end_time = time.time()
    print("The code ran for %d epochs, with %f sec/epochs" %
          ((num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and print the test error:
    test_err = np.zeros(num_targets)
    test_acc = np.zeros(num_targets)
    test_batches = 0
    test_preds = []
    for i in range(num_targets):
        test_preds.append(np.zeros(len(test[i + 2])))
    for batch in iterate_minibatches(test[0],
                                     test[2],
                                     batch_size,
                                     shuffle=False):
        inputs = [[test[0][idx] for idx in batch],
                  [test[1][idx] for idx in batch]]
        target = [[test[2][idx] for idx in batch],
                  [test[3][idx] for idx in batch],
                  [test[4][idx] for idx in batch]]
        desc = one_hot_encode_features(inputs[0], n_values=n_values['desc'])
        brands = one_hot_encode_features(inputs[1],
                                         n_values=n_values['brands'])
        inputs = np.hstack((desc, brands))

        for i in range(num_targets):
            e, a = val_fn[i](inputs, target[i])
            pred_prob = preds[i](inputs)
            pred = pred_prob.argmax(axis=1)
            test_preds[i][batch[0]:batch[-1] + 1] = pred
            test_err[i] += e
            test_acc[i] += a
        test_batches += 1

    max_err = np.max(test_err / test_batches)
    min_err = np.min(test_err / test_batches)
    avg_acc = np.mean(test_acc / test_batches)
    max_acc = np.max(test_acc / test_batches)
    min_acc = np.min(test_acc / test_batches)
    print("Final results:")
    print("  max test loss:\t\t\t{:.6f}".format(max_err))
    print("  min test loss:\t\t\t{:.6f}".format(min_err))
    print("  avg test accuracy:\t\t{:.2f} %".format(avg_acc * 100))
    print("  max test accuracy:\t\t{:.2f} %".format(max_acc * 100))
    print("  min test accuracy:\t\t{:.2f} %".format(min_acc * 100))

    params = get_all_params(network)
    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path + saveto,
             train_err=train_err / train_batches,
             valid_err=val_err / val_batches,
             test_err=test_err / test_batches,
             history_train_errs=history_train_errs,
             history_valid_errs=history_valid_errs,
             predictions=test_preds,
             *params)

    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return params, test_preds
def train_model(model='custom_mlp: ',
    mode = 'simple', #select either simple or dependent models 
    data = None,
    n_values = None,
    num_epochs=5,
    desc_n_values = 5000,
    depth = 3,
    width = 256,
    drop_in = 0.5,
    drop_hid = 0.5,
    batch_size = 32,
    learning_rate = 0.01,
    valid_freq = 100,
    save_path = '../results/',
    saveto = 'test_mlp.npz',
    reload_model = None,
    shared_params = None,
    cat = 1,
    prev_predictions = None):

    '''
    args:
        model: 'mlp', custom_mlp:','classifier_layer', or forthcoming 'image_model','multi-modal'
        data: data
        n_values: number of values ?
        num_epochs: number of epochs before quitting
        desc_n_values: 
        depth: how many layers in network
        width: units in each hidden layer
        drop_in: dropout rate for input
        drop_hid: dropout rate in hidden layers
        batch_size: how many to run at a time
        learning_rate: learning rate of SGD
        valid_freq: how often to validate
        save_path: where to save the resulting model
        saveto: name of the file where saving
        reload_model = None,
        shared_params = None,
        cat = 1,
        prev_predictions = None):
    '''

    train, valid, test = data

    layer_shape = desc_n_values + n_values['brands'] #CG TODO: add image dimensions here

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs',dtype='float32')
    target_var = T.ivector('target')

    n_val_keys = n_values.keys()

    if cat != 1:
        prev_cat_var = T.matrix('prev_inputs',dtype='float32')
        classifier_layer_shape = width + n_values[n_val_keys[cat]]
        train_prev_cat = np.array(train[cat],dtype = 'float32')
        valid_prev_cat = np.array(valid[cat], dtype = 'float32')
        test_prev_cat = np.array(prev_predictions, dtype = 'float32')

    # Create neural network model (depending on first command line parameter)
    start_time = time.time()
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var, layer_shape, n_values['y_1'])
    elif model == 'custom_mlp':
        #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        #network = build_custom_mlp(input_var, int(depth), int(width),
        #                          float(drop_in), float(drop_hid))
        network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1'])
    elif model == 'classifier_layer':
        network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1'])
        if reload_model is not None:
            with np.load(reload_model) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files)-7)]
            lasagne.layers.set_all_param_values(network, param_values)
        if shared_params is not None:
            lasagne.layers.set_all_param_values(network, shared_params)
        network = classifier_layer(network, 
            prev_cat_var, n_values[n_val_keys[cat+1]], layer_shape = classifier_layer_shape)
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()


    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    # but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    # TODO: separate accuracy for the three
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    if cat != 1:
        train_fn = theano.function([input_var, prev_cat_var, target_var], loss, updates=updates)
        # Compile a second function computing the validation loss and accuracy:
        val_fn = theano.function([input_var, prev_cat_var, target_var], [test_loss, test_acc])
        preds = theano.function([input_var, prev_cat_var],test_prediction)
    else:
        train_fn = theano.function([input_var, target_var], loss, updates=updates)
        val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
        preds = theano.function([input_var],test_prediction)

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        t_idx = 0
        for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True):
            t_idx += 1
            inputs = [[train[0][idx] for idx in batch],[train[1][idx] for idx in batch]]
            target = [train[2][idx] for idx in batch]
            if cat != 1:
                prev_inputs = [train_prev_cat[idx] for idx in batch]
                target = [train[cat + 1][idx] for idx in batch]
                prev_inputs = one_hot_encode_features(prev_inputs,
                    n_values = n_values[n_val_keys[cat]])
            desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
            brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
            inputs = np.hstack((desc, brands))
            if cat != 1:
                train_err += train_fn(inputs, prev_inputs, target)
            else:
                train_err += train_fn(inputs, target)
            train_batches += 1

            if t_idx % valid_freq == 0:
                if cat != 1:
                    err, acc = val_fn(inputs, prev_inputs, target)
                else:
                    err, acc = val_fn(inputs,target)
                history_train_errs.append([err, acc])
                np.savez(save_path + saveto,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        layers = lasagne.layers.get_all_layers(network),
                         *lasagne.layers.get_all_param_values(network))

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(valid[0],valid[1], batch_size, shuffle=False):
            inputs = [[valid[0][idx] for idx in batch],[valid[1][idx] for idx in batch]]
            target = [valid[2][idx] for idx in batch]
            #add image vectors here
            if cat != 1:
                prev_inputs = [valid_prev_cat[idx] for idx in batch]
                target = [valid[cat + 1][idx] for idx in batch]
                prev_inputs = one_hot_encode_features(prev_inputs,
                    n_values = n_values[n_val_keys[cat]])
            desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
            brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
            inputs = np.hstack((desc, brands)) #hstack image vectors
            if cat != 1:
                err, acc = val_fn(inputs, prev_inputs, target)
            else:
                err, acc = val_fn(inputs, target)
            val_err += err
            val_acc += acc
            val_batches += 1

            if t_idx % valid_freq == 0:
                if cat != 1:
                    err, acc = val_fn(inputs, prev_inputs, target)
                else:
                    err, acc = val_fn(inputs, target)
                history_train_errs.append([err, acc])
                print('saving...')
                np.savez(save_path + saveto,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                         *lasagne.layers.get_all_param_values(network))

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))


    end_time = time.time()
    print("The code ran for %d epochs, with %f sec/epochs" % (
        (num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    test_preds = np.zeros(len(test[0]))
    for batch in iterate_minibatches(test[0],test[2], batch_size, shuffle=False):
        inputs = [[test[0][idx] for idx in batch],[test[1][idx] for idx in batch]]
        target = [test[2][idx] for idx in batch]
        desc = one_hot_encode_features(inputs[0],n_values = n_values['desc'])
        brands = one_hot_encode_features(inputs[1],n_values = n_values['brands'])
        if cat != 1:
                prev_inputs = [test_prev_cat[idx] for idx in batch]
                target = [test[cat + 1][idx] for idx in batch]
                prev_inputs = one_hot_encode_features(prev_inputs,
                    n_values = n_values[n_val_keys[cat]])
        inputs = np.hstack((desc, brands))
        if cat != 1:
            err, acc = val_fn(inputs, prev_inputs, target)
            pred_prob = preds(inputs, prev_inputs)
        else:
            err, acc = val_fn(inputs, target)
            pred_prob = preds(inputs)
        pred = pred_prob.argmax(axis = 1)
        test_preds[batch[0]:batch[-1]+1] = pred
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path + saveto,train_err=train_err / train_batches,
                        valid_err=val_err / val_batches, 
                        test_err=test_err / test_batches,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        layers = lasagne.layers.get_all_layers(network),
                        predictions = test_preds,
                         *lasagne.layers.get_all_param_values(network))

    param_values = lasagne.layers.get_all_param_values(network)
    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return param_values, test_preds
Example #4
0
def train_model(
        model='custom_mlp: ',
        mode='simple',  #select either simple or dependent models 
        data=None,
        n_values=None,
        num_epochs=5,
        desc_n_values=5000,
        depth=3,
        width=256,
        drop_in=0.5,
        drop_hid=0.5,
        batch_size=32,
        learning_rate=0.01,
        valid_freq=100,
        save_path='../results/',
        saveto='test_mlp.npz',
        reload_model=None,
        shared_params=None,
        cat=1,
        prev_predictions=None):
    '''
    args:
        model: 'mlp', custom_mlp:','classifier_layer', or forthcoming 'image_model','multi-modal'
        data: data
        n_values: number of values ?
        num_epochs: number of epochs before quitting
        desc_n_values: 
        depth: how many layers in network
        width: units in each hidden layer
        drop_in: dropout rate for input
        drop_hid: dropout rate in hidden layers
        batch_size: how many to run at a time
        learning_rate: learning rate of SGD
        valid_freq: how often to validate
        save_path: where to save the resulting model
        saveto: name of the file where saving
        reload_model = None,
        shared_params = None,
        cat = 1,
        prev_predictions = None):
    '''

    train, valid, test = data

    layer_shape = desc_n_values + n_values[
        'brands']  #CG TODO: add image dimensions here

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs', dtype='float32')
    target_var = T.ivector('target')

    n_val_keys = n_values.keys()

    if cat != 1:
        prev_cat_var = T.matrix('prev_inputs', dtype='float32')
        classifier_layer_shape = width + n_values[n_val_keys[cat]]
        train_prev_cat = np.array(train[cat], dtype='float32')
        valid_prev_cat = np.array(valid[cat], dtype='float32')
        test_prev_cat = np.array(prev_predictions, dtype='float32')

    # Create neural network model (depending on first command line parameter)
    start_time = time.time()
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var, layer_shape, n_values['y_1'])
    elif model == 'custom_mlp':
        #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        #network = build_custom_mlp(input_var, int(depth), int(width),
        #                          float(drop_in), float(drop_hid))
        network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid,
                                   layer_shape, n_values['y_1'])
    elif model == 'classifier_layer':
        network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid,
                                   layer_shape, n_values['y_1'])
        if reload_model is not None:
            with np.load(reload_model) as f:
                param_values = [
                    f['arr_%d' % i] for i in range(len(f.files) - 7)
                ]
            lasagne.layers.set_all_param_values(network, param_values)
        if shared_params is not None:
            lasagne.layers.set_all_param_values(network, shared_params)
        network = classifier_layer(network,
                                   prev_cat_var,
                                   n_values[n_val_keys[cat + 1]],
                                   layer_shape=classifier_layer_shape)
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    # but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    # TODO: separate accuracy for the three
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    if cat != 1:
        train_fn = theano.function([input_var, prev_cat_var, target_var],
                                   loss,
                                   updates=updates)
        # Compile a second function computing the validation loss and accuracy:
        val_fn = theano.function([input_var, prev_cat_var, target_var],
                                 [test_loss, test_acc])
        preds = theano.function([input_var, prev_cat_var], test_prediction)
    else:
        train_fn = theano.function([input_var, target_var],
                                   loss,
                                   updates=updates)
        val_fn = theano.function([input_var, target_var],
                                 [test_loss, test_acc])
        preds = theano.function([input_var], test_prediction)

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        t_idx = 0
        for batch in iterate_minibatches(train[0],
                                         train[1],
                                         batch_size,
                                         shuffle=True):
            t_idx += 1
            inputs = [[train[0][idx] for idx in batch],
                      [train[1][idx] for idx in batch]]
            target = [train[2][idx] for idx in batch]
            if cat != 1:
                prev_inputs = [train_prev_cat[idx] for idx in batch]
                target = [train[cat + 1][idx] for idx in batch]
                prev_inputs = one_hot_encode_features(
                    prev_inputs, n_values=n_values[n_val_keys[cat]])
            desc = one_hot_encode_features(inputs[0],
                                           n_values=n_values['desc'])
            brands = one_hot_encode_features(inputs[1],
                                             n_values=n_values['brands'])
            inputs = np.hstack((desc, brands))
            if cat != 1:
                train_err += train_fn(inputs, prev_inputs, target)
            else:
                train_err += train_fn(inputs, target)
            train_batches += 1

            if t_idx % valid_freq == 0:
                if cat != 1:
                    err, acc = val_fn(inputs, prev_inputs, target)
                else:
                    err, acc = val_fn(inputs, target)
                history_train_errs.append([err, acc])
                np.savez(save_path + saveto,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         layers=lasagne.layers.get_all_layers(network),
                         *lasagne.layers.get_all_param_values(network))

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(valid[0],
                                         valid[1],
                                         batch_size,
                                         shuffle=False):
            inputs = [[valid[0][idx] for idx in batch],
                      [valid[1][idx] for idx in batch]]
            target = [valid[2][idx] for idx in batch]
            #add image vectors here
            if cat != 1:
                prev_inputs = [valid_prev_cat[idx] for idx in batch]
                target = [valid[cat + 1][idx] for idx in batch]
                prev_inputs = one_hot_encode_features(
                    prev_inputs, n_values=n_values[n_val_keys[cat]])
            desc = one_hot_encode_features(inputs[0],
                                           n_values=n_values['desc'])
            brands = one_hot_encode_features(inputs[1],
                                             n_values=n_values['brands'])
            inputs = np.hstack((desc, brands))  #hstack image vectors
            if cat != 1:
                err, acc = val_fn(inputs, prev_inputs, target)
            else:
                err, acc = val_fn(inputs, target)
            val_err += err
            val_acc += acc
            val_batches += 1

            if t_idx % valid_freq == 0:
                if cat != 1:
                    err, acc = val_fn(inputs, prev_inputs, target)
                else:
                    err, acc = val_fn(inputs, target)
                history_train_errs.append([err, acc])
                print('saving...')
                np.savez(save_path + saveto,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         *lasagne.layers.get_all_param_values(network))

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    end_time = time.time()
    print("The code ran for %d epochs, with %f sec/epochs" %
          ((num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    test_preds = np.zeros(len(test[0]))
    for batch in iterate_minibatches(test[0],
                                     test[2],
                                     batch_size,
                                     shuffle=False):
        inputs = [[test[0][idx] for idx in batch],
                  [test[1][idx] for idx in batch]]
        target = [test[2][idx] for idx in batch]
        desc = one_hot_encode_features(inputs[0], n_values=n_values['desc'])
        brands = one_hot_encode_features(inputs[1],
                                         n_values=n_values['brands'])
        if cat != 1:
            prev_inputs = [test_prev_cat[idx] for idx in batch]
            target = [test[cat + 1][idx] for idx in batch]
            prev_inputs = one_hot_encode_features(
                prev_inputs, n_values=n_values[n_val_keys[cat]])
        inputs = np.hstack((desc, brands))
        if cat != 1:
            err, acc = val_fn(inputs, prev_inputs, target)
            pred_prob = preds(inputs, prev_inputs)
        else:
            err, acc = val_fn(inputs, target)
            pred_prob = preds(inputs)
        pred = pred_prob.argmax(axis=1)
        test_preds[batch[0]:batch[-1] + 1] = pred
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path + saveto,
             train_err=train_err / train_batches,
             valid_err=val_err / val_batches,
             test_err=test_err / test_batches,
             history_train_errs=history_train_errs,
             history_valid_errs=history_valid_errs,
             layers=lasagne.layers.get_all_layers(network),
             predictions=test_preds,
             *lasagne.layers.get_all_param_values(network))

    param_values = lasagne.layers.get_all_param_values(network)
    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return param_values, test_preds