def train_simple_model(model='custom_mlp', data = None, n_values = None, num_epochs=5, desc_n_values = 5000, depth = 10, width = 256, drop_in = 0.2, drop_hid = 0.5, batch_size = 32, learning_rate = 0.01, valid_freq = 100, save_path = '../results/', saveto = 'test_mlp.npz', reload_model = None, num_targets = 1): train, valid, test = data layer_shape = desc_n_values + n_values['brands'] #CG TODO: add image dimensions here # Prepare Theano variables for inputs and target input_var = T.matrix('inputs',dtype='float32') target_var = [] for i in range(num_targets): target_var.append(T.vector('target_%s' % i,dtype = 'int64')) n_val_keys = n_values.keys() # Create neural network model (depending on first command line parameter) start_time = time.time() print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var, layer_shape, n_values['y_1']) elif model == 'custom_mlp': #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') #network = build_custom_mlp(input_var, int(depth), int(width), # float(drop_in), float(drop_hid)) network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, [[n_values['y_1']],[n_values['y_2']],[n_values['y_3']]]) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = [] for n in network: prediction.append(lasagne.layers.get_output(n)) #for p,t in zip(prediction,target_var): # loss += lasagne.objectives.categorical_crossentropy(p, t) loss = lasagne.objectives.categorical_crossentropy(prediction[0],target_var[0]) + lasagne.objectives.categorical_crossentropy(prediction[1],target_var[1]) + lasagne.objectives.categorical_crossentropy(prediction[2],target_var[2]) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, params = [] for i, n in enumerate(network): if i == 0: p = lasagne.layers.get_all_params(n, trainable=True) else: p = lasagne.layers.get_all_params(n, trainable=True)[-2:] params += p updates = lasagne.updates.adadelta( loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = [] test_loss = [] test_acc = [] preds = [] for n,t in zip(network,target_var): p = lasagne.layers.get_output(n, deterministic=True) test_prediction.append(p) l = lasagne.objectives.categorical_crossentropy(p,t) test_loss.append(l.mean()) # As a bonus, also create an expression for the classification accuracy: acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX) test_acc.append(acc) preds.append(theano.function([input_var],p)) inputs = [] val_fn = [] train_fn = theano.function([input_var] + target_var, loss, updates=updates) for t,l,a in zip(target_var, test_loss, test_acc): val_fn.append(theano.function([input_var, t], [l, a])) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() t_idx = 0 for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True): t_idx += 1 inputs = [[train[0][idx] for idx in batch],[train[1][idx] for idx in batch]] target = [[train[2][idx] for idx in batch],[train[3][idx] for idx in batch], [train[4][idx] for idx in batch]] #CG TODO: iterate over image vectors desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) inputs = np.hstack((desc, brands)) #CG TODO: hstack with images as well train_err += train_fn(inputs, target[0], target[1], target[2]) train_batches += 1 if t_idx % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs,target[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, *params) # And a full pass over the validation data: val_err = np.zeros(num_targets) val_acc = np.zeros(num_targets) val_batches = 0 for batch in iterate_minibatches(valid[0],valid[1], batch_size, shuffle=False): inputs = [[valid[0][idx] for idx in batch],[valid[1][idx] for idx in batch]] target = [[valid[2][idx] for idx in batch], [valid[3][idx] for idx in batch], [valid[4][idx] for idx in batch]] desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) inputs = np.hstack((desc, brands)) for i in range(num_targets): e,a = val_fn[i](inputs, target[i]) val_err[i] += e val_acc[i] += a val_batches += 1 params = get_all_params(network) if t_idx % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e,a = val_fn[i](inputs, target[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) print('saving...') np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, *params) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) max_train = np.max(train_err / train_batches) min_train = np.min(train_err / train_batches) max_val = np.max(val_err / val_batches) min_val = np.min(val_err / val_batches) avg_val_acc = np.mean(val_acc / val_batches) print(" max training loss:\t\t{:.6f}".format(max_train)) print(" min training loss:\t\t{:.6f}".format(min_train)) print(" max validation loss:\t\t{:.6f}".format(max_val)) print(" min validation loss:\t\t{:.6f}".format(min_val)) print(" avg validation accuracy:\t\t{:.2f} %".format( avg_val_acc * 100)) end_time = time.time() print("The code ran for %d epochs, with %f sec/epochs" % ( (num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and print the test error: test_err = np.zeros(num_targets) test_acc = np.zeros(num_targets) test_batches = 0 test_preds = [] for i in range(num_targets): test_preds.append(np.zeros(len(test[i+2]))) for batch in iterate_minibatches(test[0],test[2], batch_size, shuffle=False): inputs = [[test[0][idx] for idx in batch],[test[1][idx] for idx in batch]] target = [[test[2][idx] for idx in batch],[test[3][idx] for idx in batch], [test[4][idx] for idx in batch]] desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) inputs = np.hstack((desc, brands)) for i in range(num_targets): e,a = val_fn[i](inputs, target[i]) pred_prob = preds[i](inputs) pred = pred_prob.argmax(axis = 1) test_preds[i][batch[0]:batch[-1]+1] = pred test_err[i] += e test_acc[i] += a test_batches += 1 max_err = np.max(test_err / test_batches) min_err = np.min(test_err / test_batches) avg_acc = np.mean(test_acc / test_batches) max_acc = np.max(test_acc / test_batches) min_acc = np.min(test_acc / test_batches) print("Final results:") print(" max test loss:\t\t\t{:.6f}".format(max_err)) print(" min test loss:\t\t\t{:.6f}".format(min_err)) print(" avg test accuracy:\t\t{:.2f} %".format( avg_acc * 100)) print(" max test accuracy:\t\t{:.2f} %".format( max_acc * 100)) print(" min test accuracy:\t\t{:.2f} %".format( min_acc * 100)) params = get_all_params(network) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path + saveto,train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, predictions = test_preds, *params) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return params, test_preds
def train_simple_model(model='custom_mlp', data=None, n_values=None, num_epochs=5, desc_n_values=5000, depth=10, width=256, drop_in=0.2, drop_hid=0.5, batch_size=32, learning_rate=0.01, valid_freq=100, save_path='../results/', saveto='test_mlp.npz', reload_model=None, num_targets=1): train, valid, test = data layer_shape = desc_n_values + n_values[ 'brands'] #CG TODO: add image dimensions here # Prepare Theano variables for inputs and target input_var = T.matrix('inputs', dtype='float32') target_var = [] for i in range(num_targets): target_var.append(T.vector('target_%s' % i, dtype='int64')) n_val_keys = n_values.keys() # Create neural network model (depending on first command line parameter) start_time = time.time() print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var, layer_shape, n_values['y_1']) elif model == 'custom_mlp': #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') #network = build_custom_mlp(input_var, int(depth), int(width), # float(drop_in), float(drop_hid)) network = build_custom_mlp( input_var, depth, width, drop_in, drop_hid, layer_shape, [[n_values['y_1']], [n_values['y_2']], [n_values['y_3']]]) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = [] for n in network: prediction.append(lasagne.layers.get_output(n)) #for p,t in zip(prediction,target_var): # loss += lasagne.objectives.categorical_crossentropy(p, t) loss = lasagne.objectives.categorical_crossentropy( prediction[0], target_var[0]) + lasagne.objectives.categorical_crossentropy( prediction[1], target_var[1]) + lasagne.objectives.categorical_crossentropy( prediction[2], target_var[2]) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, params = [] for i, n in enumerate(network): if i == 0: p = lasagne.layers.get_all_params(n, trainable=True) else: p = lasagne.layers.get_all_params(n, trainable=True)[-2:] params += p updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = [] test_loss = [] test_acc = [] preds = [] for n, t in zip(network, target_var): p = lasagne.layers.get_output(n, deterministic=True) test_prediction.append(p) l = lasagne.objectives.categorical_crossentropy(p, t) test_loss.append(l.mean()) # As a bonus, also create an expression for the classification accuracy: acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX) test_acc.append(acc) preds.append(theano.function([input_var], p)) inputs = [] val_fn = [] train_fn = theano.function([input_var] + target_var, loss, updates=updates) for t, l, a in zip(target_var, test_loss, test_acc): val_fn.append(theano.function([input_var, t], [l, a])) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() t_idx = 0 for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True): t_idx += 1 inputs = [[train[0][idx] for idx in batch], [train[1][idx] for idx in batch]] target = [[train[2][idx] for idx in batch], [train[3][idx] for idx in batch], [train[4][idx] for idx in batch]] #CG TODO: iterate over image vectors desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) inputs = np.hstack( (desc, brands)) #CG TODO: hstack with images as well train_err += train_fn(inputs, target[0], target[1], target[2]) train_batches += 1 if t_idx % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs, target[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, *params) # And a full pass over the validation data: val_err = np.zeros(num_targets) val_acc = np.zeros(num_targets) val_batches = 0 for batch in iterate_minibatches(valid[0], valid[1], batch_size, shuffle=False): inputs = [[valid[0][idx] for idx in batch], [valid[1][idx] for idx in batch]] target = [[valid[2][idx] for idx in batch], [valid[3][idx] for idx in batch], [valid[4][idx] for idx in batch]] desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) inputs = np.hstack((desc, brands)) for i in range(num_targets): e, a = val_fn[i](inputs, target[i]) val_err[i] += e val_acc[i] += a val_batches += 1 params = get_all_params(network) if t_idx % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs, target[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) print('saving...') np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, *params) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) max_train = np.max(train_err / train_batches) min_train = np.min(train_err / train_batches) max_val = np.max(val_err / val_batches) min_val = np.min(val_err / val_batches) avg_val_acc = np.mean(val_acc / val_batches) print(" max training loss:\t\t{:.6f}".format(max_train)) print(" min training loss:\t\t{:.6f}".format(min_train)) print(" max validation loss:\t\t{:.6f}".format(max_val)) print(" min validation loss:\t\t{:.6f}".format(min_val)) print(" avg validation accuracy:\t\t{:.2f} %".format(avg_val_acc * 100)) end_time = time.time() print("The code ran for %d epochs, with %f sec/epochs" % ((num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and print the test error: test_err = np.zeros(num_targets) test_acc = np.zeros(num_targets) test_batches = 0 test_preds = [] for i in range(num_targets): test_preds.append(np.zeros(len(test[i + 2]))) for batch in iterate_minibatches(test[0], test[2], batch_size, shuffle=False): inputs = [[test[0][idx] for idx in batch], [test[1][idx] for idx in batch]] target = [[test[2][idx] for idx in batch], [test[3][idx] for idx in batch], [test[4][idx] for idx in batch]] desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) inputs = np.hstack((desc, brands)) for i in range(num_targets): e, a = val_fn[i](inputs, target[i]) pred_prob = preds[i](inputs) pred = pred_prob.argmax(axis=1) test_preds[i][batch[0]:batch[-1] + 1] = pred test_err[i] += e test_acc[i] += a test_batches += 1 max_err = np.max(test_err / test_batches) min_err = np.min(test_err / test_batches) avg_acc = np.mean(test_acc / test_batches) max_acc = np.max(test_acc / test_batches) min_acc = np.min(test_acc / test_batches) print("Final results:") print(" max test loss:\t\t\t{:.6f}".format(max_err)) print(" min test loss:\t\t\t{:.6f}".format(min_err)) print(" avg test accuracy:\t\t{:.2f} %".format(avg_acc * 100)) print(" max test accuracy:\t\t{:.2f} %".format(max_acc * 100)) print(" min test accuracy:\t\t{:.2f} %".format(min_acc * 100)) params = get_all_params(network) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path + saveto, train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, predictions=test_preds, *params) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return params, test_preds
def train_model(model='custom_mlp: ', mode = 'simple', #select either simple or dependent models data = None, n_values = None, num_epochs=5, desc_n_values = 5000, depth = 3, width = 256, drop_in = 0.5, drop_hid = 0.5, batch_size = 32, learning_rate = 0.01, valid_freq = 100, save_path = '../results/', saveto = 'test_mlp.npz', reload_model = None, shared_params = None, cat = 1, prev_predictions = None): ''' args: model: 'mlp', custom_mlp:','classifier_layer', or forthcoming 'image_model','multi-modal' data: data n_values: number of values ? num_epochs: number of epochs before quitting desc_n_values: depth: how many layers in network width: units in each hidden layer drop_in: dropout rate for input drop_hid: dropout rate in hidden layers batch_size: how many to run at a time learning_rate: learning rate of SGD valid_freq: how often to validate save_path: where to save the resulting model saveto: name of the file where saving reload_model = None, shared_params = None, cat = 1, prev_predictions = None): ''' train, valid, test = data layer_shape = desc_n_values + n_values['brands'] #CG TODO: add image dimensions here # Prepare Theano variables for inputs and target input_var = T.matrix('inputs',dtype='float32') target_var = T.ivector('target') n_val_keys = n_values.keys() if cat != 1: prev_cat_var = T.matrix('prev_inputs',dtype='float32') classifier_layer_shape = width + n_values[n_val_keys[cat]] train_prev_cat = np.array(train[cat],dtype = 'float32') valid_prev_cat = np.array(valid[cat], dtype = 'float32') test_prev_cat = np.array(prev_predictions, dtype = 'float32') # Create neural network model (depending on first command line parameter) start_time = time.time() print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var, layer_shape, n_values['y_1']) elif model == 'custom_mlp': #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') #network = build_custom_mlp(input_var, int(depth), int(width), # float(drop_in), float(drop_hid)) network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1']) elif model == 'classifier_layer': network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1']) if reload_model is not None: with np.load(reload_model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files)-7)] lasagne.layers.set_all_param_values(network, param_values) if shared_params is not None: lasagne.layers.set_all_param_values(network, shared_params) network = classifier_layer(network, prev_cat_var, n_values[n_val_keys[cat+1]], layer_shape = classifier_layer_shape) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, # but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: # TODO: separate accuracy for the three test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: if cat != 1: train_fn = theano.function([input_var, prev_cat_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, prev_cat_var, target_var], [test_loss, test_acc]) preds = theano.function([input_var, prev_cat_var],test_prediction) else: train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) preds = theano.function([input_var],test_prediction) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() t_idx = 0 for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True): t_idx += 1 inputs = [[train[0][idx] for idx in batch],[train[1][idx] for idx in batch]] target = [train[2][idx] for idx in batch] if cat != 1: prev_inputs = [train_prev_cat[idx] for idx in batch] target = [train[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features(prev_inputs, n_values = n_values[n_val_keys[cat]]) desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) inputs = np.hstack((desc, brands)) if cat != 1: train_err += train_fn(inputs, prev_inputs, target) else: train_err += train_fn(inputs, target) train_batches += 1 if t_idx % valid_freq == 0: if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs,target) history_train_errs.append([err, acc]) np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, layers = lasagne.layers.get_all_layers(network), *lasagne.layers.get_all_param_values(network)) # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(valid[0],valid[1], batch_size, shuffle=False): inputs = [[valid[0][idx] for idx in batch],[valid[1][idx] for idx in batch]] target = [valid[2][idx] for idx in batch] #add image vectors here if cat != 1: prev_inputs = [valid_prev_cat[idx] for idx in batch] target = [valid[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features(prev_inputs, n_values = n_values[n_val_keys[cat]]) desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) inputs = np.hstack((desc, brands)) #hstack image vectors if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs, target) val_err += err val_acc += acc val_batches += 1 if t_idx % valid_freq == 0: if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs, target) history_train_errs.append([err, acc]) print('saving...') np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, *lasagne.layers.get_all_param_values(network)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) end_time = time.time() print("The code ran for %d epochs, with %f sec/epochs" % ( (num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 test_preds = np.zeros(len(test[0])) for batch in iterate_minibatches(test[0],test[2], batch_size, shuffle=False): inputs = [[test[0][idx] for idx in batch],[test[1][idx] for idx in batch]] target = [test[2][idx] for idx in batch] desc = one_hot_encode_features(inputs[0],n_values = n_values['desc']) brands = one_hot_encode_features(inputs[1],n_values = n_values['brands']) if cat != 1: prev_inputs = [test_prev_cat[idx] for idx in batch] target = [test[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features(prev_inputs, n_values = n_values[n_val_keys[cat]]) inputs = np.hstack((desc, brands)) if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) pred_prob = preds(inputs, prev_inputs) else: err, acc = val_fn(inputs, target) pred_prob = preds(inputs) pred = pred_prob.argmax(axis = 1) test_preds[batch[0]:batch[-1]+1] = pred test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path + saveto,train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, layers = lasagne.layers.get_all_layers(network), predictions = test_preds, *lasagne.layers.get_all_param_values(network)) param_values = lasagne.layers.get_all_param_values(network) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return param_values, test_preds
def train_model( model='custom_mlp: ', mode='simple', #select either simple or dependent models data=None, n_values=None, num_epochs=5, desc_n_values=5000, depth=3, width=256, drop_in=0.5, drop_hid=0.5, batch_size=32, learning_rate=0.01, valid_freq=100, save_path='../results/', saveto='test_mlp.npz', reload_model=None, shared_params=None, cat=1, prev_predictions=None): ''' args: model: 'mlp', custom_mlp:','classifier_layer', or forthcoming 'image_model','multi-modal' data: data n_values: number of values ? num_epochs: number of epochs before quitting desc_n_values: depth: how many layers in network width: units in each hidden layer drop_in: dropout rate for input drop_hid: dropout rate in hidden layers batch_size: how many to run at a time learning_rate: learning rate of SGD valid_freq: how often to validate save_path: where to save the resulting model saveto: name of the file where saving reload_model = None, shared_params = None, cat = 1, prev_predictions = None): ''' train, valid, test = data layer_shape = desc_n_values + n_values[ 'brands'] #CG TODO: add image dimensions here # Prepare Theano variables for inputs and target input_var = T.matrix('inputs', dtype='float32') target_var = T.ivector('target') n_val_keys = n_values.keys() if cat != 1: prev_cat_var = T.matrix('prev_inputs', dtype='float32') classifier_layer_shape = width + n_values[n_val_keys[cat]] train_prev_cat = np.array(train[cat], dtype='float32') valid_prev_cat = np.array(valid[cat], dtype='float32') test_prev_cat = np.array(prev_predictions, dtype='float32') # Create neural network model (depending on first command line parameter) start_time = time.time() print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var, layer_shape, n_values['y_1']) elif model == 'custom_mlp': #depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') #network = build_custom_mlp(input_var, int(depth), int(width), # float(drop_in), float(drop_hid)) network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1']) elif model == 'classifier_layer': network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, n_values['y_1']) if reload_model is not None: with np.load(reload_model) as f: param_values = [ f['arr_%d' % i] for i in range(len(f.files) - 7) ] lasagne.layers.set_all_param_values(network, param_values) if shared_params is not None: lasagne.layers.set_all_param_values(network, shared_params) network = classifier_layer(network, prev_cat_var, n_values[n_val_keys[cat + 1]], layer_shape=classifier_layer_shape) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, # but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: # TODO: separate accuracy for the three test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: if cat != 1: train_fn = theano.function([input_var, prev_cat_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, prev_cat_var, target_var], [test_loss, test_acc]) preds = theano.function([input_var, prev_cat_var], test_prediction) else: train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) preds = theano.function([input_var], test_prediction) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() t_idx = 0 for batch in iterate_minibatches(train[0], train[1], batch_size, shuffle=True): t_idx += 1 inputs = [[train[0][idx] for idx in batch], [train[1][idx] for idx in batch]] target = [train[2][idx] for idx in batch] if cat != 1: prev_inputs = [train_prev_cat[idx] for idx in batch] target = [train[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features( prev_inputs, n_values=n_values[n_val_keys[cat]]) desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) inputs = np.hstack((desc, brands)) if cat != 1: train_err += train_fn(inputs, prev_inputs, target) else: train_err += train_fn(inputs, target) train_batches += 1 if t_idx % valid_freq == 0: if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs, target) history_train_errs.append([err, acc]) np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, layers=lasagne.layers.get_all_layers(network), *lasagne.layers.get_all_param_values(network)) # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(valid[0], valid[1], batch_size, shuffle=False): inputs = [[valid[0][idx] for idx in batch], [valid[1][idx] for idx in batch]] target = [valid[2][idx] for idx in batch] #add image vectors here if cat != 1: prev_inputs = [valid_prev_cat[idx] for idx in batch] target = [valid[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features( prev_inputs, n_values=n_values[n_val_keys[cat]]) desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) inputs = np.hstack((desc, brands)) #hstack image vectors if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs, target) val_err += err val_acc += acc val_batches += 1 if t_idx % valid_freq == 0: if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) else: err, acc = val_fn(inputs, target) history_train_errs.append([err, acc]) print('saving...') np.savez(save_path + saveto, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, *lasagne.layers.get_all_param_values(network)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) end_time = time.time() print("The code ran for %d epochs, with %f sec/epochs" % ((num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 test_preds = np.zeros(len(test[0])) for batch in iterate_minibatches(test[0], test[2], batch_size, shuffle=False): inputs = [[test[0][idx] for idx in batch], [test[1][idx] for idx in batch]] target = [test[2][idx] for idx in batch] desc = one_hot_encode_features(inputs[0], n_values=n_values['desc']) brands = one_hot_encode_features(inputs[1], n_values=n_values['brands']) if cat != 1: prev_inputs = [test_prev_cat[idx] for idx in batch] target = [test[cat + 1][idx] for idx in batch] prev_inputs = one_hot_encode_features( prev_inputs, n_values=n_values[n_val_keys[cat]]) inputs = np.hstack((desc, brands)) if cat != 1: err, acc = val_fn(inputs, prev_inputs, target) pred_prob = preds(inputs, prev_inputs) else: err, acc = val_fn(inputs, target) pred_prob = preds(inputs) pred = pred_prob.argmax(axis=1) test_preds[batch[0]:batch[-1] + 1] = pred test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path + saveto, train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, layers=lasagne.layers.get_all_layers(network), predictions=test_preds, *lasagne.layers.get_all_param_values(network)) param_values = lasagne.layers.get_all_param_values(network) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return param_values, test_preds