def trainNetwork(data, n_classes, buildNet, file, seed, max_evaluations, num_samples): # The training functions uses the average of the cumulated reward and maximum height as fitness X_train = data["X_train"] y_train = data["y_train"] def objF(params): nn = buildNet(X_train.shape[1], n_classes) nn._setParameters(np.array(params)) random_state = np.random.get_state() np.random.seed(l.numLearningSteps) sampled_data = np.random.choice(len(X_train), num_samples, replace=False) np.random.set_state(random_state) cur_data = X_train[sampled_data] cur_label = y_train[sampled_data] cum_correct = 0 for example, cor in zip(cur_data, cur_label): result = nn.activate(example) loss_sum = 0 for q, out in enumerate(result): if q != cor: loss_sum += max(0, out - result[int(cor)] + 1) # guess = np.argmax(result) #if guess == cor: #cum_correct += 1 cum_correct += loss_sum nn.reset() return cum_correct # Build net for initial random params n = buildNet(X_train.shape[1], n_classes) learned = n.params testNetwork(data, n_classes, learned, buildNet, 0, file, seed) l = SNES(objF, learned, verbose=False) # l.batchSize = batch_size batch_size = l.batchSize l.maxEvaluations = max_evaluations l.minimize = True for i in xrange((max_evaluations / batch_size)): result = l.learn(additionalLearningSteps=1) learned = result[0] testNetwork(data, n_classes, learned, buildNet, num_samples * (i + 1) * batch_size, file, seed) return learned
def train_network(X_train, y_train, X_validate, y_validate, X_test, y_test, test_split=0, validate_split=0): file_start = "%d\t%d\t%d" % (seed, test_split, validate_split) n = buildNet(X_train.shape[1], n_classes) learned = n.params population_size = get_population_size(learned, cmaes) evaluations_per_generation = population_size * batch_size num_generations = max_evaluations / (evaluations_per_generation) + 1 # Used to sample a batch with same class ratios from sklearn.cross_validation import StratifiedShuffleSplit sss = StratifiedShuffleSplit(y_train.reshape(-1), num_generations, train_size=batch_size, random_state=seed) train_indices = [batch_index for (batch_index, _) in sss] def objF(params): nn = buildNet(X_train.shape[1], n_classes) nn._setParameters(np.array(params)) cur_data = X_train[train_indices[l.numLearningSteps]] cur_label = y_train[train_indices[l.numLearningSteps]] results = [] for example, cor in zip(cur_data, cur_label): results.append(nn.activate(example)) nn.reset() loss = log_loss(cur_label, results) return loss test_network(X_validate, y_validate, learned, 0, file_start, "val") test_network(X_test, y_test, learned, 0, file_start, "test") l = SNES(objF, learned, verbose=False) if cmaes: l = CMAES(objF, learned, verbose=False) l.minimize = True l.maxEvaluations = num_generations * population_size for generation in xrange(num_generations): result = l.learn(additionalLearningSteps=1) learned = result[0] train_evaluations = (generation + 1) * evaluations_per_generation test_network(X_train[train_indices[generation]], y_train[train_indices[generation]], learned, train_evaluations, file_start, "train") test_network(X_validate, y_validate, learned, train_evaluations, file_start, "val") test_network(X_test, y_test, learned, train_evaluations, file_start, "test") if generation % 100 == 0: f.flush()