def main(): train_images, train_labels, test_images, test_labels = load_mnist() X = normalize(train_images) label_size = len(np.unique(train_labels)) y = one_hot_vector(train_labels, label_size) print("Total training example:", X.shape[0]) nn = NN(epoch=20, batch_size=256) nn.add_layer(Layer(784)) nn.add_layer(Layer(200, activation_fn=relu)) nn.add_layer(Layer(100, activation_fn=relu)) nn.add_layer(Layer(10, activation_fn=softmax)) nn.fit(X, y) print("Train Accuracy is:", nn.accuracy(X, y)) X_test = normalize(test_images) Y_test = one_hot_vector(test_labels, label_size) print("Test Accuracy is:", nn.accuracy(X_test, Y_test)) nn.plot_learning_curve()
def main(): ''' This function should generate a trained model that will allow us to play the games ''' ## specifiy condor to train on condor use_condor = True if len( sys.argv) > 1 and sys.argv[1] == 'condor' else False ## Setup variables curr_nn = NN(INPUT_SHAPE) best_nn = curr_nn game = Connect4() training_examples = np.array([]) policies = np.array([]) values = np.array([]) ## Time per X games played in 1 iteration if use_condor: result_file = open( 'results/workqueue_{}_games_per_iter_{}_workers_{}_games_per_task.csv' .format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK), "a") else: result_file = open( 'results/single_{}_games_per_iter_{}_workers_{}_games_per_task.csv' .format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK), "a") for it in range(ITERATIONS): print('ITERATION: {}/{}\nSelf playing {} games'.format( it + 1, ITERATIONS, GAMES_PER_ITERATION)) ## Self play section ti = time.time() # Take the time to play X games if use_condor: new_examples, ps, vs = selfPlayCondor(game, best_nn, CPUCT, GAMES_PER_ITERATION, GAMES_PER_TASK) else: new_examples, ps, vs = selfPlaySingle(game, best_nn, CPUCT, GAMES_PER_ITERATION) tf = time.time() # Record time result_file.write('{}\n'.format(tf - ti)) # Dont let np array get bigger than max_size_of_dataset if len(training_examples) + len(new_examples) > MAX_SIZE_OF_DATASET: r = len(training_examples) + len( new_examples) - MAX_SIZE_OF_DATASET training_examples = training_examples[r:] policies = policies[r:] values = values[r:] if len(training_examples) != 0: training_examples = np.append(training_examples, new_examples, axis=0) policies = np.append(policies, ps, axis=0) values = np.append(values, vs, axis=0) else: training_examples = new_examples policies = ps values = vs ## Train network print("Training...") size = min(len(training_examples), TRAINING_SIZE) indices = np.random.choice(range(len(training_examples)), size=size, replace=False) ex_subset = training_examples[indices, :] p_subset = policies[indices] v_subset = values[indices] curr_nn.fit(ex_subset, [p_subset, v_subset], EPOCHS, BATCH_SIZE) ## Have the curr_net and best_net play to survive print("Head to head...") wins = 0 for g in range(H2H_GAMES): if playGameNN1VsNN2(game, best_nn, curr_nn, CPUCT) > 0: wins += 1 if wins / float(H2H_GAMES) >= NET_THRESHOLD: best_nn = curr_nn best_nn.save_model( './models/{}_games_per_iter_{}_workers_{}_games_per_task.h5'. format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK)) ## Save the best model best_nn.save_model( './models/{}_games_per_iter_{}_workers_{}_games_per_task.h5'.format( GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK))
percentile = 50 log = [] for i in range(50): #generate new sessions print(i) sessions = [generate_session() for _ in range(n_sessions)] batch_states, batch_actions, batch_rewards = map(np.array, zip(*sessions)) elite_states, elite_actions = select_elites(batch_states, batch_actions, batch_rewards, percentile) elite_states = np.array(elite_states).reshape(-1, n_state) elite_actions = np.array(elite_actions) if (elite_states.shape[0] != 0): elite_actions_fit = [] for a in elite_actions: print(a) cur = np.zeros((n_actions)) cur[int(a)] = 1 elite_actions_fit.append(cur) elite_actions = elite_actions_fit # print(elite_states, elite_actions) agent.fit(elite_states, elite_actions) # show_progress(batch_rewards, log, percentile, reward_range=[0, np.max(batch_rewards)]) mean_reward, threshold = np.mean(batch_rewards), np.percentile( batch_rewards, percentile) print("mean reward = %.3f, threshold=%.3f" % (mean_reward, threshold))