def main():
    train_images, train_labels, test_images, test_labels = load_mnist()
    X = normalize(train_images)
    label_size = len(np.unique(train_labels))
    y = one_hot_vector(train_labels, label_size)

    print("Total training example:", X.shape[0])

    nn = NN(epoch=20, batch_size=256)

    nn.add_layer(Layer(784))
    nn.add_layer(Layer(200, activation_fn=relu))
    nn.add_layer(Layer(100, activation_fn=relu))
    nn.add_layer(Layer(10, activation_fn=softmax))

    nn.fit(X, y)


    print("Train Accuracy is:", nn.accuracy(X, y))

    X_test = normalize(test_images)
    Y_test = one_hot_vector(test_labels, label_size)
    print("Test Accuracy is:", nn.accuracy(X_test, Y_test))

    nn.plot_learning_curve()
Exemple #2
0
def main():
    '''
		This function should generate a trained model that will allow us
		to play the games
	'''
    ## specifiy condor to train on condor
    use_condor = True if len(
        sys.argv) > 1 and sys.argv[1] == 'condor' else False

    ## Setup variables
    curr_nn = NN(INPUT_SHAPE)
    best_nn = curr_nn
    game = Connect4()
    training_examples = np.array([])
    policies = np.array([])
    values = np.array([])

    ## Time per X games played in 1 iteration
    if use_condor:
        result_file = open(
            'results/workqueue_{}_games_per_iter_{}_workers_{}_games_per_task.csv'
            .format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK), "a")
    else:
        result_file = open(
            'results/single_{}_games_per_iter_{}_workers_{}_games_per_task.csv'
            .format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK), "a")

    for it in range(ITERATIONS):
        print('ITERATION: {}/{}\nSelf playing {} games'.format(
            it + 1, ITERATIONS, GAMES_PER_ITERATION))

        ## Self play section
        ti = time.time()  # Take the time to play X games
        if use_condor:
            new_examples, ps, vs = selfPlayCondor(game, best_nn, CPUCT,
                                                  GAMES_PER_ITERATION,
                                                  GAMES_PER_TASK)
        else:
            new_examples, ps, vs = selfPlaySingle(game, best_nn, CPUCT,
                                                  GAMES_PER_ITERATION)

        tf = time.time()  # Record time
        result_file.write('{}\n'.format(tf - ti))

        # Dont let np array get bigger than max_size_of_dataset
        if len(training_examples) + len(new_examples) > MAX_SIZE_OF_DATASET:
            r = len(training_examples) + len(
                new_examples) - MAX_SIZE_OF_DATASET
            training_examples = training_examples[r:]
            policies = policies[r:]
            values = values[r:]

        if len(training_examples) != 0:
            training_examples = np.append(training_examples,
                                          new_examples,
                                          axis=0)
            policies = np.append(policies, ps, axis=0)
            values = np.append(values, vs, axis=0)
        else:
            training_examples = new_examples
            policies = ps
            values = vs

        ## Train network
        print("Training...")
        size = min(len(training_examples), TRAINING_SIZE)
        indices = np.random.choice(range(len(training_examples)),
                                   size=size,
                                   replace=False)
        ex_subset = training_examples[indices, :]
        p_subset = policies[indices]
        v_subset = values[indices]
        curr_nn.fit(ex_subset, [p_subset, v_subset], EPOCHS, BATCH_SIZE)

        ## Have the curr_net and best_net play to survive
        print("Head to head...")
        wins = 0
        for g in range(H2H_GAMES):
            if playGameNN1VsNN2(game, best_nn, curr_nn, CPUCT) > 0:
                wins += 1

        if wins / float(H2H_GAMES) >= NET_THRESHOLD:
            best_nn = curr_nn
            best_nn.save_model(
                './models/{}_games_per_iter_{}_workers_{}_games_per_task.h5'.
                format(GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK))

    ## Save the best model
    best_nn.save_model(
        './models/{}_games_per_iter_{}_workers_{}_games_per_task.h5'.format(
            GAMES_PER_ITERATION, WORKERS, GAMES_PER_TASK))
Exemple #3
0
percentile = 50
log = []

for i in range(50):
    #generate new sessions
    print(i)
    sessions = [generate_session() for _ in range(n_sessions)]

    batch_states, batch_actions, batch_rewards = map(np.array, zip(*sessions))

    elite_states, elite_actions = select_elites(batch_states, batch_actions,
                                                batch_rewards, percentile)
    elite_states = np.array(elite_states).reshape(-1, n_state)
    elite_actions = np.array(elite_actions)

    if (elite_states.shape[0] != 0):
        elite_actions_fit = []
        for a in elite_actions:
            print(a)
            cur = np.zeros((n_actions))
            cur[int(a)] = 1
            elite_actions_fit.append(cur)
        elite_actions = elite_actions_fit
        # print(elite_states, elite_actions)
        agent.fit(elite_states, elite_actions)

    # show_progress(batch_rewards, log, percentile, reward_range=[0, np.max(batch_rewards)])
    mean_reward, threshold = np.mean(batch_rewards), np.percentile(
        batch_rewards, percentile)
    print("mean reward = %.3f, threshold=%.3f" % (mean_reward, threshold))