예제 #1
0
def main():
    game = Game()
    env_size = 4
    m = MultiAgentCNN("pursuers", env_size)
    # target_m = MultiAgentCNN("target")
    ml_simulator = TfSimulator(4, m)  #, target_m)

    epochs = 100
    sim_per_epoch = 20
    steps_per_run = 25

    memory_size = 20000

    subsample = 2

    epsilon = 1.2
    base_cap = 1

    saver = 1000

    run_dir = '{}_{}_{}_{}'.format(time(), epochs, sim_per_epoch,
                                   steps_per_run)
    print('Saving to {}'.format(run_dir))
    #os.mkdir('animations/'+run_dir)

    decay = (1 - 6e-5)
    decayer = 1.

    loss_history = []

    memory_actions = []
    memory_states = []
    memory_rewards = []
    memory_end_states = []

    total_steps_performed = 0

    # Run epochs until 2.5M examples have been visited

    for radius in range(1, 4):
        for ep in range(epochs):
            decayer = 1.
            if total_steps_performed > 4000000:
                break
            states = []
            rewards = []
            actions = []
            end_states = []

            for sim in range(sim_per_epoch):
                # print("\tRun {}".format(sim))
                n_bots = (sim % 3 + 2) % len(ml_simulator.env.bots)

                # Initialize randomly target
                pos = ml_simulator.env.rand_position()
                ml_simulator.env.rand_initialise_within_radius(
                    radius, n_bots, pos, bot_on_radius=True)

                decayer *= decay
                base_cap0 = base_cap * decayer

                if len(memory_rewards) > memory_size / 2:
                    epsilon = epsilon * decay

                # Baseline vs Model
                #if np.random.uniform(0, 1) < :
                if np.random.uniform(0, 1) < base_cap0:
                    s, r, a, e_s = game.get_simulation_history(
                        bot=0, N=steps_per_run, subsample=subsample)
                else:
                    s, r, a, e_s = ml_simulator.run_simulation(
                        0, steps_per_run, subsample=subsample, epsilon=epsilon)

                if s is None or r is None or a is None:
                    continue

                s = np.asarray(s)
                r = np.asarray(r)
                a = np.asarray(a)
                e_s = np.asarray(e_s)

                # Soft training
                try:
                    statesTrain = np.concatenate(s, axis=0)
                    rewardsTrain = np.concatenate(r, axis=0)
                    actionsTrain = np.concatenate(a, axis=0)
                    endStateTrain = np.concatenate(e_s, axis=0)

                    if statesTrain.shape[1:] != (env_size, env_size, 3):
                        statesTrain = statesTrain.transpose((0, 2, 3, 1))
                    if endStateTrain.shape[1:] != (env_size, env_size, 3):
                        endStateTrain = endStateTrain.transpose((0, 2, 3, 1))

                    ml_simulator.model.train(statesTrain, rewardsTrain,
                                             actionsTrain, endStateTrain)

                    # Expand memory DB
                    rewards += [s]
                    actions += [r]
                    states += [s]
                    end_states += [e_s]
                except Exception as e:
                    # print("Error:", e)
                    continue

            try:
                _ = np.concatenate(states, axis=0)
                _ = np.concatenate(rewards, axis=0)
                _ = np.concatenate(actions, axis=0)
            except Exception as e:
                print("Concat error:", e)
                continue

            n = len(rewards)
            total_steps_performed += n

            if len(memory_rewards) > memory_size:
                memory_rewards = memory_rewards[n * 2:] + rewards
                memory_actions = memory_actions[n * 2:] + actions
                memory_states = memory_states[n * 2:] + states
                memory_end_states = memory_end_states[n * 2:] + end_states
            else:
                memory_rewards = memory_rewards + rewards
                memory_actions = memory_actions + actions
                memory_states = memory_states + states
                memory_end_states = memory_end_states + end_states

                print("\t~ Building memory DB", len(memory_rewards))

            try:
                statesTrain = np.concatenate(memory_states, axis=0)
                rewardsTrain = np.concatenate(memory_rewards, axis=0)
                actionsTrain = np.concatenate(memory_actions, axis=0)
                endStateTrain = np.concatenate(memory_end_states, axis=0)
            except Exception as e:
                print("Concat error:", e)
                continue

            env_shape = ml_simulator.env.verticeMatrix.shape
            while statesTrain.shape[1] != env_shape[0] or statesTrain.shape[
                    2] != env_shape[1] or statesTrain.shape[3] != 3:
                statesTrain = statesTrain.transpose((0, 2, 3, 1))

            while endStateTrain.shape[1] != env_shape[0] or endStateTrain.shape[
                    2] != env_shape[1] or endStateTrain.shape[3] != 3:
                endStateTrain = endStateTrain.transpose((0, 2, 3, 1))

            # Hard training
            save = (total_steps_performed > saver)
            if save:
                saver += 2500

            print("\t- Memory Training...")
            loss = ml_simulator.model.train(statesTrain,
                                            rewardsTrain,
                                            actionsTrain,
                                            endStateTrain,
                                            save=save)
            print('\tLoss: {}'.format(loss))
            if save:
                ml_simulator.run_and_plot(run_dir, ep, radius)
            loss_history.append(loss)

    m.save()
    ml_simulator.run_and_plot(run_dir, 'FINAL')

    plt.plot(np.arange(epochs), loss_history)
    plt.title("Loss over epochs")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.show()
예제 #2
0
def main():
    game = Game()
    env_size = 4
    m = MultiAgentCNN("pursuers", env_size)
    # target_m = MultiAgentCNN("target")
    ml_simulator = TfSimulator(4, m)  #, target_m)

    eps_per_radius = 50
    sim_per_epoch = 25
    steps_per_run = 15
    radius_top = 25

    memory_size = 10000

    subsample = 2
    epsilon = 1.
    base_cap = 0.0

    save_top = 2500
    total_steps_performed = 0

    run_dir = '{}_{}_{}'.format(time(), sim_per_epoch, steps_per_run)
    print('Saving to {}'.format(run_dir))
    os.mkdir('animations/' + run_dir)

    decay = (1 - 5e-5)

    loss_history = []

    memory_actions = []
    memory_states = []
    memory_rewards = []
    memory_end_states = []

    for radius in range(1, radius_top):
        for ep in range(eps_per_radius):
            print("EPOCH {} ---- Radius {}".format(ep, radius))

            states = []
            rewards = []
            actions = []
            end_states = []

            for sim in range(sim_per_epoch):
                n_bots = (sim % (len(ml_simulator.env.bots) - 1) + 1)
                pos = ml_simulator.env.rand_position()
                ml_simulator.env.rand_initialise_within_radius(
                    radius, n_bots, pos, bot_on_radius=True)

                base_cap *= decay
                if len(memory_rewards) > memory_size * 0.9:
                    epsilon *= decay

                if np.random.uniform(0, 1) < base_cap:
                    s, r, a, e_s = game.get_simulation_history(bot=0,
                                                               N=steps_per_run,
                                                               subsample=0)
                else:
                    s, r, a, e_s = ml_simulator.run_simulation(
                        0, steps_per_run, subsample=subsample, epsilon=epsilon)

                if s is None or r is None or a is None:
                    continue

                s = np.asarray(s)
                r = np.asarray(r)
                a = np.asarray(a)
                e_s = np.asarray(e_s)

                if s.shape[1:] != (env_size, env_size, 3):
                    s = s.transpose((0, 2, 3, 1))
                if e_s.shape[1:] != (env_size, env_size, 3):
                    e_s = e_s.transpose((0, 2, 3, 1))

                states.append(s)
                rewards.append(r)
                actions.append(a)
                end_states.append(e_s)

            # Soft train
            try:
                s_train = np.concatenate(states, axis=0)
                r_train = np.concatenate(rewards, axis=0)
                a_train = np.concatenate(actions, axis=0)
                e_s_train = np.concatenate(end_states, axis=0)
            except Exception as e:
                print("Soft", e)
                continue
            ml_simulator.model.train(s_train, r_train, a_train, e_s_train)

            n = len(r_train)
            total_steps_performed += n

            if len(memory_rewards) > memory_size:
                memory_rewards = memory_rewards[n * 2:] + rewards
                memory_actions = memory_actions[n * 2:] + actions
                memory_states = memory_states[n * 2:] + states
                memory_end_states = memory_end_states[n * 2:] + end_states
            else:
                memory_rewards = memory_rewards + rewards
                memory_actions = memory_actions + actions
                memory_states = memory_states + states
                memory_end_states = memory_end_states + end_states

                print("\t~ Building memory DB", len(memory_rewards))

        # Hard Training
        try:
            statesTrain = np.concatenate(memory_states, axis=0)
            rewardsTrain = np.concatenate(memory_rewards, axis=0)
            actionsTrain = np.concatenate(memory_actions, axis=0)
            endStateTrain = np.concatenate(memory_end_states, axis=0)
        except Exception as e:
            print("Concat error:", e)
            continue

        env_shape = ml_simulator.env.verticeMatrix.shape
        while statesTrain.shape[1] != env_shape[0] or statesTrain.shape[
                2] != env_shape[1] or statesTrain.shape[3] != 3:
            statesTrain = statesTrain.transpose((0, 2, 3, 1))

        while endStateTrain.shape[1] != env_shape[0] or endStateTrain.shape[2] != env_shape[1] or \
          endStateTrain.shape[
           3] != 3:
            endStateTrain = endStateTrain.transpose((0, 2, 3, 1))

        save = (total_steps_performed > save_top)
        print("\t- Memory Training...")
        loss = ml_simulator.model.train(statesTrain,
                                        rewardsTrain,
                                        actionsTrain,
                                        endStateTrain,
                                        save=save)
        print('\tLoss: {}'.format(loss))
        loss_history.append(loss)

        if save:
            save_top += 2500
            plot_history(loss_history)

        if save:
            ml_simulator.run_and_plot(run_dir, radius, radius)

    m.save()
    ml_simulator.run_and_plot(run_dir, 'FINAL')

    plot_history(loss_history)