Beispiel #1
0
def train():

    MODEL_NAME = "diamond9_input5"
    MODEL_NAME_save = "diamond9_input5"

    FOLDER = "Best_Dojos9"

    MODEL_PATH_SAVE = "./Models/Tensorflow/" + FOLDER + "/" + MODEL_NAME_save + "/" + MODEL_NAME_save + ".ckpt"

    LOGDIR = "./Logs/" + FOLDER + "/" + MODEL_NAME_save + "_2"

    USE_SAVED_MODEL_FILE = False

    GRID_SIZE = 8
    LOCAL_GRID_SIZE = 9
    MAP_NUMBER = 0
    RANDOMIZE_MAPS = False

    # MAP_PATH = "./Maps/Grid{}/map{}.txt".format(GRID_SIZE, MAP_NUMBER)
    MAP_PATH = None

    print("\n ---- Training the Deep Neural Network ----- \n")

    RENDER_TO_SCREEN = False
    # RENDER_TO_SCREEN = True

    env = Environment(wrap=False,
                      grid_size=GRID_SIZE,
                      local_size=LOCAL_GRID_SIZE,
                      rate=80,
                      max_time=50,
                      food_count=10,
                      obstacle_count=0,
                      lava_count=0,
                      zombie_count=0,
                      history=0,
                      action_space=5,
                      map_path=MAP_PATH)

    if RENDER_TO_SCREEN:
        env.prerender()

    model = Network(local_size=LOCAL_GRID_SIZE,
                    name=MODEL_NAME,
                    load=False,
                    path="./Models/Tensorflow/" + FOLDER + "/")

    brain = Brain(epsilon=0.1, action_space=env.number_of_actions())

    model.setup(brain)

    score = tf.placeholder(tf.float32, [])
    avg_t = tf.placeholder(tf.float32, [])
    epsilon = tf.placeholder(tf.float32, [])
    avg_r = tf.placeholder(tf.float32, [])

    tf.summary.scalar('error', tf.squeeze(model.error))
    tf.summary.scalar('score', score)
    tf.summary.scalar('average time', avg_t)
    tf.summary.scalar('epsilon', epsilon)
    tf.summary.scalar('avg reward', avg_r)

    avg_time = 0
    avg_score = 0
    avg_error = 0
    avg_reward = 0
    cumulative_reward = 0

    # Number of episodes
    print_episode = 100
    total_episodes = 10000

    saver = tf.train.Saver()

    # Initialising all variables (weights and biases)
    init = tf.global_variables_initializer()

    # Adds a summary graph of the error over time
    merged_summary = tf.summary.merge_all()

    # Tensorboard capabilties
    writer = tf.summary.FileWriter(LOGDIR)

    # Assume that you have 12GB of GPU memory and want to allocate ~4GB:
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)

    # Begin session
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        if USE_SAVED_MODEL_FILE:
            saver.restore(sess, MODEL_PATH_SAVE)
            print("Model restored.")
        else:
            sess.run(init)

        # for episode in range(50):
        # 	state, info = env.reset()
        # 	done = False

        # 	if RENDER_TO_SCREEN:
        # 		env.render()

        # 	while not done:
        # 		action = brain.choose_action(state, sess, model)

        # 		new_state, reward, done, info = env.step(action)

        # 		brain.store_transition(state, action, reward, done, new_state)

        # 		state = new_state

        # 		if RENDER_TO_SCREEN:
        # 			env.render()

        # print("\nREPLAY MEMORY INITIALISED")
        # print(brain.memCntr)

        writer.add_graph(sess.graph)

        start_time = time.time()

        print("")

        for episode in range(total_episodes):

            if RANDOMIZE_MAPS:
                MAP_PATH = "./Maps/Grid10/map{}.txt".format(
                    np.random.randint(10))
                env.set_map(MAP_PATH)

            state, info = env.reset()
            done = False

            # brain.linear_epsilon_decay(total_episodes, episode, start=0.4, end=0.05, percentage=0.8)

            # brain.linear_alpha_decay(total_episodes, episode)

            if RENDER_TO_SCREEN:
                env.render()

            while not done:

                action = brain.choose_action(state, sess, model)
                # print(action)

                # Update environment by performing action
                new_state, reward, done, info = env.step(action)
                # print(new_state)

                brain.store_transition(state, action, reward, done, new_state)

                # e, Q_vector = brain.train_batch(4, model, sess)

                e, Q_vector = brain.train(model, sess)

                state = new_state

                cumulative_reward += reward

                if RENDER_TO_SCREEN:
                    env.render()

                if done:
                    avg_time += info["time"]
                    avg_score += info["score"]
                    avg_error += e
                    avg_reward += cumulative_reward
                    cumulative_reward = 0

            if (episode % print_episode == 0
                    and episode != 0) or (episode == total_episodes - 1):

                current_time = math.floor(time.time() - start_time)
                print(
                    "Ep:",
                    episode,
                    "\tavg t: {0:.3f}".format(avg_time / print_episode),
                    "\tavg score: {0:.3f}".format(avg_score / print_episode),
                    "\tErr {0:.3f}".format(avg_error / print_episode),
                    "\tavg_reward {0:.3f}".format(
                        avg_reward / print_episode),  # avg cumulative reward
                    "\tepsilon {0:.3f}".format(brain.EPSILON),
                    end="")
                print_readable_time(current_time)

                # Save the model's weights and biases to .npz file
                model.save(sess, name=MODEL_NAME_save)
                # save_path = saver.save(sess, MODEL_PATH_SAVE)

                s = sess.run(merged_summary,
                             feed_dict={
                                 model.input: state,
                                 model.actions: Q_vector,
                                 score: avg_score / print_episode,
                                 avg_t: avg_time / print_episode,
                                 epsilon: brain.EPSILON,
                                 avg_r: avg_reward / print_episode
                             })
                writer.add_summary(s, episode)

                avg_time = 0
                avg_score = 0
                avg_error = 0
                avg_reward = 0

        model.save(sess, verbose=True, name=MODEL_NAME_save)

        # save_path = saver.save(sess, MODEL_PATH_SAVE)
        # print("Model saved in path: %s" % save_path)

        writer.close()
Beispiel #2
0
			if RENDER_TO_SCREEN:
				env.render()

			while not done:

				action = brain.choose_action(state, sess, model)
				# print(action)

				# Update environment by performing action
				new_state, reward, done, info = env.step(action)
				# print(new_state[3], reward)

				brain.store_transition(state, action, reward, done, new_state)
				
				e, Q_vector = brain.train(model, sess)

				state = new_state

				cumulative_reward += reward

				if RENDER_TO_SCREEN:
					env.render()

				if done:
					avg_time += info["time"]
					avg_score += info["score"]
					avg_error += e
					avg_reward += cumulative_reward 
					cumulative_reward = 0
Beispiel #3
0
def train():

    MODEL_NAME = "diamond_local15_maps"

    MODEL_PATH_SAVE = "./Models/Tensorflow/Maps/" + MODEL_NAME + "/" + MODEL_NAME + ".ckpt"

    LOGDIR = "./Logs/" + MODEL_NAME

    USE_SAVED_MODEL_FILE = False

    GRID_SIZE = 10
    LOCAL_GRID_SIZE = 15
    MAP_NUMBER = 0
    RANDOMIZE_MAPS = True

    # MAP_PATH = "./Maps/Grid{}/map{}.txt".format(GRID_SIZE, MAP_NUMBER)
    MAP_PATH = None

    print("\n ---- Training the Deep Neural Network ----- \n")

    RENDER_TO_SCREEN = False
    RENDER_TO_SCREEN = True

    env = Environment(wrap=False,
                      grid_size=GRID_SIZE,
                      local_size=LOCAL_GRID_SIZE,
                      rate=80,
                      max_time=50,
                      food_count=3,
                      obstacle_count=1,
                      lava_count=1,
                      zombie_count=0,
                      action_space=5,
                      map_path=MAP_PATH)

    if RENDER_TO_SCREEN:
        env.prerender()

    model = Network(local_size=LOCAL_GRID_SIZE,
                    name=MODEL_NAME,
                    load=False,
                    path="./Models/Tensorflow/Maps/")

    brain = Brain(epsilon=0.05, action_space=env.number_of_actions())

    model.setup(brain)

    tf.summary.scalar('error', tf.squeeze(model.error))

    avg_time = 0
    avg_score = 0
    avg_error = 0

    # Number of episodes
    print_episode = 1000
    total_episodes = 100000

    saver = tf.train.Saver()

    # Initialising all variables (weights and biases)
    init = tf.global_variables_initializer()

    # Adds a summary graph of the error over time
    merged_summary = tf.summary.merge_all()

    # Tensorboard capabilties
    # writer = tf.summary.FileWriter(LOGDIR)

    # Assume that you have 12GB of GPU memory and want to allocate ~4GB:
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)

    # Begin session
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        if USE_SAVED_MODEL_FILE:
            saver.restore(sess, MODEL_PATH_SAVE)
            print("Model restored.")

        sess.run(init)

        # writer.add_graph(sess.graph)

        start_time = time.time()

        print("")

        for episode in range(total_episodes):

            if RANDOMIZE_MAPS:
                # Make a random map 0: lava, 1: obstacle
                MAP_PATH = "./Maps/Grid10/map{}.txt".format(
                    np.random.randint(10))
                env.set_map(MAP_PATH)

            state, info = env.reset()
            done = False

            brain.linear_epsilon_decay(total_episodes,
                                       episode,
                                       start=0.5,
                                       end=0.05,
                                       percentage=0.6)

            # brain.linear_alpha_decay(total_episodes, episode)

            if RENDER_TO_SCREEN:
                env.render()

            while not done:

                # Retrieve the Q values from the NN in vector form
                # Q_vector = sess.run(model.q_values, feed_dict={model.input: state})

                action = brain.choose_action(state, sess, model)

                # print(action)

                # Update environment by performing action
                new_state, reward, done, info = env.step(action)

                # print(new_state)

                brain.store_transition(state, action, reward, done, new_state)

                e = brain.train(model, sess)

                state = new_state

                if RENDER_TO_SCREEN:
                    env.render()

                if done:
                    avg_time += info["time"]
                    avg_score += info["score"]
                    avg_error += e

            if (episode % print_episode == 0
                    and episode != 0) or (episode == total_episodes - 1):

                current_time = math.floor(time.time() - start_time)
                print("Ep:",
                      episode,
                      "\tavg t: {0:.3f}".format(avg_time / print_episode),
                      "\tavg score: {0:.3f}".format(avg_score / print_episode),
                      "\tErr {0:.3f}".format(avg_error / print_episode),
                      "\tepsilon {0:.3f}".format(brain.EPSILON),
                      end="")
                print_readable_time(current_time)

                avg_time = 0
                avg_score = 0
                avg_error = 0

                # Save the model's weights and biases to .npz file
                model.save(sess)
                save_path = saver.save(sess, MODEL_PATH_SAVE)

                # s = sess.run(merged_summary, feed_dict={model.input: state, model.actions: Q_vector})
                # writer.add_summary(s, episode)

        model.save(sess, verbose=True)

        save_path = saver.save(sess, MODEL_PATH_SAVE)
        print("Model saved in path: %s" % save_path)