Example #1
0
def play(difficulty):
    """
	Play the Snake game, feel free to change the difficulty

	Default difficulty: Medium
	"""
    d = 150

    if difficulty == "easy":
        d = 200
        print("\nDifficulty set to <easy>\n")
    elif difficulty == "medium":
        d = 150
        print("\nDifficulty set to <medium>\n")
    elif difficulty == "hard":
        d = 100
        print("\nDifficulty set to <hard>\n")
    elif difficulty == "insane":
        d = 80
        print("\nDifficulty set to <insane>\n")
    else:
        print(
            "\nYou did not enter a valid difficulty!\nDifficulty set to <medium>\n"
        )

    env = Environment(wrap=False,
                      grid_size=10,
                      rate=d,
                      tail=True,
                      obstacles=False)

    env.play()
Example #2
0
def play():

    env = Environment(wrap=False,
                      grid_size=10,
                      rate=difficulty,
                      tail=True,
                      obstacles=False)

    env.play()
Example #3
0
def watch():

	csv_file_path = "./Data/Logs/log_file{}.csv".format(logFileNumber)

	RENDER_TO_SCREEN = True

	# rate should be 0 when not rendering, else it will lengthen training time unnecessarily
	env = Environment(wrap = False, grid_size = 10, rate = 80, tail = True, obstacles = False)

	if RENDER_TO_SCREEN:
		env.prerender()
	
	env.noArrow()

	avg_time = 0
	avg_score = 0

	episode = 0

	df = pd.read_csv(csv_file_path)
	# print(df)

	GAME_OVER = False
	GAME_ON = True

	t = 3

	time_stamps = df["TIME_STAMP"].values

	snake_Xs = df["SNAKE_X"].values
	snake_Ys = df["SNAKE_Y"].values

	food_Xs = df["FOOD_X"].values
	food_Ys = df["FOOD_Y"].values

	my_actions = df["INPUT_BUTTON"].values

	my_index = 0

	while GAME_ON:

		# needs to be changed to ignore the SCALE
		state, info = env.irl_reset(snake_Xs[my_index], snake_Ys[my_index], food_Xs[my_index], food_Ys[my_index])
		# last_action = 3
		last_action = my_actions[my_index]

		# print(state)

		my_index = my_index + 1 #BUG: TODO: FIX MY_INDEX

		if RENDER_TO_SCREEN:
			env.countdown = True
		else:
			env.countdown = False

		while not GAME_OVER:

			try:
				if RENDER_TO_SCREEN:
					env.render()

				action = last_action

				# print(self.time)
				if my_index < len(my_actions):

					# Ensuring that it takes the last action pressed during that "tick"
					last_action_pressed = False
					i = 1
					while not last_action_pressed:
						if my_index + i < len(my_actions) and time_stamps[my_index] == time_stamps[my_index + i]:
							my_index = my_index + 1
							i = i + 1
						else:
							last_action_pressed = True

					# print(time_stamps[my_index])
					if env.time == time_stamps[my_index]:
						action = my_actions[my_index]
						last_action = action
						if action != -1:
							my_index = my_index + 1

				if env.countdown:
					text = env.font.render(str(t), True, (255, 255, 255))
					env.display.blit(text,(60,30))
					env.pg.display.update()
					time.sleep(0.2)
					t =  t - 1
					if t == 0:
						t = 3
						env.countdown = False
				else:
					new_state, reward, GAME_OVER, info = env.step(action, action_space = 4)

					if reward > 0:
						env.food.irl_make(food_Xs[my_index], food_Ys[my_index], env.SCALE)
						# new_state = env.get_state()

				if GAME_OVER:
					avg_time += info["time"]
					avg_score += info["score"]
					my_index = my_index + 1

			except KeyboardInterrupt as e:
				raise e

		while GAME_OVER:

			if my_index >= len(my_actions)-1:
				# print("Total Episodes: ", episode)
				env.end()
			else:
				GAME_OVER = False
				episode = episode + 1
				avg_time = 0
				avg_score = 0

	# print("Total Episodes: ", episode+1)
	env.end()
Example #4
0
def play():

    env = Environment(wrap=True, grid_size=10, rate=100, tail=False)

    env.play()
Example #5
0
def train(s):

    RENDER_TO_SCREEN = False

    # rate should be 0 when not rendering, else it will lengthen training time unnecessarily
    env = Environment(wrap=False, grid_size=8, rate=0, max_time=50)

    Q = Qmatrix(2, env)  # 0 - zeros, 1 - random, 2 - textfile

    alpha = 0.15  # Learning rate, i.e. which fraction of the Q values should be updated
    gamma = 0.99  # Discount factor, i.e. to which extent the algorithm considers possible future rewards
    epsilon = 0.1  # Probability to choose random action instead of best action

    epsilon_function = True
    epsilon_start = 0.5
    epsilon_end = 0.05
    epsilon_percentage = 0.5  # in decimal

    # Test for an Epsilon linear function
    # y = mx + c
    # y = (0.9 / 20% of total episode)*x + 1
    # if epsilon <= 0.1, make epsilon = 0.1

    avg_time = 0
    avg_score = 0

    print_episode = 100
    total_episodes = 2000

    start_time = time.time()

    for episode in range(total_episodes):
        # Reset the environment
        # state, info = env.reset()
        done = False
        first_action = True
        action_count = 0

        # Epsilon linear function
        if epsilon_function:
            epsilon = (-(epsilon_start - epsilon_end) /
                       (epsilon_percentage * total_episodes)) * episode + (
                           epsilon_start)
            if epsilon < epsilon_end:
                epsilon = epsilon_end

        while not done:

            # Testing with try except in loop
            # Might not be the best implementation of training and ensuring saving Q to a .txt file
            try:
                # print("waiting for recv...")
                # data = input("Send (q to Quit): ")
                # s.send(str.encode("p\n"))
                r = s.recv(1024)
                if r != None:
                    x = r.decode("utf-8")
                    # print(x) #to skip line
                    x_cleaned = x[3:-1]  #Need to find a better implementation
                    a = x_cleaned.split(", ")
                    # print("Cleaned msg: ", a) #raw bytes received

                    if a[0] != "close":

                        if a[0] == "done":
                            done = True
                            # print("\nEpisode done, #", episode)

                            state = np.zeros(
                                4
                            )  # Needs to change to incorporate dead states
                            reward = int(a[1])
                            # print("reward = ", reward, "\n")

                        else:
                            state = np.zeros(4)
                            for i in range(4):
                                state[i] = float(a[i])

                            # print("State = ", state)

                            reward = int(a[4])
                            # print("reward = ", reward, "\n")

                            if np.random.rand() <= epsilon:
                                action = np.random.randint(0, 4)
                            else:
                                action = np.argmax(Q[state_index(state)])

                            # print("Action = ", action)

                            s.send(str.encode(str(action) + "\n"))

                            if first_action:
                                action_count = action_count + 1

                                if action_count >= 2:
                                    first_action = False

                            # Q[env.state_index(state), action] += alpha * (reward + gamma * np.max(Q[env.state_index(new_state)]) - Q[env.state_index(state), action])

                        # TRAINING PART
                        if not first_action:

                            if reward == 10:
                                avg_score = avg_score + 1

                            Q[state_index(prev_state),
                              prev_action] += alpha * (
                                  reward +
                                  gamma * np.max(Q[state_index(state)]) -
                                  Q[state_index(prev_state), prev_action])

                        # save the previous state
                        prev_state = state
                        prev_action = action

                    else:
                        s.close()
                        print("Socket has been closed")
                        connected = False

            except KeyboardInterrupt as e:
                # Test to see if I can write the Q file during runtime
                np.savetxt(Q_textfile_path_save,
                           Q.astype(np.float),
                           fmt='%f',
                           delimiter=" ")
                print("Saved Q matrix to text file")

                s.close()
                print("Socket has been closed")
                raise e

        if (episode % print_episode == 0
                and episode != 0) or (episode == total_episodes - 1):
            current_time = time.time() - start_time
            print(
                "Episode:", episode, "\ttime:", avg_time / print_episode,
                "\tscore:", avg_score / print_episode, "\tepsilon:", epsilon,
                "\ttime {0:.0f}:{1:.0f}".format(current_time / 60,
                                                current_time % 60))
            np.savetxt(Q_textfile_path_save,
                       Q.astype(np.float),
                       fmt='%f',
                       delimiter=" ")
            avg_time = 0
            avg_score = 0

    # This doesn't need to be here
    # np.savetxt(Q_textfile_path_save, Q.astype(np.float), fmt='%f', delimiter = " ")
    print("Simulation finished. \nSaved Q matrix to text file at:",
          Q_textfile_path_save)
Example #6
0
def run(s):

    RENDER_TO_SCREEN = False

    env = Environment(wrap=False,
                      grid_size=8,
                      rate=80,
                      max_time=100,
                      tail=False)

    Q = Qmatrix(2, env)  # 0 - zeros, 1 - random, 2 - textfile

    # Minimise the overfitting during testing
    epsilon = 0.005

    # Testing for a certain amount of episodes
    for episode in range(1000):
        # state, info = env.reset()
        done = False

        score = 0

        prev_state = np.zeros(4)
        prev_state[2] = -1

        while not done:

            try:
                # print("waiting for recv...")
                # data = input("Send (q to Quit): ")
                # s.send(str.encode("p\n"))
                r = s.recv(1024)
                if r != None:
                    x = r.decode("utf-8")
                    # print(x) #to skip line
                    x_cleaned = x[3:-1]  #Need to find a better implementation
                    a = x_cleaned.split(", ")

                    if a[0] != "close":

                        if a[0] == "done":
                            done = True
                            print("Episode done")

                        else:
                            state = np.zeros(4)
                            for i in range(4):
                                state[i] = float(a[i])
                                state[i] = int(state[i])

                            # print(state)

                            if np.random.rand() <= epsilon:
                                action = np.random.randint(0, 4)
                            else:
                                action = np.argmax(Q[state_index(state)])

                            # print("Action = ", action)

                            s.send(str.encode(str(action) + "\n"))

                            score = brute_force_scoring(
                                state, prev_state, score)

                            prev_state = state

                    else:
                        s.close()
                        print("Socket has been closed")
                        connected = False

            # To force close the connection
            except KeyboardInterrupt as e:
                s.close()
                print("Socket has been closed")
                raise e
                # connected = False

        if episode % 1 == 0:
            print("Episode:", episode, "   Score:", score)
Example #7
0
def watch(file_number):
    """
	Watch the games in a specific log file 

	Default file: Last log file
	"""
    number_path = "./Data/log_file_number.txt"
    log_file_number_txt = np.loadtxt(number_path, dtype='int')

    if file_number >= log_file_number_txt or file_number < 0:
        print(
            "\nYou did not enter a valid log file number!\nReplaying last log file (file number: {})\n"
            .format(log_file_number_txt - 1))
        file_number = log_file_number_txt - 1
    else:
        print("\nReplaying Log file {}\n".format(file_number))

    csv_file_path = "./Data/Logs/log_file{}.csv".format(file_number)

    RENDER_TO_SCREEN = True

    # Create environment
    env = Environment(wrap=False,
                      grid_size=10,
                      rate=90,
                      tail=True,
                      obstacles=False)

    if RENDER_TO_SCREEN:
        env.prerender()

    # No arrow on top of snakes head
    env.noArrow()

    avg_time = 0
    avg_score = 0

    episode = 0

    df = pd.read_csv(csv_file_path)
    # print(df)

    GAME_OVER = False
    GAME_ON = True

    t = 3

    time_stamps = df["TIME_STAMP"].values

    snake_Xs = df["SNAKE_X"].values
    snake_Ys = df["SNAKE_Y"].values

    food_Xs = df["FOOD_X"].values
    food_Ys = df["FOOD_Y"].values

    my_actions = df["INPUT_BUTTON"].values

    my_index = 0

    while GAME_ON:

        # needs to be changed to ignore the SCALE
        state, info = env.irl_reset(snake_Xs[my_index], snake_Ys[my_index],
                                    food_Xs[my_index], food_Ys[my_index])
        # last_action = 3
        last_action = my_actions[my_index]

        # print(state)

        my_index = my_index + 1  #BUG: TODO: FIX MY_INDEX

        if RENDER_TO_SCREEN:
            env.countdown = True
        else:
            env.countdown = False

        while not GAME_OVER:

            try:
                if RENDER_TO_SCREEN:
                    env.render()

                action = last_action

                # print(self.time)
                if my_index < len(my_actions):

                    # Ensuring that it takes the last action pressed during that "tick"
                    last_action_pressed = False
                    i = 1
                    while not last_action_pressed:
                        if my_index + i < len(my_actions) and time_stamps[
                                my_index] == time_stamps[my_index + i]:
                            my_index = my_index + 1
                            i = i + 1
                        else:
                            last_action_pressed = True

                    # print(time_stamps[my_index])
                    if env.time == time_stamps[my_index]:
                        action = my_actions[my_index]
                        last_action = action
                        if action != -1:
                            my_index = my_index + 1

                if env.countdown:
                    text = env.font.render(str(t), True, (255, 255, 255))
                    env.display.blit(text, (60, 30))
                    env.pg.display.update()
                    time.sleep(0.4)
                    t = t - 1
                    if t == 0:
                        t = 3
                        env.countdown = False
                else:
                    new_state, reward, GAME_OVER, info = env.step(
                        action, action_space=4)

                    if reward > 0:
                        env.food.irl_make(food_Xs[my_index], food_Ys[my_index],
                                          env.SCALE)
                        # new_state = env.get_state()

                if GAME_OVER:
                    avg_time += info["time"]
                    avg_score += info["score"]
                    my_index = my_index + 1

            except KeyboardInterrupt as e:
                raise e

        while GAME_OVER:

            if my_index >= len(my_actions) - 1:
                # print("Total Episodes: ", episode)
                env.end()
            else:
                GAME_OVER = False
                episode = episode + 1
                avg_time = 0
                avg_score = 0

    # print("Total Episodes: ", episode+1)
    env.end()