Esempio n. 1
0
class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0.5  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(2, 256, 4)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
        self.epsilon_decay_value = (self.epsilon) / (END_EPSILON_DECAYING -
                                                     START_EPSILON_DECAYING)

    #TO DO
    def get_state(self, game):
        drone = game.drone
        state = [drone.x, drone.y]
        return np.array(state, dtype=int)

    # Random Moves: tradeoff exploration / exploitation
    def get_action(self, state, episode):

        if END_EPSILON_DECAYING >= episode >= START_EPSILON_DECAYING:
            self.epsilon -= self.epsilon_decay_value

        final_move = [0, 0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 3)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move

    #Storing Memory
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    #TO DO
    def train_long_memory(self):

        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    # Updating Q Values
    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)
Esempio n. 2
0
class Agent:
    def __init__(self):
        self.epsilion = 0.999
        self.gamma = 0.9
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(2, 256, 4)
        self.trainer = QTrainer(self.model, LR, self.gamma)
        self.epsilion_decay_value = 0.998

    def get_state(self, game):
        # drone = game.drone
        # [game.drone_x, game.drone_y, game.man_x, game.man_y]
        state = [game.drone_x, game.drone_y]

        return np.array(state, dtype=int)

    def get_action(self, state, episode):
        self.epsilion *= self.epsilion_decay_value

        if np.random.random() < self.epsilion:
            # take random action
            move = np.random.randint(0, 4)
            return move
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            return move

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)
Esempio n. 3
0
class Agent:
    def __init__(self):
        self.memory = deque(maxlen=MAX_MEM)
        self.n_games: int = 0
        self.epsilon = 0
        self.gamma = 0.9
        self.model = Q_Net(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory
        for state, action, reward, next_state, done in mini_sample:
            self.trainer.train_step(state, action, reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1
        return final_move
class Agent:
    def __init__(self):
        self.number_of_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.8  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = LinearQNet(11, 256, 3)
        self.trainer = QTrainer(self.model, learning_rate=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]

        point_left = Point(head.x - BLOCK_SIZE, head.y)
        point_right = Point(head.x + BLOCK_SIZE, head.y)
        point_up = Point(head.x, head.y - BLOCK_SIZE)
        point_down = Point(head.x, head.y + BLOCK_SIZE)

        direction_left = game.direction == Direction.LEFT
        direction_right = game.direction == Direction.RIGHT
        direction_up = game.direction == Direction.UP
        direction_down = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (direction_right and game.is_collision(point_right))
            or (direction_left and game.is_collision(point_left))
            or (direction_up and game.is_collision(point_up))
            or (direction_down and game.is_collision(point_down)),

            # Danger right
            (direction_up and game.is_collision(point_right))
            or (direction_down and game.is_collision(point_left))
            or (direction_left and game.is_collision(point_up))
            or (direction_right and game.is_collision(point_down)),

            # Danger left
            (direction_down and game.is_collision(point_right))
            or (direction_up and game.is_collision(point_left))
            or (direction_right and game.is_collision(point_up))
            or (direction_left and game.is_collision(point_down)),

            # Move direction
            direction_left,
            direction_right,
            direction_up,
            direction_down,

            # Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y
        ]
        return np.array(state, dtype=int)

    def get_action(self, state):
        # random moves: tradeoff between exploration / exploitation
        self.epsilon = 80 - self.number_of_games / 10
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            initial_state = torch.tensor(state, dtype=torch.float)
            prediction = self.model(initial_state)
            move = torch.argmax(prediction).item()
            final_move[move] = 1
        return final_move

    def remember(self, state, action, reward, next_state, game_over):
        self.memory.append((state, action, reward, next_state, game_over))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            sample = random.sample(self.memory, BATCH_SIZE)
        else:
            sample = self.memory
        states, actions, rewards, next_states, game_overs = zip(*sample)
        self.trainer.train_step(states, actions, rewards, next_states,
                                game_overs)

    def train_short_memory(self, state, action, reward, next_state, game_over):
        self.trainer.train_step(state, action, reward, next_state, game_over)
Esempio n. 5
0
class Agent:
    # Razred Agent. Agent je posrednik med modelom ter okoljem (igro).
    def __init__(self):
        with open('games.txt', 'r') as f:
            self.n_games = int(f.read())
            print(self.n_games)

        self.epsilon = 0
        self.gamma = 0.9
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        #self.model.load_state_dict(torch.load('model/model.pth'))
        self.model.eval()
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    # Inicializacija. Prvo si sposodi shranjene rezultate, nastavi nekaj konstant in si izpododi nevronsko mrežo iz datoteke 'model.pth'.
    # V primeru, da boste ta program zagnali prvič, spremenite vrstice 25-27 v "self.n_games = 0" in vrstico 33 izbrišite.

    def get_state(self, game):
        # Funkcija, s katero agent dobi informacije o okolju.
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN
        # Definicije spodaj uporabljenih spremenljivk.
        state = [

            # Nevarnost spredaj?
            (dir_r and game.is_collision(point_r)) or
            (dir_l and game.is_collision(point_l)) or
            (dir_u and game.is_collision(point_u)) or
            (dir_d and game.is_collision(point_d)),

            # Nevarnost desno?
            (dir_u and game.is_collision(point_r)) or
            (dir_d and game.is_collision(point_l)) or
            (dir_l and game.is_collision(point_u)) or
            (dir_r and game.is_collision(point_d)),

            # Nevarnost levo?
            (dir_d and game.is_collision(point_r)) or
            (dir_u and game.is_collision(point_l)) or
            (dir_r and game.is_collision(point_u)) or
            (dir_l and game.is_collision(point_d)),

            # Smer kače.
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Relativni položaj hrane.
            game.food.x < game.head.x,
            game.food.x > game.head.x,
            game.food.y < game.head.y,
            game.food.y > game.head.y

        ]

        return np.array(state, dtype=int)

    # Vrne podatke agentu.

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory
        # Funkcija za ponovno učenje. (Po realni igri model ponovi igro še enkrat).
        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    # Funkcija za realno-časno učenje.

    def get_action(self, state):
        self.epsilon = 500 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 500) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Esempio n. 6
0
class Agent:

	def __init__(self):
		self.n_games = 0
		self.epsilon = 0	# randomness
		self.gamma = 0.9		# discount rate
		self.memory = deque(maxlen = max_memory)
		self.model = Linear_QNet(11, 256, 3)
		PATH = './model/model.pth'
		if os.path.exists(PATH):
			self.model.load_state_dict(torch.load(PATH))
			# self.model.eval()
			print('Pretrained = True')

		self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma)

	def get_state(self, game):
		head = game.snake[0]
		point_l = Point(head.x - 20, head.y)
		point_r = Point(head.x + 20, head.y)
		point_u = Point(head.x, head.y - 20)
		point_d = Point(head.x, head.y + 20)

		dir_l = game.direction == Direction.LEFT
		dir_r = game.direction == Direction.RIGHT
		dir_u = game.direction == Direction.UP
		dir_d = game.direction == Direction.DOWN

		state = [
		    # Danger straight
		    (dir_r and game.is_collision(point_r)) or 
		    (dir_l and game.is_collision(point_l)) or 
		    (dir_u and game.is_collision(point_u)) or 
		    (dir_d and game.is_collision(point_d)),

		    # Danger right
		    (dir_u and game.is_collision(point_r)) or 
		    (dir_d and game.is_collision(point_l)) or 
		    (dir_l and game.is_collision(point_u)) or 
		    (dir_r and game.is_collision(point_d)),

		    # Danger left
		    (dir_d and game.is_collision(point_r)) or 
		    (dir_u and game.is_collision(point_l)) or 
		    (dir_r and game.is_collision(point_u)) or 
		    (dir_l and game.is_collision(point_d)),
		    
		    # Move direction
		    dir_l,
		    dir_r,
		    dir_u,
		    dir_d,
		    
		    # Food location 
		    game.food.x < game.head.x,  # food left
		    game.food.x > game.head.x,  # food right
		    game.food.y < game.head.y,  # food up
		    game.food.y > game.head.y  # food down
		    ]

		return np.array(state, dtype=int)

	def remember(self, state, action, reward, next_state, done):
		self.memory.append((state, action, reward, next_state, done))

	def train_long_memory(self):
		if len(self.memory) > batch_size:
			mini_sample = random.sample(self.memory, batch_size) # list of tuples of size = 1000
		else:
			mini_sample = self.memory

		states, actions, rewards, next_states, dones = zip(*mini_sample)

		self.trainer.train_step(states, actions, rewards, next_states, dones)

	def train_short_memory(self, state, action, reward, next_state, done):
		self.trainer.train_step(state, action, reward, next_state, done)

	def get_action(self, state):
		# random moves: tradeoff exploration / exploitation
		self.epsilon = 80 - self.n_games
		final_move = [0, 0, 0]

		if random.randint(0, 200) < self.epsilon:
			move = random.randint(0, 2)
			final_move[move] = 1
		else:
			state0 = torch.tensor(state, dtype = torch.float)
			prediction = self.model(state0)
			move = torch.argmax(prediction).item()
			final_move[move] = 1

		return final_move
Esempio n. 7
0
class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(4, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.ship.center
        dir_l = game.ship.moving_left == True
        dir_r = game.ship.moving_right == True
        dir_s = game.ship.moving_left == False and game.ship.moving_right == False
        alienlen10 = len(game.aliens) < 10
        alienlen5 = len(game.aliens) < 5

        state = [
            head,
            # alienlen10,
            # alienlen5,
            dir_l,
            dir_r,
            dir_s,
            # game.ship.rect.left == 0,
            # game.ship.rect.right == game.ship.screen_rect.right,
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)
        #for state, action, reward, nexrt_state, done in mini_sample:
        #    self.trainer.train_step(state, action, reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0  #randomness
        self.gamma = 0.9  #discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  #popleft()
        self.model = Linear_QNet(11, 256,
                                 3)  #input_lauer=11,hidden:256 ,output:3
        self.model.load_state_dict(torch.load('./optimized_model/model.pth'))
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        BLOCK_SIZE = 20

        #Points to check danger
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            #For straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            #Danger Right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            #Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            #Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            #Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY IS REACHED

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  #list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

        #for state, action,reward, next_state, done in mini_sample:
        #    self.trainer.train_step(state, action,reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves : tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(
                0, 200
        ) < self.epsilon and False:  #This was original ,we made small changes to it
            #if random.randint(0,200) < 20 and self.n_games<90:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model.forward(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1
        return final_move
Esempio n. 9
0
class Agent:
    def __init__(self):
        self.n_games = 0
        self.n_revise = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.statusGame = []
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    def addStatus(self, Snake2, Score2, food2, frame_iteration, direction,
                  old_record):
        Snake = []
        food = [food2.x, food2.y]
        for itemSnack2 in Snake2:
            item = [itemSnack2.x, itemSnack2.y]
            Snake.append(item)
        self.statusGame.append(
            [Snake, Score2, food, frame_iteration, direction, old_record])

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)
        #for state, action, reward, nexrt_state, done in mini_sample:
        #    self.trainer.train_step(state, action, reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Esempio n. 10
0
class Agent:
    def __init__(self, args, model):
        self.parameters_file = args.parameters_file
        self.args = args
        self.parameters = yaml.load(open(self.parameters_file, 'r'),
                                    Loader=yaml.FullLoader)
        self.n_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=self.parameters["max_memory"])  # popleft()
        self.model = model
        self.trainer = QTrainer(self.model,
                                lr=self.parameters["lr"],
                                gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger is straight if
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # Danger is right if
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # Danger is left if
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
        ]

        return np.array(state, dtype=int)  # converting to 0 or 1

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > self.parameters["batch_size"]:
            mini_sample = random.sample(
                self.memory, self.parameters["batch_size"])  # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def predict(self, state):
        state_tensor = torch.tensor(state, dtype=torch.float)
        prediction = self.model(state_tensor)  # moves depending on the model
        move = torch.argmax(prediction).item()
        return move

    def get_action(self, state):
        move = 0
        final_move = [0, 0, 0]
        # random moves: tradeoff exploration / exploitation
        if self.args.use_trained == True:
            move = self.predict(state)
        else:
            self.epsilon = 100 - self.n_games
            if random.randint(0, 200) < self.epsilon:
                move = random.randint(0, 2)
            else:
                move = self.predict(state)

        final_move[move] = 1

        return final_move
Esempio n. 11
0
class Agent:
  # Initialize agent's parameters
  def __init__(self):
    self.n_games = 0
    self.epsilon = 0    # randomness
    self.gamma = 0.9    # discount rate
    self.memory = deque(maxlen=MAX_MEMORY)
    self.model = Linear_QNet(11, 258, 3)
    self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
  
  # 11 values represent the state of the game
  def get_state(self, game):
    head = game.snake[0]

    # Clok-wise directions and angles
    cw_dirs = [
      Direction.RIGHT == game.direction, 
      Direction.DOWN == game.direction,
      Direction.LEFT == game.direction,
      Direction.UP == game.direction
      ]
    cw_angs = np.array([0, np.pi/2, np.pi, -np.pi/2])

    # Position - in front: 0, on right: 1, on left: -1; BLOCK_SIZE = 20
    getPoint = lambda pos: Point(
      head.x + 20*np.cos(cw_angs[(cw_dirs.index(True)+pos) % 4]),
      head.y + 20*np.sin(cw_angs[(cw_dirs.index(True)+pos) % 4]))

    state = [
      # Danger
      game.is_collision(getPoint(0)),
      game.is_collision(getPoint(1)),
      game.is_collision(getPoint(-1)),

      # Move direction
      cw_dirs[2],
      cw_dirs[0],
      cw_dirs[3],
      cw_dirs[1],

      # Food location
      game.food.x < head.x,
      game.food.x > head.x,
      game.food.y < head.y,
      game.food.y > head.y
    ]

    return np.array(state, dtype=int)

  # Add information of one frame iteration (when play step happens) to memory
  def remember(self, state, action, reward, next_state, done):
    self.memory.append((state, action, reward, next_state, done))

  # Train the model with information based on one full game
  def train_long_memory(self):
    if len(self.memory) > BATCH_SIZE:
      mini_sample = random.sample(self.memory, BATCH_SIZE)    # List of tuples
    else:
      mini_sample = self.memory

    states, actions, rewards, next_states, dones = zip(*mini_sample)
    self.trainer.train_step(states, actions, rewards, next_states, dones)

  # Train the model with information based on one frame iteration
  def train_short_memory(self, state, action, reward, next_state, done):
    self.trainer.train_step(state, action, reward, next_state, done)

  def get_action(self, state):
    # Random moves: tradeoff exploration / exploitation
    self.epsilon = 80 - self.n_games
    final_move = [0, 0, 0]

    # The bigger the epsilon, the more likely randint is lower
    if random.randint(0, 200) < self.epsilon:
      move = random.randint(0, 2)
      final_move[move] = 1
    else:
      state0 = torch.tensor(state, dtype=torch.float)
      prediction = self.model(state0)
      move = torch.argmax(prediction).item()
      final_move[move] = 1

    return final_move
Esempio n. 12
0
class Agent:
    def __init__(self):
        self.num_games = 0
        self.epsilon = 0  # to control the randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # pop left
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, env):
        head = env.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        dir_l = env.snake_direction == Direction.LEFT
        dir_r = env.snake_direction == Direction.RIGHT
        dir_u = env.snake_direction == Direction.UP
        dir_d = env.snake_direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and env.is_collision(point_r))
            or (dir_l and env.is_collision(point_l))
            or (dir_u and env.is_collision(point_u))
            or (dir_d and env.is_collision(point_d)),

            # Danger right
            (dir_u and env.is_collision(point_r))
            or (dir_d and env.is_collision(point_l))
            or (dir_l and env.is_collision(point_u))
            or (dir_r and env.is_collision(point_d)),

            # Danger left
            (dir_d and env.is_collision(point_r))
            or (dir_u and env.is_collision(point_l))
            or (dir_r and env.is_collision(point_u))
            or (dir_l and env.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            env.food.x < env.head_position.x,  # food left
            env.food.x > env.head_position.x,  # food right
            env.food.y < env.head_position.y,  # food down
            env.food.y > env.head_position.y,  # food up
        ]

        return np.array(state, dtype=int)

    def store_data(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        # grab one thousand samples from the memory
        if len(self.memory) > BATCH_SIZE:
            batch_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            batch_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*batch_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: trade-off between exploration and exploitation
        self.epsilon = 80 - self.num_games
        move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move_idx = random.randint(0, 2)
            move[move_idx] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move_idx = torch.argmax(prediction).item()
            move[move_idx] = 1

        return move
Esempio n. 13
0
class Agent:
    def __init__(self, use_checkpoint=False):
        self.no_of_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  #  discount rate
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

        if use_checkpoint:
            checkpoint = torch.load("./model/model.pth")
            self.model.load_state_dict(checkpoint)
            self.model.eval()

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < game.head.x,  #  Food left
            game.food.x > game.head.x,  #  Food right
            game.food.y < game.head.y,  #  Food up
            game.food.y > game.head.y,  #  Food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, game_over):
        self.memory.append((state, action, reward, next_state, game_over))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, game_overs = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states,
                                game_overs)

    def train_short_memory(self, state, action, reward, next_state, game_over):
        self.trainer.train_step(state, action, reward, next_state, game_over)

    def get_action(self, state):
        self.epsilon = 80 - self.no_of_games
        action = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            action[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            action[move] = 1

        return action
Esempio n. 14
0
class Agent:

    def __init__(self):
        self.n_games = 0
        self.epsilon = 0 # randomness
        self.gamma = 0.9 # discount rate
        self.memory = deque(maxlen=MAX_MEMORY) # popleft()
        self.model = Linear_QNet(11, 256, 3)    # entrada, capa oculta, salida
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)


    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)
        
        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Peligro delante
            (dir_r and game.is_collision(point_r)) or 
            (dir_l and game.is_collision(point_l)) or 
            (dir_u and game.is_collision(point_u)) or 
            (dir_d and game.is_collision(point_d)),

            # Peligro derecha
            (dir_u and game.is_collision(point_r)) or 
            (dir_d and game.is_collision(point_l)) or 
            (dir_l and game.is_collision(point_u)) or 
            (dir_r and game.is_collision(point_d)),

            # Peligro izquierda
            (dir_d and game.is_collision(point_r)) or 
            (dir_u and game.is_collision(point_l)) or 
            (dir_r and game.is_collision(point_u)) or 
            (dir_l and game.is_collision(point_d)),
            
            # Dirección del movimiento
            dir_l,
            dir_r,
            dir_u,
            dir_d,
            
            # Localización comida
            game.food.x < game.head.x,  # comida izquierda
            game.food.x > game.head.x,  # comida derecha
            game.food.y < game.head.y,  # comida arriba
            game.food.y > game.head.y   # comida abajo
            ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)
        #for state, action, reward, nexrt_state, done in mini_sample:
        #    self.trainer.train_step(state, action, reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0,0,0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Esempio n. 15
0
class Agent:
    def __init__(self, agent_cfg) -> None:
        self.n_games = 0
        self.agent_cfg = agent_cfg
        self.epsilon = agent_cfg.epsilon  # randomness
        self.random_until = agent_cfg.random_until
        self.memory = deque(maxlen=agent_cfg.max_memory_size)
        self.model = LinearQNet(agent_cfg.model)
        self.trainer = QTrainer(self.model, agent_cfg.lr, agent_cfg.gamma)

    def get_state(self, game):
        head = game.snake[0]
        last_actions = self.get_previous_actions(self.agent_cfg.state.lookback)
        point_l = Point(head.x - game.block_size, head.y)
        point_r = Point(head.x + game.block_size, head.y)
        point_u = Point(head.x, head.y - game.block_size)
        point_d = Point(head.x, head.y + game.block_size)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),
            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),
            # Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),
            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,
            # Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y,  # food down,
            # distance from food
            (game.food.x - game.head.x) / game.w,
            (game.food.y - game.head.y) / game.h
        ]
        state += [direction for action in last_actions for direction in action]
        assert len(state) == self.model.input_size
        return np.array(state, dtype=int)

    def get_previous_actions(self, n: int) -> List[List[int]]:
        """
        Get a list of the previous integer encoded actions
        """
        default_action = [0 for _ in range(self.model.output_size)]
        actions = [default_action for _ in range(n)]
        for i in range(min(n, len(self.memory))):
            step = self.memory[-(i + 1)]
            step_action = step[1]
            actions[i] = step_action
        return actions

    def remember(self, state, action, reward, next_state, gameover):
        self.memory.append((state, action, reward, next_state, gameover))

    def sample_from_memory(self, n_samples: int):
        N = len(self.memory)
        if self.agent_cfg.batch.method == 'linear':
            samples = np.random.triangular(0, N, N, n_samples)
            idxs = samples.astype(int)
            batch = [self.memory[idx] for idx in idxs]
        elif self.agent_cfg.batch.method == 'uniform':
            batch = random.choice(self.memory, n_samples)
        else:
            raise ValueError('Invalid choice for `agent.batch.method`')
        return batch

    def train_long_memory(self):
        if len(self.memory) > self.agent_cfg.batch.size:
            mini_batch = self.sample_from_memory(
                self.agent_cfg.batch.size)  # list of tuples
        else:
            mini_batch = self.memory

        states, actions, rewards, next_states, gameovers = zip(*mini_batch)
        self.trainer.train_step(states, actions, rewards, next_states,
                                gameovers)

    def train_short_memory(self, state, action, reward, next_state, gameover):
        self.trainer.train_step(state, action, reward, next_state, gameover)

    def get_action(self, state):
        # random moves: exploration exploitation tradeoff
        # self.epsilon = 80 - self.n_games
        action = [0 for _ in range(self.model.output_size)]
        rand_action_thresh = self.epsilon - (self.n_games * self.epsilon /
                                             self.random_until)
        if random.random() < rand_action_thresh:
            move = random.randint(0, len(action) - 1)
        else:
            prediction = self.model(torch.tensor(state, dtype=torch.float))
            move = int(prediction.argmax().item())
        action[move] = 1
        return action
Esempio n. 16
0
class Agent:
    """
    Agent class
    agent running and the snake
    """
    def __init__(self, game, pars=dict()):
        """
        (Agent, Snake, dict()) -> None
        Initialize everything
        get everything that is passed from 
        json file to modify attributes and train model
        """
        self.n_games = 0
        self.epsilon = pars.get('eps', EPSILON)
        self.eps = pars.get('eps', EPSILON)
        self.gamma = pars.get('gamma', GAMMA)  # discount rate
        self.eps_range = pars.get('eps_range', EPS_RANGE)
        print(self.epsilon, self.eps)
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(len(game.get_state()),
                                 pars.get('hidden_size', HIDDEN_SIZE),
                                 OUTPUT_SIZE)
        self.trainer = QTrainer(self.model,
                                lr=pars.get('lr', LR),
                                gamma=self.gamma)

        self.game = game

    def remember(self, *args):
        """
        (Agent, (float, float, float, float, bool)) -> None
        state: current state
        action: current actions
        reward: current immediate rewards
        next_state: get the next state
        done: terminal state point
        append all this attributes to the queue: memory
        do this every frame
        """
        state, action, reward, next_state, done = args
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        """
        (Agent) -> None
        train after every game is finished
        """
        # get memory
        # if memory is above a certain BATCH SIZE then
        # randomly sample BACTCH SIZE memory
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory

        # get all states actions, rewards, etc...
        # and train the step using QTrainer
        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, *args):
        """
        (Agent, (float, float, float, float, bool)) -> None
        state: current state
        action: current actions
        reward: current immediate rewards
        next_state: get the next state
        done: terminal state point
        train agent every game frame
        """
        state, action, reward, next_state, done = args
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        """
        (Agent, float) -> np.array(dtype=int): (1, 3)
        get an action either from the policy or randomly
        """
        # tradeoff exploration / exploitation based on epsilon and eps_range
        self.epsilon = self.eps - self.n_games
        final_move = [0, 0, 0]
        # check if should move randomly
        if is_random_move(self.epsilon, self.eps_range):
            # if so then randomly turn one of the bits
            # to go right left or straight
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            # else get the best move from the
            # NN by taking its argmax and setting
            # its bits
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Esempio n. 17
0
class Agent:
    def __init__(self):
        self.n_games = 0
        self.n_state = 14
        self.frame_to_read = 1
        self.epsilon = 0.4
        self.gamma = 0.8
        self.memory = deque(maxlen=MAX_MEM)
        self.states = deque(maxlen=self.frame_to_read)
        for _ in range(self.frame_to_read):
            self.states.append([0 for _ in range(self.n_state)])
        self.trainer = QTrainer(self.n_state * self.frame_to_read, LR,
                                self.n_state * self.frame_to_read, [256, 256],
                                3, self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        wall_u = 1 / abs(game.head.y + 21)
        wall_d = 1 / abs(game.h - game.head.y + 1)
        wall_l = 1 / abs(game.head.x + 21)
        wall_r = 1 / abs(game.w - game.head.x + 1)

        food_u = 1 if game.food.y < game.head.y else 0
        food_d = 1 if game.food.y > game.head.y else 0
        food_l = 1 if game.food.x < game.head.x else 0
        food_r = 1 if game.food.x > game.head.x else 0

        self_s = float('inf')
        self_l = float('inf')
        self_r = float('inf')
        for i, b in enumerate(game.snake):
            if i == 0: continue
            if dir_l:
                if b.x <= game.head.x and b.y == game.head.y:
                    self_s = min(self_s, game.head.x - b.x)
                if b.y <= game.head.y and b.x == game.head.x:
                    self_r = min(self_r, game.head.y - b.y)
                if b.y >= game.head.y and b.x == game.head.x:
                    self_l = min(self_l, b.y - game.head.y)
            if dir_r:
                if b.x >= game.head.x and b.y == game.head.y:
                    self_s = min(self_s, b.x - game.head.x)
                if b.y >= game.head.y and b.x == game.head.x:
                    self_r = min(self_r, b.y - game.head.y)
                if b.y <= game.head.y and b.x == game.head.x:
                    self_l = min(self_l, game.head.y - b.y)
            if dir_u:
                if b.y <= game.head.y and b.x == game.head.x:
                    self_s = min(self_s, game.head.y - b.y)
                if b.x >= game.head.x and b.y == game.head.y:
                    self_r = min(self_r, b.x - game.head.x)
                if b.x <= game.head.x and b.y == game.head.y:
                    self_l = min(self_l, game.head.x - b.x)
            if dir_d:
                if b.y >= game.head.y and b.x == game.head.x:
                    self_s = min(self_s, b.y - game.head.y)
                if b.x <= game.head.x and b.y == game.head.y:
                    self_r = min(self_r, game.head.x - b.x)
                if b.x >= game.head.x and b.y == game.head.y:
                    self_l = min(self_l, b.x - game.head.x)

        self_s = 1 / (self_s + 1)
        self_r = 1 / (self_r + 1)
        self_l = 1 / (self_l + 1)

        state = [
            # danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # to body coefficient
            self_s,
            self_r,
            self_l,

            # move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # food loc
            game.food.x < game.head.x,
            game.food.x > game.head.x,
            game.food.y < game.head.y,
            game.food.y > game.head.y,
        ]
        print(state)
        self.states.append(state)

        return np.array(list(self.states), dtype=np.float32).reshape(
            (-1, self.n_state * self.frame_to_read))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            minibatch = random.sample(self.memory, BATCH_SIZE)
        else:
            minibatch = self.memory

        states, actions, rewards, next_states, dones = zip(*minibatch)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        print('reward =', reward)
        self.trainer.train_step((state, ), (action, ), (reward, ),
                                (next_state, ), (done, ))

    def get_action(self, state):
        # random move
        final_move = [0, 0, 0]
        if random.random() < self.epsilon:
            print('random behavior')
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            prediction = self.trainer.model.predict(state)
            move = np.argmax(prediction)
            final_move[move] = 1

        return final_move
Esempio n. 18
0
class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0  #reandonmess
        self.gamma = 0.9  #discount rate
        self.model = Linear_Qnet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
        self.memory = deque(maxlen=MAX_MEMEORY)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            #danger
            (dir_r and game.is_collison(point_r))
            or (dir_l and game.is_collison(point_l))
            or (dir_u and game.is_collison(point_u))
            or (dir_d and game.is_collison(point_d)),

            #right
            (dir_u and game.is_collison(point_r))
            or (dir_d and game.is_collison(point_l))
            or (dir_l and game.is_collison(point_u))
            or (dir_r and game.is_collison(point_d)),

            #left
            (dir_d and game.is_collison(point_r))
            or (dir_u and game.is_collison(point_l))
            or (dir_r and game.is_collison(point_u))
            or (dir_l and game.is_collison(point_d)),

            #move
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            #food location
            game.food.x < game.head.x,  #left
            game.food.x > game.head.x,  #left
            game.food.y < game.head.y,  #left
            game.food.y > game.head.y  #left
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  #list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        #random moves
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Esempio n. 19
0
class Agent:
    def __init__(self):
        self.n_game = 0
        self.epsilon = 0  # Randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
        # for n,p in self.model.named_parameters():
        #     print(p.device,'',n)
        # self.model.to('cuda')
        # for n,p in self.model.named_parameters():
        #     print(p.device,'',n)
        # TODO: model,trainer

    # state (11 Values)
    #[ danger straight, danger right, danger left,
    #
    # direction left, direction right,
    # direction up, direction down
    #
    # food left,food right,
    # food up, food down]
    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger Straight
            (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d))
            or (dir_l and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_r)),

            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            #Danger Left
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move Direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            #Food Location
            game.food.x < game.head.x,  # food is in left
            game.food.x > game.head.x,  # food is in right
            game.food.y < game.head.y,  # food is up
            game.food.y > game.head.y  # food is down
        ]
        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if memory exceed

    def train_long_memory(self):
        if (len(self.memory) > BATCH_SIZE):
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory
        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff explotation / exploitation
        self.epsilon = 80 - self.n_game
        final_move = [0, 0, 0]
        if (random.randint(0, 200) < self.epsilon):
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float).cuda()
            prediction = self.model(state0).cuda()  # prediction by model
            move = torch.argmax(prediction).item()
            final_move[move] = 1
        return final_move