class Agent: # Razred Agent. Agent je posrednik med modelom ter okoljem (igro). def __init__(self): with open('games.txt', 'r') as f: self.n_games = int(f.read()) print(self.n_games) self.epsilon = 0 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) #self.model.load_state_dict(torch.load('model/model.pth')) self.model.eval() self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) # Inicializacija. Prvo si sposodi shranjene rezultate, nastavi nekaj konstant in si izpododi nevronsko mrežo iz datoteke 'model.pth'. # V primeru, da boste ta program zagnali prvič, spremenite vrstice 25-27 v "self.n_games = 0" in vrstico 33 izbrišite. def get_state(self, game): # Funkcija, s katero agent dobi informacije o okolju. head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN # Definicije spodaj uporabljenih spremenljivk. state = [ # Nevarnost spredaj? (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Nevarnost desno? (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Nevarnost levo? (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Smer kače. dir_l, dir_r, dir_u, dir_d, # Relativni položaj hrane. game.food.x < game.head.x, game.food.x > game.head.x, game.food.y < game.head.y, game.food.y > game.head.y ] return np.array(state, dtype=int) # Vrne podatke agentu. def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory # Funkcija za ponovno učenje. (Po realni igri model ponovi igro še enkrat). states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) # Funkcija za realno-časno učenje. def get_action(self, state): self.epsilon = 500 - self.n_games final_move = [0, 0, 0] if random.randint(0, 500) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self, use_checkpoint=False): self.no_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) if use_checkpoint: checkpoint = torch.load("./model/model.pth") self.model.load_state_dict(checkpoint) self.model.eval() def get_state(self, game): head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # Food left game.food.x > game.head.x, # Food right game.food.y < game.head.y, # Food up game.food.y > game.head.y, # Food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, game_over): self.memory.append((state, action, reward, next_state, game_over)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, game_overs = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, game_overs) def train_short_memory(self, state, action, reward, next_state, game_over): self.trainer.train_step(state, action, reward, next_state, game_over) def get_action(self, state): self.epsilon = 80 - self.no_of_games action = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) action[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() action[move] = 1 return action