def __init__(self): self.n_games = 0 self.epsilon = 0 #randomness self.gamma = 0.9 #discount rate self.memory = deque(maxlen=MAX_MEMORY) #popleft() self.model = Linear_QNet(11, 256, 3) #input_lauer=11,hidden:256 ,output:3 self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def main(): parameters = yaml.load(open(args.parameters_file, 'r'), Loader=yaml.FullLoader) model = Linear_QNet(11, 256, 3) if args.use_trained == True: model.load_state_dict(torch.load(parameters["model_path"])) plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent(args, model) game = SnakeGameAI() while True: # get old state state_old = agent.get_state(game) # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) if done: # train long memory, plot result game.reset() agent.n_games += 1 agent.train_long_memory() if score > record: record = score if args.save_model == True: agent.model.save() print('Game', agent.n_games, 'Score', score, 'Record:', record) plot_scores.append(score) total_score += score mean_score = total_score / agent.n_games plot_mean_scores.append(mean_score) plot(plot_scores, plot_mean_scores)
def __init__(self): with open('games.txt', 'r') as f: self.n_games = int(f.read()) print(self.n_games) self.epsilon = 0 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) #self.model.load_state_dict(torch.load('model/model.pth')) self.model.eval() self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self, use_checkpoint=False): self.no_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) if use_checkpoint: checkpoint = torch.load("./model/model.pth") self.model.load_state_dict(checkpoint) self.model.eval()
def __init__(self, filename='model.pth'): file_name = os.path.join( '/Users/kevin/GitHub/lunarip/aiexplore/rls500/model', filename) if os.path.exists(file_name): self.model = torch.load(file_name) print("loaded") else: self.model = Linear_QNet(len(state_info), 16, len( onehot_action)) # first parm is the lenght of the state array for param_tensor in self.model.state_dict(): print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) print(param_tensor, "\t", self.model.state_dict()[param_tensor])
def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen = max_memory) self.model = Linear_QNet(11, 256, 3) PATH = './model/model.pth' if os.path.exists(PATH): self.model.load_state_dict(torch.load(PATH)) # self.model.eval() print('Pretrained = True') self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma)
def __init__(self): self.numberOfGames = 0 self.epsilon = 0 # controlls randomness self.gamma = 0.9 # discount rate, <1 # will popleft if there is too much in memory self.memory = deque(maxlen=maxMemory) self.model = Linear_QNet(11, 256, 3) if os.path.isfile('./model/model.pth'): model_folder_path = './model/model.pth' self.model.load_state_dict(torch.load(model_folder_path)) self.trainer = QTrainer(self.model, lr=learningRate, gamma=self.gamma)
def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(4, 256, 4) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.n_games = 0 self.epsilon = 0 # for random self.gamma = 0.5 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(11, 256, 3) self.trainer = Qtrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.num_games = 0 self.epsilon = 0 # to control the randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # pop left self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
def __init__(self): self.epsilion = 0.999 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, LR, self.gamma) self.epsilion_decay_value = 0.998
def __init__(self): self.n_games = 0 self.epsilon = 0.5 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(2, 256, 4) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.epsilon_decay_value = (self.epsilon) / (END_EPSILON_DECAYING - START_EPSILON_DECAYING)
class Agent: def __init__(self, filename='model.pth'): file_name = os.path.join( '/Users/kevin/GitHub/lunarip/aiexplore/rls500/model', filename) if os.path.exists(file_name): self.model = torch.load(file_name) print("loaded") else: self.model = Linear_QNet(len(state_info), 16, len( onehot_action)) # first parm is the lenght of the state array for param_tensor in self.model.state_dict(): print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) print(param_tensor, "\t", self.model.state_dict()[param_tensor]) def get_action(self, state): state0 = torch.tensor(state, dtype=torch.float32) prediction = self.model(state0) move = int(torch.argmax(prediction).item()) final_move = int_onehot[move] action = onehot_action[tuple(final_move)] #print(move,final_move,action) return action
def __init__(self, game, pars=dict()): """ (Agent, Snake, dict()) -> None Initialize everything get everything that is passed from json file to modify attributes and train model """ self.n_games = 0 self.epsilon = pars.get('eps', EPSILON) self.eps = pars.get('eps', EPSILON) self.gamma = pars.get('gamma', GAMMA) # discount rate self.eps_range = pars.get('eps_range', EPS_RANGE) print(self.epsilon, self.eps) self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(len(game.get_state()), pars.get('hidden_size', HIDDEN_SIZE), OUTPUT_SIZE) self.trainer = QTrainer(self.model, lr=pars.get('lr', LR), gamma=self.gamma) self.game = game
class Agent: def __init__(self, use_checkpoint=False): self.no_of_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) if use_checkpoint: checkpoint = torch.load("./model/model.pth") self.model.load_state_dict(checkpoint) self.model.eval() def get_state(self, game): head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # Food left game.food.x > game.head.x, # Food right game.food.y < game.head.y, # Food up game.food.y > game.head.y, # Food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, game_over): self.memory.append((state, action, reward, next_state, game_over)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory states, actions, rewards, next_states, game_overs = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, game_overs) def train_short_memory(self, state, action, reward, next_state, game_over): self.trainer.train_step(state, action, reward, next_state, game_over) def get_action(self, state): self.epsilon = 80 - self.no_of_games action = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) action[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() action[move] = 1 return action
class Agent: # Razred Agent. Agent je posrednik med modelom ter okoljem (igro). def __init__(self): with open('games.txt', 'r') as f: self.n_games = int(f.read()) print(self.n_games) self.epsilon = 0 self.gamma = 0.9 self.memory = deque(maxlen=MAX_MEMORY) self.model = Linear_QNet(11, 256, 3) #self.model.load_state_dict(torch.load('model/model.pth')) self.model.eval() self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) # Inicializacija. Prvo si sposodi shranjene rezultate, nastavi nekaj konstant in si izpododi nevronsko mrežo iz datoteke 'model.pth'. # V primeru, da boste ta program zagnali prvič, spremenite vrstice 25-27 v "self.n_games = 0" in vrstico 33 izbrišite. def get_state(self, game): # Funkcija, s katero agent dobi informacije o okolju. head = game.snake[0] point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN # Definicije spodaj uporabljenih spremenljivk. state = [ # Nevarnost spredaj? (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Nevarnost desno? (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Nevarnost levo? (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Smer kače. dir_l, dir_r, dir_u, dir_d, # Relativni položaj hrane. game.food.x < game.head.x, game.food.x > game.head.x, game.food.y < game.head.y, game.food.y > game.head.y ] return np.array(state, dtype=int) # Vrne podatke agentu. def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) else: mini_sample = self.memory # Funkcija za ponovno učenje. (Po realni igri model ponovi igro še enkrat). states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) # Funkcija za realno-časno učenje. def get_action(self, state): self.epsilon = 500 - self.n_games final_move = [0, 0, 0] if random.randint(0, 500) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque(maxlen = max_memory) self.model = Linear_QNet(11, 256, 3) PATH = './model/model.pth' if os.path.exists(PATH): self.model.load_state_dict(torch.load(PATH)) # self.model.eval() print('Pretrained = True') self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma) def get_state(self, game): head = game.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ # Danger straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), # Danger right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), # Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), # Move direction dir_l, dir_r, dir_u, dir_d, # Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def train_long_memory(self): if len(self.memory) > batch_size: mini_sample = random.sample(self.memory, batch_size) # list of tuples of size = 1000 else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype = torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
class Agent: def __init__(self): self.numberOfGames = 0 self.epsilon = 0 # controlls randomness self.gamma = 0.9 # discount rate, <1 # will popleft if there is too much in memory self.memory = deque(maxlen=maxMemory) self.model = Linear_QNet(11, 256, 3) if os.path.isfile('./model/model.pth'): model_folder_path = './model/model.pth' self.model.load_state_dict(torch.load(model_folder_path)) self.trainer = QTrainer(self.model, lr=learningRate, gamma=self.gamma) def getState(self, game): head = game.snake[0] # Clok-wise directions and angles cw_dirs = [ Direction.RIGHT == game.direction, Direction.DOWN == game.direction, Direction.LEFT == game.direction, Direction.UP == game.direction ] cw_angs = np.array([0, np.pi / 2, np.pi, -np.pi / 2]) # Position - in front: 0, on right: 1, on left: -1; BLOCK_SIZE = 20 def getPoint(pos): return Point( head.x + 20 * np.cos(cw_angs[(cw_dirs.index(True) + pos) % 4]), head.y + 20 * np.sin(cw_angs[(cw_dirs.index(True) + pos) % 4])) state = [ # Danger game.is_collision(getPoint(0)), game.is_collision(getPoint(1)), game.is_collision(getPoint(-1)), # Move direction cw_dirs[2], cw_dirs[0], cw_dirs[3], cw_dirs[1], # Food location game.food.x < head.x, game.food.x > head.x, game.food.y < head.y, game.food.y > head.y ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, game_over): self.memory.append((state, action, reward, next_state, game_over)) def trainLongMemory(self): if len(self.memory) > batchSize: # list of tuples from the memory miniSample = random.sample(self.memory, batchSize) else: miniSample = self.memory states, actions, rewards, next_states, game_over = zip(*miniSample) self.trainer.trainStep(states, actions, rewards, next_states, game_over) def trainShortMemory(self, state, action, reward, next_state, game_over): self.trainer.trainStep(state, action, reward, next_state, game_over) def getAction(self, state): # exploitation / exploration self.epsilon = 80 - self.numberOfGames final_move = [0, 0, 0] if random.randint(-2, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return (final_move)
class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 #randomness self.gamma = 0.9 #discount rate self.memory = deque(maxlen=MAX_MEMORY) #popleft() self.model = Linear_QNet(11, 256, 3) #input_lauer=11,hidden:256 ,output:3 self.model.load_state_dict(torch.load('./optimized_model/model.pth')) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, game): head = game.snake[0] BLOCK_SIZE = 20 #Points to check danger point_l = Point(head.x - BLOCK_SIZE, head.y) point_r = Point(head.x + BLOCK_SIZE, head.y) point_u = Point(head.x, head.y - BLOCK_SIZE) point_d = Point(head.x, head.y + BLOCK_SIZE) dir_l = game.direction == Direction.LEFT dir_r = game.direction == Direction.RIGHT dir_u = game.direction == Direction.UP dir_d = game.direction == Direction.DOWN state = [ #For straight (dir_r and game.is_collision(point_r)) or (dir_l and game.is_collision(point_l)) or (dir_u and game.is_collision(point_u)) or (dir_d and game.is_collision(point_d)), #Danger Right (dir_u and game.is_collision(point_r)) or (dir_d and game.is_collision(point_l)) or (dir_l and game.is_collision(point_u)) or (dir_r and game.is_collision(point_d)), #Danger left (dir_d and game.is_collision(point_r)) or (dir_u and game.is_collision(point_l)) or (dir_r and game.is_collision(point_u)) or (dir_l and game.is_collision(point_d)), #Move direction dir_l, dir_r, dir_u, dir_d, #Food location game.food.x < game.head.x, # food left game.food.x > game.head.x, # food right game.food.y < game.head.y, # food up game.food.y > game.head.y # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY IS REACHED def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) #list of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) #for state, action,reward, next_state, done in mini_sample: # self.trainer.train_step(state, action,reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves : tradeoff exploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint( 0, 200 ) < self.epsilon and False: #This was original ,we made small changes to it #if random.randint(0,200) < 20 and self.n_games<90: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model.forward(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move