def update_frame(x): global state, score, high_score, last_move, bot_mode, down_press if bot_mode: # a = policy(Variable(torch.from_numpy(state).type(torch.FloatTensor))) # _, ac = a.max(0) # action = ac.item() a = policy(Variable(torch.from_numpy(state).type(torch.FloatTensor))) # a = F.softmax(a, dim=-1) c = Categorical(a) action = c.sample() # action = train.select_action(state).item() state, reward, done = game.step(action) last_move = action else: state, reward, done = game.step(0) if down_press: game.active_piece, game.grid, _ = game.move_down(game.active_piece, game.grid) last_move = 4 score += reward if done: game.reset() high_score = max(high_score, score) score = 0
async def stopBJ(ctx): """Stop an active game of blackjack & reset everything.""" global game game.reset() game.set_game_state("STOPPED") await ctx.send("Awwww....how sad. If you'd like to start a game, use __!startBJ__")
def init(): global state, speed, last_update # set the initial time for the first update with the current time last_update = time.time() game.score = 0 game.reset() graphics.drawGame() state = 1 try: speed = config.game_speed[parser.args.speed] except: speed = config.game_speed['m'] livesIn = parser.args.lives if livesIn >= 1 and livesIn <= 5: game.lives = livesIn game.livesMax = livesIn elif livesIn > 5: game.lives = 5 game.livesMax = 5 else: game.lives = 1 game.livesMax = 1
def setup(): if game.setuplock == 0: game.setuplock=1 game.movelock=1 game.placelock=1 game.reset() game.pu() game.clear() game.pensize(5) game.edge=80 game.sety(0) game.setx(0) game.title("PyTacToe") game.speed(0) game.plansza() game.speed(2) game.sety(0) game.setx(0) game.x=1 game.y=1 game.last=2 print("x - 1") print("o - 2") game.f00=game.f01=game.f02=game.f10=game.f11=game.f12=game.f20=game.f21=game.f22="dummy" game.setuplock=0 game.movelock=0 game.placelock=0
def main(): viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT, topLeft=Point(400, 80)) game = CandySeller(viewPort) while True: game.run() # Re-initialised the game state. game.reset()
def main(): viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT) game = SheriffQuest(viewPort) while True: game.run() # Re-initialised the game state. game.reset()
def main(): print("Starting Jimmy Pixel...") viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT, topLeft=Point(400, 80)) game = JimmyPixel(viewPort) print("Created Jimmy Pixel game...") while True: game.run() # Re-initialised the game state. game.reset()
def run(): try: parser.init() stage.init() graphics.init() theme.init() game.reset() gameloop.start() except KeyboardInterrupt: exit()
def on_key_press(symbol, modifiers): if symbol == key.LEFT: game.batE.status = "PRESS" elif symbol == key.RIGHT: game.batD.status = "PRESS" elif symbol == key.SPACE: game.molaS = 'PRESS' if game.status == "GAME OVER": if symbol == key.ENTER: game.reset()
def init_random_exp_memory(self, size): if size > self.memory_size: size = self.memory_size game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) for i in range(size): random_action = np.random.randint(0, self.num_actions) reward, is_terminal = game.execute_action(random_action) state = game.get_state() self.exp_memory.add(state, random_action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0)
async def handleGame(data): if (data['text'] == 'join' and game.game_on == False): playerNum = game.add_player(data['sender']) data['text'] = f'join{str(playerNum)}' await notify_public_message(data) elif (data['text'] == 'join' and game.game_on == True): data = game.add_player(data['sender']) await notify_client(data) elif (data['text'] == 'ready'): data['text'] = 'start' data['sender'] = 'game' await notify_public_message(data) cards = game.deal_3(1) await notify_client(cards) cards = game.deal_3(2) await notify_client(cards) elif (data['text'] == 'move'): is_bomb = False if (len(data['data'])>3): is_bomb = True data = game.process_message(data) await notify_public_message(data) if (not is_bomb): player = game.players.index(game.whos_turn)+1 cards = game.deal_1(player) await notify_client(cards) win_state = game.check_win_state() if (win_state[0]): print('win') data = game.create_win_message(win_state[1]) await notify_public_message(data) data = game.reset() await notify_public_message(data) else: game.switch_turn()
def on_key_press(symbol, modifiers): if symbol == key.LEFT: game.batE.status = "PRESS" elif symbol == key.RIGHT: game.batD.status = "PRESS" elif symbol == key.SPACE: game.game_start_time = game.time if game.status == "GAME OVER": if symbol == key.ENTER: game.reset() if game.status == "REINICIAR": if symbol == key.ENTER: game.reset()
def init_exp_memory(self, size): if size > self.memory_size: size = self.memory_size game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) for i in range(size): action = 0 if np.random.rand() < self.epsilon: action = np.random.randint(0, self.num_actions) else: action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) state = game.get_state() self.exp_memory.add(state, action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0)
def run(): try: # Init the game parser.init() # Check for editor if (parser.args.editor): os.system("/usr/share/make-snake/snake-editor/__main__.py") sys.exit(0) graphics.init() theme.init() stage.init() game.reset() # Start the game gameloop.start() except KeyboardInterrupt: exit()
async def unregister(cid): clients.pop(cid, None) await notify_clients() if cid in game.players: data = game.reset() await notify_public_message(data) data['tag'] = 'public' data['text'] = 'a player quit, game reset' await notify_public_message(data)
def eval(self, num_steps): game = self.get_game() total_score = 0.0 current_score = 0.0 num_games = 1.0 max_score = 0.0 for i in range(num_steps): action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) current_score += reward total_score += reward if is_terminal: game.reset() if i < (num_steps - 1): num_games += 1 if current_score > max_score: max_score = current_score current_score = 0 average = total_score / num_games return total_score, num_games, average, max_score
def find_max_games(self, num_steps, path, score_threshold): image_id = 0 game = self.get_game() frames = [] frames.append((np.copy(game.get_state()), 0.0)) max_game_score = 0 current_game_score = 0.0 for i in range(num_steps): if i % (num_steps // 10) == 0: print("At step {}".format(i)) action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) current_game_score += reward frames.append((np.copy(game.get_state()), current_game_score)) if is_terminal: game.reset() if current_game_score > max_game_score: max_game_score = current_game_score if current_game_score > score_threshold: print("Saving images...") for frame in frames: self.save_image(frame[0], path, image_id, 0, 0, 0, score=frame[1]) image_id += 1 frames = [] frames.append((np.copy(game.get_state()), 0.0)) current_game_score = 0.0 print("Max score: {}".format(max_game_score))
def eval_with_images(self, num_steps, path): image_id = 0 game = self.get_game() self.save_image(game.get_state(), path, image_id, 0, 0, 0, 0.0) total_score = 0 games_finished = 0 max_game_score = 0 current_game_score = 0.0 for i in range(num_steps): image_id += 1 action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) total_score += reward current_game_score += reward self.save_image(game.get_state(), path, image_id, action, reward, is_terminal, score=current_game_score) if is_terminal: game.reset() games_finished += 1 if current_game_score > max_game_score: max_game_score = current_game_score current_game_score = 0.0 self.save_image(game.get_state(), path, image_id, action, reward, is_terminal, score=current_game_score) print("Max score: {}".format(max_game_score))
def train(num_episodes, save_rate=0, starting_episode=0): global f import time if starting_episode > 0: model = 'models/tetris_policy_' + str(starting_episode) + '.pth' policy.load_state_dict(torch.load(model)) start_time = time.time() total_time = 0 running_reward = 1 episode = starting_episode while episode != num_episodes: state = game.reset() # Reset environment and record the starting state f = True game_reward = 0 for _ in range(max_time): action = select_action(state) f = False # Step through environment using chosen action state, reward, done = game.step(action.item()) # Save reward policy.reward_episode.append(reward) game_reward += reward if done: break # Used to determine when the environment is solved. running_reward = (running_reward * 0.99) + (game_reward * 0.01) update_policy() if episode % 50 == 0: cur_time = time.time() total_time += cur_time - start_time start_time = cur_time print( 'Episode {}\tLast reward: {:5d}\tAverage reward: {:.2f}\tTime: {:.2f}' .format(episode, game_reward, running_reward, total_time)) if save_rate != 0 and (episode + 1) % save_rate == 0: PATH = 'models/tetris_policy_' + str(episode + 1) + '.pth' torch.save(policy.state_dict(), PATH) episode += 1
while True: step += 1 x = random() if (x < explore_rate): action = rdi(0, game.actions) else: current_frames = np.array([ np.concatenate( (exp.get_last(), [gp.transform(game.current_state, height, width)])) ]) action = net.choose_action(current_frames, i % play_interval == play_interval - 1)[0] for j in range(1): #+(i%play_interval!=play_interval-1)) : exp.update() state = game.next_frame(action, False) exp_batch = exp.get(xp_nb_batch) if (i % play_interval == play_interval - 1): print "--" gp.show_rgb(state[2]) else: if (exp_batch[0].shape[0] > 0): net.learn(exp_batch[0], exp_batch[1], (state[0] == 0)) if (state[0] == 0): game.reset() exp.reset() break print "step =", step if (i % save_interval == save_interval - 1): net.save("version01") print "Model saved"
def reset(): game.reset() graphics.drawGame()
def task_three(): print("Task Three") game.file_exists() game.start() game.reset()
start_move_time = time.time() while game.winner is None: if move_nr % 2 == 1: move = alpha_beta.rootAlphaBeta(game, 3, player1) color = player1.color else: move = alpha_beta.rootAlphaBeta(game, 2, player2) color = player2.color valid_move = game.doMove(color, move) if valid_move: move_nr += 1 # player = game.getOtherPlayer(player) end_move_time = time.time() print(game.getPlayerString(color), move, " - Time used:", end_move_time - start_move_time) start_move_time = time.time() # print(game.getCorrectBoardArray()) end_time = time.time() - start_time print(game) print("Moves:", len(game.moves), end_time) game_nr += 1 if game.winner == game.WHITE: new_ai_win += 1 else: new_ai_loose += 1 game.reset() print(new_ai_win, new_ai_loose) print(time.time()-super_start_time)
async def startBJ(ctx): """Start a game of blackjack (Note: will reset an ongoing game!)""" global game game.reset(4) await ctx.send("Starting Blackjack! Who'd like to play?\nRespond with the __!playing__ command to join.\n__!out__ will get you out of the game.")
print "Playing randomly with prob", epsilon_greedy ep_index = ep_index + 1 if ep_index > 1000: if avg_nonzero_reward > -0.1: print "Min reward over last 500 is", avg_nonzero_reward, "> -0.1, so finished training" return tf_sess, tf_output_layer tf_sess, tf_output_layer = qlearning() # Now observe the game with the learned parameters game = Game() game.set_render_or_not(True) last_nonzero_rewards = [] current_state = game.reset() print "Obs", current_state for i in range(1000): action = compute_action(tf_sess, tf_output_layer, 1.0, current_state, 0.0) #action = np.random.randint(NUM_ACTIONS) obs, reward, terminal = game.step_environment(action) current_state = obs last_nonzero_rewards.append(reward) last_nonzero_rewards = last_nonzero_rewards[-500:] print obs, reward, terminal print "Average rewards:", np.mean(last_nonzero_rewards) if terminal: #print "Resetting" current_state = game.reset() #print "Obs", current_state
if __name__ == "__main__": state_size = cf.stateSize() print(state_size) action_size = cf.actionSize() print(action_size) playerOne = DQNAgent(state_size, action_size) playerTwo = DQNAgent(state_size, action_size) playerOne.load("./save/cfOne2000.h5") playerTwo.load("./save/cfTwo2000.h5") done = 0 batch_size = 42 for e in range(EPISODES): cf.reset() state = cf.board state = np.reshape(state, [1, state_size]) time = 0 # time is used just to count frames as a measurement of how long the ai lasted while True: sleep(5) time += 1 actionOne = playerOne.act(state) moveOne = cf.dropTile(actionOne, 1) cf.render() print("/////////////////") currentBoard = np.reshape(cf.board, [1, state_size]) actionTwo = playerTwo.act(currentBoard) moveTwo = cf.dropTile(actionTwo, -1) cf.render() print("/////////////////")
def post(self): game.reset() return jsonify(reset=True)
def run_main(): parser = argparse.ArgumentParser() parser.add_argument('--train_dir', type=str, help='Training directory') parser.add_argument( '--tmp_dir', type=str, default='/tmp', help= 'Temporary directory to store model checkpoint for restoration process' ) parser.add_argument('--game', type=str, default='Breakout-v0', help='Game name') parser.add_argument('--dump_model', action='store_true', help='Dump model into checkpoint/graph and exit') parser.add_argument( '--remote_addr', default='localhost:5001', type=str, help='Remote service address to connect to for inference') parser.add_argument('--logfile', type=str, help='Logfile') parser.add_argument('--player_id', default=0, type=int, help='Player ID used to index history entries') parser.add_argument('--num_episodes', default=10000, type=int, help='Number of episodes to run') FLAGS = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['NVIDIA_VISIBLE_DEVICES'] = '' os.environ['CUDA_VISIBLE_DEVICES'] = '' tf.logging.set_verbosity(tf.logging.ERROR) logging.basicConfig(filename=FLAGS.logfile, filemode='a', level=logging.INFO, format='%(asctime)s.%(msecs)03d: %(message)s', datefmt='%d/%m/%y %H:%M:%S') config = { 'game': FLAGS.game, 'tmp_dir': FLAGS.tmp_dir, 'state_stack_size': 1, 'remote_addr': FLAGS.remote_addr, 'train_dir': FLAGS.train_dir, 'input_map_shape': [84, 84, 1], 'input_params_shape': [4], 'owner_id': FLAGS.player_id, 'env_id': 0, } if FLAGS.dump_model: env = gym.make(config['game']) config['num_actions'] = env.action_space.n import model m = model.create_model(config) m.save_checkpoint() exit(0) game = GameWrapper(config) episode = 0 episode_rewards = [] while FLAGS.num_episodes < 0 or episode < FLAGS.num_episodes: game.prev_st = game.reset() game.prev_model_st = halite_model_pb2.State( state=game.prev_st.state.tobytes(), params=game.prev_st.params.tobytes()) done = False rewards = [] while not done: done = game.loop_body() rewards.append(game.prev_reward) er = np.sum(rewards) episode_rewards.append(er) if len(episode_rewards) > 100: episode_rewards = episode_rewards[1:] logging.info( '{}: last train_step: {}, steps: {}, episode reward: {}, mean episode reward: {:.1f}, std: {:.1f}' .format(episode, game.train_step, len(rewards), er, np.mean(episode_rewards), np.std(episode_rewards))) episode += 1
def qlearning(): tf.reset_default_graph() tf_sess = tf.Session() tf_output_layer, l2_reg = create_network((1 + NUM_PLAYERS) * 2, 20, NUM_ACTIONS) tf_action = tf.placeholder("float", [None, NUM_ACTIONS], name='action') tf_target = tf.placeholder("float", [None], name='target') tf_q_for_action = tf.reduce_sum(tf.mul(tf_output_layer, tf_action), reduction_indices=1) with tf.name_scope('cost'): #reg_losses = [tf.nn.l2_loss(tf.get_variable('layer1/weights'))] tf_cost = tf.reduce_mean(tf.square(tf_target - tf_q_for_action)) + \ l2_reg * L2_REG #+ L2_REG * sum(reg_losses) tf.scalar_summary('cost', tf_cost) tf.scalar_summary('l2_reg', l2_reg) #tf.scalar_summary('reg_loss', sum(reg_losses)) with tf.name_scope('avg_reward'): tf_rewards = tf.placeholder("float", [None], name='rewards') tf_avg_reward = tf.reduce_mean(tf_rewards) tf.scalar_summary('avg_reward', tf_avg_reward) with tf.name_scope('train_op'): tf_train_operation = \ tf.train.AdamOptimizer(INITIAL_LEARNING_RATE).minimize(tf_cost) merged = tf.merge_all_summaries() # Give this run of the program an identifier identifier = str(time.gmtime()[0:5]) identifier = identifier.replace('(', '').replace(')', '') identifier = identifier.replace(' ', '-').replace(',', '') train_writer = tf.train.SummaryWriter('train-' + identifier, tf_sess.graph) tf_sess.run(tf.initialize_all_variables()) epsilon_greedy = INITIAL_EPSILON_GREEDY transitions = deque() episode_lengths = [] ep_index = 0 loss = None game = Game() game.set_render_or_not(False) current_state = game.reset() keep_prob = 0.5 last_nonzero_rewards = [] t_step = 0 # Record transitions while True: # Run an episode action = compute_action(tf_sess, tf_output_layer, keep_prob, current_state, epsilon_greedy) obs, reward, terminal = game.step_environment(action) #print "Observation", obs next_state = obs last_nonzero_rewards.append(reward) last_nonzero_rewards = last_nonzero_rewards[-500:] transitions.append({ 'state': current_state, 'next_state': next_state, 'action': action, 'reward': reward, 'terminal': terminal }) if terminal: current_state = game.reset() else: current_state = next_state if len(transitions) > MINI_BATCH_SIZE: summary = train(tf_sess, tf_train_operation, tf_output_layer, merged, transitions, last_nonzero_rewards[-500:], KEEP_PROB) if t_step % 100 == 0: train_writer.add_summary(summary, t_step) t_step = t_step + 1 epsilon_greedy = epsilon_greedy - \ (INITIAL_EPSILON_GREEDY-FINAL_EPSILON_GREEDY) / float(EPSILON_STEPS) epsilon_greedy = max(FINAL_EPSILON_GREEDY, epsilon_greedy) avg_nonzero_reward = np.mean(last_nonzero_rewards) if (ep_index % 100) == 0: print "Average nonzero reward", avg_nonzero_reward print "Playing randomly with prob", epsilon_greedy ep_index = ep_index + 1 if ep_index > 1000: if avg_nonzero_reward > -0.1: print "Min reward over last 500 is", avg_nonzero_reward, "> -0.1, so finished training" return tf_sess, tf_output_layer
def train(self): if self.model_loaded: self.init_exp_memory(self.exp_memory_start_size) else: self.init_random_exp_memory(self.exp_memory_start_size) total_reward = 0.0 games_played = 1 game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) while self.curr_step < self.max_steps: #play one game step according to epsilon-greedy policy action = 0 if np.random.rand() < self.epsilon: action = np.random.randint(0, self.num_actions) else: action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) self.exp_memory.add(game.get_state(), action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0) games_played += 1 total_reward += self.renormalize_reward(reward) #compute next epsilon self.epsilon = np.maximum(self.epsilon_min, self.epsilon - self.epsilon_step) self.memory_beta = np.minimum( self.memory_beta_end, self.memory_beta + self.memory_beta_step) if self.curr_step % self.update_freq == 0: #sample a batch of transitions from experience memory s, a, r, s2, t, indices, p_values = self.exp_memory.sample( self.batch_size) #output tensorboard summaries write_summary = False if (self.tensorboard_log_freq > 0) and ( self.curr_step % self.tensorboard_log_freq == 0): write_summary = True #beta is divided by 2 here because squared error loss squares beta _, _, td = self.qlearner.train_step( s, a, r, s2, t, p_values, self.memory_beta / 2.0, write_summary=write_summary) self.exp_memory.update_p(indices, td) #update target network if self.target_network_update_mode == "soft": if self.curr_step % self.update_freq == 0: self.qlearner.update_target_network() else: if self.curr_step % self.target_network_update_freq == 0: self.qlearner.update_target_network() #output current training status if self.curr_step % self.output_freq == 0: average_reward = total_reward / games_played total_reward = 0 games_played = 1 print("step: {} epsilon: {} average reward per game: {}". format(self.curr_step, self.epsilon, average_reward)) #evaluate current target network and save model if average score per game has improved if (self.curr_step % self.eval_freq == 0): score, num_games, average, max_score = self.eval( self.eval_steps) print("Evaluating model with {} steps:".format( self.eval_steps)) print( "Total score: {} Games: {} Average: {} Max: {}".format( score, num_games, average, max_score)) if average >= self.best_average_score: print("Improved average score") print("Saving model...") self.save() self.best_average_score = average #add average score to tensorboard summary = tf.Summary() summary.value.add(tag='average_score', simple_value=average) summary.value.add(tag='max_score', simple_value=max_score) self.qlearner.summary_writer.add_summary( summary, self.curr_step) self.curr_step += 1
# ======================================================================== # # Initialization # # ======================================================================== # which model to run # the full list of models are contained in the models folder # training the network will generate models model = 'models/tetris_policy_470000.pth' # Loading the model and resetting the game state policy = train.Policy() policy.load_state_dict(torch.load(model)) state = game.reset() score = 0 high_score = 0 last_move = 0 # Window constants GAME_WIDTH = 100 UI_WIDTH = 150 GAME_HEIGHT = 180 SQUARE_WIDTH = 10 SQUARE_HEIGHT = 10 SCREEN_TITLE = "Tetris" FONT_SIZE = 11 KEY_SIZE = 15