def __init__(self, model, memory_size, iterations_num, batch_size, game_board_size=20, random_start=True): self.model = model self.game_engine = Engine(game_board_size, random_start) self.replay_memory = Memory(memory_size) self.dataset = RLDataset(self.replay_memory, iterations_num, batch_size) self.game_state = self.get_game_state()
def __init__(self, **kwargs): self.score_label = kwargs.pop('score_label', None) super(SnakeGame, self).__init__(**kwargs) self._keyboard = Window.request_keyboard(self._keyboard_closed, self) self._keyboard.bind(on_key_down=self._on_keyboard_down) self.engine = Engine(board_size=20) self.round_time = .05 self.model_path = './models/model.ptl' self.block_size = 10 self.board_length = (self.block_size + 1) * self.engine.board_size self.game_direction = self.engine.direction self.game_next_direction = self.game_direction
import shutil import numpy as np from collections import namedtuple from itertools import count from copy import deepcopy import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.autograd import Variable from game import Engine width, height = 10, 20 engine = Engine(width, height) FloatTensor = torch.FloatTensor LongTensor = torch.LongTensor ByteTensor = torch.ByteTensor Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) """ Replay Memory Transition - a named tuple representing a single transition in our environment ReplayMemory - a cyclic buffer of bounded size that holds the transitions observed recently. It also implements a ``.sample()`` method for selecting a random batch of transitions for training. """
f = tkinter.LabelFrame(top, height=200, width=200) f.grid(row=r, column=c) l = tkinter.Label(f, textvariable=labels[r][c], width=10, height=5, bg='snow3') l.pack() frames[r][c] = l start = time.time() printed = False while True: new_state = engine.get_next() if new_state != None: state = new_state else: if not printed: print(time.time() - start) printed = True for r in range(4): for c in range(4): labels[r][c].set(str(state[r][c]) if state[r][c] != 0 else "") frames[r][c].configure(bg=color_from_num(state[r][c])) top.update_idletasks() top.update() run(Engine())
def update(gameEngine: game.Engine): """makes changes visible""" gameEngine.update_screen()
def move(gameEngine: game.Engine, key: int): """a head fuction to move camera and player and update that on screen""" gameEngine.move_player_and_camera(key) update(gameEngine)
import sys import os import torch import time from game import Engine from dqn import DQN, ReplayMemory, Transition from torch.autograd import Variable FloatTensor = torch.FloatTensor LongTensor = torch.LongTensor width, height = 10, 20 engine = Engine(width, height) def load_model(filename): model = DQN() checkpoint = torch.load(filename) model.load_state_dict(checkpoint['state_dict']) return model def run(model): state = FloatTensor(engine.clear()[None, None, :, :]) score = 0 while True: action = model(Variable( state, volatile=True).type(FloatTensor)).data.max(1)[1].view( 1, 1).type(LongTensor) print(model(Variable(state, volatile=True).type(FloatTensor)).data)
import pygame pygame.init() size = (900, 600) from game import Engine Engine(pygame.display.set_mode(size)).run() pygame.quit()
def say(self): if not self.reusable: self.active = False return self.script() def script(self): pass class ConvoEngine: """ interprets conversation graph, ConvoNode attributes, conditions and scripts """ engine = Engine() d = defaultdict(set) d['0'].add('A') d['A'].add('B') d['A'].add('C') d['B'].add('A') d['C'] convos = dict(A=ConvoNode("Hello", True, True, True), B=ConvoNode("Hi", True, False, False), C=ConvoNode("Bye", True, True, True)) # TODO: show conversation option based on value of variable # TODO: after conversation tree of side conversation is finished, go back to main branch def convo():
class Agent: """ Reinforcement learning agent interacting with the game environment """ def __init__(self, model, memory_size, iterations_num, batch_size, game_board_size=20, random_start=True): self.model = model self.game_engine = Engine(game_board_size, random_start) self.replay_memory = Memory(memory_size) self.dataset = RLDataset(self.replay_memory, iterations_num, batch_size) self.game_state = self.get_game_state() def get_dataset(self): """ Returns dataset based on replay memory """ return self.dataset def get_game_state(self): state = self.game_engine.get_game_state() return torch.from_numpy(state).unsqueeze(0) def play_full_game(self, max_moves=1000): self.game_engine.reset() for _ in range(max_moves): state = self.get_game_state() out = self.model(state) action = torch.argmax(out).item() - 1 new_dir = (self.game_engine.direction + action) % 4 self.game_engine.next_round(new_dir) if not self.game_engine.alive: break points = self.game_engine.points self.game_engine.reset() return points def move(self, epsilon): """ Make single interaction with the game environment """ actions_num = self.model.actions_num rnd_action = random.random() <= epsilon if rnd_action: action_idx = random.randint(0, actions_num - 1) else: model_output = self.model(self.game_state) action_idx = torch.argmax(model_output).item() action = torch.zeros(actions_num) action[action_idx] = 1 action = action.unsqueeze(0) direction = self.translate_action(action_idx) reward, terminal = self.game_engine.next_round(direction) new_state = self.get_game_state() exp = (self.game_state, action, reward, new_state, terminal) self.replay_memory.append(exp) if terminal: self.game_engine.reset() new_state = self.get_game_state() self.game_state = new_state def translate_action(self, action): """ Translate action to new direction Actions: 0 - turn left 1 - go straight 2 - turn right """ direction = self.game_engine.direction new_direction = (direction + action - 1) % 4 return new_direction def warmup(self, num): """ Make <num> of random moves """ for _ in range(num): self.move(1)
class SnakeGame(Widget): def __init__(self, **kwargs): self.score_label = kwargs.pop('score_label', None) super(SnakeGame, self).__init__(**kwargs) self._keyboard = Window.request_keyboard(self._keyboard_closed, self) self._keyboard.bind(on_key_down=self._on_keyboard_down) self.engine = Engine(board_size=20) self.round_time = .05 self.model_path = './models/model.ptl' self.block_size = 10 self.board_length = (self.block_size + 1) * self.engine.board_size self.game_direction = self.engine.direction self.game_next_direction = self.game_direction def change_snake_direction(self, new_direction): directions = ['up', 'right', 'down', 'left'] new_d = directions.index(new_direction) if (self.game_direction + 2) % 4 != new_d: self.game_next_direction = new_d def update(self, dt): if self.engine.alive: self.game_direction = self.game_next_direction self.engine.next_round(self.game_direction) self.draw_board() self.update_score() def init_ai(self): self.model = SnakeNet.load_from_checkpoint(self.model_path) self.model.freeze() def update_with_model(self, dt): if self.engine.alive: state = self.engine.get_game_state() output = self.model(torch.from_numpy(state).unsqueeze(0)) action = torch.argmax(output).item() - 1 self.game_direction = (self.game_direction + action) % 4 self.engine.next_round(self.game_direction) self.draw_board() self.update_score() def update_score(self): score = self.engine.points self.score_label.text = f'Points: {int(score)}' def draw_board(self): self.canvas.clear() with self.canvas: border_width = 5 self.padding_x = (self.width - self.board_length) // 2 self.padding_y = (self.height - self.board_length) // 2 Rectangle(pos=(self.padding_x - border_width, self.padding_y), size=(border_width, self.board_length)) Rectangle(pos=(self.padding_x, self.padding_y - border_width), size=(self.board_length, border_width)) Rectangle(pos=(self.padding_x + self.board_length, self.padding_y), size=(border_width, self.board_length)) Rectangle(pos=(self.padding_x, self.padding_y + self.board_length), size=(self.board_length, border_width)) Color(.59, .91, .12) for s in self.engine.snake: x, y = s Rectangle(pos=(self.padding_x + x * (self.block_size + 1), self.padding_y + y * (self.block_size + 1)), size=(self.block_size, self.block_size)) x, y = self.engine.fruit Color(.93, .83, .05) Rectangle(pos=(self.padding_x + x * (self.block_size + 1), self.padding_y + y * (self.block_size + 1)), size=(self.block_size, self.block_size)) def _keyboard_closed(self): self._keyboard.unbind(on_key_down=self._on_keyboard_down) self._keyboard = None def _on_keyboard_down(self, keyboard, keycode, text, modifiers): if keycode[1] == 'w': self.change_snake_direction('up') elif keycode[1] == 's': self.change_snake_direction('down') elif keycode[1] == 'a': self.change_snake_direction('left') elif keycode[1] == 'd': self.change_snake_direction('right') elif keycode[1] == 'r': self.engine.reset()