Python Linear_QNet Exemples, model.Linear_QNet Python Exemples

Exemple #1

0

Afficher le fichier

 def __init__(self):
     self.n_games = 0
     self.epsilon = 0  #randomness
     self.gamma = 0.9  #discount rate
     self.memory = deque(maxlen=MAX_MEMORY)  #popleft()
     self.model = Linear_QNet(11, 256,
                              3)  #input_lauer=11,hidden:256 ,output:3
     self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

Exemple #2

0

Afficher le fichier

Fichier : main.py Projet : lamb-does-code/ai_snake

def main():

    parameters = yaml.load(open(args.parameters_file, 'r'),
                           Loader=yaml.FullLoader)

    model = Linear_QNet(11, 256, 3)

    if args.use_trained == True:
        model.load_state_dict(torch.load(parameters["model_path"]))

    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent(args, model)
    game = SnakeGameAI()

    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, final_move, reward, state_new,
                                 done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            # train long memory, plot result
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                if args.save_model == True:
                    agent.model.save()

            print('Game', agent.n_games, 'Score', score, 'Record:', record)

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.n_games
            plot_mean_scores.append(mean_score)
            plot(plot_scores, plot_mean_scores)

Exemple #3

0

Afficher le fichier

    def __init__(self):
        with open('games.txt', 'r') as f:
            self.n_games = int(f.read())
            print(self.n_games)

        self.epsilon = 0
        self.gamma = 0.9
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        #self.model.load_state_dict(torch.load('model/model.pth'))
        self.model.eval()
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

Exemple #4

0

Afficher le fichier

Fichier : agent.py Projet : anthonyatp/snake-rl

    def __init__(self, use_checkpoint=False):
        self.no_of_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  #  discount rate
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

        if use_checkpoint:
            checkpoint = torch.load("./model/model.pth")
            self.model.load_state_dict(checkpoint)
            self.model.eval()

Exemple #5

0

Afficher le fichier

Fichier : Inspect_model.py Projet : kglavin/lunarip

 def __init__(self, filename='model.pth'):
     file_name = os.path.join(
         '/Users/kevin/GitHub/lunarip/aiexplore/rls500/model', filename)
     if os.path.exists(file_name):
         self.model = torch.load(file_name)
         print("loaded")
     else:
         self.model = Linear_QNet(len(state_info), 16, len(
             onehot_action))  # first parm is the lenght of the state array
     for param_tensor in self.model.state_dict():
         print(param_tensor, "\t",
               self.model.state_dict()[param_tensor].size())
         print(param_tensor, "\t", self.model.state_dict()[param_tensor])

Exemple #6

0

Afficher le fichier

	def __init__(self):
		self.n_games = 0
		self.epsilon = 0	# randomness
		self.gamma = 0.9		# discount rate
		self.memory = deque(maxlen = max_memory)
		self.model = Linear_QNet(11, 256, 3)
		PATH = './model/model.pth'
		if os.path.exists(PATH):
			self.model.load_state_dict(torch.load(PATH))
			# self.model.eval()
			print('Pretrained = True')

		self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma)

Exemple #7

0

Afficher le fichier

    def __init__(self):
        self.numberOfGames = 0
        self.epsilon = 0  # controlls randomness
        self.gamma = 0.9  # discount rate, <1

        # will popleft if there is too much in memory
        self.memory = deque(maxlen=maxMemory)

        self.model = Linear_QNet(11, 256, 3)

        if os.path.isfile('./model/model.pth'):
            model_folder_path = './model/model.pth'
            self.model.load_state_dict(torch.load(model_folder_path))

        self.trainer = QTrainer(self.model, lr=learningRate, gamma=self.gamma)

Exemple #8

0

Afficher le fichier

Fichier : agent.py Projet : SayeedAbid/Marine-Search-and-Rescue-using-Reinforcement-Learning

 def __init__(self):
     self.n_games = 0
     self.epsilon = 0  # randomness
     self.gamma = 0.9  # discount rate
     self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
     self.model = Linear_QNet(4, 256, 4)
     self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

Exemple #9

0

Afficher le fichier

 def __init__(self):
     self.n_games = 0
     self.epsilon = 0  # for random
     self.gamma = 0.5  # discount rate
     self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
     self.model = Linear_QNet(11, 256, 3)
     self.trainer = Qtrainer(self.model, lr=LR, gamma=self.gamma)

Exemple #10

0

Afficher le fichier

Fichier : agent.py Projet : Kiiiiii123/SnakeGameAI

 def __init__(self):
     self.num_games = 0
     self.epsilon = 0  # to control the randomness
     self.gamma = 0.9  # discount rate
     self.memory = deque(maxlen=MAX_MEMORY)  # pop left
     self.model = Linear_QNet(11, 256, 3)
     self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

Exemple #11

0

Afficher le fichier

 def __init__(self):
     self.epsilion = 0.999
     self.gamma = 0.9
     self.memory = deque(maxlen=MAX_MEMORY)
     self.model = Linear_QNet(2, 256, 4)
     self.trainer = QTrainer(self.model, LR, self.gamma)
     self.epsilion_decay_value = 0.998

Exemple #12

0

Afficher le fichier

 def __init__(self):
     self.n_games = 0
     self.epsilon = 0.5  # randomness
     self.gamma = 0.9  # discount rate
     self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
     self.model = Linear_QNet(2, 256, 4)
     self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
     self.epsilon_decay_value = (self.epsilon) / (END_EPSILON_DECAYING -
                                                  START_EPSILON_DECAYING)

Exemple #13

0

Afficher le fichier

Fichier : Inspect_model.py Projet : kglavin/lunarip

class Agent:
    def __init__(self, filename='model.pth'):
        file_name = os.path.join(
            '/Users/kevin/GitHub/lunarip/aiexplore/rls500/model', filename)
        if os.path.exists(file_name):
            self.model = torch.load(file_name)
            print("loaded")
        else:
            self.model = Linear_QNet(len(state_info), 16, len(
                onehot_action))  # first parm is the lenght of the state array
        for param_tensor in self.model.state_dict():
            print(param_tensor, "\t",
                  self.model.state_dict()[param_tensor].size())
            print(param_tensor, "\t", self.model.state_dict()[param_tensor])

    def get_action(self, state):
        state0 = torch.tensor(state, dtype=torch.float32)
        prediction = self.model(state0)
        move = int(torch.argmax(prediction).item())
        final_move = int_onehot[move]
        action = onehot_action[tuple(final_move)]
        #print(move,final_move,action)
        return action

Exemple #14

0

Afficher le fichier

    def __init__(self, game, pars=dict()):
        """
        (Agent, Snake, dict()) -> None
        Initialize everything
        get everything that is passed from 
        json file to modify attributes and train model
        """
        self.n_games = 0
        self.epsilon = pars.get('eps', EPSILON)
        self.eps = pars.get('eps', EPSILON)
        self.gamma = pars.get('gamma', GAMMA)  # discount rate
        self.eps_range = pars.get('eps_range', EPS_RANGE)
        print(self.epsilon, self.eps)
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(len(game.get_state()),
                                 pars.get('hidden_size', HIDDEN_SIZE),
                                 OUTPUT_SIZE)
        self.trainer = QTrainer(self.model,
                                lr=pars.get('lr', LR),
                                gamma=self.gamma)

        self.game = game

Exemple #15

0

Afficher le fichier

Fichier : agent.py Projet : anthonyatp/snake-rl

class Agent:
    def __init__(self, use_checkpoint=False):
        self.no_of_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  #  discount rate
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

        if use_checkpoint:
            checkpoint = torch.load("./model/model.pth")
            self.model.load_state_dict(checkpoint)
            self.model.eval()

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < game.head.x,  #  Food left
            game.food.x > game.head.x,  #  Food right
            game.food.y < game.head.y,  #  Food up
            game.food.y > game.head.y,  #  Food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, game_over):
        self.memory.append((state, action, reward, next_state, game_over))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, game_overs = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states,
                                game_overs)

    def train_short_memory(self, state, action, reward, next_state, game_over):
        self.trainer.train_step(state, action, reward, next_state, game_over)

    def get_action(self, state):
        self.epsilon = 80 - self.no_of_games
        action = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            action[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            action[move] = 1

        return action

Exemple #16

0

Afficher le fichier

class Agent:
    # Razred Agent. Agent je posrednik med modelom ter okoljem (igro).
    def __init__(self):
        with open('games.txt', 'r') as f:
            self.n_games = int(f.read())
            print(self.n_games)

        self.epsilon = 0
        self.gamma = 0.9
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Linear_QNet(11, 256, 3)
        #self.model.load_state_dict(torch.load('model/model.pth'))
        self.model.eval()
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    # Inicializacija. Prvo si sposodi shranjene rezultate, nastavi nekaj konstant in si izpododi nevronsko mrežo iz datoteke 'model.pth'.
    # V primeru, da boste ta program zagnali prvič, spremenite vrstice 25-27 v "self.n_games = 0" in vrstico 33 izbrišite.

    def get_state(self, game):
        # Funkcija, s katero agent dobi informacije o okolju.
        head = game.snake[0]
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN
        # Definicije spodaj uporabljenih spremenljivk.
        state = [

            # Nevarnost spredaj?
            (dir_r and game.is_collision(point_r)) or
            (dir_l and game.is_collision(point_l)) or
            (dir_u and game.is_collision(point_u)) or
            (dir_d and game.is_collision(point_d)),

            # Nevarnost desno?
            (dir_u and game.is_collision(point_r)) or
            (dir_d and game.is_collision(point_l)) or
            (dir_l and game.is_collision(point_u)) or
            (dir_r and game.is_collision(point_d)),

            # Nevarnost levo?
            (dir_d and game.is_collision(point_r)) or
            (dir_u and game.is_collision(point_l)) or
            (dir_r and game.is_collision(point_u)) or
            (dir_l and game.is_collision(point_d)),

            # Smer kače.
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Relativni položaj hrane.
            game.food.x < game.head.x,
            game.food.x > game.head.x,
            game.food.y < game.head.y,
            game.food.y > game.head.y

        ]

        return np.array(state, dtype=int)

    # Vrne podatke agentu.

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory
        # Funkcija za ponovno učenje. (Po realni igri model ponovi igro še enkrat).
        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    # Funkcija za realno-časno učenje.

    def get_action(self, state):
        self.epsilon = 500 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 500) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move

Exemple #17

0

Afficher le fichier

class Agent:

	def __init__(self):
		self.n_games = 0
		self.epsilon = 0	# randomness
		self.gamma = 0.9		# discount rate
		self.memory = deque(maxlen = max_memory)
		self.model = Linear_QNet(11, 256, 3)
		PATH = './model/model.pth'
		if os.path.exists(PATH):
			self.model.load_state_dict(torch.load(PATH))
			# self.model.eval()
			print('Pretrained = True')

		self.trainer = QTrainer(self.model, lr = lr, gamma = self.gamma)

	def get_state(self, game):
		head = game.snake[0]
		point_l = Point(head.x - 20, head.y)
		point_r = Point(head.x + 20, head.y)
		point_u = Point(head.x, head.y - 20)
		point_d = Point(head.x, head.y + 20)

		dir_l = game.direction == Direction.LEFT
		dir_r = game.direction == Direction.RIGHT
		dir_u = game.direction == Direction.UP
		dir_d = game.direction == Direction.DOWN

		state = [
		    # Danger straight
		    (dir_r and game.is_collision(point_r)) or 
		    (dir_l and game.is_collision(point_l)) or 
		    (dir_u and game.is_collision(point_u)) or 
		    (dir_d and game.is_collision(point_d)),

		    # Danger right
		    (dir_u and game.is_collision(point_r)) or 
		    (dir_d and game.is_collision(point_l)) or 
		    (dir_l and game.is_collision(point_u)) or 
		    (dir_r and game.is_collision(point_d)),

		    # Danger left
		    (dir_d and game.is_collision(point_r)) or 
		    (dir_u and game.is_collision(point_l)) or 
		    (dir_r and game.is_collision(point_u)) or 
		    (dir_l and game.is_collision(point_d)),
		    
		    # Move direction
		    dir_l,
		    dir_r,
		    dir_u,
		    dir_d,
		    
		    # Food location 
		    game.food.x < game.head.x,  # food left
		    game.food.x > game.head.x,  # food right
		    game.food.y < game.head.y,  # food up
		    game.food.y > game.head.y  # food down
		    ]

		return np.array(state, dtype=int)

	def remember(self, state, action, reward, next_state, done):
		self.memory.append((state, action, reward, next_state, done))

	def train_long_memory(self):
		if len(self.memory) > batch_size:
			mini_sample = random.sample(self.memory, batch_size) # list of tuples of size = 1000
		else:
			mini_sample = self.memory

		states, actions, rewards, next_states, dones = zip(*mini_sample)

		self.trainer.train_step(states, actions, rewards, next_states, dones)

	def train_short_memory(self, state, action, reward, next_state, done):
		self.trainer.train_step(state, action, reward, next_state, done)

	def get_action(self, state):
		# random moves: tradeoff exploration / exploitation
		self.epsilon = 80 - self.n_games
		final_move = [0, 0, 0]

		if random.randint(0, 200) < self.epsilon:
			move = random.randint(0, 2)
			final_move[move] = 1
		else:
			state0 = torch.tensor(state, dtype = torch.float)
			prediction = self.model(state0)
			move = torch.argmax(prediction).item()
			final_move[move] = 1

		return final_move

Exemple #18

0

Afficher le fichier

class Agent:
    def __init__(self):
        self.numberOfGames = 0
        self.epsilon = 0  # controlls randomness
        self.gamma = 0.9  # discount rate, <1

        # will popleft if there is too much in memory
        self.memory = deque(maxlen=maxMemory)

        self.model = Linear_QNet(11, 256, 3)

        if os.path.isfile('./model/model.pth'):
            model_folder_path = './model/model.pth'
            self.model.load_state_dict(torch.load(model_folder_path))

        self.trainer = QTrainer(self.model, lr=learningRate, gamma=self.gamma)

    def getState(self, game):
        head = game.snake[0]

        # Clok-wise directions and angles
        cw_dirs = [
            Direction.RIGHT == game.direction,
            Direction.DOWN == game.direction, Direction.LEFT == game.direction,
            Direction.UP == game.direction
        ]
        cw_angs = np.array([0, np.pi / 2, np.pi, -np.pi / 2])

        # Position - in front: 0, on right: 1, on left: -1; BLOCK_SIZE = 20
        def getPoint(pos):
            return Point(
                head.x + 20 * np.cos(cw_angs[(cw_dirs.index(True) + pos) % 4]),
                head.y + 20 * np.sin(cw_angs[(cw_dirs.index(True) + pos) % 4]))

        state = [
            # Danger
            game.is_collision(getPoint(0)),
            game.is_collision(getPoint(1)),
            game.is_collision(getPoint(-1)),

            # Move direction
            cw_dirs[2],
            cw_dirs[0],
            cw_dirs[3],
            cw_dirs[1],

            # Food location
            game.food.x < head.x,
            game.food.x > head.x,
            game.food.y < head.y,
            game.food.y > head.y
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, game_over):
        self.memory.append((state, action, reward, next_state, game_over))

    def trainLongMemory(self):
        if len(self.memory) > batchSize:
            # list of tuples from the memory
            miniSample = random.sample(self.memory, batchSize)
        else:
            miniSample = self.memory

        states, actions, rewards, next_states, game_over = zip(*miniSample)
        self.trainer.trainStep(states, actions, rewards, next_states,
                               game_over)

    def trainShortMemory(self, state, action, reward, next_state, game_over):
        self.trainer.trainStep(state, action, reward, next_state, game_over)

    def getAction(self, state):
        # exploitation / exploration
        self.epsilon = 80 - self.numberOfGames
        final_move = [0, 0, 0]
        if random.randint(-2, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1

        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return (final_move)

Exemple #19

0

Afficher le fichier

Fichier : PlayAgent.py Projet : SandeepPadhi/AI_That_Plays_Snake

class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0  #randomness
        self.gamma = 0.9  #discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  #popleft()
        self.model = Linear_QNet(11, 256,
                                 3)  #input_lauer=11,hidden:256 ,output:3
        self.model.load_state_dict(torch.load('./optimized_model/model.pth'))
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        BLOCK_SIZE = 20

        #Points to check danger
        point_l = Point(head.x - BLOCK_SIZE, head.y)
        point_r = Point(head.x + BLOCK_SIZE, head.y)
        point_u = Point(head.x, head.y - BLOCK_SIZE)
        point_d = Point(head.x, head.y + BLOCK_SIZE)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            #For straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            #Danger Right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            #Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            #Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            #Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state,
                            done))  # popleft if MAX_MEMORY IS REACHED

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory,
                                        BATCH_SIZE)  #list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

        #for state, action,reward, next_state, done in mini_sample:
        #    self.trainer.train_step(state, action,reward, next_state, done)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves : tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(
                0, 200
        ) < self.epsilon and False:  #This was original ,we made small changes to it
            #if random.randint(0,200) < 20 and self.n_games<90:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model.forward(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1
        return final_move