# long_mem = [] mlp = MLP([4, 16, 16, 2]) # TRAINING for i in range(games): state = env.reset() done = False score = 0 short_mem = [] # Play a game while not done: if random.random() < exploration_rate: action = random.randint(0, 1) else: actions = mlp.frontprop(normalize(np.array(state))) action = np.argmax(actions) # action = np.argmin(actions) short_mem.append((state, action)) state, reward, done, _ = env.step(action) if render: env.render() score += 1 # If win then learn short mem if score == 500: for state, action in short_mem: mlp.backprop(normalize(np.array(state)), np.array([1, 0] if action == 0 else [0, 1])) mlp.fit() # long_mem.append((state, action)) # If loose, don't else: