Ejemplo n.º 1
0
            ep_r += r
            if miner.memory_counter > opt.memory_capacity:
                miner.optimize_model()
                if game.get_status() != 0:
                    print('Ep: ', epoch,
                          '| Ep_r: ', round(ep_r, 2))

            if status != 0:
                break

            s = s_.copy()

        critic_r += ep_r
        if (epoch+1) % opt.n_critic == 0:
            print('=====evaluation=====')
            print('Epochs:', epoch)
            print('win number:', win_num)
            print('fail number:', fail_num)
            print('win rate:', win_num / (win_num + fail_num))
            print('total reward:', critic_r)
            avg_rewards.append(critic_r)
            success.append(win_num)
            win_num = 0
            fail_num = 0
            critic_r = 0
            plot_durations(success, 'Accuracy','batch_size16_rmsprop_longdirect.png' )
            plot_durations(avg_rewards, 'Critic Reward','batch_size16_rmsprop_longreward.png' )

    miner.save_params()