ep_r += r if miner.memory_counter > opt.memory_capacity: miner.optimize_model() if game.get_status() != 0: print('Ep: ', epoch, '| Ep_r: ', round(ep_r, 2)) if status != 0: break s = s_.copy() critic_r += ep_r if (epoch+1) % opt.n_critic == 0: print('=====evaluation=====') print('Epochs:', epoch) print('win number:', win_num) print('fail number:', fail_num) print('win rate:', win_num / (win_num + fail_num)) print('total reward:', critic_r) avg_rewards.append(critic_r) success.append(win_num) win_num = 0 fail_num = 0 critic_r = 0 plot_durations(success, 'Accuracy','batch_size16_rmsprop_longdirect.png' ) plot_durations(avg_rewards, 'Critic Reward','batch_size16_rmsprop_longreward.png' ) miner.save_params()