Ejemplo n.º 1
0
                win_num += 1
                r = 1 #reward if won
            elif status == -1:
                fail_num += 1
                r = -1#reward if loss
            elif progress.sum() != 0:
                r = 0.9 #some progress is made
            else:
                r = -0.3 #nothing
            #we wanted to reward YOLO bombing as this is a proven strategy if stuck. 

            miner.store_transition(s, a, r, s_)

            ep_r += r
            if miner.memory_counter > opt.memory_capacity:
                miner.optimize_model()
                if game.get_status() != 0:
                    print('Ep: ', epoch,
                          '| Ep_r: ', round(ep_r, 2))

            if status != 0:
                break

            s = s_.copy()

        critic_r += ep_r
        if (epoch+1) % opt.n_critic == 0:
            print('=====evaluation=====')
            print('Epochs:', epoch)
            print('win number:', win_num)
            print('fail number:', fail_num)