Example #1
0
    def train(config, checkpoint_dir=None):
        trainer = PPOTrainer(config=config, env='BomberMan-v0')
        init_w = trainer.get_policy('policy_01').get_weights()
        trainer.restore(
            'C:\\Users\\Florian\\ray_results\\PPO_BomberMan-v0_2021-03-25_08-56-43eo23nmho\\checkpoint_002360\\checkpoint-2360'
        )
        trainer.workers.foreach_worker(
            lambda w: w.get_policy('policy_01').set_weights(init_w))
        trainer.restore('.\\kill-policy-0\\checkpoint')
        trainer.import_model()
        iter = 0

        #def update_phase(ev):
        #    ev.foreach_env(lambda e: e.set_phase(phase))

        while True:
            iter += 1
            result = trainer.train()
            if iter % 200 == 0:
                if not os.path.exists(f'./model-{iter}'):
                    #trainer.get_policy('policy_01').export_model(f'./model-{iter}')
                    trainer.export_policy_model(f'./model-{iter}/main',
                                                'policy_01')
                    trainer.export_policy_model(f'./model-{iter}/collect',
                                                'policy_collect')
                    trainer.export_policy_model(f'./model-{iter}/destroy',
                                                'policy_destroy')
                    trainer.export_policy_model(f'./model-{iter}/kill',
                                                'policy_kill')

                else:
                    print("model already saved")
Example #2
0
    def train(config, checkpoint_dir=None):
        trainer = PPOTrainer(config=config, env='BomberMan-v0')
        #trainer.restore('C:\\Users\\Florian\\ray_results\\PPO_BomberMan-v0_2021-03-22_10-57-05mz9533ge\\checkpoint_000140\\checkpoint-140')
        iter = 0

        #def update_phase(ev):
        #    ev.foreach_env(lambda e: e.set_phase(phase))

        while True:
            iter += 1
            result = trainer.train()
            if iter % 250 == 1:
                if not os.path.exists(f'./model-{iter}-ckpt'):
                    #trainer.export_policy_model(f'./model-{iter}/kill', 'policy_kill')
                    trainer.export_model('h5',f'./model-{iter}')
                else:
                    trainer.import_model(f'./model-{iter}')
                    print("model already saved")