コード例 #1
0
ファイル: 1.train.py プロジェクト: liusida/thesis-bodies
                verbose=1,
                tensorboard_log=str(common.output_data_folder /
                                    f"tensorboard" / saved_model_filename),
                seed=common.seed,
                **hyperparams)

    if len(args.initialize_weights_from) > 0:
        try:
            load_model = PPO.load(args.initialize_weights_from)
            load_weights = load_model.policy.state_dict()
            model.policy.load_state_dict(load_weights)
            print(f"Weights loaded from {args.initialize_weights_from}")
        except Exception:
            print("Initialize weights error.")
            raise Exception

    try:
        model.learn(total_timesteps=args.train_steps, callback=all_callbacks)
    except KeyboardInterrupt:
        pass
    model.save(str(common.output_data_folder / "models" /
                   saved_model_filename))

    if args.vec_normalize:
        # Important: save the running average, for testing the agent we need that normalization
        model.get_vec_normalize_env().save(
            str(common.output_data_folder / "models" /
                f"{saved_model_filename}.vnorm.pkl"))

    venv.close()
コード例 #2
0
ファイル: train_simple.py プロジェクト: liusida/thesis-bodies
    else:
        body = args.body_id
        print(body)
        env = DummyVecEnv([utils.make_env(rank=i, seed=utils.seed, render=args.render, robot_body=body, body_info=0) for i in range(train_num_envs)])
        save_filename = f"model-ant-single-{body}"

    env = VecNormalize(env, **normalize_kwargs)

    keys_remove =["normalize", "n_envs", "n_timesteps", "policy"]
    for key in keys_remove:
        del hyperparams[key]

    eval_env = DummyVecEnv([utils.make_env(rank=0, seed=utils.seed+1, render=False, robot_body=2, body_info=0)])
    eval_env = VecNormalize(eval_env, norm_reward=False, **normalize_kwargs)
    eval_callback = EvalCallback(
        eval_env=eval_env,
        n_eval_episodes=3,
        eval_freq=1e4, # will implicitly multiplied by 16 (train_num_envs)
        deterministic=True,
    )
    # eval_callback = None

    model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f"{folder}/tb/{save_filename}", seed=utils.seed, **hyperparams)

    model.learn(total_timesteps=total_timesteps, callback=eval_callback)
    model.save(f"{folder}/{save_filename}")
    # Important: save the running average, for testing the agent we need that normalization
    model.get_vec_normalize_env().save(f"{folder}/{save_filename}-vecnormalize.pkl")

    env.close()