예제 #1
0
        raise ValueError(f"Invalid Policy: {args.policy}!")

    if args.save_model and not os.path.exists("./models"):
        os.makedirs("./models")

    if args.load_model != "":
        policy_file = file_name if args.load_model == "default" else args.load_model
        if not os.path.exists(f"./models/{policy_file}"):
            assert f"The loading model path of `../models/{policy_file}` does not exist! "
        policy.load(f"./models/{policy_file}")

    # Setup loggers
    logger_kwargs = setup_logger_kwargs(args.exp_name,
                                        args.seed,
                                        datestamp=False)
    logger = EpochLogger(**logger_kwargs)

    # Sync params across processes
    sync_params(policy)

    # Set up experience buffer
    local_steps_per_epoch = int(args.steps_per_epoch / num_procs())
    _replay_buffer = replay_buffer.VPGBuffer(state_dim, action_dim,
                                             local_steps_per_epoch,
                                             args.discount, args.lam,
                                             is_discrete)

    state, done = env.reset(), False
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0
예제 #2
0
파일: main.py 프로젝트: LQNew/LWDRLD
        raise ValueError(f"Invalid Policy: {args.policy}!")

    if args.save_model and not os.path.exists("./models"):
        os.makedirs("./models")

    if args.load_model != "":
        policy_file = file_name if args.load_model == "default" else args.load_model
        if not os.path.exists(f"./models/{policy_file}"):
            assert f"The loading model path of `../models/{policy_file}` does not exist! "
        policy.load(f"./models/{policy_file}")

    # Setup loggers
    logger_kwargs = setup_logger_kwargs(args.exp_name,
                                        args.seed,
                                        datestamp=False)
    logger = EpochLogger(**logger_kwargs)

    _replay_buffer = replay_buffer.ReplayBuffer(int(args.buffer_size))

    print("Collecting experience...")
    epinfobuf = deque(maxlen=100)  # episode step for accumulate reward
    start_time = time.time()  # check learning time

    states = np.array(
        env.reset())  # env reset, output array of num of `#num_envs` states

    step = 0
    for t in range(1, int(args.max_timesteps) // int(args.num_envs) + 1):
        actions = policy.select_action(states, eps_schedule.value)
        next_states, rewards, dones, infos = env.step(
            actions)  # take actions and get next states