コード例 #1
0
    # INIT ENV
    env, observation_space, action_space = build_env_wrapper(
        params["env_name"], env_type=params["env_type"])

    # LOGGING
    writer = SummaryWriter(comment="-" + params["run_name"] + "-basic")

    # NETWORK
    net = dqn_mlp_net.Network(observation_space,
                              action_space,
                              hidden_layer_size=64).to(device)
    tgt_net = agents.TargetNetwork(net)

    # AGENT
    selector = actions.EpsilonGreedyActionSelector(
        epsilon=params["epsilon_start"])
    epsilon_tracker = logger.EpsilonTracker(selector, params)
    agent = agents.DQNAgent(net, selector, device=device)

    # RUNNER
    exp_source = runner.RunnerSourceFirstLast(env,
                                              agent,
                                              gamma=params["gamma"],
                                              steps_count=1)
    buffer = ExperienceReplayBuffer(exp_source,
                                    buffer_size=params["replay_size"])
    optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"])

    frame_idx = 0
    done = False
コード例 #2
0
    net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size())

    # load the network
    if RL_options['load_net'] is True:
        with open(
                os.path.join(RL_options['net_saved_path'],
                             RL_options['net_file']), "rb") as f:
            checkpoint = torch.load(f)
        net = models.SimpleFFDQN(env.get_obs_len(),
                                 env.get_action_space_size())
        net.load_state_dict(checkpoint['state_dict'])

    # create buffer
    net.to(torch.device("cuda"))  # pass into gpu
    selector = actions.EpsilonGreedyActionSelector(RL_options['epsilon_start'])
    agent = agents.DQNAgent(net, selector)
    # agent = agents.Supervised_DQNAgent(net, selector, sample_sheet, assistance_ratio=0.2)
    exp_source = experience.ExperienceSourceFirstLast(
        env,
        agent,
        RL_options['gamma'],
        steps_count=RL_options['reward_steps'])
    buffer = experience.ExperienceReplayBuffer(exp_source,
                                               RL_options['replay_size'])

    # create optimizer
    optimizer = optim.Adam(net.parameters(), lr=RL_options['lr'])

    # create net pre-processor
    net_processor = common.netPreprocessor(net, agent.target_model)