コード例 #1
0
ファイル: train_dqn.py プロジェクト: youjp/Hierarchical-DQN
def make_agent(agent_type, env, num_clusters, use_extra_travel_penalty,
               use_extra_bit, use_controller_dqn, use_intrinsic_timeout,
               use_memory, memory_size, pretrain_controller):
    if agent_type == 'dqn':
        return dqn.DqnAgent(state_dims=[2],
                            num_actions=2)  # env.action_space.n
    elif agent_type == 'h_dqn':
        meta_controller_state_fn, check_subgoal_fn, num_subgoals, subgoals = clustering.get_cluster_fn(
            n_clusters=num_clusters, extra_bit=use_extra_bit)

        return hierarchical_dqn.HierarchicalDqnAgent(
            state_sizes=[num_subgoals, [2]],
            agent_types=['tabular', 'network'],
            subgoals=subgoals,
            num_subgoals=num_subgoals,
            num_primitive_actions=2,  # env.action_space.n
            meta_controller_state_fn=meta_controller_state_fn,
            check_subgoal_fn=check_subgoal_fn,
            use_extra_travel_penalty=use_extra_travel_penalty,
            use_extra_bit_for_subgoal_center=use_extra_bit,
            use_controller_dqn=use_controller_dqn,
            use_intrinsic_timeout=use_intrinsic_timeout,
            use_memory=use_memory,
            memory_size=memory_size,
            pretrain_controller=pretrain_controller)
コード例 #2
0
def make_agent(agent_type, env):
    if agent_type == 'dqn':
        return dqn.DqnAgent(state_dims=[2],
                            num_actions=2)  # env.action_space.n
    elif agent_type == 'h_dqn':
        meta_controller_state_fn, check_subgoal_fn, num_subgoals, subgoals = clustering.get_cluster_fn(
            n_clusters=4, extra_bit=False)

        return hierarchical_dqn.HierarchicalDqnAgent(
            state_sizes=[[num_subgoals], 2],
            subgoals=subgoals,
            num_subgoals=num_subgoals,
            num_primitive_actions=2,  # env.action_space.n
            meta_controller_state_fn=meta_controller_state_fn,
            check_subgoal_fn=check_subgoal_fn)
コード例 #3
0
def make_agent(agent_type, env, load=True):
    if agent_type == 'dqn':
        return dqn.DqnAgent(state_dims=[2], num_actions=env.action_space.n)
    elif agent_type == 'h_dqn':
        meta_controller_state_fn, check_subgoal_fn, num_subgoals = None, check_subgoal, 2

        # subgoals = [\
        #     [-.7,-.2],
        #     [-1,0],
        #     [.5,.2],
        #     [ 1,0]
        # ]
        #clustering.get_cluster_fn(n_clusters=num_clusters, extra_bit=use_extra_bit)

        return hierarchical_dqn.HierarchicalDqnAgent(
            state_sizes=env.observation_space.shape,
            subgoals=subgoals,
            num_subgoals=num_subgoals,
            num_primitive_actions=env.action_space.n,
            meta_controller_state_fn=meta_controller_state_fn,
            check_subgoal_fn=check_subgoal_fn,
            load=load)
コード例 #4
0
ファイル: trainMain.py プロジェクト: Ivehui/DQN
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    env = gym.make(pms.gameName)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/DQN-' + pms.gameName
    env.monitor.start(outdir, force=True, seed=0)

    # This declaration must go *after* the monitor call, since the
    # monitor's seeding creates a new action_space instance with the
    # appropriate pseudorandom number generator.
    agent = dqn.DqnAgent(env.action_space)
    tran = Tran(max_size=pms.bufferSize)
    caffe.set_mode_gpu()

    imageDim = np.array((pms.frameHeight, pms.frameWidth))
    curFrame = np.zeros((pms.frameChannel, pms.frameHeight, pms.frameWidth))
    nextFrame = np.zeros((pms.frameChannel, pms.frameHeight, pms.frameWidth))

    testStep = 0
    update_step = 0

    for i in range(pms.episodeCount):
        rgbImage = env.reset()
        # env.render()
        done = False
        for j in range(pms.frameChannel):
コード例 #5
0
ファイル: testMain.py プロジェクト: Ivehui/DQN
    logger.setLevel(logging.INFO)

    env = gym.make(pms.gameName)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/DQN-Test-' + pms.gameName
    env.monitor.start(outdir, force=True, seed=0)

    # This declaration must go *after* the monitor call, since the
    # monitor's seeding creates a new action_space instance with the
    # appropriate pseudorandom number generator.
    model = './models/action_iter_2000000.caffemodel'
    agent = dqn.DqnAgent(env.action_space, model=model)
    caffe.set_mode_gpu()

    imageDim = np.array((pms.frameHeight, pms.frameWidth))
    curFrame = np.zeros((pms.frameChannel, pms.frameHeight, pms.frameWidth))
    nextFrame = np.zeros((pms.frameChannel, pms.frameHeight, pms.frameWidth))

    testStep = 0
    update_step = 0

    for i in range(pms.episodeTestCount):
        rgbImage = env.reset()
        env.render()
        done = False
        rewardSum = 0
        while (done == False):