コード例 #1
0
ファイル: q-ifddk.py プロジェクト: mcmellawatt/rlpy
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000000
    opt["num_policy_checks"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=True,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)

    # Policy
    policy = eGreedyDecay(representation, epsilonInit=0.9)

    # Agent
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
コード例 #2
0
ファイル: q-ifddk.py プロジェクト: okkhoy/rlpy
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain, discover_threshold, initial_rep,
                          sparsify=True,
                          useCache=True, lazy=True,
                          lambda_=lambda_)

    # Policy
    policy = eGreedy(representation, epsilon=0.1)

    # Agent
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
コード例 #3
0
ファイル: gridworld.py プロジェクト: kngwyu/rlpy-experiments
def select_agent(name: Optional[str], _seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'nac':
        return NaturalActorCritic(GibbsPolicy(tabular),
                                  tabular,
                                  DOMAIN.discount_factor,
                                  forgetting_rate=0.3,
                                  min_steps_between_updates=100,
                                  max_steps_between_updates=1000,
                                  lambda_=0.7,
                                  learn_rate=0.1)
    elif name == 'tabular-q':
        return Q_Learning(
            eGreedy(tabular, epsilon=0.1),
            tabular,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.3,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    elif name == 'ifddk-q':
        lambda_ = 0.3
        ifddk = iFDDK(
            DOMAIN,
            discovery_threshold=1.0,
            initial_representation=IndependentDiscretization(DOMAIN),
            sparsify=True,
            useCache=True,
            lazy=True,
            lambda_=lambda_,
        )
        return Q_Learning(
            eGreedy(ifddk, epsilon=0.1),
            ifddk,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    else:
        raise NotImplementedError()