def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000000 opt["num_policy_checks"] = 50 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation discover_threshold = 1. lambda_ = 0.3 initial_learn_rate = 0.11 boyan_N0 = 100 initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=True, useCache=True, lazy=True, lambda_=lambda_) # Policy policy = eGreedyDecay(representation, epsilonInit=0.9) # Agent opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation discover_threshold = 1. lambda_ = 0.3 initial_learn_rate = 0.11 boyan_N0 = 100 initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=True, useCache=True, lazy=True, lambda_=lambda_) # Policy policy = eGreedy(representation, epsilon=0.1) # Agent opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def select_agent(name: Optional[str], _seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'nac': return NaturalActorCritic(GibbsPolicy(tabular), tabular, DOMAIN.discount_factor, forgetting_rate=0.3, min_steps_between_updates=100, max_steps_between_updates=1000, lambda_=0.7, learn_rate=0.1) elif name == 'tabular-q': return Q_Learning( eGreedy(tabular, epsilon=0.1), tabular, discount_factor=DOMAIN.discount_factor, lambda_=0.3, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) elif name == 'ifddk-q': lambda_ = 0.3 ifddk = iFDDK( DOMAIN, discovery_threshold=1.0, initial_representation=IndependentDiscretization(DOMAIN), sparsify=True, useCache=True, lazy=True, lambda_=lambda_, ) return Q_Learning( eGreedy(ifddk, epsilon=0.1), ifddk, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) else: raise NotImplementedError()