def setup_experiment(model_class, exp_type, seed): if model_class not in (QLearning, Sarsa, ExpectedSarsa): raise ValueError("Unknown model : %s" % (model_class, )) if model_class == QLearning: learning_rate = 0.85 discount_factor = 0.85 else: learning_rate = 0.15 discount_factor = 0.85 epsilon = 0.15 train_episodes = 1000000 value_function = DictActionValueFunction(0) learning_rate = StaticLearningRate(learning_rate) discount_factor = StaticDiscountFactor(discount_factor) snake_params = SnakeParameters() env = SnakeEnvironment(snake_params) policy = EpsilonGreedyPolicy(env, epsilon) experiment = Experiment(env=env, model_class=model_class, experiment_type=exp_type, seed=seed, epsilon=epsilon, train_episodes=train_episodes, value_function=value_function, learning_rate=learning_rate, discount_factor=discount_factor, policy=policy) experiment.load() return experiment
def run_sarsa(): params = SnakeParameters() env = SnakeEnvironment(params) value_function = pickle.load( open("../models/sarsa_%s.p" % params.file_str, "rb")) agent = SnakeAgent(policy=EpsilonGreedyPolicy(env, 0), action_value_function=value_function) start_app(env, agent, params)
def train(): for params in get_snake_parameters(): env = SnakeEnvironment(params) params.policy = EpsilonGreedyPolicy(env, params.epsilon) exp_params = ExperimentParameters() exp_params.env = env exp_params.model_class = Sarsa exp_params.model_params = params exp_params.seed = get_parameters_seed() output_dir = "../../../models/sarsa/params/%i" % exp_params.seed exp_params.model_output_dir = output_dir train_and_store_model(exp_params)
def train_sarsa(): start = timer() params = SnakeParameters() env = SnakeEnvironment(params) policy = EpsilonGreedyPolicy(env, params.epsilon) learner = Sarsa(action_value_function=params.value_function, policy=policy, learning_rate=params.learning_rate, discount_factor=params.discount_factor) learner.train(env, params.train_episodes) pickle.dump(learner.Q, open("../models/sarsa_%s.p" % params.file_str, "wb")) print("Elapsed time:", timer() - start)
def test_state(): params = SnakeParameters() env = SnakeEnvironment(params) state = BoardState(env) print(state) print(hash(state)) state = DirectionalState(env) print(state) print(hash(state)) state = SnakeFoodState(env) print(state) print(hash(state)) state = DirectionalDistanceState(env) print(state) print(hash(state))
def analyze_test(): params = SnakeParameters() params.discount_factor = StaticDiscountFactor(0.95) params.learning_rate = StaticLearningRate(0.15) env = SnakeEnvironment(params) params.policy = GreedyPolicy(env) exp_params = ExperimentParameters() exp_params.env = env exp_params.model_class = Sarsa exp_params.model_params = params model_output_dir = "../../../models/sarsa/reward/%i" % exp_params.seed exp_params.model_output_dir = model_output_dir image_output_dir = "../../../images/sarsa/reward/%i" % exp_params.seed exp_params.image_output_dir = image_output_dir analyze_models_test(exp_params)
def train(): params = SnakeParameters() params.discount_factor = StaticDiscountFactor(0.95) params.learning_rate = StaticLearningRate(0.15) params.reward = get_state_reward() env = SnakeEnvironment(params) params.policy = EpsilonGreedyPolicy(env, params.epsilon) exp_params = ExperimentParameters() exp_params.env = env exp_params.model_class = Sarsa exp_params.model_params = params for seed in get_state_seeds(): exp_params.seed = seed output_dir = "../../../models/sarsa/state/%i" % exp_params.seed exp_params.model_output_dir = output_dir train_models(exp_params)
def train(): params = SnakeParameters() params.discount_factor = StaticDiscountFactor(0.85) params.learning_rate = StaticLearningRate(0.85) env = SnakeEnvironment(params) params.policy = EpsilonGreedyPolicy(env, params.epsilon) exp_params = ExperimentParameters() exp_params.env = env exp_params.model_class = QLearning exp_params.model_params = params for seed in get_reward_seeds(): exp_params.seed = seed output_dir = "../../../models/qlearning/reward/%i" % exp_params.seed exp_params.model_output_dir = output_dir for state in get_reward_states(): exp_params.model_params.state = state train_models(exp_params)
def main(): params = SnakeParameters() env = SnakeEnvironment(params) player = SnakePlayer() start_app(env, player, params)
@pyqtSlot() def __on_push_run(self): print("Run Agent") @pyqtSlot() def __on_push_train(self): print("Train Agent") @pyqtSlot() def __on_push_reward_stat(self): print("Reward Stat") @pyqtSlot() def __on_push_action_stat(self): print("Action Stat") @pyqtSlot() def __on_push_exploration_exploitation_stat(self): print("Exploration/Exploitation Stat") if __name__ == '__main__': params = SnakeParameters() env = SnakeEnvironment(params) player = SnakePlayer() app = QApplication(["SnakeBot"]) window = Window(env, player, params) window.show() app.exec_()