Ejemplo n.º 1
0
def setup_experiment(model_class, exp_type, seed):
    if model_class not in (QLearning, Sarsa, ExpectedSarsa):
        raise ValueError("Unknown model : %s" % (model_class, ))

    if model_class == QLearning:
        learning_rate = 0.85
        discount_factor = 0.85
    else:
        learning_rate = 0.15
        discount_factor = 0.85

    epsilon = 0.15
    train_episodes = 1000000
    value_function = DictActionValueFunction(0)
    learning_rate = StaticLearningRate(learning_rate)
    discount_factor = StaticDiscountFactor(discount_factor)

    snake_params = SnakeParameters()
    env = SnakeEnvironment(snake_params)
    policy = EpsilonGreedyPolicy(env, epsilon)

    experiment = Experiment(env=env,
                            model_class=model_class,
                            experiment_type=exp_type,
                            seed=seed,
                            epsilon=epsilon,
                            train_episodes=train_episodes,
                            value_function=value_function,
                            learning_rate=learning_rate,
                            discount_factor=discount_factor,
                            policy=policy)
    experiment.load()
    return experiment
Ejemplo n.º 2
0
Archivo: main.py Proyecto: raholm/DMP
def run_sarsa():
    params = SnakeParameters()
    env = SnakeEnvironment(params)
    value_function = pickle.load(
        open("../models/sarsa_%s.p" % params.file_str, "rb"))

    agent = SnakeAgent(policy=EpsilonGreedyPolicy(env, 0),
                       action_value_function=value_function)

    start_app(env, agent, params)
Ejemplo n.º 3
0
Archivo: sarsa.py Proyecto: raholm/DMP
def train():
    for params in get_snake_parameters():
        env = SnakeEnvironment(params)
        params.policy = EpsilonGreedyPolicy(env, params.epsilon)

        exp_params = ExperimentParameters()
        exp_params.env = env
        exp_params.model_class = Sarsa
        exp_params.model_params = params
        exp_params.seed = get_parameters_seed()

        output_dir = "../../../models/sarsa/params/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        train_and_store_model(exp_params)
Ejemplo n.º 4
0
Archivo: main.py Proyecto: raholm/DMP
def train_sarsa():
    start = timer()

    params = SnakeParameters()
    env = SnakeEnvironment(params)
    policy = EpsilonGreedyPolicy(env, params.epsilon)

    learner = Sarsa(action_value_function=params.value_function,
                    policy=policy,
                    learning_rate=params.learning_rate,
                    discount_factor=params.discount_factor)

    learner.train(env, params.train_episodes)

    pickle.dump(learner.Q, open("../models/sarsa_%s.p" % params.file_str,
                                "wb"))

    print("Elapsed time:", timer() - start)
Ejemplo n.º 5
0
Archivo: main.py Proyecto: raholm/DMP
def test_state():
    params = SnakeParameters()
    env = SnakeEnvironment(params)

    state = BoardState(env)
    print(state)
    print(hash(state))

    state = DirectionalState(env)
    print(state)
    print(hash(state))

    state = SnakeFoodState(env)
    print(state)
    print(hash(state))

    state = DirectionalDistanceState(env)
    print(state)
    print(hash(state))
Ejemplo n.º 6
0
Archivo: sarsa.py Proyecto: raholm/DMP
def analyze_test():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.95)
    params.learning_rate = StaticLearningRate(0.15)

    env = SnakeEnvironment(params)
    params.policy = GreedyPolicy(env)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = Sarsa
    exp_params.model_params = params

    model_output_dir = "../../../models/sarsa/reward/%i" % exp_params.seed
    exp_params.model_output_dir = model_output_dir

    image_output_dir = "../../../images/sarsa/reward/%i" % exp_params.seed
    exp_params.image_output_dir = image_output_dir

    analyze_models_test(exp_params)
Ejemplo n.º 7
0
Archivo: sarsa.py Proyecto: raholm/DMP
def train():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.95)
    params.learning_rate = StaticLearningRate(0.15)
    params.reward = get_state_reward()

    env = SnakeEnvironment(params)
    params.policy = EpsilonGreedyPolicy(env, params.epsilon)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = Sarsa
    exp_params.model_params = params

    for seed in get_state_seeds():
        exp_params.seed = seed

        output_dir = "../../../models/sarsa/state/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        train_models(exp_params)
Ejemplo n.º 8
0
def train():
    params = SnakeParameters()
    params.discount_factor = StaticDiscountFactor(0.85)
    params.learning_rate = StaticLearningRate(0.85)

    env = SnakeEnvironment(params)
    params.policy = EpsilonGreedyPolicy(env, params.epsilon)

    exp_params = ExperimentParameters()
    exp_params.env = env
    exp_params.model_class = QLearning
    exp_params.model_params = params

    for seed in get_reward_seeds():
        exp_params.seed = seed

        output_dir = "../../../models/qlearning/reward/%i" % exp_params.seed
        exp_params.model_output_dir = output_dir

        for state in get_reward_states():
            exp_params.model_params.state = state
            train_models(exp_params)
Ejemplo n.º 9
0
Archivo: main.py Proyecto: raholm/DMP
def main():
    params = SnakeParameters()
    env = SnakeEnvironment(params)
    player = SnakePlayer()

    start_app(env, player, params)
Ejemplo n.º 10
0
    @pyqtSlot()
    def __on_push_run(self):
        print("Run Agent")

    @pyqtSlot()
    def __on_push_train(self):
        print("Train Agent")

    @pyqtSlot()
    def __on_push_reward_stat(self):
        print("Reward Stat")

    @pyqtSlot()
    def __on_push_action_stat(self):
        print("Action Stat")

    @pyqtSlot()
    def __on_push_exploration_exploitation_stat(self):
        print("Exploration/Exploitation Stat")


if __name__ == '__main__':
    params = SnakeParameters()
    env = SnakeEnvironment(params)
    player = SnakePlayer()

    app = QApplication(["SnakeBot"])
    window = Window(env, player, params)
    window.show()
    app.exec_()