Example #1
0
 success_threshold: float = 30.0
 experiment_high_reward_selfplay: TicTacToeExperiment = TicTacToeExperiment(
     "experiment_high_reward", success_threshold,
     environment_selfplay_high_reward, dddqn_curriculum_agent_second,
     interface_high_reward_selfplay)
 # Define experiments data
 testing_episodes: int = 100
 test_cycles: int = 10
 training_episodes: int = 1000
 validation_episodes: int = 100
 max_training_episodes: int = 25000
 episode_length_max: int = 20
 # Run curriculum experiments for low reward
 saved_metagraph_paths: [] = run_experiment(
     experiment_low_reward_fixed, training_episodes, max_training_episodes,
     episode_length_max, validation_episodes, testing_episodes, test_cycles,
     render_during_training, render_during_validation, render_during_test,
     workspace_path, __file__, logger, None, experiment_iterations_number)
 for metagraph_path in saved_metagraph_paths:
     run_experiment(experiment_low_reward_selfplay, training_episodes,
                    max_training_episodes, episode_length_max,
                    validation_episodes, testing_episodes, test_cycles,
                    render_during_training, render_during_validation, True,
                    workspace_path, __file__, logger, metagraph_path)
 # Run curriculum experiments for high reward
 saved_metagraph_paths: [] = run_experiment(
     experiment_high_reward_fixed, training_episodes, max_training_episodes,
     episode_length_max, validation_episodes, testing_episodes, test_cycles,
     render_during_training, render_during_validation, render_during_test,
     workspace_path, __file__, logger, None, experiment_iterations_number)
 for metagraph_path in saved_metagraph_paths:
Example #2
0
    testing_episodes: int = 100
    test_cycles: int = 10
    training_episodes: int = 100
    validation_episodes: int = 100
    max_training_episodes: int = 35000
    episode_length_max: int = 100
    plot_sample_density: int = 10
    # Run experiment
    intro: str = "Data:\n" \
                 "\nVanilla Policy Gradient with GAE buffer" \
                 "\nThree dense layer with 4096 neurons each using xavier initialization" \
                 "\nLearning rate policy: 0.0003" \
                 "\nLearning rate advantage: 0.0001" \
                 "\nDiscount factor: 0.99" \
                 "\nValue steps per update: 10" \
                 "\nLambda parameter: 0.95" \
                 "\nUpdates per training volley: 2" \
                 "\nSuccess threshold: 0.35 average total reward on the validation set episodes" \
                 "\nEpisodic: yes" \
                 "\nEpisode length: 100" \
                 "\nMax allowed steps for episode: 100" \
                 "\nSeed states range [0, 0]" \
                 "\nAcceptance value: none" \
                 "\nThreshold value: 0.2\n"
    run_experiment(experiment, training_episodes, max_training_episodes,
                   episode_length_max, validation_episodes, testing_episodes,
                   test_cycles, render_during_training,
                   render_during_validation, render_during_test,
                   workspace_path, __file__, logger, None,
                   experiment_iterations_number, intro, plot_sample_density)
Example #3
0
     environment_high_reward, dddqn_epsilon_greedy_agent,
     interface_high_reward)
 experiment_boltzmann_high_reward: TicTacToeExperiment = TicTacToeExperiment(
     "b_experiment_high_reward", success_threshold, environment_high_reward,
     dddqn_boltzmann_agent, interface_high_reward)
 # Define experiments data
 testing_episodes: int = 100
 test_cycles: int = 10
 training_episodes: int = 1000
 validation_episodes: int = 100
 max_training_episodes: int = 50000
 episode_length_max: int = 20
 # Run epsilon greedy experiment for low reward
 run_experiment(experiment_egreedy_low_reward, training_episodes,
                max_training_episodes, episode_length_max,
                validation_episodes, testing_episodes, test_cycles,
                render_during_training, render_during_validation,
                render_during_test, workspace_path, __file__, logger, None,
                experiment_iterations_number)
 # Run boltzmann experiment for low reward
 run_experiment(experiment_boltzmann_low_reward, training_episodes,
                max_training_episodes, episode_length_max,
                validation_episodes, testing_episodes, test_cycles,
                render_during_training, render_during_validation,
                render_during_test, workspace_path, __file__, logger, None,
                experiment_iterations_number)
 # # Run epsilon greedy experiment for high reward
 run_experiment(experiment_egreedy_high_reward, training_episodes,
                max_training_episodes, episode_length_max,
                validation_episodes, testing_episodes, test_cycles,
                render_during_training, render_during_validation,
                render_during_test, workspace_path, __file__, logger, None,