def run_trial(planning_horizon): blocks_world_builder = BlocksWorldBuilder(blocks_world_size) ctrl = SimpleMonteCarloControl() planner = Planner(planning_horizon) mc = MonteCarlo(blocks_world_builder, planner, control=ctrl, max_episode_length=blocks_world_size * 2, planning_factor=0, plan_on_empty_policy=True, exploring_starts=True, exploring_factor=0) mc.learn_policy(number_episodes=number_of_episodes, show_progress_bar=True, evaluate_return_ratio=False) data = pd.DataFrame({ 'episode': range(len(mc.returns)), #'return_ratio': mc.return_ratios, 'observed_returns': mc.returns, #'optimal_returns': mc.optimal_returns }) return data
import os import sys # Make sure the path of the framework is included in the import path sys.path.insert( 0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) from tests import test_policy from MonteCarlo import MonteCarlo from mdp import BlocksWorldBuilder from control import SimpleMonteCarloControl, SgdMonteCarloControl from planner import Planner from matplotlib import pyplot as plt mdp_builder = BlocksWorldBuilder(blocks_world_size=7) planner = Planner(planning_horizon=5) ctrl = SimpleMonteCarloControl() mc = MonteCarlo(mdp_builder, planner, control=ctrl, max_episode_length=14, planning_factor=0, plan_on_empty_policy=True, exploring_starts=True, exploring_factor=0.0) learned_policy = mc.learn_policy(number_episodes=150, show_progress_bar=True)