Esempio n. 1
0
    get_sim = Model( RW, build_initial_model=True )

    get_sim.collect_transition_data( num_det_calls=100, num_stoic_calls=10000 )

    RW.layout.s_hash_print()

    #get_sim.num_calls_layout_print()
    #get_sim.min_num_calls_layout_print()
    
    env = EnvBaseline( s_hash_rowL=RW.s_hash_rowL, 
                       x_axis_label=RW.x_axis_label, 
                       y_axis_label=RW.y_axis_label )
                       
    get_sim.add_all_data_to_an_environment( env )

    policy, state_value = dp_value_iteration( env, do_summ_print=True, fmt_V='%.3f', fmt_R='%.1f',
                                              max_iter=1000, err_delta=0.0001, 
                                              gamma=0.9, iteration_prints=10)
                                  
    policy.save_diagram( RW, inp_colorD=None, save_name='dp_rw1000_policy',
                         show_arrows=False, scale=0.5, h_over_w=0.8,
                         show_terminal_labels=False)

    print( 'Total Time =',time.time() - start_time )

    pickle_esp.save_to_pickle_file( fname='dp_soln_to_randwalk_1000', 
                                    env=env, state_values=state_value, policy=policy)



Esempio n. 2
0
                    show_start_policy=True,
                    max_iter=1000,
                    err_delta=0.0001,
                    gamma=0.9)

diag_colorD = {
    '5': 'r',
    '4': 'g',
    '3': 'b',
    '2': 'c',
    '1': 'y',
    '0': 'w',
    '-5': 'r',
    '-4': 'g',
    '-3': 'b',
    '-2': 'c',
    '-1': 'y'
}

policy.save_diagram(env,
                    inp_colorD=diag_colorD,
                    save_name='car_rental_var_rtn',
                    show_arrows=False,
                    scale=0.25,
                    h_over_w=0.8)

pickle_esp.save_to_pickle_file(fname='dp_car_rental_PI',
                               env=env,
                               state_values=state_value,
                               policy=policy)
Esempio n. 3
0
from introrl.utils import pickle_esp

RW = RandomWalk_1000Simulation()

model = Model(RW, build_initial_model=True)
model.collect_transition_data(num_det_calls=100, num_stoic_calls=10000)
print('Model Built')
# build an EnvBaseline from the Simulation
env = EnvBaseline(s_hash_rowL=RW.s_hash_rowL,
                  x_axis_label=RW.x_axis_label,
                  y_axis_label=RW.y_axis_label)
model.add_all_data_to_an_environment(env)

policy = Policy(environment=env)
policy.intialize_policy_to_equiprobable(env=env)

state_value = StateValues(env)
state_value.init_Vs_to_zero()

dp_policy_evaluation(policy,
                     state_value,
                     do_summ_print=True,
                     max_iter=1000,
                     err_delta=0.0001,
                     gamma=1.0)

pickle_esp.save_to_pickle_file(fname='random_walk_1000_PI_eval',
                               env=env,
                               state_values=state_value,
                               policy=policy)
Esempio n. 4
0
    env = EnvBaseline(s_hash_rowL=CR.s_hash_rowL,
                      x_axis_label=CR.x_axis_label,
                      y_axis_label=CR.y_axis_label)

    get_sim.add_all_data_to_an_environment(env)

    #env.save_to_pickle_file('car_rental')
    #print('Saved env to *.env_pickle file')

    print('built environment')
    print('_' * 55)

    #env.summ_print()
    policy, state_value = dp_value_iteration(env,
                                             do_summ_print=True,
                                             fmt_V='%.1f',
                                             fmt_R='%.1f',
                                             max_iter=1000,
                                             err_delta=0.0001,
                                             gamma=0.9,
                                             iteration_prints=10)

    print('Total Time =', time.time() - start_time)

    #env.save_to_pickle_file('car_rental')

    pickle_esp.save_to_pickle_file(fname='car_rental_sim_to_env_const_rtn',
                                   env=env,
                                   state_values=state_value,
                                   policy=policy)
Esempio n. 5
0
get_sim.add_all_data_to_an_environment(env)

print('built environment')
print('_' * 55)

#env.summ_print()
policy, state_value = dp_value_iteration(env,
                                         do_summ_print=True,
                                         fmt_V='%.1f',
                                         fmt_R='%.1f',
                                         max_iter=1000,
                                         err_delta=0.0001,
                                         gamma=0.9,
                                         iteration_prints=10)

policy.save_diagram(BJ,
                    inp_colorD=None,
                    save_name='dp_blackjack_policy',
                    show_arrows=False,
                    scale=0.5,
                    h_over_w=0.8,
                    show_terminal_labels=False)

print('Total Time =', time.time() - start_time)

pickle_esp.save_to_pickle_file(fname='dp_soln_to_blackjack',
                               env=env,
                               state_values=state_value,
                               policy=policy)