Beispiel #1
0
def update():
    
    data = json.loads(request.data)
    size = data['size']
    state_rewards_list = data['state_rewards_list']
    state_rewards_dict = {tuple(k):v for k,v in state_rewards_list}
    blocked_states_list = [tuple(s) for s in data['blocked_states_list']]
    discount = data['discount']
    started = data['started']
        
    values = np.array(data['values'])
    policy = np.array(data['policy'])
    
    print(blocked_states_list)
    if started:
        mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size, values, policy)
    else:
        mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size)
    table = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy,
                            mdp.blocked_states_list)
    
    return json.dumps({'table': table, 'values': mdp.values.tolist(),
            'policy': mdp.policy.tolist()})
Beispiel #2
0
def policy_iteration_example():
    
    
    size = 5
    state_rewards_dict = {(3,3):1, (0,0):1}
    blocked_states_list = [(2,3), (2,4), (2,2)]
    discount=.9
    
    np.random.seed(443209)
    policy = np.random.randint(0,4, size=size**2)
    mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size=size, policy=policy)        

    value_table1 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    policy_table1 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list, show_policy=True)
    
    mdp.values = mdp.evaluate_policy_values()
    intermediate_table = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    mdp.policy_evaluation()
    value_table2 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    mdp.policy_improvement()
    policy_table2 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list, show_policy=True)
    
    mdp.policy_evaluation()
    value_table3 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    mdp.policy_improvement()
    policy_table3 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list, show_policy=True)
    
    mdp.policy_iteration()
    value_table4 = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list)
    policy_table4 = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list, show_policy=True)
    
    state_rewards_list = [[list(k),v] for k,v in state_rewards_dict.items()]
    return render_template("policy_iteration_example.html",
                value_table1=value_table1,
                policy_table1=policy_table1,
                intermediate_table=intermediate_table,
                value_table2=value_table2,
                policy_table2=policy_table2,
                value_table3=value_table3,
                policy_table3=policy_table3,
                value_table4=value_table4,
                policy_table4=policy_table4,
                size=size,
                state_rewards_list=state_rewards_list,
                blocked_states_list=[list(s) for s in blocked_states_list],
                discount=discount,
                values=mdp.values.tolist(),
                policy=mdp.policy.tolist())
Beispiel #3
0
def value_iteration():
    
    size = 5
    state_rewards_dict = {(3,3):1, (0,0):2}
    blocked_states_list = [(2,3), (2,4), (2,2)]
    discount=.9
    mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size=size)
        
    table1 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    mdp.values = mdp.evaluate_values()
    table2 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    mdp.values = mdp.evaluate_values()
    table3 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    
    mdp.values = mdp.evaluate_values()
    table4 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    
    mdp.value_iteration()
    table5 = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    value_table6 = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list)
    policy_table6 = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list,
                                    show_policy=True)
            
    size = 10
    state_rewards_dict = {(6,6):1, (0,0):1}
    blocked_states_list = [(2, 3), (1, 3), (0, 3), (4, 8), (5, 8), (6, 8),
                           (5, 2), (6, 2), (7, 2), (8, 2), (8, 3), (8, 4)]
    discount=.9
    mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size=size)
    
    table = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list)
    state_rewards_list = [[list(k),v] for k,v in state_rewards_dict.items()]
    return render_template("value_iteration.html",
                table1=table1,
                table2=table2,
                table3=table3,
                table4=table4,
                table5=table5,
                value_table6=value_table6,
                policy_table6=policy_table6,
                table=table,
                size=size,
                state_rewards_list=state_rewards_list,
                blocked_states_list=[list(s) for s in blocked_states_list],
                discount=discount,
                values=mdp.values.tolist(),
                policy=mdp.policy.tolist())
Beispiel #4
0
def policy_iteration_step():
        
    data = json.loads(request.data)
    size = data['size']
    state_rewards_list = data['state_rewards_list']
    state_rewards_dict = {tuple(k):v for k,v in state_rewards_list}
    blocked_states_list = [tuple(s) for s in data['blocked_states_list']]
    discount = data['discount']
    
    values = np.array(data['values'])
    policy = np.array(data['policy'])
    
    mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size, values, policy)
    mdp.values = mdp.evaluate_policy_values()
    
    table = make_grid_world(mdp.states, mdp.values, mdp.policy, mdp.blocked_states_list)
    
    return json.dumps({'table': table, 'values': mdp.values.tolist(),
            'policy': mdp.policy.tolist()})
Beispiel #5
0
def index():
    
    
    size = 10
    state_rewards_dict = {(6,6):1, (0,0):1}
    blocked_states_list = [(2, 3), (1, 3), (0, 3), (4, 8), (5, 8), (6, 8),
                           (5, 2), (6, 2), (7, 2), (8, 2), (8, 3), (8, 4)]
    discount=.9
    mdp = MDP(state_rewards_dict, blocked_states_list,
                     discount, size=size)
        
    table = make_grid_world(mdp.states, mdp.get_total_rewards(), mdp.policy, mdp.blocked_states_list)
    state_rewards_list = [[list(k),v] for k,v in state_rewards_dict.items()]
    return render_template("index.html",
                table=table,
                size=size,
                state_rewards_list=state_rewards_list,
                blocked_states_list=[list(s) for s in blocked_states_list],
                discount=discount,
                values=mdp.values.tolist(),
                policy=mdp.policy.tolist())