Beispiel #1
0
def get_Model_from_COO(coo_filename,
                       rs_filename,
                       filename='Transition_dict',
                       n_actions=1,
                       nt=None,
                       dt=None,
                       F=None,
                       startpos=None,
                       endpos=None):

    print("Building Model")
    global state_list
    global base_path
    global save_path

    start_time = time.time()

    #setup grid
    g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, path_mat, params, param_str = setup_grid(
        num_actions=n_actions)

    #name of pickle file containing transtion prob in dictionary format
    filename = filename + str(n_actions) + 'a'
    base_path = join(getcwd(), 'DP/Trans_matxs/')
    save_path = base_path + filename
    if exists(save_path):
        print("Folder Already Exists !!")
        return

    #build probability transition dictionary
    state_list = g.ac_state_space()
    init_transition_dict = initialise_dict(g)
    transition_dict = convert_COO_to_dict(init_transition_dict, g,
                                          coo_filename, rs_filename)
    build_time = time.time() - start_time

    #save dictionary to file
    data = params, param_str, g.reward_structure, build_time
    write_files(transition_dict, filename, data)
    total_time = time.time() - start_time

    #command line outputs
    print("Dictionary saved !")
    print("Build Time = ", build_time / 60, " mins")
    print("Total TIme = ", total_time / 60, "mins")
Beispiel #2
0
        print("In Runner: Executing QL Finished !!")


# # employ argparse to run code from commanline
# parser = argparse.ArgumentParser(description='Take parameters as input args.')
# # parser.add_argument('num_passes', type=int, help='number of passes for learning data from trajectories')
# # parser.add_argument('QL_Iters', type=int, help='number of QL iters in regfiniement phase')
# # parser.add_argument('eps0_list', metavar='eps0_list', type=float, nargs='+',help='eps0_list')
# # parser.add_argument('eps_dec_method', type=int, help='1= dec to 0.05 eps0; 2= dec to 0.5eps0')
# # parser.add_argument('N_inc', type=float, help='increment parameter for Nsa')
# parser.add_argument('dt_size', type=int, help='training data size 0-5000')

# args = parser.parse_args()

# Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters, init_Q, with_guidance = QL_params
setup_grid_params = setup_grid(num_actions=16)
model_file = 'DG_model_2500_train_id_list_1_3D_60nT_a16'

g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid_params

# Paramerers for QL
#Traing data size
Training_traj_size_list = [len(train_id_list)]

# ALPHA_list = [0.05, 0.5, 1]
ALPHA_list = [0.05]

esp0_list = [0.1]
# esp0_list = [0.33, 0.66, 1]
# esp0_list = args.eps0_list
Beispiel #3
0
# print("mean= ", summ/cnt)
# print("cnt = ", cnt)
# print("pfail or badcount% = ", cnt/n)

import time
import numpy as np
from utils.custom_functions import picklePolicy, calc_mean_and_std, read_pickled_File, read_pickled_File
import pickle
from utils.plot_functions import plot_exact_trajectory_set, plot_learned_policy, plot_exact_trajectory_set_DP
from definition import ROOT_DIR
import math
from utils.setup_grid import setup_grid
from os.path import join

g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, params, param_str = setup_grid(num_actions=16)

# rel_path = 'Experiments/106/DP'
rel_path = 'Experiments/26/DP'
exp_num_case_dir = join(ROOT_DIR, rel_path)
output_path = exp_num_case_dir

test_id_list = [i for i in range(2500,2550)]
 
policy = read_pickled_File(exp_num_case_dir + '/policy')

t_list_all, t_list_reached, G_list, bad_count_tuple= plot_exact_trajectory_set_DP(g, policy, X, Y, vel_field_data, test_id_list, output_path, fname='Test_Traj_set' + '_')
badcount = bad_count_tuple[0]

# Plot Policy
print("plot policy")
Beispiel #4
0
def build_sparse_transition_model(filename='Transition_dict',
                                  n_actions=1,
                                  nt=None,
                                  dt=None,
                                  F=None,
                                  startpos=None,
                                  endpos=None,
                                  Test_grid=False):

    global state_list
    global base_path
    global save_path

    print("Building Sparse Model")
    t1 = time.time()
    #setup grid
    g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, path_mat, setup_params, setup_param_str = setup_grid(
        num_actions=n_actions, nt=nt, Test_grid=Test_grid)
    """
    # Prepare Data
    nT = 1  # total no. of time steps
    is_stationary = 1  # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary
    #  and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1.
    # list_size = 10     #predefined size of list for each S2
    gsize = 100  # size of grid along 1 direction. ASSUMING square grid.
    num_actions = 16
    nrzns = 5000
    bDimx = 1000
    F = 1
    dt = 1
    r_outbound = -100
    r_terminal = 100
    T = 0  # time index of vrzns
    i_term = 20  # terminal state indices
    j_term = 50
    """
    #setup_params = [num_actions, nt, dt, F, startpos, endpos] reference from setup grid
    nT = setup_params[1]  # total no. of time steps TODO: check default value
    is_stationary = 1  # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary
    #  and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1.
    # list_size = 10     #predefined size of list for each S2
    # if nt > 1:
    #     is_stationary = 0
    gsize = g.ni  # size of grid along 1 direction. ASSUMING square grid.
    num_actions = setup_params[0]
    nrzns = num_rzns
    bDimx = nrzns  # for small test cases
    if nrzns >= 1000:
        bDimx = 1000  #for large problems
    dt = setup_params[2]
    F = setup_params[3]
    r_outbound = g.r_outbound
    r_terminal = g.r_terminal
    i_term = g.endpos[0]  # terminal state indices
    j_term = g.endpos[1]
    dummyT = 0

    #name of output pickle file containing transtion prob in dictionary format
    if nT > 1:
        prefix = '3D_' + str(nT) + 'nT_a'
    else:
        prefix = '2D_a'
    filename = filename + prefix + str(n_actions)  #TODO: change filename
    base_path = join(ROOT_DIR, 'DP/Trans_matxs/')
    save_path = base_path + filename
    if exists(save_path):
        print("Folder Already Exists !!\n")
        return

    params = np.array([
        gsize, num_actions, nrzns, F, dt, r_outbound, r_terminal, dummyT,
        i_term, j_term, nT, is_stationary
    ]).astype(np.float32)
    st_sp_size = (gsize**2) * nT  # size of total state space
    save_file_for_each_a = False

    # cpu initialisations.
    # dummy intialisations to copy size to gpu
    vxrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32)
    vyrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32)
    results = -1 * np.ones(((gsize**2) * nrzns), dtype=np.float32)
    sumR_sa = np.zeros(st_sp_size).astype(np.float32)
    Tdummy = np.zeros(2, dtype=np.float32)

    #  informational initialisations
    ac_angles = np.linspace(0, 2 * pi, num_actions, dtype=np.float32)
    ac_angle = ac_angles[0].astype(np.float32)
    # xs = np.arange(gsize, dtype=np.float32)
    # ys = np.arange(gsize, dtype=np.float32)
    xs = xs.astype(np.float32)
    ys = ys.astype(np.float32)
    print("params: \n", params, "\n\n")

    t1 = time.time()
    # allocates memory on gpu. vxrzns and vyrzns nees be allocated just once and will be overwritten for each timestep
    vxrzns_gpu = cuda.mem_alloc(vxrzns.nbytes)
    vyrzns_gpu = cuda.mem_alloc(vyrzns.nbytes)
    ac_angles_gpu = cuda.mem_alloc(ac_angles.nbytes)
    ac_angle_gpu = cuda.mem_alloc(ac_angle.nbytes)
    xs_gpu = cuda.mem_alloc(xs.nbytes)
    ys_gpu = cuda.mem_alloc(ys.nbytes)
    params_gpu = cuda.mem_alloc(params.nbytes)
    T_gpu = cuda.mem_alloc(Tdummy.nbytes)

    # copies contents of a to  allocated memory on gpu
    cuda.memcpy_htod(ac_angle_gpu, ac_angle)
    cuda.memcpy_htod(xs_gpu, xs)
    cuda.memcpy_htod(ys_gpu, ys)
    cuda.memcpy_htod(params_gpu, params)

    for T in range(nT):
        print("Computing data for timestep, T = ", T, '\n')
        # params[7] = T
        # cuda.memcpy_htod(params_gpu, params)
        Tdummy[0] = T
        # Load Velocities
        # vxrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32)
        # #expectinf to see probs of 0.5 in stream area
        # for i in range(int(nrzns/2)):
        #     vxrzns[i,int(gsize/2 -1):int(gsize/2 +1),:] = 1
        # vyrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32)
        # vxrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Velx_5K_rlzns.npy')
        # vyrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Vely_5K_rlzns.npy')
        vxrzns = Vx_rzns
        vyrzns = Vy_rzns
        vxrzns = vxrzns.astype(np.float32)
        vyrzns = vyrzns.astype(np.float32)
        Tdummy = Tdummy.astype(np.float32)

        # TODO: sanity check on dimensions: compare loaded matrix shape with gsize, numrzns

        # copy loaded velocities to gpu
        cuda.memcpy_htod(vxrzns_gpu, vxrzns)
        cuda.memcpy_htod(vyrzns_gpu, vyrzns)
        cuda.memcpy_htod(T_gpu, Tdummy)

        coo_list_a, Rs_list_a = build_sparse_transition_model_at_T(
            T,
            T_gpu,
            vxrzns_gpu,
            vyrzns_gpu,
            params,
            bDimx,
            params_gpu,
            xs_gpu,
            ys_gpu,
            ac_angles,
            results,
            sumR_sa,
            save_file_for_each_a=False)

        # print("R_s_a0 \n", Rs_list_a[0][0:200])

        # TODO: end loop over timesteps here and comcatenate COOs and R_sas over timesteps for each action
        # full_coo_list and full_Rs_list are lists with each element containing coo and R_s for an action of the same index
        if T > 0:
            full_coo_list_a, full_Rs_list_a = concatenate_results_across_time(
                coo_list_a, Rs_list_a, full_coo_list_a, full_Rs_list_a)
            # TODO: finish concatenate...() function
        else:
            full_coo_list_a = coo_list_a
            full_Rs_list_a = Rs_list_a

    t2 = time.time()
    build_time = t2 - t1

    #save data to file
    # data = setup_params, setup_param_str, g.reward_structure, build_time
    # write_files(full_coo_list_a, filename + '_COO', data)
    # pickleFile(full_coo_list_a, save_path + '/' + filename + '_COO')
    # pickleFile(full_Rs_list_a, save_path + '/' + filename + '_Rsa')
    # print("Pickled sparse files !")

    #build probability transition dictionary
    state_list = g.ac_state_space()
    init_transition_dict = initialise_dict(g)
    transition_dict = convert_COO_to_dict(init_transition_dict, g,
                                          full_coo_list_a, full_Rs_list_a)

    #save dictionary to file
    data = setup_params, setup_param_str, g.reward_structure, build_time
    write_files(transition_dict, filename, data)


# t1 = time.time()
# build_sparse_transition_model(filename = 'Highway_', n_actions = 16, nt = 1)
# t2 = time.time()
# print("Time for 1 timestep = ", t2 - t1, " secs")
def build_sparse_transition_model(filename='Transition_dict',
                                  n_actions=16,
                                  all_Yi=None,
                                  wanted_nrzns=5000,
                                  nt=None):

    global state_list
    global base_path
    global save_path

    print("Building Sparse Model")
    t1 = time.time()
    #setup grid
    print("input to build_sparse_trans_model:\n")
    print("n_actions", n_actions)
    # print("nt, dt", nt, dt)

    g, xs, ys, X, Y, vel_field_data, nmodes, num_rzns, path_mat, setup_params, setup_param_str = setup_grid(
        num_actions=n_actions)

    print("xs: ", xs)
    print("ys", ys)

    # ------- IMPORTANT ----------
    # REMOVING all_Yi FROM ROLLING OUT vel_field_data BECAUSE WE WILL USE A REPLACEMENT
    all_u_mat, all_v_mat, all_ui_mat, all_vi_mat, _ = vel_field_data

    check_nt, check_nrzns, nmodes = all_Yi.shape

    all_u_mat = all_u_mat.astype(np.float32)
    all_v_mat = all_v_mat.astype(np.float32)
    all_ui_mat = all_ui_mat.astype(np.float32)
    all_vi_mat = all_vi_mat.astype(np.float32)
    all_Yi = all_Yi.astype(np.float32)

    #setup_params = [num_actions, nt, dt, F, startpos, endpos] reference from setup grid
    nT = setup_params[1]  # total no. of time steps TODO: check default value
    print("****CHECK: ", nt, nT, check_nt)
    # assert (nt == nT), "nt and nT are not the same!"
    #if nt specified in runner is within nT from param file, then use nt. i.e. model will be built for nt timesteps.
    if nt != None and nt <= nT:
        nT = nt
    is_stationary = 0  # 0 is false. any other number is true. is_stationry = 0 (false) means that flow is NOT stationary
    #  and S2 will be indexed by T+1. if is_stationary = x (true), then S2 is indexed by 0, same as S1.
    # list_size = 10     #predefined size of list for each S2
    # if nt > 1:
    #     is_stationary = 0
    gsize = g.ni  # size of grid along 1 direction. ASSUMING square grid.
    num_actions = setup_params[0]

    # nrzns = num_rzns
    nrzns = wanted_nrzns

    bDimx = nrzns  # for small test cases
    if nrzns >= 1000:
        bDimx = 1000  #for large problems
    dt = setup_params[2]
    F = setup_params[3]
    r_outbound = g.r_outbound
    r_terminal = g.r_terminal
    i_term = g.endpos[0]  # terminal state indices
    j_term = g.endpos[1]

    #name of output pickle file containing transtion prob in dictionary format
    if nT > 1:
        prefix = '3D_' + str(nT) + 'nT_a'
    else:
        prefix = '2D_a'
    filename = filename + prefix + str(n_actions)  #TODO: change filename
    base_path = join(ROOT_DIR, 'DP/Trans_matxs_3D/')
    save_path = base_path + filename
    if exists(save_path):
        print("Folder Already Exists !!\n")
        return
    # TODO: remove z from params. it is only for chekcs
    z = -9999
    params = np.array([
        gsize, num_actions, nrzns, F, dt, r_outbound, r_terminal, nmodes,
        i_term, j_term, nT, is_stationary, z, z, z, z, z, z, z, z, z, z, z, z,
        z, z, z, z, z, z, z, z
    ]).astype(np.float32)
    st_sp_size = (gsize**2)  # size of spatial state space
    print("check stsp_size", gsize, nT, st_sp_size)
    save_file_for_each_a = False

    print("params")
    print("gsize ", params[0], "\n", "num_actions ", params[1], "\n", "nrzns ",
          params[2], "\n", "F ", params[3], "\n", "dt ", params[4], "\n",
          "r_outbound ", params[5], "\n", "r_terminal ", params[6], "\n",
          "nmodes ", params[7], "\n", "i_term ", params[8], "\n", "j_term ",
          params[9], "\n", "nT", params[10], "\n", "is_stationary ",
          params[11], "")

    # cpu initialisations.
    # dummy intialisations to copy size to gpu
    # vxrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32)
    # vyrzns = np.zeros((nrzns, gsize, gsize), dtype=np.float32)

    results = -1 * np.ones(((gsize**2) * nrzns), dtype=np.float32)
    sumR_sa = np.zeros(st_sp_size).astype(np.float32)
    Tdummy = np.zeros(2, dtype=np.float32)

    #  informational initialisations
    ac_angles = np.linspace(0,
                            2 * pi,
                            num_actions,
                            endpoint=False,
                            dtype=np.float32)
    print("action angles:\n", ac_angles)

    ac_angle = ac_angles[0].astype(np.float32)  # just for allocating memory
    # xs = np.arange(gsize, dtype=np.float32)
    # ys = np.arange(gsize, dtype=np.float32)
    xs = xs.astype(np.float32)
    ys = ys.astype(np.float32)
    print("params: \n", params, "\n\n")

    t1 = time.time()
    # allocates memory on gpu. vxrzns and vyrzns nees be allocated just once and will be overwritten for each timestep
    # vxrzns_gpu = cuda.mem_alloc(vxrzns.nbytes)
    # vyrzns_gpu = cuda.mem_alloc(vyrzns.nbytes)
    all_u_mat_gpu = cuda.mem_alloc(all_u_mat.nbytes)
    all_v_mat_gpu = cuda.mem_alloc(all_v_mat.nbytes)
    all_ui_mat_gpu = cuda.mem_alloc(all_ui_mat.nbytes)
    all_vi_mat_gpu = cuda.mem_alloc(all_vi_mat.nbytes)
    all_Yi_gpu = cuda.mem_alloc(all_Yi.nbytes)
    vel_data_gpu = [
        all_u_mat_gpu, all_v_mat_gpu, all_ui_mat_gpu, all_vi_mat_gpu,
        all_Yi_gpu
    ]

    ac_angles_gpu = cuda.mem_alloc(ac_angles.nbytes)
    ac_angle_gpu = cuda.mem_alloc(ac_angle.nbytes)
    xs_gpu = cuda.mem_alloc(xs.nbytes)
    ys_gpu = cuda.mem_alloc(ys.nbytes)
    params_gpu = cuda.mem_alloc(params.nbytes)
    T_gpu = cuda.mem_alloc(Tdummy.nbytes)

    # copies contents of a to  allocated memory on gpu
    cuda.memcpy_htod(all_u_mat_gpu, all_u_mat)
    cuda.memcpy_htod(all_v_mat_gpu, all_v_mat)
    cuda.memcpy_htod(all_ui_mat_gpu, all_ui_mat)
    cuda.memcpy_htod(all_vi_mat_gpu, all_vi_mat)
    cuda.memcpy_htod(all_Yi_gpu, all_Yi)

    cuda.memcpy_htod(ac_angle_gpu, ac_angle)
    cuda.memcpy_htod(xs_gpu, xs)
    cuda.memcpy_htod(ys_gpu, ys)
    cuda.memcpy_htod(params_gpu, params)

    for T in range(nT):
        print("*** Computing data for timestep, T = ", T, '\n')
        # params[7] = T
        # cuda.memcpy_htod(params_gpu, params)
        Tdummy[0] = T
        # Load Velocities
        # vxrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32)
        # #expectinf to see probs of 0.5 in stream area
        # for i in range(int(nrzns/2)):
        #     vxrzns[i,int(gsize/2 -1):int(gsize/2 +1),:] = 1
        # vyrzns = np.zeros((nrzns, gsize, gsize), dtype = np.float32)
        # vxrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Velx_5K_rlzns.npy')
        # vyrzns = np.load('/home/rohit/Documents/Research/ICRA_2020/DDDAS_2D_Highway/Input_data_files/Vely_5K_rlzns.npy')
        # vxrzns = Vx_rzns
        # vyrzns = Vy_rzns
        # vxrzns = vxrzns.astype(np.float32)
        # vyrzns = vyrzns.astype(np.float32)
        Tdummy = Tdummy.astype(np.float32)

        # TODO: sanity check on dimensions: compare loaded matrix shape with gsize, numrzns

        # copy loaded velocities to gpu
        # cuda.memcpy_htod(vxrzns_gpu, vxrzns)
        # cuda.memcpy_htod(vyrzns_gpu, vyrzns)
        cuda.memcpy_htod(T_gpu, Tdummy)

        print("pre func")

        coo_list_a, Rs_list_a = build_sparse_transition_model_at_T(
            T,
            T_gpu,
            vel_data_gpu,
            params,
            bDimx,
            params_gpu,
            xs_gpu,
            ys_gpu,
            ac_angles,
            results,
            sumR_sa,
            save_file_for_each_a=False)

        # print("R_s_a0 \n", Rs_list_a[0][0:200])
        print("post func")

        # TODO: end loop over timesteps here and comcatenate COOs and R_sas over timesteps for each action
        # full_coo_list and full_Rs_list are lists with each element containing coo and R_s for an action of the same index
        if T > 0:
            full_coo_list_a, full_Rs_list_a = concatenate_results_across_time(
                coo_list_a, Rs_list_a, full_coo_list_a, full_Rs_list_a)
            # TODO: finish concatenate...() function
        else:
            full_coo_list_a = coo_list_a
            full_Rs_list_a = Rs_list_a

    t2 = time.time()
    build_time = t2 - t1
    print("build_time ", build_time)

    #save data to file
    # data = setup_params, setup_param_str, g.reward_structure, build_time
    # write_files(full_coo_list_a, filename + '_COO', data)

    # print("Pickled sparse files !")

    #build probability transition dictionary
    state_list = g.ac_state_space()
    init_transition_dict = initialise_dict(g)
    transition_dict = convert_COO_to_dict(init_transition_dict, g,
                                          full_coo_list_a, full_Rs_list_a)
    print("conversion COO to dict done")

    #save dictionary to file
    data = setup_params, setup_param_str, g.reward_structure, build_time
    write_files(transition_dict, filename, data)
    pickleFile(full_coo_list_a, save_path + '/' + filename + '_COO')
    pickleFile(full_Rs_list_a, save_path + '/' + filename + '_Rsa')
Beispiel #6
0
from utils.setup_grid import setup_grid
g, xs, ys, X, Y, vel_field_data, nmodes, nrzns, paths, params, param_str = setup_grid(
)
print(X)
print(Y)
print(paths[0, 0].shape)