Ejemplo n.º 1
0
def run_Experiment(DP=None, QL=None):
    """
    Runs experiment using DP, QL or both.
    Creates new directory automatically
    Save result summary to summary file
    :param DP: [prob_file(just name of file, not path), output_path]
    :param QL: [, .....]
    :return:
    """

    # Path information
    output_path, exp_num = create_new_dir()  #dirs Exp/1, Exp/2, ...
    DP_path = join(output_path, 'DP')  #dirs Exp/1/DP
    QL_path = join(output_path, 'QL')  #dirs Exp/1/QL
    print("************  Exp ", exp_num, "************ \n")

    # Exp_summary_data
    method = get_method_str(DP, QL)
    exp_summary = [str(exp_num), method]

    # Run DP
    if DP != None:
        print("In Runner: Executing DP !!")

        prob_file = DP[0]
        createFolder(DP_path)
        # output_params = [V_so, mean, variance, bad_count]
        output_params = run_DP(setup_grid_params,
                               prob_file,
                               output_file,
                               DP_path,
                               threshold=threshold)
        """CHANGE ARGUMENT if return order of setup_grid() is changed"""
        input_params = setup_grid_params[9].copy()
        input_params.append(prob_file)

        exp_summary = append_params_to_summary(exp_summary, input_params,
                                               output_params)
        append_summary_to_summaryFile('Experiments/Exp_summary.csv',
                                      exp_summary)
        print("In Runner: Executing DP Finished!!")

    # Run QL
    if QL != None:
        print("In Runner: Executing QL !!")

        QL_params = QL
        createFolder(QL_path)
        output_parameters_all_cases = run_QL(setup_grid_params, QL_params,
                                             QL_path, exp_num)
        # run_QL(setup_grid_params, QL_params, QL_path)

        print("In Runner: Executing QL Finished !!")
Ejemplo n.º 2
0
def write_files(transition_dict, filename, data):
    """
    Pickles dictionary contaniing model details.
    Writes parameters to file.
    Writes parameters to summary file
    :param transition_dict:
    :param filename:
    :return:
    """

    summary_file = base_path + 'model_summary.csv'
    params, param_str, reward_structure, build_time = data

    createFolder(save_path)

    # save transition_probs. Pickle it.
    with open(save_path + '/' + filename + '.p', 'wb') as fp:
        pickle.dump(transition_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)

    with open(save_path + '/' + filename + '_params.txt', 'w') as f:
        for i in range(len(param_str)):
            f.write(param_str[i] + ':' + '    ' + str(params[i]) + "\n")
        f.write("Reward Structure: " + str(reward_structure) + "\n")
        f.write("Build Time: " + str(build_time))
Ejemplo n.º 3
0
def run_QL(setup_grid_params, QL_params, QL_path):

    exp = QL_path

    #Parameters
    # Training_traj_size_list = [1000, 2000, 3000, 4000, 5000]
    # ALPHA_list = [0.2, 0.35, 0.5, 0.75]
    # esp0_list = [0.25, 0.5, 0.75]

    # with_guidance = True
    # Training_traj_size_list = [5000]
    # ALPHA_list = [0.5]
    # esp0_list = [0.5]
    # QL_Iters = int(1000)
    #
    # init_Q = -1000000
    #     stream_speed = 0.2

    Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters, init_Q, with_guidance, method, num_passes = QL_params

    #Read data from files
    g, xs, ys, X, Y, Vx_rzns, Vy_rzns, num_rzns, paths, params, param_str = setup_grid_params
    print("In TQLearn: ", len(params), params)
    num_actions, nt, dt, F, startpos, endpos = params

    total_cases = len(Training_traj_size_list) * len(ALPHA_list) * len(
        esp0_list)
    str_Params = [
        'with_guidance', 'Training_traj_size_list', 'ALPHA_list', 'esp0_list',
        'QL_Iters', 'num_actions', 'init_Q', 'dt', 'F'
    ]
    Params = [
        with_guidance, Training_traj_size_list, ALPHA_list, esp0_list,
        QL_Iters, num_actions, init_Q, dt, F
    ]
    Param_filename = exp + '/Parmams.txt'
    outputfile = open(Param_filename, 'w+')
    for i in range(len(Params)):
        print(str_Params[i] + ':  ', Params[i], file=outputfile)
    outputfile.close()

    #Create Sub-directories for different hyper parameters
    for eps_0 in esp0_list:
        for ALPHA in ALPHA_list:
            for dt_size in Training_traj_size_list:
                directory = exp + '/dt_size_' + str(dt_size) + '/ALPHA_' + str(
                    ALPHA) + '/eps_0_' + str(eps_0)
                createFolder(directory)

    case = 0
    start = time.time()
    for eps_0 in esp0_list:
        for ALPHA in ALPHA_list:
            for dt_size in Training_traj_size_list:

                dir_path = exp + '/dt_size_' + str(dt_size) + '/ALPHA_' + str(
                    ALPHA) + '/eps_0_' + str(eps_0) + '/'
                case += 1
                print("*******  CASE: ", case, '/', total_cases, '*******')
                print("with_guidance= ", with_guidance)
                print('eps_0 = ', eps_0)
                print('ALPHA =', ALPHA)
                print('dt_size = ', dt_size)

                #Reset Variables and environment
                if with_guidance == True:
                    Q, N = initialise_guided_Q_N(g, init_Q, init_Q / 2, 1)
                else:
                    Q, N = initialise_Q_N(g, init_Q, 1)
                g.set_state(g.start_state)

                #Learn Policy From Trajectory Data
                if dt_size != 0:
                    Q, policy, max_delQ_list_1 = Learn_policy_from_data(
                        paths,
                        g,
                        Q,
                        N,
                        Vx_rzns,
                        Vy_rzns,
                        num_of_paths=dt_size,
                        num_actions=num_actions,
                        ALPHA=ALPHA,
                        method=method,
                        num_passes=num_passes)
                    plot_max_Qvalues(Q, policy, X, Y)
                    #Save policy
                    Policy_path = dir_path + 'Policy_01'

                    #Plot Policy
                    # Fig_policy_path = dir_path+'Fig_'+ 'Policy01'+'.png'
                    label_data = [F, ALPHA, init_Q, QL_Iters]
                    QL_params = policy, Q, init_Q, label_data, dir_path
                    plot_learned_policy(g, QL_params=QL_params)
                    # plot_all_policies(g, Q, policy, init_Q, label_data, full_file_path= Fig_policy_path )

                    writePolicytoFile(policy, Policy_path)
                    plot_max_delQs(max_delQ_list_1,
                                   filename=dir_path + 'delQplot1')
                else:
                    if with_guidance == True:
                        policy = initialise_policy_from_initQ(Q)
                    else:
                        policy = initialise_policy(g)

                #Times and Trajectories based on data and/or guidance
                t_list1, G0_list1, bad_count1 = plot_exact_trajectory_set(
                    g,
                    policy,
                    X,
                    Y,
                    Vx_rzns,
                    Vy_rzns,
                    exp,
                    fname=dir_path + 'Trajectories_before_exp')

                #Learn from Experience
                Q, policy, max_delQ_list_2 = Q_learning_Iters(Q,
                                                              N,
                                                              g,
                                                              policy,
                                                              Vx_rzns,
                                                              Vy_rzns,
                                                              alpha=ALPHA,
                                                              QIters=QL_Iters,
                                                              eps_0=eps_0)

                #save Updated Policy
                Policy_path = dir_path + 'Policy_02'
                # Fig_policy_path = dir_path + 'Fig_' + 'Policy02' + '.png'

                writePolicytoFile(policy, Policy_path)
                # plot_learned_policy(g, Q, policy, init_Q, label_data, Iters_after_update=QL_Iters, full_file_path= Fig_policy_path )

                #plots after Experince
                plot_max_delQs(max_delQ_list_2,
                               filename=dir_path + 'delQplot2')
                t_list2, G0_list2, bad_count2 = plot_exact_trajectory_set(
                    g,
                    policy,
                    X,
                    Y,
                    Vx_rzns,
                    Vy_rzns,
                    exp,
                    fname=dir_path + 'Trajectories_after_exp')

                #Results to be printed
                # avg_time1 = np.mean(t_list1)
                # std_time1 = np.std(t_list1)
                # avg_G01 = np.mean(G0_list1)
                # avg_time2 = np.mean(t_list2)
                # std_time2 = np.std(t_list2)
                # avg_G02 = np.mean(G0_list2)
                avg_time1, std_time1, _, _ = calc_mean_and_std(t_list1)
                avg_G01, _, _, _ = calc_mean_and_std(G0_list1)
                avg_time2, std_time2, _, _ = calc_mean_and_std(t_list2)
                avg_G02, _, _, _ = calc_mean_and_std(G0_list2)

                if QL_Iters != 0:
                    bad_count1 = (bad_count1,
                                  str(bad_count1 * 100 / dt_size) + '%')
                    bad_count2 = (bad_count2,
                                  str(bad_count2 * 100 / dt_size) + '%')

                #Print results to file
                str_Results1 = [
                    'avg_time1', 'std_time1', 'bad_count1', 'avg_G01'
                ]
                Results1 = [avg_time1, std_time1, bad_count1, avg_G01]
                str_Results2 = [
                    'avg_time2', 'std_time2', 'bad_count2', 'avg_G02'
                ]
                Results2 = [avg_time2, std_time2, bad_count2, avg_G02]

                Result_filename = dir_path + 'Results.txt'
                outputfile = open(Result_filename, 'w+')
                print("Before Experince ", file=outputfile)
                for i in range(len(Results1)):
                    print(str_Results1[i] + ':  ',
                          Results1[i],
                          file=outputfile)

                print(end="\n" * 3, file=outputfile)
                print("After Experince ", file=outputfile)
                for i in range(len(Results2)):
                    print(str_Results2[i] + ':  ',
                          Results2[i],
                          file=outputfile)

                print(end="\n" * 3, file=outputfile)
                print("Parameters: ", file=outputfile)
                for i in range(len(Params)):
                    print(str_Params[i] + ':  ', Params[i], file=outputfile)
                outputfile.close()

                #Print out times to file
                TrajTimes_filename = dir_path + 'TrajTimes1.txt'
                outputfile = open(TrajTimes_filename, 'w+')
                print(t_list1, file=outputfile)
                outputfile.close()

                Returns_filename = dir_path + 'G0list1.txt'
                outputfile = open(Returns_filename, 'w+')
                print(G0_list1, file=outputfile)
                outputfile.close()

                TrajTimes_filename = dir_path + 'TrajTimes2.txt'
                outputfile = open(TrajTimes_filename, 'w+')
                print(t_list2, file=outputfile)
                outputfile.close()

                Returns_filename = dir_path + 'G0list2.txt'
                outputfile = open(Returns_filename, 'w+')
                print(G0_list2, file=outputfile)
                outputfile.close()

                end = time.time()
                time_taken = round(end - start, 2)
                #Terminal Print
                print('time_taken= ', time_taken, 's', end="\n" * 3)

                end = start
Ejemplo n.º 4
0
def plot_all_policies(g,
                      Q,
                      policy,
                      init_Q,
                      label_data,
                      showfig=False,
                      Iters_after_update=None,
                      full_file_path=None):

    createFolder(full_file_path)

    # set grid
    fig1 = plt.figure(figsize=(10, 10))
    ax1 = fig1.add_subplot(1, 1, 1)

    F, stream_speed, ALPHA, initq, QIters = label_data

    ax1.text(0.1, 9, 'F=(%s)' % F, fontsize=12)
    ax1.text(0.1, 8, 'ALPHA=(%s)' % ALPHA, fontsize=12)
    ax1.text(0.1, 7, 'initq=(%s)' % initq, fontsize=12)
    ax1.text(0.1, 6, 'QIters=(%s)' % QIters, fontsize=12)

    minor_xticks = np.arange(g.xs[0] - 0.5 * g.dj, g.xs[-1] + 2 * g.dj, g.dj)
    minor_yticks = np.arange(g.ys[0] - 0.5 * g.di, g.ys[-1] + 2 * g.di, g.di)

    major_xticks = np.arange(g.xs[0], g.xs[-1] + 2 * g.dj, 5 * g.dj)
    major_yticks = np.arange(g.ys[0], g.ys[-1] + 2 * g.di, 5 * g.di)

    ax1.set_xticks(minor_xticks, minor=True)
    ax1.set_yticks(minor_yticks, minor=True)
    ax1.set_xticks(major_xticks)
    ax1.set_yticks(major_yticks)

    ax1.grid(which='major', color='#CCCCCC', linestyle='')
    ax1.grid(which='minor', color='#CCCCCC', linestyle='--')
    ax1.scatter(g.xs[g.start_state[2]],
                g.ys[g.ni - 1 - g.start_state[1]],
                c='g')
    ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r')

    xtr = []
    ytr = []
    ax_list = []
    ay_list = []

    for s in Q.keys():
        t, i, j = s
        for a in Q[s].keys():
            if Q[s][a] != init_Q and a == policy[s]:
                xtr.append(g.xs[j])
                ytr.append(g.ys[g.ni - 1 - i])
                # print("test", s, a_policy)
                ax, ay = action_to_quiver(a)
                ax_list.append(ax)
                ay_list.append(ay)
                # print(i,j,g.xs[j], g.ys[g.ni - 1 - i], ax, ay)

    plt.quiver(xtr, ytr, ax_list, ay_list)
    filename = full_file_path + '/policy@t'
    fig1.savefig(filename, dpi=300)
    if showfig == True:
        plt.show()
    return
Ejemplo n.º 5
0
def run_QL(setup_grid_params, QL_params, QL_path, exp_num):

    exp = QL_path

    Training_traj_size_list, ALPHA_list, esp0_list, QL_Iters_multiplier_list, init_Q, with_guidance, method, num_passes_list, eps_dec_method, N_inc = QL_params

    #Read data from files
    #setup_params (from setup_grid.py)= [num_actions, nt, dt, F, startpos, endpos]
    g, xs, ys, X, Y, vel_field_data, nmodes, useful_num_rzns, paths, setup_params, setup_param_str = setup_grid_params
    print("In TQLearn: ", len(setup_params), setup_params)
    num_actions, nt, dt, F, startpos, endpos = setup_params

    #print QL Parameters to file
    total_cases = len(Training_traj_size_list) * len(ALPHA_list) * len(
        esp0_list) * len(num_passes_list) * len(QL_Iters_multiplier_list)
    str_Params = [
        'with_guidance', 'Training_traj_size_list', 'ALPHA_list', 'esp0_list',
        'QL_Iters', 'num_actions', 'init_Q', 'dt', 'F'
    ]
    Params = [
        with_guidance, Training_traj_size_list, ALPHA_list, esp0_list,
        QL_Iters_multiplier_list, num_actions, init_Q, dt, F
    ]
    Param_filename = exp + '/Parmams.txt'
    outputfile = open(Param_filename, 'w+')
    for i in range(len(Params)):
        print(str_Params[i] + ':  ', Params[i], file=outputfile)
    outputfile.close()

    #Create Sub-directories for different hyper parameters
    for num_passes in num_passes_list:
        for QL_Iters_x in QL_Iters_multiplier_list:
            for eps_0 in esp0_list:
                for ALPHA in ALPHA_list:
                    for dt_size in Training_traj_size_list:
                        directory = exp + '/num_passes_' + str(
                            num_passes) + '/QL_Iter_x' + str(
                                QL_Iters_x) + '/dt_size_' + str(
                                    dt_size) + '/ALPHA_' + str(
                                        ALPHA) + '/eps_0_' + str(eps_0)
                        createFolder(directory)

    case = 0  #initilise case. Each case is an experiment with a particular combination of eps_0, ALPHA and dt_size
    output_parameters_all_cases = [
    ]  # contains output params for runQL for all the cases

    t_start_RUN_QL = time.time()
    query_state = (58, 20, 41)
    for num_passes in num_passes_list:
        for QL_Iters_x in QL_Iters_multiplier_list:
            for eps_0 in esp0_list:
                for ALPHA in ALPHA_list:
                    for dt_size in Training_traj_size_list:

                        # test_size = useful_num_rzns - dt_size   #number of trajetcories to be used for testing
                        t_start_case = time.time()
                        dir_path = exp + '/num_passes_' + str(
                            num_passes) + '/QL_Iter_x' + str(
                                QL_Iters_x) + '/dt_size_' + str(
                                    dt_size) + '/ALPHA_' + str(
                                        ALPHA) + '/eps_0_' + str(eps_0) + '/'
                        case += 1
                        QL_Iters = QL_Iters_x * dt_size
                        print("*******  CASE: ", case, '/', total_cases,
                              '*******')
                        print("num_passes= ", num_passes)
                        print("QL_Iters_x= ", QL_Iters_x)
                        print("with_guidance= ", with_guidance)
                        print('eps_0 = ', eps_0)
                        print('ALPHA =', ALPHA)
                        print('dt_size = ', dt_size)
                        print("N_inc= ", N_inc)
                        print("num_actions= ", num_actions)

                        # get respective indices fo trajectories for training and testing:
                        train_id_list, test_id_list, train_id_set, test_id_set, goodlist = get_rzn_ids_for_training_and_testing(
                        )

                        print("$$$$ check in TQ : train_id_list",
                              train_id_list[0:20])
                        # print("test_size= ", test_size)
                        print("len_goodlist \n", len(goodlist))

                        # Reset Variables and environment
                        # (Re)initialise Q and N based on with_guidance paramter
                        # HCparams
                        if with_guidance == True:
                            Q, N = initialise_guided_Q_N(
                                g, init_Q, init_Q / 2,
                                1)  #(g, init_Qval, guiding_Qval,  init_Nval)
                        else:
                            Q, N = initialise_Q_N(
                                g, init_Q, 1)  #(g, init_Qval, init_Nval)

                        g.set_state(g.start_state)
                        print("Q and N intialised!")

                        print("$$$$ CHECK Q[g.start_state]= ",
                              Q[g.start_state])
                        print_sorted_Qs_kvs(g, Q, query_state)
                        #Learn Policy From Trajectory Data
                        #if trajectory data is given, learn from it. otherwise just initilise a policy and go to refinemnet step. The latter becomes model-free QL
                        if dt_size != 0:
                            # for n_intrleave in range(Num_interleaves):
                            # Q, N, policy, max_delQ_list_1 = Learn_policy_from_data(paths, g, Q, N, vel_field_data, nmodes, train_id_list, N_inc, num_actions =num_actions, ALPHA=ALPHA, method = method, num_passes = num_passes//Num_interleaves)
                            # Q, N, policy, max_delQ_list_2 = Q_learning_Iters(Q, N, g, policy, vel_field_data, nmodes, train_id_list, N_inc, alpha=ALPHA, QIters=QL_Iters//Num_interleaves,
                            # eps_0=eps_0, eps_dec_method = eps_dec_method)
                            Q, N, policy, max_delQ_list_1 = Learn_policy_from_data(
                                paths,
                                g,
                                Q,
                                N,
                                vel_field_data,
                                nmodes,
                                train_id_list,
                                N_inc,
                                num_actions=num_actions,
                                ALPHA=ALPHA,
                                method=method,
                                num_passes=num_passes)

                            print("Learned Policy from data")

                            #Save policy
                            Policy_path = dir_path + 'Policy_01'
                            picklePolicy(policy, Policy_path)
                            print("Policy written to file")

                            # plot_max_Qvalues(Q, policy, X, Y, fpath = dir_path, fname = 'max_Qvalues', showfig = True)
                            print("Plotted max Qvals")

                            #Plot Policy
                            # Fig_policy_path = dir_path+'Fig_'+ 'Policy01'+'.png'
                            label_data = [F, ALPHA, init_Q, QL_Iters]
                            QL_params_plot = policy, Q, init_Q, label_data, dir_path, 'pol_plot_1'
                            plot_learned_policy(g, QL_params=QL_params_plot)
                            # plot_all_policies(g, Q, policy, init_Q, label_data, full_file_path= Fig_policy_path )

                            #plot max_delQ
                            plot_max_delQs(max_delQ_list_1,
                                           filename=dir_path + 'delQplot1')
                            print("plotted learned policy and max_delQs")

                            print_sorted_Qs_kvs(g, Q, query_state)

                        else:
                            if with_guidance == True:
                                policy = initialise_policy_from_initQ(Q)
                            else:
                                policy = initialise_policy(g)

                        # Times and Trajectories based on data and/or guidance
                        t_list1, G0_list1, bad_count1 = plot_exact_trajectory_set(
                            g,
                            policy,
                            X,
                            Y,
                            vel_field_data,
                            nmodes,
                            train_id_set,
                            test_id_set,
                            goodlist,
                            fpath=dir_path,
                            fname='Trajectories_before_exp')
                        print("plotted exacte trajectory set")

                        # Policy Refinement Step: Learn from Experience
                        # Q, N, policy, max_delQ_list_2 = Q_learning_Iters(Q, N, g, policy, vel_field_data, nmodes, train_id_list, N_inc, alpha=ALPHA, QIters=QL_Iters,
                        #                             eps_0=eps_0, eps_dec_method = eps_dec_method)

                        print("Policy refined")
                        #save Updated Policy
                        Policy_path = dir_path + 'Policy_02'
                        # Fig_policy_path = dir_path + 'Fig_' + 'Policy02' + '.png'

                        picklePolicy(policy, Policy_path)
                        QL_params_plot = policy, Q, init_Q, label_data, dir_path, 'pol_plot_2'
                        plot_learned_policy(g, QL_params=QL_params_plot)
                        print("Refined policy written to file")

                        #plots after Experince
                        # plot_max_delQs(max_delQ_list_2, filename= dir_path + 'delQplot2' )
                        t_list2, G0_list2, bad_count2 = plot_exact_trajectory_set(
                            g,
                            policy,
                            X,
                            Y,
                            vel_field_data,
                            nmodes,
                            train_id_set,
                            test_id_set,
                            goodlist,
                            fpath=dir_path,
                            fname='Trajectories_after_exp')
                        t_list3, G0_list3, bad_count3 = plot_and_return_exact_trajectory_set_train_data(
                            g,
                            policy,
                            X,
                            Y,
                            vel_field_data,
                            nmodes,
                            train_id_list,
                            fpath=dir_path,
                            fname='Train_Trajectories_after_exp')
                        t_list4, G0_list4, bad_count4 = plot_and_return_exact_trajectory_set_train_data(
                            g,
                            policy,
                            X,
                            Y,
                            vel_field_data,
                            nmodes,
                            test_id_list,
                            fpath=dir_path,
                            fname='Test_Trajectories_after_exp')
                        print(
                            "plotted max delQs and exact traj set AFTER REFINEMENT"
                        )

                        picklePolicy(Q, dir_path + 'Q2')
                        picklePolicy(N, dir_path + 'N2')

                        print_sorted_Qs_kvs(g, Q, query_state)

                        #Results to be printed
                        # avg_time1 = np.mean(t_list1)
                        # std_time1 = np.std(t_list1)
                        # avg_G01 = np.mean(G0_list1)
                        # avg_time2 = np.mean(t_list2)
                        # std_time2 = np.std(t_list2)
                        # avg_G02 = np.mean(G0_list2)
                        avg_time1, std_time1, cnt1, none_cnt1, none_cnt_perc1 = calc_mean_and_std_train_test(
                            t_list1, train_id_set, test_id_set)
                        avg_G01, _, _, _, _ = calc_mean_and_std_train_test(
                            G0_list1, train_id_set, test_id_set)

                        avg_time2, std_time2, cnt2, none_cnt2, none_cnt_perc2 = calc_mean_and_std_train_test(
                            t_list2, train_id_set, test_id_set)
                        avg_G02, _, _, _, _ = calc_mean_and_std_train_test(
                            G0_list2, train_id_set, test_id_set)

                        overall_bad_count1 = 'dummy_init'
                        overall_bad_count2 = 'dummy_init'
                        if QL_Iters != 0:
                            overall_bad_count1 = (
                                bad_count1,
                                str(bad_count1 * 100 / dt_size) + '%')
                            overall_bad_count2 = (
                                bad_count2,
                                str(bad_count2 * 100 / dt_size) + '%')

                        t_end_case = time.time()
                        case_runtime = round((t_end_case - t_start_case) / 60,
                                             2)  #mins

                        #Print results to file
                        picklePolicy(train_id_list, dir_path + 'train_id_list')
                        picklePolicy(test_id_list, dir_path + 'test_id_list')

                        str_Results1 = [
                            'avg_time1', 'std_time1', 'overall_bad_count1',
                            'avg_G01'
                        ]
                        Results1 = [
                            avg_time1, std_time1, overall_bad_count1, avg_G01
                        ]
                        str_Results2 = [
                            'avg_time2', 'std_time2', 'overall_bad_count2',
                            'avg_G02'
                        ]
                        Results2 = [
                            avg_time2, std_time2, overall_bad_count2, avg_G02
                        ]

                        Result_filename = dir_path + 'Results.txt'
                        outputfile = open(Result_filename, 'w+')
                        print("Before Experince ", file=outputfile)
                        for i in range(len(Results1)):
                            print(str_Results1[i] + ':  ',
                                  Results1[i],
                                  file=outputfile)

                        print(end="\n" * 3, file=outputfile)
                        print("After Experince ", file=outputfile)
                        for i in range(len(Results2)):
                            print(str_Results2[i] + ':  ',
                                  Results2[i],
                                  file=outputfile)

                        print(end="\n" * 3, file=outputfile)
                        print("Parameters: ", file=outputfile)
                        for i in range(len(Params)):
                            print(str_Params[i] + ':  ',
                                  Params[i],
                                  file=outputfile)
                        outputfile.close()

                        #Print out times to file
                        TrajTimes_filename = dir_path + 'TrajTimes1.txt'
                        outputfile = open(TrajTimes_filename, 'w+')
                        print(t_list1, file=outputfile)
                        outputfile.close()

                        Returns_filename = dir_path + 'G0list1.txt'
                        outputfile = open(Returns_filename, 'w+')
                        print(G0_list1, file=outputfile)
                        outputfile.close()

                        TrajTimes_filename = dir_path + 'TrajTimes2.txt'
                        outputfile = open(TrajTimes_filename, 'w+')
                        print(t_list2, file=outputfile)
                        outputfile.close()

                        Returns_filename = dir_path + 'G0list2.txt'
                        outputfile = open(Returns_filename, 'w+')
                        print(G0_list2, file=outputfile)
                        outputfile.close()

                        output_paramaters_ith_case = [
                            exp_num,
                            method,
                            num_actions,
                            nt,
                            dt,
                            F,
                            startpos,
                            endpos,
                            eps_0,
                            ALPHA,
                            eps_dec_method,
                            N_inc,
                            dt_size,
                            with_guidance,
                            init_Q,
                            num_passes,
                            QL_Iters,
                            avg_time1[0],
                            std_time1[0],
                            avg_G01[0],
                            none_cnt1[0],
                            cnt1[0],
                            none_cnt_perc1[0],  #train stats
                            avg_time2[0],
                            std_time2[0],
                            avg_G02[0],
                            none_cnt2[0],
                            cnt2[0],
                            none_cnt_perc2[0],  #train stats
                            avg_time1[1],
                            std_time1[1],
                            avg_G01[1],
                            none_cnt1[1],
                            cnt1[1],
                            none_cnt_perc1[1],  #test stats
                            avg_time2[1],
                            std_time2[1],
                            avg_G02[1],
                            none_cnt2[1],
                            cnt2[1],
                            none_cnt_perc2[
                                1],  #test stats                                            
                            overall_bad_count1,
                            overall_bad_count2,
                            case_runtime
                        ]
                        # Exp No	Method	Num_actions	nt	dt	F	start_pos	end_pos	Eps_0	ALPHA	dt_size_(train_size)	V[start_pos]	Mean_Time_over_5k	Variance_Over_5K	Bad Count	DP_comput_time	Mean_Glist
                        # useless line now since append summary is done here itself
                        output_parameters_all_cases.append(
                            output_paramaters_ith_case)
                        print("output_paramaters_ith_case\n")
                        print(output_paramaters_ith_case)
                        append_summary_to_summaryFile(
                            join(ROOT_DIR, 'Experiments/Exp_summary_QL.csv'),
                            output_paramaters_ith_case)
                        picklePolicy(output_paramaters_ith_case,
                                     join(dir_path, 'output_paramaters'))
                        RUN_QL_elpased_time = round(
                            (time.time() - t_start_RUN_QL) / 60, 2)
                        #Terminal Print
                        print('Case_runtime= ', case_runtime)
                        print('RUN_QL_elpased_time= ',
                              RUN_QL_elpased_time,
                              ' mins',
                              end="\n" * 3)

    t_end_RUN_QL = time.time()
    RUN_QL_runtime = round((t_end_RUN_QL - t_start_RUN_QL) / 60, 2)
    print("RUN_QL_runtime: ", RUN_QL_runtime, " mins")

    return output_parameters_all_cases
Ejemplo n.º 6
0
def plot_learned_policy(g,
                        DP_params=None,
                        QL_params=None,
                        vel_field_data=None,
                        showfig=False):
    """
    Plots learned policy
    :param g: grid object
    :param DP_params: [policy, filepath]
    :param QL_params: [policy, Q, init_Q, label_data, filepath]  - details mentioned below
    :param showfig: whether you want to see fig during execution
    :return:
    """
    """
    QL_params:
    :param Q: Leared Q against which policy is plotted. This is needed just for a check in the QL case. TO plot policy only at those states which have been updated
    :param policy: Learned policy.
    :param init_Q: initial value for Q. Just like Q, required only for the QL policy plot
    :param label_data: Labels to put on fig. Currently requiered only for QL
    """
    # TODO: check QL part for this DG3
    # full_file_path = ROOT_DIR
    if DP_params == None and QL_params == None:
        print("Nothing to plot! Enter either DP or QL params !")
        return

    # set grid
    fig1 = plt.figure(figsize=(10, 10))
    ax1 = fig1.add_subplot(1, 1, 1)

    minor_xticks = np.arange(g.xs[0] - 0.5 * g.dj, g.xs[-1] + 2 * g.dj, g.dj)
    minor_yticks = np.arange(g.ys[0] - 0.5 * g.di, g.ys[-1] + 2 * g.di, g.di)

    major_xticks = np.arange(g.xs[0], g.xs[-1] + 2 * g.dj, 5 * g.dj)
    major_yticks = np.arange(g.ys[0], g.ys[-1] + 2 * g.di, 5 * g.di)

    ax1.set_xticks(minor_xticks, minor=True)
    ax1.set_yticks(minor_yticks, minor=True)
    ax1.set_xticks(major_xticks)
    ax1.set_yticks(major_yticks)

    ax1.grid(which='major', color='#CCCCCC', linestyle='')
    ax1.grid(which='minor', color='#CCCCCC', linestyle='--')
    xtr = []
    ytr = []
    ax_list = []
    ay_list = []

    if QL_params != None:
        policy, Q, init_Q, label_data, full_file_path, fname = QL_params
        F, ALPHA, initq, QIters = label_data
        ax1.text(0.1, 9, 'F=(%s)' % F, fontsize=12)
        ax1.text(0.1, 8, 'ALPHA=(%s)' % ALPHA, fontsize=12)
        ax1.text(0.1, 7, 'initq=(%s)' % initq, fontsize=12)
        ax1.text(0.1, 6, 'QIters=(%s)' % QIters, fontsize=12)
        for s in Q.keys():
            t, i, j = s
            # for a in Q[s].keys():
            # if s[t]%2==0: # to print policy at time t = 0
            a = policy[s]
            if not (Q[s][a] == init_Q / 2 or Q[s][a]
                    == init_Q):  # to plot policy of only updated states
                # t, i, j = s
                xtr.append(g.xs[j])
                ytr.append(g.ys[g.ni - 1 - i])
                # print("test", s, a_policy)
                ax, ay = action_to_quiver(a)
                ax_list.append(ax)
                ay_list.append(ay)
                # print(i,j,g.xs[j], g.ys[g.ni - 1 - i], ax, ay)

        plt.quiver(xtr, ytr, ax_list, ay_list)
        ax1.scatter(g.xs[g.start_state[2]],
                    g.ys[g.ni - 1 - g.start_state[1]],
                    c='g')
        ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r')

        fig1.savefig(full_file_path + fname + '.png', dpi=150)
        if showfig == True:
            plt.show()
        plt.cla()
        plt.close(fig1)

    if DP_params != None:
        policy, full_file_path = DP_params
        policy_plot_folder = createFolder(join(full_file_path, 'policy_plots'))

        for tt in range(g.nt - 1):
            ax_list = []
            ay_list = []
            vnetx_list = []
            vnety_list = []
            xtr = []
            ytr = []

            for s in g.ac_state_space(time=tt):
                a = policy[s]
                t, i, j = s
                xtr.append(g.xs[j])
                ytr.append(g.ys[g.ni - 1 - i])
                # print("test", s, a_policy)
                ax, ay = action_to_quiver(a)
                # if you enter vel_field_data, then the "net" "mean" vector will be plotted.
                if vel_field_data != None:
                    vx = vel_field_data[0][t, i, j]
                    vy = vel_field_data[1][t, i, j]
                    vnetx = ax + vx
                    vnety = ay + vy
                    vnetx_list.append(vnetx)
                    vnety_list.append(vnety)
                ax_list.append(ax)
                ay_list.append(ay)

            plt.quiver(xtr, ytr, ax_list, ay_list)
            ax1.scatter(g.xs[g.start_state[2]],
                        g.ys[g.ni - 1 - g.start_state[1]],
                        c='g')
            ax1.scatter(g.xs[g.endpos[1]], g.ys[g.ni - 1 - g.endpos[0]], c='r')
            if showfig == True:
                plt.show()
            fig1.savefig(full_file_path + '/policy_plots/policy_plot_t' +
                         str(tt),
                         dpi=150)
            plt.clf()
            fig1.clf()

            if vel_field_data != None:
                plt.quiver(xtr, ytr, vnetx_list, vnety_list)
                ax1.scatter(g.xs[g.start_state[2]],
                            g.ys[g.ni - 1 - g.start_state[1]],
                            c='g')
                ax1.scatter(g.xs[g.endpos[1]],
                            g.ys[g.ni - 1 - g.endpos[0]],
                            c='r')
                fig1.savefig(full_file_path + '/policy_plots/vnet_plot_t' +
                             str(tt),
                             dpi=150)
                plt.clf()
                fig1.clf()

    return