Ejemplo n.º 1
0
def weak_scaling_flow():
    """
    Weak scaling analysis.
    """

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    # batch_folder = check_relative_path("data/scaling_output")
    base_figure_folder = check_relative_path("figures/")
    base_figure_folder = os.path.join(base_figure_folder, "weak_scaling")
    check_folder(base_figure_folder, verbose=verbose)
    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Build correct files list
    # weak_scaling_files = filter(
    #     lambda _f: True if "weak_scaling" in _f else False,
    #     os.listdir(batch_folder))
    with open(datapath, "r") as f:
        string_scaling_times = json.load(f)["runs"]
    string_scaling_times = filter(
        lambda _f: True
        if "weak_scaling" in _f["runname"] else False, string_scaling_times)

    # Splits into gen, io, flow
    weakrong_scaling = filter(
        lambda _f: True
        if "gen" in _f["runname"] else False, string_scaling_times)
    weakong_scaling = filter(
        lambda _f: True
        if "io" in _f["runname"] else False, string_scaling_times)
    weaktrong_scaling = filter(
        lambda _f: True
        if "flow" in _f["runname"] else False, string_scaling_times)
def distribution_analysis():
    """Analysis for different SU3 epsilon matrix geeration values."""
    default_params = get_default_parameters(data_batch_folder="temp")

    verbose = True
    use_pickle = False
    pickle_name = "distribution_analysis_data_pickle.pkl"

    ########## Distribution analysis ##########
    dist_eps = [0.05, 0.10, 0.20, 0.24, 0.30, 0.40, 0.60]

    def create_dist_batch_set(default_parameters, eps):
        def clean_str(s):
            return str("%-2.2f" % s).replace(".", "")

        dist_data_beta60_analysis = copy.deepcopy(default_parameters)

        # Ensuring that the distribution runs folder exist
        dist_data_beta60_analysis["batch_folder"] = (
            "../data/distribution_tests/distribution_runs")
        if not os.path.isdir(dist_data_beta60_analysis["batch_folder"]):
            dist_data_beta60_analysis["batch_folder"] = \
                os.path.join("..", dist_data_beta60_analysis["batch_folder"])

        dist_data_beta60_analysis["batch_name"] = \
            "distribution_test_eps{0:s}".format(clean_str(eps))
        dist_data_beta60_analysis["beta"] = 6.0
        dist_data_beta60_analysis["num_bins_per_int"] = 16
        dist_data_beta60_analysis["bin_range"] = [-2.1, 2.1]
        dist_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
        dist_data_beta60_analysis["NCfgs"] = get_num_observables(
            dist_data_beta60_analysis["batch_folder"],
            dist_data_beta60_analysis["batch_name"])
        dist_data_beta60_analysis["obs_file"] = "6_6.00"  # 6^3x12, beta=6.0
        dist_data_beta60_analysis["N"] = 6
        dist_data_beta60_analysis["NT"] = 12
        dist_data_beta60_analysis["color"] = "#377eb8"
        return dist_data_beta60_analysis

    dist_param_list = [
        create_dist_batch_set(default_params, _eps) for _eps in dist_eps
    ]

    # dist_param_list = dist_param_list[:2]

    # exit("not performing the regular pre-analysis.")

    # Submitting distribution analysis
    for analysis_parameters in dist_param_list:
        pre_analysis.pre_analysis(analysis_parameters)

    # Use post_analysis data for further analysis.
    data = {}
    for eps, param in zip(dist_eps, dist_param_list):
        print "Loading data for eps={0:.2f}".format(eps)
        data[eps] = post_analysis.PostAnalysisDataReader(
            [param], observables_to_load=param["observables"])

    # Plot topc
    distribution_plotter(data,
                         "topc",
                         r"$\sqrt{8t_f}$",
                         r"$\langle Q \rangle$",
                         verbose=verbose)

    # Plot topsus
    distribution_plotter(data,
                         "topsus",
                         r"$\sqrt{8t_f}$",
                         r"$\chi(\langle Q^2 \rangle)$",
                         verbose=verbose)

    # Plot plaq
    distribution_plotter(data,
                         "plaq",
                         r"$\sqrt{8t_f}$",
                         r"$\langle P \rangle$",
                         verbose=verbose)
Ejemplo n.º 3
0
def beta645_L32_analysis():
    from pre_analysis.pre_analyser import pre_analysis
    from post_analysis.post_analyser import post_analysis
    from default_analysis_params import get_default_parameters
    from tools.folderreadingtools import get_num_observables
    import copy
    import os

    # TODO: load b645 32^4 data
    # TODO: pre-analyse b645 32^4 data
    # TODO: compare b645 32^4 and b645 48^3*96 data in post analysis

    #### Different batches
    data_batch_folder = "../GluonAction/data9"
    data_batch_folder = "../GluonAction/data10"

    default_params = get_default_parameters(
        data_batch_folder=data_batch_folder)
    # obs_exlusions = ["plaq", "energy", "topc", "topc2", "topc4", "topcr", "topcMC", "topsus"]
    # obs_exlusions = ["energy", "topsus", "topsust", "topsuste", "topsusMC", "topsusqtq0"]

    # observables = observables_euclidean_time
    # observables = ["topsus", "topsust", "topsuste", "topsusMC", "topsusqtq0"]
    # observables = ["topsusMC"]
    # observables = ["topcr", "qtq0eff"]
    # observables = ["topcte"]
    # observables = observables_euclidean_time
    # observables = ["topcr", "topsus"]
    # observables = ["topsust", "topsuste", "topsusqtq0"]
    # observables = ["topcrMC"]
    # observables = ["qtq0eff", "qtq0e"] + ["topsus", "topsust", "topsuste", "topsusMC", "topsusqtq0"]
    # observables = ["qtq0eff", "qtq0e"] + ["topsust", "topsuste", "topsusMC", "topsusqtq0"]
    # observables = ["topsus", "topsust", "topsuste", "topsusMC", "topsusqtq0"]
    # observables = ["topcr"]
    # exit("CHECK TOPCR!! @ 34 in main_analysis.py")
    # observables = ["topsuste"]
    # observables = ["qtq0effMC"]
    # observables = ["energy"]
    # observables = ["w_t_energy"]
    # observables = ["plaq", "energy", "topc", "topct"]

    # observables += ["energy"]
    # default_params["observables"] = observables

    #### Post analysis parameters
    line_fit_interval_points = 20
    # topsus_fit_targets = [0.3,0.4,0.5,0.58]
    # topsus_fit_targets = [0.3, 0.4, 0.5, 0.6] # tf = sqrt(8*t0)
    topsus_fit_targets = [0.6]
    energy_fit_target = 0.3

    #### Different batches
    # data_batch_folder = "../GluonAction/data8"
    data_batch_folder = "../GluonAction/data10"
    # data_batch_folder = "../GluonAction/DataGiovanni"
    # data_batch_folder = "../data/topc_modes_8x16"

    # Method of continuum extrapolation.
    # Options: plateau, plateau_mean, nearest, interpolate, bootstrap
    extrapolation_methods = [
        "plateau", "plateau_mean", "nearest", "interpolate", "bootstrap"
    ]
    extrapolation_methods = ["plateau"]
    extrapolation_methods = ["bootstrap"]
    plot_continuum_fit = False

    # Topcr reference value. Options: [float], t0beta, article, t0
    topcr_t0 = "t0beta"

    # Number of different sectors we will analyse in euclidean time
    default_params["numsplits_eucl"] = 4
    intervals_eucl = [None, None, None, None]

    # Number of different sectors we will analyse in monte carlo time
    default_params["MC_time_splits"] = 4
    # MC_intervals = [[0, 1000], [500, 1000], [500, 1000], [175, 250]]
    MC_intervals = [None, None, None, None]

    # Extraction point in flow time a*t_f for q0 in qtq0
    q0_flow_times = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]  # [fermi]

    # Flow time indexes in percent to plot qtq0 in euclidean time at
    euclidean_time_percents = [0, 0.25, 0.50, 0.75, 1.00]
    # euclidean_time_percents = [0]

    # Data types to be looked at in the post-analysis.
    post_analysis_data_type = ["bootstrap", "jackknife", "unanalyzed"]
    post_analysis_data_type = ["bootstrap"]

    ########## Main analysis ##########
    databeta60 = copy.deepcopy(default_params)
    databeta60["batch_name"] = "beta60"
    databeta60["beta"] = 6.0
    databeta60["topc_y_limits"] = [-9, 9]
    databeta60["topc2_y_limits"] = [-81, 81]
    databeta60["NCfgs"] = get_num_observables(databeta60["batch_folder"],
                                              databeta60["batch_name"])
    databeta60["obs_file"] = "24_6.00"
    databeta60["MCInt"] = MC_intervals[0]
    databeta60["N"] = 24
    databeta60["NT"] = 2 * databeta60["N"]
    databeta60["color"] = "#e41a1c"

    databeta61 = copy.deepcopy(default_params)
    databeta61["batch_name"] = "beta61"
    databeta61["beta"] = 6.1
    databeta61["topc_y_limits"] = [-12, 12]
    databeta61["topc2_y_limits"] = [-144, 144]
    databeta61["NCfgs"] = get_num_observables(databeta61["batch_folder"],
                                              databeta61["batch_name"])
    databeta61["obs_file"] = "28_6.10"
    databeta61["MCInt"] = MC_intervals[1]
    databeta61["N"] = 28
    databeta61["NT"] = 2 * databeta61["N"]
    databeta61["color"] = "#377eb8"

    databeta62 = copy.deepcopy(default_params)
    databeta62["batch_name"] = "beta62"
    databeta62["beta"] = 6.2
    databeta62["topc_y_limits"] = [-12, 12]
    databeta62["topc2_y_limits"] = [-196, 196]
    databeta62["NCfgs"] = get_num_observables(databeta62["batch_folder"],
                                              databeta62["batch_name"])
    databeta62["obs_file"] = "32_6.20"
    databeta62["MCInt"] = MC_intervals[2]
    databeta62["N"] = 32
    databeta62["NT"] = 2 * databeta62["N"]
    databeta62["color"] = "#4daf4a"

    databeta645 = copy.deepcopy(default_params)
    databeta645["flow_epsilon"] = 0.02
    databeta645["batch_name"] = "beta645"
    databeta645["beta"] = 6.45
    databeta645["topc_y_limits"] = [-15, 15]
    databeta645["topc2_y_limits"] = [-300, 300]
    databeta645["NCfgs"] = get_num_observables(databeta645["batch_folder"],
                                               databeta645["batch_name"])
    databeta645["obs_file"] = "48_6.45"
    databeta645["MCInt"] = MC_intervals[3]
    databeta645["N"] = 48
    databeta645["NT"] = 2 * databeta645["N"]
    databeta645["color"] = "#984ea3"

    # Adding relevant batches to args
    analysis_parameter_list = [databeta60, databeta61, databeta62, databeta645]
    # analysis_parameter_list = [databeta60, databeta61, databeta62]
    # analysis_parameter_list = [databeta61, databeta62]
    # analysis_parameter_list = [databeta62]
    # analysis_parameter_list = [databeta645]

    section_seperator = "=" * 160
    print section_seperator
    print "Observables to be analysed: %s" % ", ".join(
        default_params["observables"])
    print section_seperator + "\n"

    #### Submitting main analysis
    for analysis_parameters in analysis_parameter_list:
        pre_analysis(analysis_parameters)

    if not analysis_parameter_list[0]["MCInt"] is None:
        assert sum([len(plist["MCInt"]) - len(analysis_parameter_list[0]["MCInt"])
            for plist in analysis_parameter_list]) == 0, \
            "unequal amount of MC intervals"

    #### Submitting post-analysis data
    if len(analysis_parameter_list) >= 3:
        post_analysis(analysis_parameter_list,
                      default_params["observables"],
                      topsus_fit_targets,
                      line_fit_interval_points,
                      energy_fit_target,
                      q0_flow_times,
                      euclidean_time_percents,
                      extrapolation_methods=extrapolation_methods,
                      plot_continuum_fit=plot_continuum_fit,
                      post_analysis_data_type=post_analysis_data_type,
                      figures_folder=default_params["figures_folder"],
                      gif_params=default_params["gif"],
                      verbose=default_params["verbose"])
    else:
        msg = "Need at least 3 different beta values to run post analysis"
        msg += "(%d given)." % len(analysis_parameter_list)
        print msg
def topc_modes_analysis():
    """Analysis for different lattice sizes and their topological charges."""
    default_params = get_default_parameters(data_batch_folder="temp")

    run_pre_analysis = False
    verbose = True

    data_path = "../data/"
    if not os.path.isdir(data_path):
        data_path = "../" + data_path

    ########## Smaug data 8x16 analysis ##########
    smaug8x16_data_beta60_analysis = copy.deepcopy(default_params)
    smaug8x16_data_beta60_analysis["batch_folder"] = data_path
    smaug8x16_data_beta60_analysis["batch_name"] = "beta60_8x16_run"
    smaug8x16_data_beta60_analysis["beta"] = 6.0
    smaug8x16_data_beta60_analysis["topc_y_limits"] = [-2, 2]
    smaug8x16_data_beta60_analysis["num_bins_per_int"] = 32
    smaug8x16_data_beta60_analysis["bin_range"] = [-2.5, 2.5]
    smaug8x16_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug8x16_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug8x16_data_beta60_analysis["batch_folder"],
        smaug8x16_data_beta60_analysis["batch_name"])
    smaug8x16_data_beta60_analysis["obs_file"] = "8_6.00"
    smaug8x16_data_beta60_analysis["N"] = 8
    smaug8x16_data_beta60_analysis["NT"] = 16
    smaug8x16_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 12x24 analysis ##########
    smaug12x24_data_beta60_analysis = copy.deepcopy(default_params)
    smaug12x24_data_beta60_analysis["batch_folder"] = data_path
    smaug12x24_data_beta60_analysis["batch_name"] = "beta60_12x24_run"
    smaug12x24_data_beta60_analysis["beta"] = 6.0
    smaug12x24_data_beta60_analysis["topc_y_limits"] = [-4, 4]
    smaug12x24_data_beta60_analysis["num_bins_per_int"] = 16
    smaug12x24_data_beta60_analysis["bin_range"] = [-4.5, 4.5]
    smaug12x24_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug12x24_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug12x24_data_beta60_analysis["batch_folder"],
        smaug12x24_data_beta60_analysis["batch_name"])
    smaug12x24_data_beta60_analysis["obs_file"] = "12_6.00"
    smaug12x24_data_beta60_analysis["N"] = 12
    smaug12x24_data_beta60_analysis["NT"] = 24
    smaug12x24_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 16x32 analysis ##########
    smaug16x32_data_beta61_analysis = copy.deepcopy(default_params)
    smaug16x32_data_beta61_analysis["batch_folder"] = data_path
    smaug16x32_data_beta61_analysis["batch_name"] = "beta61_16x32_run"
    smaug16x32_data_beta61_analysis["beta"] = 6.1
    smaug16x32_data_beta61_analysis["topc_y_limits"] = [-8, 8]
    smaug16x32_data_beta61_analysis["num_bins_per_int"] = 16
    smaug16x32_data_beta61_analysis["bin_range"] = [-7.5, 7.5]
    smaug16x32_data_beta61_analysis["hist_flow_times"] = [0, 250, 600]
    smaug16x32_data_beta61_analysis["NCfgs"] = get_num_observables(
        smaug16x32_data_beta61_analysis["batch_folder"],
        smaug16x32_data_beta61_analysis["batch_name"])
    smaug16x32_data_beta61_analysis["obs_file"] = "16_6.10"
    smaug16x32_data_beta61_analysis["N"] = 16
    smaug16x32_data_beta61_analysis["NT"] = 32
    smaug16x32_data_beta61_analysis["color"] = "#377eb8"

    param_list = [
        smaug8x16_data_beta60_analysis, smaug12x24_data_beta60_analysis,
        smaug16x32_data_beta61_analysis
    ]

    if run_pre_analysis:
        # Submitting analysis
        for analysis_parameters in param_list:
            pre_analysis(analysis_parameters)

    # Loads topc data
    data = []
    # N_val = [24, 24, 28]
    for i, param in enumerate(param_list):
        print "Loading data for: {}".format(param["batch_name"])
        # fpath = os.path.join(param["batch_folder"], param["batch_name"],
        #                      "{0:d}_{1:.2f}.npy".format(N_val[i],
        #                                                 param["beta"]))
        data_, p = get_data_parameters(param)
        data.append({
            "data": data_("topc")["obs"].T,
            "beta": param["beta"],
            "N": param["N"]
        })

        # print data_("topc")["obs"].shape

    # Flow time to plots
    flow_times = [0, 250, 600]

    # Histogram plotting
    xlim = 7.5
    NBins = np.arange(-xlim, xlim, 0.05)
    for t_f in flow_times:
        # Adds unanalyzed data
        fig, axes = plt.subplots(3, 1, sharey=False, sharex=True)
        for i, ax in enumerate(axes):
            lab = r"${0:d}^3\times{1:d}$, $\beta={2:.2f}$".format(
                data[i]["N"], data[i]["N"] * 2, data[i]["beta"])

            weights = np.ones_like(data[i]["data"][t_f])
            weights /= len(data[i]["data"][t_f])
            ax.hist(data[i]["data"][t_f],
                    bins=NBins,
                    label=lab,
                    weights=weights)
            ax.legend(loc="upper right")
            ax.grid(True)
            ax.set_xlim(-xlim, xlim)

            if i == 1:
                ax.set_ylabel(r"$Hits$")
            elif i == 2:
                ax.set_xlabel(r"$Q$")

        # Sets up figure
        figpath = "figures/topc_modes_analysis"
        if not os.path.isdir(figpath):
            figpath = "../" + figpath
        check_folder(figpath, verbose=verbose)
        figpath = os.path.join(figpath, "topc_modes_tf{}.pdf".format(t_f))
        fig.savefig(figpath)
        print "Figure saved at {0:s}".format(figpath)
        plt.close(fig)
Ejemplo n.º 5
0
def lattice_updates_analysis():

    run_pre_analysis = False
    verbose = False
    N_corr = [200, 400, 600]
    N_updates = [10, 20, 30]
    param_list = []
    beta = 6.0
    figure_folder = "../figures/lattice_updates"
    output_folder = "../data/lattice_update_data"
    ############ Sets up the different N_up/N_corr analysises ##########

    # Sets up Slurm output files
    if not os.path.isdir(output_folder):
        output_folder = os.path.join("..", output_folder)
    output_file_path = os.path.join(output_folder, "output_files")

    # Sets up empty nested dictionary
    output_files = {icorr: {iup: {} for iup in N_updates} for icorr in N_corr}

    # Retrieves standard parameters
    default_params = get_default_parameters(data_batch_folder="temp",
                                            verbose=verbose)

    # Loops through different corr lengths and link update sizes
    for icorr in N_corr:
        for iup in N_updates:

            # Loops over files in directory
            for of in os.listdir(output_file_path):
                _tmp = re.findall(r"NUP([\d]+)_NCORR([\d]+)", of)[0]
                _NUp, _NCorr = list(map(int, _tmp))

                # If icorr and iup matches files, we add it to the dictionary
                # we created earlier.
                if icorr == _NCorr and iup == _NUp:
                    output_files[icorr][iup] = {
                        "NUp": iup,
                        "NCorr": icorr,
                        "output_path": os.path.join(output_file_path, of)
                    }
                    break

    # Sets up parameter list for analysis
    for icorr in N_corr:
        for iup in N_updates:
            _params = copy.deepcopy(default_params)
            _params["batch_folder"] = output_folder
            _params["batch_name"] = \
                "B60_NUP{0:d}_NCORR{1:d}".format(iup, icorr)
            _params["NCfgs"] = get_num_observables(_params["batch_folder"],
                                                   _params["batch_name"])
            _params["beta"] = beta
            _params["color"] = "#377eb8"
            _params["N"] = 16
            _params["NT"] = _params["N"] * 2
            _params["observables"] = ["plaq", "energy", "topc"]
            _params.update(output_files[icorr][iup])
            _times = read_run_time(_params["output_path"])
            _params["total_runtime"] = _times[-1][-1]
            _params["update_time"] = _times[0][0]
            param_list.append(_params)

    if run_pre_analysis:
        # Submitting distribution analysis
        for analysis_parameters in param_list:
            pre_analysis(analysis_parameters)

    print("Success: pre analysis done.")

    # Use post_analysis data for further analysis.
    data = {icorr: {iup: {} for iup in N_updates} for icorr in N_corr}
    for i, _params in enumerate(param_list):
        print "Loading data for NCorr={0:d} NUp={1:d}".format(
            _params["NCorr"], _params["NUp"])
        data[_params["NCorr"]][_params["NUp"]] = {
            "data":
            post_analysis.PostAnalysisDataReader(
                [_params],
                observables_to_load=_params["observables"],
                verbose=verbose),
            "params":
            _params,
        }

    print("Success: post analysis data retrieved.")

    X_corr, Y_up = np.meshgrid(N_corr, N_updates)

    # Sets up time grid
    Z_total_runtimes = np.zeros((3, 3))
    Z_update_times = np.zeros((3, 3))
    X_flow = np.zeros((3, 3, 251))
    Z_autocorr = np.zeros((3, 3, 251))
    Z_autocorr_error = np.zeros((3, 3, 251))
    for i, icorr in enumerate(N_corr):
        for j, iup in enumerate(N_updates):

            Z_total_runtimes[i, j] = \
                data[icorr][iup]["params"]["total_runtime"]
            Z_update_times[i, j] = \
                data[icorr][iup]["params"]["update_time"]

            _tmp = data[icorr][iup]["data"]["topc"][beta]
            Z_autocorr[i, j] = \
                _tmp["with_autocorr"]["autocorr"]["tau_int"]
            Z_autocorr_error[i, j] = \
                _tmp["with_autocorr"]["autocorr"]["tau_int_err"]

            X_flow[i, j] = _tmp["with_autocorr"]["unanalyzed"]["x"]
            X_flow[i, j] *= data[icorr][iup]["data"].flow_epsilon[6.0]
            X_flow[i, j] = np.sqrt(8 * X_flow[i, j])
            X_flow[i, j] *= data[icorr][iup]["data"].lattice_sizes[6.0][0]

    # Plots update and total run-times on grid
    heatmap_plotter(N_corr,
                    N_updates,
                    Z_total_runtimes,
                    "topc_total_runtime.pdf",
                    xlabel=r"$N_\mathrm{corr}$",
                    ylabel=r"$N_\mathrm{up}$",
                    cbartitle=r"$t_\mathrm{total}$",
                    figure_folder=figure_folder)

    heatmap_plotter(N_corr,
                    N_updates,
                    Z_update_times,
                    "topc_update_runtime.pdf",
                    xlabel=r"$N_\mathrm{corr}$",
                    ylabel=r"$N_\mathrm{up}$",
                    cbartitle=r"$t_\mathrm{update}$",
                    figure_folder=figure_folder)

    # Plots final autocorrelations on grid
    flow_times = [0, 100, 250]
    for tf in flow_times:
        heatmap_plotter(N_corr,
                        N_updates,
                        Z_autocorr[:, :, tf],
                        "topc_autocorr_tau{0:d}.pdf".format(tf),
                        xlabel=r"$N_\mathrm{corr}$",
                        ylabel=r"$N_\mathrm{up}$",
                        cbartitle=r"$\tau_\mathrm{int}$",
                        figure_folder=figure_folder)

        heatmap_plotter(N_corr,
                        N_updates,
                        Z_autocorr_error[:, :, tf],
                        "topc_autocorr_err_tau{0:d}.pdf".format(tf),
                        xlabel=r"$N_\mathrm{corr}$",
                        ylabel=r"$N_\mathrm{up}$",
                        cbartitle=r"$\tau_\mathrm{int}$",
                        figure_folder=figure_folder)

    # Plots all of the 9 autocorrs in single figure
    plot9_figures(X_flow,
                  X_corr,
                  Y_up,
                  Z_autocorr,
                  Z_autocorr_error,
                  "topc_autocorr.pdf",
                  xlabel=r"$\sqrt{8t_{f}}$",
                  ylabel=r"$\tau_\mathrm{int}$",
                  figure_folder=figure_folder,
                  mark_interval=10)
Ejemplo n.º 6
0
def main_analysis(run_pre_analysis=True,
                  run_post_analysis=True,
                  only_generate_data=False,
                  observables=None,
                  post_analysis_data_type=["bootstrap"]):
    from pre_analysis.pre_analyser import pre_analysis
    from post_analysis.post_analyser import post_analysis
    from default_analysis_params import get_default_parameters
    from tools.folderreadingtools import get_num_observables
    import copy
    import os

    # Different batches
    # data_batch_folder = "../GluonAction/data8"
    data_batch_folder = "../GluonAction/data11"
    # data_batch_folder = "../GluonAction/DataGiovanni"
    # data_batch_folder = "../data/topc_modes_8x16"
    data_batch_folder = "../data/data11"

    obs_exlusions = ["w_t_energy", "energy", "topcMC", "topsusMC", "qtq0effMC"]
    default_params = get_default_parameters(
        data_batch_folder=data_batch_folder, obs_exlusions=obs_exlusions)

    if not isinstance(observables, type(None)):
        default_params["observables"] = observables

    # Post analysis parameters
    line_fit_interval_points = 20
    # topsus_fit_targets = [0.3,0.4,0.5,0.58]
    # topsus_fit_targets = [0.3, 0.4, 0.5, 0.6] # tf = sqrt(8*t0)
    topsus_fit_targets = [0.6, "t0", "w0", "t0cont", "w0cont"]
    energy_fit_target = 0.3

    # Will try to use pickled reference scale instead
    use_pickled_reference_scale = True

    # Method of continuum extrapolation.
    # Options: plateau, plateau_mean, nearest, interpolate, bootstrap
    extrapolation_methods = [
        "plateau", "plateau_mean", "nearest", "interpolate", "bootstrap"
    ]
    # extrapolation_methods = ["plateau"]
    extrapolation_methods = ["bootstrap"]
    plot_continuum_fit = False

    # Number of different sectors we will analyse in euclidean time
    default_params["numsplits_eucl"] = 4
    intervals_eucl = [None, None, None, None]

    # Number of different sectors we will analyse in monte carlo time
    default_params["MC_time_splits"] = 4
    # MC_intervals = [[0, 1000], [500, 1000], [500, 1000], [175, 250]]
    # MC_intervals = [[0, 1000], [0, 1000], [0, 2000], [0, 125]]
    MC_intervals = [None, None, None, None]

    # Extraction point in flow time a*t_f for q0 in qtq0
    q0_flow_times = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]  # [fermi]

    # Flow time indexes in percent to plot qtq0 in euclidean time at
    euclidean_time_percents = [0, 0.25, 0.50, 0.75, 1.00]
    # euclidean_time_percents = [0]

    # Blocking
    default_params["blocking_analysis"] = True

    # Check to only generate data for post-analysis
    default_params["only_generate_data"] = only_generate_data

    ########## Main analysis ##########
    databeta60 = copy.deepcopy(default_params)
    databeta60["batch_name"] = "beta60"
    databeta60["ensemble_name"] = r"$A$"
    databeta60["beta"] = 6.0
    databeta60["block_size"] = 10  # None
    databeta60["topc_y_limits"] = [-9, 9]
    databeta60["topc2_y_limits"] = [-81, 81]
    databeta60["NCfgs"] = get_num_observables(databeta60["batch_folder"],
                                              databeta60["batch_name"])
    databeta60["obs_file"] = "24_6.00"
    databeta60["MCInt"] = MC_intervals[0]
    databeta60["N"] = 24
    databeta60["NT"] = 2 * databeta60["N"]
    databeta60["color"] = "#e41a1c"

    databeta61 = copy.deepcopy(default_params)
    databeta61["batch_name"] = "beta61"
    databeta61["ensemble_name"] = r"$B$"
    databeta61["beta"] = 6.1
    databeta61["block_size"] = 10  # None
    databeta61["topc_y_limits"] = [-12, 12]
    databeta61["topc2_y_limits"] = [-144, 144]
    databeta61["NCfgs"] = get_num_observables(databeta61["batch_folder"],
                                              databeta61["batch_name"])
    databeta61["obs_file"] = "28_6.10"
    databeta61["MCInt"] = MC_intervals[1]
    databeta61["N"] = 28
    databeta61["NT"] = 2 * databeta61["N"]
    databeta61["color"] = "#377eb8"

    databeta62 = copy.deepcopy(default_params)
    databeta62["batch_name"] = "beta62"
    databeta62["ensemble_name"] = r"$C$"
    databeta62["beta"] = 6.2
    databeta62["block_size"] = 10  # None
    databeta62["topc_y_limits"] = [-12, 12]
    databeta62["topc2_y_limits"] = [-196, 196]
    databeta62["NCfgs"] = get_num_observables(databeta62["batch_folder"],
                                              databeta62["batch_name"])
    databeta62["obs_file"] = "32_6.20"
    databeta62["MCInt"] = MC_intervals[2]
    databeta62["N"] = 32
    databeta62["NT"] = 2 * databeta62["N"]
    databeta62["color"] = "#4daf4a"

    databeta645 = copy.deepcopy(default_params)
    databeta645["flow_epsilon"] = 0.02
    databeta645["batch_name"] = "beta645"
    databeta645["ensemble_name"] = r"$D_2$"
    databeta645["beta"] = 6.45
    databeta645["block_size"] = 25  # None
    databeta645["topc_y_limits"] = [-15, 15]
    databeta645["topc2_y_limits"] = [-300, 300]
    databeta645["NCfgs"] = get_num_observables(databeta645["batch_folder"],
                                               databeta645["batch_name"])
    databeta645["obs_file"] = "48_6.45"
    databeta645["MCInt"] = MC_intervals[3]
    databeta645["N"] = 48
    databeta645["NT"] = 2 * databeta645["N"]
    databeta645["color"] = "#984ea3"

    # Adding relevant batches to args
    analysis_parameter_list = [databeta60, databeta61, databeta62, databeta645]

    section_seperator = "=" * 160
    print section_seperator
    print "Observables to be analysed: %s" % ", ".join(
        default_params["observables"])
    print section_seperator + "\n"

    # Submitting main analysis
    if run_pre_analysis:
        for analysis_parameters in analysis_parameter_list:
            pre_analysis(analysis_parameters)

    if not analysis_parameter_list[0]["MCInt"] is None:
        assert sum(
            [len(plist["MCInt"]) - len(analysis_parameter_list[0]["MCInt"])
             for plist in analysis_parameter_list]) == 0, \
            "unequal amount of MC intervals"

    # Submitting post-analysis data
    if run_post_analysis:
        if len(analysis_parameter_list) >= 3:
            post_analysis(analysis_parameter_list,
                          default_params["observables"],
                          topsus_fit_targets,
                          line_fit_interval_points,
                          energy_fit_target,
                          q0_flow_times,
                          euclidean_time_percents,
                          extrapolation_methods=extrapolation_methods,
                          plot_continuum_fit=plot_continuum_fit,
                          post_analysis_data_type=post_analysis_data_type,
                          figures_folder=default_params["figures_folder"],
                          gif_params=default_params["gif"],
                          verbose=default_params["verbose"])
        else:
            msg = "Need at least 3 different beta values to run post analysis"
            msg += "(%d given)." % len(analysis_parameter_list)
            print msg
Ejemplo n.º 7
0
def thermalization_analysis():
    """Runs the thermalization analysis."""

    verbose = True
    run_pre_analysis = True
    mark_every = 50
    mc_cutoff = -1  # Skip every 100 points with 2000 therm-steps!!
    batch_folder = check_relative_path("data/thermalization_data")
    base_figure_folder = check_relative_path("figures/")
    base_figure_folder = os.path.join(base_figure_folder,
                                      "thermalization_analysis")
    check_folder(base_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    ############ COLD START #############
    cold_beta60_params = copy.deepcopy(default_params)
    cold_beta60_params["batch_folder"] = batch_folder
    cold_beta60_params["batch_name"] = "B60_THERM_COLD"
    cold_beta60_params["load_binary_file"] = False
    cold_beta60_params["beta"] = 6.0
    cold_beta60_params["topc_y_limits"] = [-2, 2]
    cold_beta60_params["num_bins_per_int"] = 32
    cold_beta60_params["bin_range"] = [-2.5, 2.5]
    cold_beta60_params["hist_flow_times"] = [0, 250, 600]
    cold_beta60_params["NCfgs"] = get_num_observables(
        cold_beta60_params["batch_folder"], cold_beta60_params["batch_name"])
    cold_beta60_params["obs_file"] = "8_6.00"
    cold_beta60_params["N"] = 8
    cold_beta60_params["NT"] = 16
    cold_beta60_params["color"] = "#377eb8"

    ########## HOT RND START ############
    hot_rnd_beta60_params = copy.deepcopy(default_params)
    hot_rnd_beta60_params["batch_folder"] = batch_folder
    hot_rnd_beta60_params["batch_name"] = "B60_THERM_HOT_RND"

    ########## HOT RST START ############
    hot_rst_beta60_params = copy.deepcopy(default_params)
    hot_rst_beta60_params["batch_folder"] = batch_folder
    hot_rst_beta60_params["batch_name"] = "B60_THERM_HOT_RST"

    if run_pre_analysis:
        # Submitting distribution analysis
        cold_data = load_observable(cold_beta60_params)
        hot_rnd_data = load_observable(hot_rnd_beta60_params)
        hot_rst_data = load_observable(hot_rst_beta60_params)

    # # Loads post analysis data
    # cold_data = post_analysis.PostAnalysisDataReader(
    #     [cold_beta60_params],
    #     observables_to_load=cold_beta60_params["observables"],
    #     verbose=verbose)

    # hot_rnd_data = post_analysis.PostAnalysisDataReader(
    #     [hot_rnd_beta60_params],
    #     observables_to_load=hot_rnd_beta60_params["observables"],
    #     verbose=verbose)

    # hot_rst_data = post_analysis.PostAnalysisDataReader(
    #     [hot_rst_beta60_params],
    #     observables_to_load=hot_rst_beta60_params["observables"],
    #     verbose=verbose)

    # TODO: plot termaliations for the 3 different observables

    plot_types = ["default", "loglog", "logx", "logy"]

    y_labels = [[r"$P$", r"$Q$", r"$E$"],
                [
                    r"$\frac{|P - \langle P \rangle|}{\langle P \rangle}$",
                    r"$\frac{|Q - \langle Q \rangle|}{\langle Q \rangle}$",
                    r"$\frac{|E - \langle E \rangle|}{\langle E \rangle}$"
                ],
                [
                    r"$|P - \langle P \rangle|$", r"$|Q - \langle Q \rangle|$",
                    r"$|E - \langle E \rangle|$"
                ]]
    # y_labels[i_dr] = [r"$\langle P \rangle$", r"$\langle P \rangle$",
    #             r"$\langle P \rangle$"]

    subplot_rows = [1, 3]

    # Limits to be put on plot
    x_limits = [[] for i in range(3)]
    y_limits = [[], [], []]

    data_representations = ["default", "relerr", "abserr"]

    obs_list = cold_data["obs"].keys()

    x_label = r"$t_\mathrm{MC}$"

    for i_dr, dr in enumerate(data_representations):
        for pt in plot_types:
            for i_obs, obs in enumerate(obs_list):
                for plot_rows in subplot_rows:

                    # Sets up figure folder for observable
                    figure_folder = os.path.join(base_figure_folder, obs)
                    check_folder(figure_folder, verbose=verbose)

                    # Sets up plot type folder
                    figure_folder = os.path.join(figure_folder, pt)
                    check_folder(figure_folder, verbose=verbose)

                    if obs == "energy":
                        correction_factor = -1.0 / 64
                        cold_data["obs"][obs] *= correction_factor
                        hot_rnd_data["obs"][obs] *= correction_factor
                        hot_rst_data["obs"][obs] *= correction_factor

                    # Retrieves data and makes modifications
                    _cold_data = modify_data(cold_data["obs"][obs][:mc_cutoff],
                                             dr)
                    _hot_rnd_data = modify_data(
                        hot_rnd_data["obs"][obs][:mc_cutoff], dr)
                    _hot_rst_data = modify_data(
                        hot_rst_data["obs"][obs][:mc_cutoff], dr)

                    # Creates figure name
                    figure_name = "{0:s}_{1:s}_{2:s}_{3:d}plotrows.pdf".format(
                        obs, pt, dr, plot_rows)

                    plot_data_array(
                        [np.arange(_cold_data.shape[0]) for i in range(3)],
                        [_cold_data, _hot_rnd_data, _hot_rst_data],
                        ["Cold start", "Hot start", r"Hot start, $RST$"],
                        x_label,
                        y_labels[i_dr][i_obs],
                        figure_name,
                        figure_folder,
                        plot_type=pt,
                        x_limits=x_limits[i_obs],
                        y_limits=y_limits[i_obs],
                        mark_every=mark_every,
                        subplot_rows=plot_rows)
Ejemplo n.º 8
0
def topc_modes_analysis():
    """Analysis for different lattice sizes and their topological charges."""
    default_params = get_default_parameters(data_batch_folder="temp")
    default_params["blocking_analysis"] = True

    default_params["observables"] = ["plaq", "topc", "topc2", "topc4", "topcr",
                                     "topsus", "topsusqtq0", "qtq0e",
                                     "qtq0eff", "topcMC"]
    default_params["observables"] = ["topc2", "topc4", "topcr"]

    # Check to only generate data for post-analysis
    default_params["only_generate_data"] = False

    run_pre_analysis = False
    run_post_analysis = False

    # run_pre_analysis = True
    # run_post_analysis = True
    default_params["verbose"] = True

    ########## Post analysis parameters ##########
    line_fit_interval_points = 20
    topsus_fit_targets = [0.5, 0.6]
    energy_fit_target = 0.3
    q0_flow_times = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]  # [fermi]
    euclidean_time_percents = [0, 0.25, 0.50, 0.75, 1.00]
    extrapolation_methods = ["bootstrap"]
    plot_continuum_fit = False
    post_analysis_data_type = ["bootstrap"]
    figures_folder = "figures/topc_modes_analysis"

    data_path = "../data/"
    if not os.path.isdir(data_path):
        data_path = "../" + data_path

    ########## Smaug data 8x16 analysis ##########
    smaug8x16_data_beta60_analysis = copy.deepcopy(default_params)
    smaug8x16_data_beta60_analysis["batch_folder"] = data_path
    smaug8x16_data_beta60_analysis["batch_name"] = "beta60_8x16_run"
    smaug8x16_data_beta60_analysis["ensemble_name"] = r"$E$"
    smaug8x16_data_beta60_analysis["beta"] = 6.0
    smaug8x16_data_beta60_analysis["block_size"] = 10  # None
    smaug8x16_data_beta60_analysis["topc_y_limits"] = [-2, 2]
    smaug8x16_data_beta60_analysis["num_bins_per_int"] = 32
    smaug8x16_data_beta60_analysis["bin_range"] = [-2.5, 2.5]
    smaug8x16_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug8x16_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug8x16_data_beta60_analysis["batch_folder"],
        smaug8x16_data_beta60_analysis["batch_name"])
    smaug8x16_data_beta60_analysis["obs_file"] = "8_6.00"
    smaug8x16_data_beta60_analysis["N"] = 8
    smaug8x16_data_beta60_analysis["NT"] = 16
    smaug8x16_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 12x24 analysis ##########
    smaug12x24_data_beta60_analysis = copy.deepcopy(default_params)
    smaug12x24_data_beta60_analysis["batch_folder"] = data_path
    smaug12x24_data_beta60_analysis["batch_name"] = "beta60_12x24_run"
    smaug12x24_data_beta60_analysis["ensemble_name"] = r"$F$"
    smaug12x24_data_beta60_analysis["beta"] = 6.0
    smaug12x24_data_beta60_analysis["block_size"] = 10  # None
    smaug12x24_data_beta60_analysis["topc_y_limits"] = [-4, 4]
    smaug12x24_data_beta60_analysis["num_bins_per_int"] = 16
    smaug12x24_data_beta60_analysis["bin_range"] = [-4.5, 4.5]
    smaug12x24_data_beta60_analysis["hist_flow_times"] = [0, 100, 600]
    smaug12x24_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug12x24_data_beta60_analysis["batch_folder"],
        smaug12x24_data_beta60_analysis["batch_name"])
    smaug12x24_data_beta60_analysis["obs_file"] = "12_6.00"
    smaug12x24_data_beta60_analysis["N"] = 12
    smaug12x24_data_beta60_analysis["NT"] = 24
    smaug12x24_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 16x32 analysis ##########
    smaug16x32_data_beta61_analysis = copy.deepcopy(default_params)
    smaug16x32_data_beta61_analysis["batch_folder"] = data_path
    smaug16x32_data_beta61_analysis["batch_name"] = "beta61_16x32_run"
    smaug16x32_data_beta61_analysis["ensemble_name"] = r"$G$"
    smaug16x32_data_beta61_analysis["beta"] = 6.1
    smaug16x32_data_beta61_analysis["block_size"] = 10  # None
    smaug16x32_data_beta61_analysis["topc_y_limits"] = [-8, 8]
    smaug16x32_data_beta61_analysis["num_bins_per_int"] = 16
    smaug16x32_data_beta61_analysis["bin_range"] = [-7.5, 7.5]
    smaug16x32_data_beta61_analysis["hist_flow_times"] = [0, 100, 400]
    smaug16x32_data_beta61_analysis["NCfgs"] = get_num_observables(
        smaug16x32_data_beta61_analysis["batch_folder"],
        smaug16x32_data_beta61_analysis["batch_name"])
    smaug16x32_data_beta61_analysis["obs_file"] = "16_6.10"
    smaug16x32_data_beta61_analysis["N"] = 16
    smaug16x32_data_beta61_analysis["NT"] = 32
    smaug16x32_data_beta61_analysis["color"] = "#377eb8"

    param_list = [
        smaug8x16_data_beta60_analysis,
        smaug12x24_data_beta60_analysis,
        smaug16x32_data_beta61_analysis]

    if run_pre_analysis:
        # Submitting analysis
        for analysis_parameters in param_list:
            pre_analysis(analysis_parameters)

    # Submitting post-analysis data
    if run_post_analysis:
        if len(param_list) >= 3:
            post_analysis(param_list,
                          default_params["observables"],
                          topsus_fit_targets, line_fit_interval_points,
                          energy_fit_target,
                          q0_flow_times, euclidean_time_percents,
                          extrapolation_methods=extrapolation_methods,
                          plot_continuum_fit=plot_continuum_fit,
                          post_analysis_data_type=post_analysis_data_type,
                          figures_folder=figures_folder, # "figures/topc_modes_analysis"
                          verbose=default_params["verbose"])
        else:
            msg = "Need at least 3 different beta values to run post analysis"
            msg += "(%d given)." % len(analysis_parameter_list)
            print msg

    # Loads topc data
    data = []
    # N_val = [24, 24, 28]
    for i, param in enumerate(param_list):
        print "Loading data for: {}".format(param["batch_name"])
        data_, p = get_data_parameters(param)
        data.append({"data": data_("topc")["obs"].T,
                     "beta": param["beta"],
                     "N": param["N"],
                     "ensemble_name": param["ensemble_name"]})

    # Flow time to plots
    flow_times = [0, 25, 50, 100, 150, 250, 450, 600]

    # Histogram plotting
    xlim = 7.5
    NBins = np.arange(-xlim, xlim, 0.05)
    for t_f in flow_times:
        # Adds unanalyzed data
        fig, axes = plt.subplots(len(param_list), 1,
                                 sharey=False, sharex=True)
        axes = np.atleast_1d(axes)
        for i, ax in enumerate(axes):
            # lab = r"${0:d}^3\times{1:d}$, $\beta={2:.2f}$".format(
            #     data[i]["N"], data[i]["N"]*2, data[i]["beta"])
            lab = data[i]["ensemble_name"]

            weights = np.ones_like(data[i]["data"][t_f])
            weights /= len(data[i]["data"][t_f])
            ax.hist(data[i]["data"][t_f], bins=NBins,
                    label=lab, weights=weights)
            ax.legend(loc="upper right")
            ax.grid(True)
            ax.set_xlim(-xlim, xlim)

            if i == 1:
                ax.set_ylabel(r"Hits(normalized)")
            elif i == 2:
                ax.set_xlabel(r"$Q$")

        # Sets up figure
        figpath = figures_folder
        if not os.path.isdir(figpath):
            figpath = "../" + figpath
        check_folder(figpath, verbose=default_params["verbose"])
        figpath = os.path.join(figpath, "topc_modes_tf{}.pdf".format(t_f))
        fig.savefig(figpath)
        print "Figure saved at {0:s}".format(figpath)
        plt.close(fig)
Ejemplo n.º 9
0
def scaling_analysis():
    """
    Scaling analysis.
    """

    # TODO: complete load slurm output function
    # TODO: add line fitting procedure to plot_scaling function
    # TODO: double check what I am plotting makes sense(see pdf)

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    json_file = "run_times_updated.json"
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "data/scaling_output/"), json_file)

    slurm_output_folder = check_relative_path("../data/scaling_output")

    # Extract times from slurm files and put into json file
    if not os.path.isfile(datapath):
        print "No {} found. Loading slurm data.".format(json_file)
        load_slurm_folder(slurm_output_folder)

    # Basic figure setup
    base_figure_folder = check_relative_path("figures")
    base_figure_folder = os.path.join(base_figure_folder, "scaling")
    check_folder(base_figure_folder, verbose=verbose)

    # Strong scaling folder setup
    strong_scaling_figure_folder = os.path.join(base_figure_folder, "strong")
    check_folder(strong_scaling_figure_folder, verbose=verbose)

    # Weak scaling folder setup
    weak_scaling_figure_folder = os.path.join(base_figure_folder, "weak")
    check_folder(weak_scaling_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Loads scaling times and splits into weak and strong
    with open(datapath, "r") as f:
        scaling_times = json.load(f)["runs"]
    strong_scaling_times = filter_scalings(scaling_times, "strong_scaling_np")
    weak_scaling_times = filter_scalings(scaling_times, "weak_scaling_np")

    # Splits strong scaling into gen, io, flow
    gen_strong_scaling = filter_scalings(strong_scaling_times, "gen")
    io_strong_scaling = filter_scalings(strong_scaling_times, "io")
    flow_strong_scaling = filter_scalings(strong_scaling_times, "flow")

    # Splits weak scaling into gen, io, flow
    gen_weak_scaling = filter_scalings(weak_scaling_times, "gen")
    io_weak_scaling = filter_scalings(weak_scaling_times, "io")
    flow_weak_scaling = filter_scalings(weak_scaling_times, "flow")

    # Adds number of processors to strong scaling
    gen_strong_scaling = add_numprocs(gen_strong_scaling)
    io_strong_scaling = add_numprocs(io_strong_scaling)
    flow_strong_scaling = add_numprocs(flow_strong_scaling)

    # Adds number of processors to strong scaling
    gen_weak_scaling = add_numprocs(gen_weak_scaling)
    io_weak_scaling = add_numprocs(io_weak_scaling)
    flow_weak_scaling = add_numprocs(flow_weak_scaling)

    scalings = [
        gen_strong_scaling, io_strong_scaling, flow_strong_scaling,
        gen_weak_scaling, io_weak_scaling, flow_weak_scaling
    ]

    for time_type in ["update_time", "time"]:

        # Loops over scaling values in scalings
        for sv in scalings:
            x = [i["NP"] for i in sv]
            y = [i[time_type] for i in sv]

            # Sets up filename and folder name
            _scaling = list(set([i["runname"].split("_")[0] for i in sv]))
            _sc_part = list(set([i["runname"].split("_")[-1] for i in sv]))
            assert len(_scaling) == 1, \
                "incorrect sv type list: {}".format(_scaling)
            assert len(_sc_part) == 1, \
                "incorrect sv part list length: {}".format(_sc_part)
            _sc_part = _sc_part[0]
            _scaling = _scaling[0]
            figure_name = "{}_{}_{}.pdf".format(_scaling, _sc_part, time_type)

            if _sc_part != "gen" and time_type == "update_time":
                print "Skipping {}".format(figure_name)
                continue

            # Sets correct figure folder
            if _scaling == "strong":
                figure_folder = strong_scaling_figure_folder
            elif _scaling == "weak":
                figure_folder = weak_scaling_figure_folder
            else:
                raise ValueError("Scaling type not recognized for"
                                 " folder: {}".format(_scaling))

            plot_scaling(
                x, y, _sc_part.capitalize(), r"$N_p$", r"$t_\mathrm{%s}$" %
                time_type.replace("_", r"\ ").capitalize(), figure_folder,
                figure_name)
Ejemplo n.º 10
0
def scaling_analysis():
    """
    Scaling analysis.
    """

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    json_file = "run_times_tmp.json"
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "data/scaling_output/"), json_file)
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "LatticeAnalyser"), json_file)

    slurm_output_folder = check_relative_path("../data/scaling_output")

    slurm_json_output_path = os.path.split(datapath)[0]
    slurm_json_output_path = os.path.join(slurm_json_output_path,
                                          "slurm_output_data.json")

    # Comment this out to use old file
    if os.path.isfile(slurm_json_output_path):
        datapath = slurm_json_output_path

    # Extract times from slurm files and put into json file
    if not os.path.isfile(datapath):
        print "No {} found. Loading slurm data.".format(json_file)
        load_slurm_folder(slurm_output_folder, slurm_json_output_path)
        datapath = slurm_json_output_path

    # Basic figure setup
    base_figure_folder = check_relative_path("figures")
    base_figure_folder = os.path.join(base_figure_folder, "scaling")
    check_folder(base_figure_folder, verbose=verbose)

    # Strong scaling folder setup
    strong_scaling_figure_folder = os.path.join(base_figure_folder, "strong")
    check_folder(strong_scaling_figure_folder, verbose=verbose)

    # Weak scaling folder setup
    weak_scaling_figure_folder = os.path.join(base_figure_folder, "weak")
    check_folder(weak_scaling_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Loads scaling times and splits into weak and strong
    with open(datapath, "r") as f:
        scaling_times = json.load(f)["runs"]
    strong_scaling_times = filter_scalings(scaling_times, "strong_scaling_np")
    weak_scaling_times = filter_scalings(scaling_times, "weak_scaling_np")

    # Splits strong scaling into gen, io, flow
    gen_strong_scaling = filter_scalings(strong_scaling_times, "gen")
    io_strong_scaling = filter_scalings(strong_scaling_times, "io")
    flow_strong_scaling = filter_scalings(strong_scaling_times, "flow")

    # Splits weak scaling into gen, io, flow
    gen_weak_scaling = filter_scalings(weak_scaling_times, "gen")
    gen_weak_scaling = filter_duplicates(gen_weak_scaling)
    io_weak_scaling = filter_scalings(weak_scaling_times, "io")
    flow_weak_scaling = filter_scalings(weak_scaling_times, "flow")

    # Adds number of processors to strong scaling
    gen_strong_scaling = add_numprocs(gen_strong_scaling)
    io_strong_scaling = add_numprocs(io_strong_scaling)
    flow_strong_scaling = add_numprocs(flow_strong_scaling)

    # Adds number of processors to strong scaling
    gen_weak_scaling = add_numprocs(gen_weak_scaling)
    io_weak_scaling = add_numprocs(io_weak_scaling)
    flow_weak_scaling = add_numprocs(flow_weak_scaling)

    scalings = [
        gen_strong_scaling, io_strong_scaling, flow_strong_scaling,
        gen_weak_scaling, io_weak_scaling, flow_weak_scaling
    ]

    times_to_scan = ["update_time", "time"]
    times_to_scan = ["time"]

    # For speedup and retrieving parallelizability fraction.
    min_procs = 8

    strong_scaling_list = []
    weak_scaling_list = []

    for time_type in times_to_scan:

        # Loops over scaling values in scalings
        for sv in scalings:
            x = [i["NP"] for i in sv]
            y = [i[time_type] for i in sv]

            # Sets up filename and folder name
            _scaling = list(set([i["runname"].split("_")[0] for i in sv]))
            _sc_part = list(set([i["runname"].split("_")[-1] for i in sv]))
            assert len(_scaling) == 1, \
                "incorrect sv type list: {}".format(_scaling)
            assert len(_sc_part) == 1, \
                "incorrect sv part list length: {}".format(_sc_part)
            _sc_part = _sc_part[0]
            _scaling = _scaling[0]
            figure_name = "{}_{}_{}.pdf".format(_scaling, _sc_part, time_type)

            if _sc_part != "gen" and time_type == "update_time":
                print "Skipping {}".format(figure_name)
                continue

            # Sets correct figure folder
            if _scaling == "strong":
                _loc = "upper right"
                figure_folder = strong_scaling_figure_folder
            elif _scaling == "weak":
                _loc = "upper left"
                figure_folder = weak_scaling_figure_folder
            else:
                raise ValueError("Scaling type not recognized for"
                                 " folder: {}".format(_scaling))

            if _sc_part == "io":
                _label = r"Input/Output"
            elif _sc_part == "gen":
                _label = r"Configuration generation"
            else:
                _label = _sc_part.capitalize()

            _xlabel = r"$N_p$"
            if time_type == "time":
                _time_type = _sc_part

            if _sc_part == "io":
                _ylabel = r"$t_\mathrm{IO}$[s]"
            else:
                _ylabel = r"$t_\mathrm{%s}$[s]" % _time_type.replace(
                    "_", r"\ ").capitalize()

            # Sets speedup labels
            if _sc_part == "io":
                _ylabel_speedup = (
                    r"$t_{\mathrm{IO},N_{p=%s}}/t_{\mathrm{IO},N_p}$"
                    "[s]" % min_procs)
            else:
                _tmp = _time_type.replace("_", r"\ ").capitalize()
                _ylabel_speedup = (
                    r"$t_{\mathrm{%s},N_{p=%s}}/t_{\mathrm{%s},N_p}$"
                    "[s]" % (_tmp, min_procs, _tmp))

            _tmp_dict = {
                "sc": _sc_part,
                "x": np.asarray(x),
                "y": np.asarray(y),
                "label": _label,
                "xlabel": _xlabel,
                "ylabel": _ylabel,
                "ylabel_speedup": _ylabel_speedup,
                "figure_folder": figure_folder,
                "figure_name": figure_name,
                "loc": _loc,
            }

            if _scaling == "strong":
                strong_scaling_list.append(_tmp_dict)
            else:
                weak_scaling_list.append(_tmp_dict)

            plot_scaling(x,
                         y,
                         _label,
                         _xlabel,
                         _ylabel,
                         figure_folder,
                         figure_name,
                         loc=_loc)

    plot_all_scalings(strong_scaling_list, "strong")
    plot_all_scalings(weak_scaling_list, "weak")
    plot_speedup(strong_scaling_list, "strong")