예제 #1
0
def collect_training_data(kernel_folders, kernel_folder_pattern):
    """
    Collect training data from log files resulting of autotuning
    """

    # ===============================================================================
    # For each folder:
    n_kernels = len(kernel_folders)
    for i, kernel_folder in enumerate(kernel_folders):

        print("\nProcess folder {} ({}/{:,})".format(kernel_folder, i + 1, n_kernels))

        # Find (m, n, k)
        # Each folder contains data for just one (m, n, k) but potentially mutliple algorithms
        match = kernel_folder_pattern.search(kernel_folder).groups()
        m = int(match[0])
        n = int(match[1])
        k = int(match[2])

        # ===============================================================================
        # Collect info from log files
        data = read_log_file(kernel_folder, m, n, k)

        # ===============================================================================
        # Write parameters to CSV
        for name_algo, kernel_algo in kernel_algorithm.items():

            # if applicable to this mnk
            if name_algo in data["algorithm"].values:

                # Does collected csv file exist already?
                raw_parameters_file_name = os.path.join(
                    kernel_folder,
                    "raw_training_data_"
                    + to_string(m, n, k)
                    + "_"
                    + name_algo
                    + ".csv",
                )

                if os.path.exists(raw_parameters_file_name):
                    print(
                        "\tFound csv file:", raw_parameters_file_name, ", skipping ..."
                    )

                else:

                    # Get the data corresponding to this algorithm
                    data_algo = data[data["algorithm"] == name_algo]

                    # Write raw parameters
                    pars_to_get = kernel_algo.launch_parameters + ["perf (Gflop/s)"]
                    data_algo[pars_to_get].to_csv(raw_parameters_file_name, index=False)
                    print("\tWrote", raw_parameters_file_name)
예제 #2
0
def get_predive_model_performances(y_true, y_pred, x_mnk, max_performances_ref,
                                   max_performances_algo):

    predictive_model_perf_scaled = dict()

    for mnk_string in x_mnk["mnk"].unique():

        idx_mnk = np.where(x_mnk == mnk_string)[0].tolist()
        assert len(idx_mnk) > 0, "idx_mnk is empty"
        m, n, k = to_tuple(mnk_string)

        perf_chosen_idx = np.argmax(y_pred[idx_mnk])
        perf_effective = y_true.iloc[idx_mnk].iloc[
            perf_chosen_idx].values.item()
        predictive_model_perf_scaled[(
            m, n, k)] = perf_effective  # 'scaled' between 0 and 1

    predictive_model_perf = dict(
        zip(
            predictive_model_perf_scaled.keys(),
            [
                perf_scaled * max_performances_ref[to_string(mnk)]
                for mnk, perf_scaled in predictive_model_perf_scaled.items()
            ],
        ))

    # Re-scale performances by algorithm for a fair comparison
    predictive_model_perf_scaled = dict(
        zip(
            predictive_model_perf.keys(),
            [
                perf / max_performances_algo[mnk]
                for mnk, perf in predictive_model_perf.items()
            ],
        ))

    return predictive_model_perf, predictive_model_perf_scaled
예제 #3
0
def plot_choice_goodness(
    m,
    n,
    k,
    baseline_performances,
    max_performances,
    y_true,
    y_pred,
    train,
    pp,
    scaled=True,
):

    # Sort in ascending performances
    data_mnk = pd.DataFrame()
    if scaled:
        data_mnk["perf_true"] = (100 * y_true.flatten()).tolist()
        data_mnk["perf_pred"] = (100 * y_pred).tolist()
    else:
        data_mnk["perf_true"] = y_true.flatten().tolist()
        data_mnk["perf_pred"] = y_pred.tolist()
    data_mnk.sort_values(by="perf_true", inplace=True)

    # Plot
    plt.figure()
    marker_size = 1
    par_set_ids = range(len(data_mnk.index.values))
    plt.plot(
        par_set_ids,
        data_mnk["perf_true"],
        "b.",
        markersize=marker_size,
        label="measured performances",
    )
    plt.xlabel("Parameter set id")
    plt.ylabel("Performance scaled [%]")
    type = "train" if train else "test"
    plt.title("Performance profile of parameter sets for " + str((m, n, k)) +
              "-triplet (" + type + ")")

    # Annotate
    x = [0, len(y_true)]
    y = np.array([1, 1])
    perf_num = "{:2.2f}"

    # autotuning
    perf_autotuned_algo = data_mnk["perf_true"].max()
    plt.plot(
        x,
        perf_autotuned_algo * y,
        "k-",
        label="max (for this algo): " + perf_num.format(perf_autotuned_algo),
    )

    # chosen
    idx_perf_chosen = data_mnk["perf_pred"].idxmax()
    perf_chosen = data_mnk["perf_true"][idx_perf_chosen]
    plt.plot(x,
             perf_chosen * y,
             "r-",
             label="chosen: " + perf_num.format(perf_chosen))

    # baseline
    if scaled:
        # baseline = per algo, scale it to 0-1
        perf_baseline = (100 * baseline_performances[to_string(m, n, k)] /
                         max_performances["{}x{}x{}".format(m, n, k)])
    else:
        perf_baseline = baseline_performances[to_string(m, n, k)]
    plt.plot(x,
             perf_baseline * y,
             "g-",
             label="baseline: " + perf_num.format(perf_baseline))

    plt.legend(loc="lower right")
    pp.savefig()
예제 #4
0
def main(tunedir, arch):
    """
    This script is part of the workflow for predictive modelling of optimal libcusmm parameters.
    For more details, see predict.md.

    After downloading raw data from the dedicated repository, use this script to
    - Compute derived training data and write it to a CSV file
    - Record maximum and baseline performances of (m,n,k)-triplets in JSON files
    """
    # ===============================================================================
    # Read GPU properties and autotuning properties
    with open("kernels/gpu_properties.json") as f:
        gpu_properties = json.load(f)["sm_" + str(arch)]
    with open("kernels/autotuning_properties.json") as f:
        autotuning_properties = json.load(f)

    # ===============================================================================
    # Loop over algorithms
    max_performances_per_mnk = dict()
    baseline_performances_per_algo_per_mnk = {
        "tiny": dict(),
        "small": dict(),
        "medium": dict(),
        "largeDB1": dict(),
        "largeDB2": dict(),
    }
    for name_algo, kernel_algo in kernel_algorithm.items():

        raw_training_data_filename = os.path.join(
            tunedir, "raw_training_data_{}.csv".format(name_algo))
        print("\nReading from {}".format(raw_training_data_filename))

        # Read CSV and loop over chunks
        chunk_size = 10000  # Number of rows of CSV file to process at a time
        chunk_count = 0

        for data_chunk in pd.read_csv(raw_training_data_filename,
                                      chunksize=chunk_size):

            # Print progress
            chunk_count += 1
            print("Read chunk {:5>}".format(chunk_count))

            # Get max_performance_per_mnk
            max_performances = get_max_performances_per_mnk(data_chunk)
            max_performances_per_mnk.update(
                dict(
                    zip(to_string(*max_performances.keys()),
                        max_performances.values())))

            # Get baseline_per_mnk
            baseline_performances_algo = get_baseline_performances_per_mnk(
                data_chunk, name_algo, gpu_properties, autotuning_properties)
            baseline_performances_per_algo_per_mnk[name_algo].update(
                dict(
                    zip(
                        to_string(*baseline_performances_algo.keys()),
                        baseline_performances_algo.values(),
                    )))

            # Compute derived parameters
            data_chunk["algorithm"] = [name_algo] * len(
                data_chunk.index)  # add 'algorithm' column manually
            parameter_sets = PredictiveParameters(data_chunk, gpu_properties,
                                                  autotuning_properties,
                                                  max_performances)
            pars_to_get = derived_parameters["common"] + derived_parameters[
                name_algo]
            new_data = parameter_sets.get_features(pars_to_get)

            # Write derived parameters
            derived_training_data_filename = os.path.join(
                tunedir,
                "training_data_{}_{}.csv".format(name_algo, chunk_count - 1))
            new_data[pars_to_get].to_csv(derived_training_data_filename,
                                         index=False)
            print("\tWrote", derived_training_data_filename)

    # ===============================================================================
    print("\nRead all raw and computed all derived data")

    # Print header lines & merge instructions
    print("\n$ # Merge instructions:")
    print("$ cd {}".format(tunedir))
    for name_algo, kernel_algo in kernel_algorithm.items():

        # Print header line
        derived_training_data_filename_base = "training_data_{}_{}.csv"
        derived_training_data_filename_chunk = derived_training_data_filename_base.format(
            name_algo, 0)
        with open(derived_training_data_filename_chunk, "r") as f:
            header_line = f.readline()
        derived_training_data_filename = "training_data_{}.csv".format(
            name_algo)
        with open(derived_training_data_filename, "w") as f:
            f.write(header_line)
        print("$ # Wrote header line to {}".format(
            derived_training_data_filename))

        # Print merge instructions
        print("$ # Wrote header line to {}".format(
            derived_training_data_filename))
        print("$ # Append training data chunks to {} by running:".format(
            derived_training_data_filename))
        derived_training_data_filename_wildcard = derived_training_data_filename_base.format(
            name_algo, "*")
        print("$ tail -n +2 -q {to_merge} >> {training_data_file}".format(
            to_merge=derived_training_data_filename_wildcard,
            training_data_file=derived_training_data_filename,
        ))

    # Print max performances
    max_performances_per_mnk_file = os.path.join(tunedir,
                                                 "max_performances.json")
    with open(max_performances_per_mnk_file, "w") as f:
        json.dump(max_performances_per_mnk, f)
    print("\nWrote maximum performances to:\n", max_performances_per_mnk_file)

    # Print baseline
    baseline_performances_per_algo_per_mnk_file = os.path.join(
        tunedir, "baseline_performances_by_algo.json")
    with open(baseline_performances_per_algo_per_mnk_file, "w") as f:
        json.dump(baseline_performances_per_algo_per_mnk, f)
    print(
        "\nWrote baseline performances to:\n",
        baseline_performances_per_algo_per_mnk_file,
    )
예제 #5
0
def print_merging_commands(kernel_folders, kernel_folder_pattern, tunedir):
    """
    Print commands to execute in order to merge CSV files
    """
    for algorithm in kernel_algorithm.keys():
        for data_type in ("raw_", ""):

            data_type_name = ("raw" if data_type == "raw_" else
                              "for predictive modelling")
            print(
                "\n$ # Merge instructions for algorithm",
                algorithm,
                "(",
                data_type_name,
                ")",
            )
            training_data_file = "{data_type}training_data_{algorithm}.csv".format(
                data_type=data_type, algorithm=algorithm)

            if os.path.exists(training_data_file):
                print("$ # Found {}, append new training data to this file:".
                      format(training_data_file))

            else:

                # Find an (m, n, k) for this algorithm to get its header line
                for i, kernel_folder in enumerate(kernel_folders):

                    # Find (m, n, k)
                    match = kernel_folder_pattern.search(
                        kernel_folder).groups()
                    m = int(match[0])
                    n = int(match[1])
                    k = int(match[2])

                    file_name = os.path.join(
                        kernel_folder,
                        "{data_type}training_data_{mnk}_{algorithm}.csv".
                        format(
                            data_type=data_type,
                            mnk=to_string(m, n, k),
                            algorithm=algorithm,
                        ),
                    )
                    if os.path.exists(file_name):
                        print("$ head -1 {base_file} > {training_data_file}".
                              format(
                                  base_file=file_name,
                                  training_data_file=training_data_file,
                              ))
                        break
                else:
                    print(
                        "None: did not find any existing files for algorithm",
                        algorithm,
                        "and data",
                        data_type_name,
                    )
                    continue

            print(
                "$ tail -n +2 -q {tunedir}tune_*/{data_type}training_data_*_{algorithm}.csv >> {training_data_file}"
                .format(
                    tunedir=tunedir,
                    data_type=data_type,
                    algorithm=algorithm,
                    training_data_file=training_data_file,
                ))
예제 #6
0
def collect_training_data(
    kernel_folders,
    kernel_folder_pattern,
    gpu_properties,
    autotuning_properties,
    max_performances_per_mnk,
    baseline_performances_per_algo_per_mnk,
):
    """
    Collect training data from log files resulting of autotuning
    """

    n_kernels = len(kernel_folders)

    # For each folder:
    for i, kernel_folder in enumerate(kernel_folders):

        print("\nProcess folder {} ({}/{:,})".format(kernel_folder, i + 1,
                                                     n_kernels))

        # Find (m, n, k)
        match = kernel_folder_pattern.search(kernel_folder).groups()
        m = int(match[0])
        n = int(match[1])
        k = int(match[2])

        # ===============================================================================
        # Collect info from log files
        data = read_log_file(kernel_folder, m, n, k)

        # Collect max performances per (m, n, k)
        max_performances = get_max_performances_per_mnk(data)
        max_performances_per_mnk.update(
            dict(
                zip(to_string(*max_performances.keys()),
                    max_performances.values())))

        # ===============================================================================
        # Write parameters to CSV
        for name_algo, kernel_algo in kernel_algorithm.items():

            # if applicable to this mnk
            if name_algo in data["algorithm"].values:

                # Get the data corresponding to this algorithm
                data_algo = data[data["algorithm"] == name_algo]

                # Collect baseline performances per algo, per (m, n, k)
                baseline_performances_algo = get_baseline_performances_per_mnk(
                    data_algo, name_algo, gpu_properties,
                    autotuning_properties)
                baseline_performances_per_algo_per_mnk[name_algo].update(
                    dict(
                        zip(
                            to_string(*baseline_performances_algo.keys()),
                            baseline_performances_algo.values(),
                        )))

                # Does collected csv file exist already?
                raw_parameters_file_name = os.path.join(
                    kernel_folder,
                    "raw_training_data_" + to_string(m, n, k) + "_" +
                    name_algo + ".csv",
                )
                derived_parameters_file_name = os.path.join(
                    kernel_folder,
                    "training_data_" + to_string(m, n, k) + "_" + name_algo +
                    ".csv",
                )

                if os.path.exists(raw_parameters_file_name):
                    print("\tFound csv file:", raw_parameters_file_name,
                          ", skipping ...")

                else:

                    # Write raw parameters
                    pars_to_get = kernel_algo.launch_parameters + [
                        "perf (Gflop/s)"
                    ]
                    data_algo[pars_to_get].to_csv(raw_parameters_file_name,
                                                  index=False)
                    print("\tWrote", raw_parameters_file_name)

                if os.path.exists(derived_parameters_file_name):
                    print(
                        "\tFound csv file:",
                        derived_parameters_file_name,
                        ", skipping ...",
                    )

                else:
                    # Compute derived parameters
                    parameter_sets = PredictiveParameters(
                        data_algo,
                        gpu_properties,
                        autotuning_properties,
                        max_performances,
                    )
                    pars_to_get = (derived_parameters["common"] +
                                   derived_parameters[name_algo])
                    new_df = parameter_sets.get_features(pars_to_get)
                    data_algo.merge(new_df)

                    # Write derived parameters
                    data_algo[pars_to_get].to_csv(derived_parameters_file_name,
                                                  index=False)
                    print("\tWrote", derived_parameters_file_name)