Ejemplo n.º 1
0
def main(params, njobs, baseline, paths_to_models, chunk_size):
    """
    Update parameter file with new optimal parameter predictions given newly trained decision trees
    """
    # ===============================================================================
    # Load GPU and autotuning properties
    assert (os.path.basename(params) in gpu_architectures.keys()
            ), "Cannot find compute version for file " + str(params)
    arch_code = gpu_architectures[os.path.basename(params)]
    with open("../kernels/gpu_properties.json") as f:
        gpu_properties = json.load(f)[arch_code]
    with open("../kernels/autotuning_properties.json") as f:
        autotuning_properties = json.load(f)

    # Load autotuned kernel parameters
    with open(params) as f:
        all_kernels = [
            params_dict_to_kernel(**params) for params in json.load(f)
        ]
    print("libsmm_acc: Found %d existing parameter sets." % len(all_kernels))
    autotuned_mnks = [(k.m, k.n, k.k) for k in all_kernels if k.autotuned]
    autotuned_kernels_ = [k for k in all_kernels if k.autotuned]
    autotuned_kernels = dict(zip(autotuned_mnks, autotuned_kernels_))

    # ===============================================================================
    # Construct the list of (m,n,k)-triplets for which parameter sets should be made available to libcusmm
    mnks = combinations(list(range(4, 46)))
    mnks = set.union(set(mnks), set(autotuned_kernels.keys()))

    # ===============================================================================
    # Compute parameter sets
    mnks_to_predict = list()
    kernels_to_print = dict()
    for m, n, k in mnks:
        if (m, n, k) in autotuned_kernels.keys():
            kernels_to_print[(m, n, k)] = autotuned_kernels[(m, n, k)]
        else:
            mnks_to_predict.append((m, n, k))

    if baseline:
        kernels = get_baseline_kernels(mnks_to_predict, gpu_properties,
                                       autotuning_properties)
    else:
        kernels = get_optimal_kernels(
            mnks_to_predict,
            njobs,
            chunk_size,
            paths_to_models,
            gpu_properties,
            autotuning_properties,
            1,
        )

    kernels_to_print.update(kernels)

    # ===============================================================================
    # Write to file
    with open(params, "w") as f:
        s = json.dumps([
            kernels_to_print[kernel].as_dict_for_parameters_json
            for kernel in sorted(kernels_to_print.keys())
        ])
        s = s.replace("}, ", "},\n")
        s = s.replace("[", "[\n")
        s = s.replace("]", "\n]")
        f.write(s)
    print("Wrote new predicted parameters to file", params)
Ejemplo n.º 2
0
def main(
    param_fn,
    compiler,
    cpus_per_node,
    max_num_nodes,
    blocksizes,
    blocks_from_param_file,
    tune_dir,
):

    # Read existing parameters
    assert (os.path.basename(param_fn) in gpu_architectures.keys(
    )), "Cannot find GPU architecture for file " + os.path.basename(param_fn)
    arch_code = gpu_architectures[os.path.basename(param_fn)]
    with open("../kernels/gpu_properties.json") as f:
        gpu_properties = json.load(f)[arch_code]
    with open("../kernels/autotuning_properties.json") as f:
        autotuning_properties = json.load(f)
    with open(param_fn) as f:
        all_kernels = [
            params_dict_to_kernel(**params) for params in json.load(f)
        ]
    print("Reading parameters from %s" % param_fn)
    autotuned_kernels = [k for k in all_kernels if k.autotuned]
    predicted_kernels = [k for k in all_kernels if not k.autotuned]
    print(
        "libsmm_acc: found %d existing parameter sets, of which %d are autotuned and %d are predicted."
        % (len(all_kernels), len(autotuned_kernels), len(predicted_kernels)))

    # Get blocksizes to be autotuned
    if blocks_from_param_file:  # open and read file
        with open(blocksizes) as f:
            all_kernels_ref = [
                params_dict_to_kernel(**params) for params in json.load(f)
            ]
        print("Reading parameters to autotune from %s" % blocksizes)
        triples = [(k.m, k.n, k.k) for k in all_kernels_ref if k.autotuned]
    else:
        assert len(set(blocksizes)) == len(blocksizes)
        blocksizes.sort()
        # Get (m, n, k) triplets to be autotuned
        triples = combinations(*blocksizes)
    print("Requested to autotune %d triplets" % len(triples))

    for (m, n, k) in triples:
        existing = [
            kern for kern in autotuned_kernels if kern.can_handle(m, n, k)
        ]
        if existing:
            print(
                "Found existing autotuned parameter set for %dx%dx%d, skipping."
                % (m, n, k))
            continue

        outdir = os.path.join(tune_dir, "tune_%dx%dx%d/" % (m, n, k))
        if os.path.exists(outdir):
            print("Directory %s exists already, skipping." % outdir)
            continue
        os.mkdir(outdir)
        gen_benchmark(outdir, gpu_properties, autotuning_properties, compiler,
                      m, n, k)
        gen_jobfile(outdir, compiler, m, n, k, cpus_per_node, max_num_nodes)
        gen_makefile(outdir, compiler, arch_code)