def main(param_fn): # Read new kernel parameters param_new = "parameters.json" with open(param_new) as f: new_kernels = [params_dict_to_kernel(**params) for params in json.load(f)] # Read old kernel parameters with open(param_fn) as f: old_kernels = [params_dict_to_kernel(**params) for params in json.load(f)] # Merge two parameter lists print("Merging", param_new, "with", param_fn) kernels_dict = dict(zip([(k.m, k.n, k.k) for k in old_kernels], old_kernels)) new_kernels_dict = dict(zip([(k.m, k.n, k.k) for k in new_kernels], new_kernels)) kernels_dict.update(new_kernels_dict) # Write kernel parameters to new file new_file = "parameters.new.json" with open(new_file, "w") as f: s = json.dumps( [ kernels_dict[kernel].as_dict_for_parameters_json for kernel in sorted(kernels_dict.keys()) ] ) s = s.replace("}, ", "},\n") s = s.replace("[", "[\n") s = s.replace("]", "\n]") f.write(s) print("Wrote", new_file)
def find_optimal_kernel( mnk, algo, tree, tree_features, gpu_properties, autotuning_properties ): """ Find the optimal kernel parameter set for a given (m, n, k) and a given algorithm :return: optimal_kernels: dictionary, keys: (m, n, k), values: Kernel object describing best parameters """ # Get parameter space for this (m, n, k) and this algorithm m, n, k = mnk parameter_space_ = kernel_algorithm[algo].promising_parameters( m, n, k, gpu_properties, autotuning_properties ) parameter_space = pd.DataFrame(parameter_space_) del parameter_space_ parameter_space["algorithm"] = [algo] * len( parameter_space.index ) # Add "algorithm" column if len(parameter_space.index) == 0: optimal_kernels = dict() else: # Get predictor features from raw parameters parameter_sets = PredictiveParameters( parameter_space, gpu_properties, autotuning_properties, None ) predictors = parameter_sets.get_features(tree_features) if algo == "medium": predictors = predictors.rename( columns=dict( zip( predictors.columns, [ "f{}".format(i) for i in range(0, len(predictors.columns) + 1) ], ) ) ) # Predict performances performances_scaled = tree.predict(predictors) del predictors parameter_performances = parameter_sets.params del parameter_sets parameter_performances["perf"] = performances_scaled del performances_scaled # Pick optimal kernel optimal_kernel = max( parameter_performances.to_dict("records"), key=lambda x: x["perf"] ) del parameter_performances optimal_kernels = dict() optimal_kernels[(m, n, k)] = params_dict_to_kernel( **optimal_kernel, source="predicted" ) return optimal_kernels
def main(params, njobs, baseline, paths_to_models, chunk_size): """ Update parameter file with new optimal parameter predictions given newly trained decision trees """ # =============================================================================== # Load GPU and autotuning properties assert (os.path.basename(params) in gpu_architectures.keys() ), "Cannot find compute version for file " + str(params) arch_code = gpu_architectures[os.path.basename(params)] with open("../kernels/gpu_properties.json") as f: gpu_properties = json.load(f)[arch_code] with open("../kernels/autotuning_properties.json") as f: autotuning_properties = json.load(f) # Load autotuned kernel parameters with open(params) as f: all_kernels = [ params_dict_to_kernel(**params) for params in json.load(f) ] print("libsmm_acc: Found %d existing parameter sets." % len(all_kernels)) autotuned_mnks = [(k.m, k.n, k.k) for k in all_kernels if k.autotuned] autotuned_kernels_ = [k for k in all_kernels if k.autotuned] autotuned_kernels = dict(zip(autotuned_mnks, autotuned_kernels_)) # =============================================================================== # Construct the list of (m,n,k)-triplets for which parameter sets should be made available to libcusmm mnks = combinations(list(range(4, 46))) mnks = set.union(set(mnks), set(autotuned_kernels.keys())) # =============================================================================== # Compute parameter sets mnks_to_predict = list() kernels_to_print = dict() for m, n, k in mnks: if (m, n, k) in autotuned_kernels.keys(): kernels_to_print[(m, n, k)] = autotuned_kernels[(m, n, k)] else: mnks_to_predict.append((m, n, k)) if baseline: kernels = get_baseline_kernels(mnks_to_predict, gpu_properties, autotuning_properties) else: kernels = get_optimal_kernels( mnks_to_predict, njobs, chunk_size, paths_to_models, gpu_properties, autotuning_properties, 1, ) kernels_to_print.update(kernels) # =============================================================================== # Write to file with open(params, "w") as f: s = json.dumps([ kernels_to_print[kernel].as_dict_for_parameters_json for kernel in sorted(kernels_to_print.keys()) ]) s = s.replace("}, ", "},\n") s = s.replace("[", "[\n") s = s.replace("]", "\n]") f.write(s) print("Wrote new predicted parameters to file", params)
def main( param_fn, compiler, cpus_per_node, max_num_nodes, blocksizes, blocks_from_param_file, tune_dir, ): # Read existing parameters assert (os.path.basename(param_fn) in gpu_architectures.keys( )), "Cannot find GPU architecture for file " + os.path.basename(param_fn) arch_code = gpu_architectures[os.path.basename(param_fn)] with open("../kernels/gpu_properties.json") as f: gpu_properties = json.load(f)[arch_code] with open("../kernels/autotuning_properties.json") as f: autotuning_properties = json.load(f) with open(param_fn) as f: all_kernels = [ params_dict_to_kernel(**params) for params in json.load(f) ] print("Reading parameters from %s" % param_fn) autotuned_kernels = [k for k in all_kernels if k.autotuned] predicted_kernels = [k for k in all_kernels if not k.autotuned] print( "libsmm_acc: found %d existing parameter sets, of which %d are autotuned and %d are predicted." % (len(all_kernels), len(autotuned_kernels), len(predicted_kernels))) # Get blocksizes to be autotuned if blocks_from_param_file: # open and read file with open(blocksizes) as f: all_kernels_ref = [ params_dict_to_kernel(**params) for params in json.load(f) ] print("Reading parameters to autotune from %s" % blocksizes) triples = [(k.m, k.n, k.k) for k in all_kernels_ref if k.autotuned] else: assert len(set(blocksizes)) == len(blocksizes) blocksizes.sort() # Get (m, n, k) triplets to be autotuned triples = combinations(*blocksizes) print("Requested to autotune %d triplets" % len(triples)) for (m, n, k) in triples: existing = [ kern for kern in autotuned_kernels if kern.can_handle(m, n, k) ] if existing: print( "Found existing autotuned parameter set for %dx%dx%d, skipping." % (m, n, k)) continue outdir = os.path.join(tune_dir, "tune_%dx%dx%d/" % (m, n, k)) if os.path.exists(outdir): print("Directory %s exists already, skipping." % outdir) continue os.mkdir(outdir) gen_benchmark(outdir, gpu_properties, autotuning_properties, compiler, m, n, k) gen_jobfile(outdir, compiler, m, n, k, cpus_per_node, max_num_nodes) gen_makefile(outdir, compiler, arch_code)