def write_latex_table(self, latex_module): if len(self.argument_sets) > 0: argument_diff = cr.ArgumentSetDifference(self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() is_a_comparison = len(differences) > 0 latex_module.append( ('For all runs, ``' if is_a_comparison else 'Command: ') + ' '.join(self.argument_sets[0].get_args(require_keys=argument_diff.get_similarities())) +("'' is held constant." if is_a_comparison else '') )
def plot(self, run_configurations, axes): num_argument_sets = len(self.argument_sets) if num_argument_sets == 0: return sorted_argument_sets = self.sort_argument_sets( isolate_keys=[]) # No sort applied, but labels provided argument_diff = cr.ArgumentSetDifference( self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() test = [] xLabel = [] for key in differences: xLabel.append(key) for argument_set_hash, argument_sets in sorted_argument_sets.items(): argument_set = argument_sets[0] precision = argument_set.get("compute_type").get_value() function = argument_set.get("function").get_value() for key in differences: argument = argument_set.get(key) test.append( argument.get_value() if argument.is_set() else 'DEFAULT') break grouped_run_configurations = run_configurations.group_by_label() num_groups = len(grouped_run_configurations) metric_labels = [ key for key in self.argument_sets[0].collect_timing( run_configurations[0]) ] num_metrics = len(metric_labels) if num_metrics == 0: return # loop over independent outputs y_scatter_by_group = OrderedDict() for group_label, run_configuration_group in grouped_run_configurations.items( ): # x_scatter_by_group[group_label] = [] y_scatter_by_group[group_label] = [] # loop over argument sets that differ other than the swept variable(s) for subset_label, partial_argument_sets in sorted_argument_sets.items( ): if len(partial_argument_sets) != 1: raise ValueError( 'Assumed that sorting argument sets with no keys has a single element per sort.' ) argument_set = partial_argument_sets[0] y_list_by_metric = OrderedDict( ) # One array of y values for each metric # loop over number of coarse grain runs and concatenate results for run_configuration in run_configuration_group: results = argument_set.collect_timing(run_configuration) for metric_label in results: if not metric_label in y_list_by_metric: y_list_by_metric[metric_label] = [] y_list_by_metric[metric_label].extend( results[metric_label]) # For each metric, add a set of bars in the bar chart. for metric_label, y_list in y_list_by_metric.items(): y_scatter_by_group[group_label].extend(sorted(y_list)) for group_label, run_configuration_group in grouped_run_configurations.items( ): for run_configuration in run_configuration_group: # Reference: MI-100 theoretical memory bandwidth by default tmb_MI100 = 1200 # Reference: radeon 7 theoretical memory bandwidth by default tmb_radeon7 = 1000 theoMax = 0 precisionBits = int(re.search(r'\d+', precision).group()) if (function == 'gemm' and precisionBits == 32): #xdlops theoMax = tmb_MI100 #scaling to appropriate precision elif ( function == 'trsm' or function == 'gemm' ): #TODO better logic to decide memory bound vs compute bound theoMax = tmb_MI100 #scaling to appropriate precision elif (function == 'copy' and precisionBits == 32): theoMax = tmb_MI100 elif (function == 'swap' and precisionBits == 32): theoMax = tmb_MI100 elif self.flops and self.mem: try: theoMax = tmb_MI100 except: print("flops and mem equations produce errors") if theoMax: theoMax = round(theoMax) x_co = (test[0], test[len(test) - 1]) y_co = (theoMax, theoMax) axes.plot(x_co, y_co, label="Theoretical Peak Performance: " + str(theoMax) + "GB/s") for group_label in y_scatter_by_group: axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group[group_label], # gap_scalar * width, color='black', # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group[group_label], # 'k*', '-ok', ) axes.xaxis.set_minor_locator(AutoMinorLocator()) axes.yaxis.set_minor_locator(AutoMinorLocator()) axes.set_ylabel('Bandwidth (GB/s)') axes.set_xlabel('='.join(xLabel)) return True
def plot(self, run_configurations, figure, axes, cuda, compare): def get_function_prefix(compute_type): if '32_r' in compute_type: return 's' elif '64_r' in compute_type: return 'd' elif '32_c' in compute_type: return 'c' elif '64_c' in compute_type: return 'z' elif 'bf16_r' in compute_type: return 'bf' elif 'f16_r' in compute_type: return 'h' else: print('Error - Cannot detect precision preFix: ' + compute_type) num_argument_sets = len(self.argument_sets) if num_argument_sets == 0: return sorted_argument_sets = self.sort_argument_sets( isolate_keys=[]) # No sort applied, but labels provided argument_diff = cr.ArgumentSetDifference( self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() test = [] test_x = [] test_y = [] xLabel = [] for key in differences: xLabel.append(key) for argument_set_hash, argument_sets in sorted_argument_sets.items(): argument_set = argument_sets[0] precision = argument_set.get("compute_type").get_value() function = argument_set.get("function").get_value() for key in differences: if user_args.surface_plot: argument = argument_set.get(key) if key == 'm': test_x.append(argument.get_value() if argument.is_set( ) else 'DEFAULT') elif key == 'n': test_y.append(argument.get_value() if argument.is_set( ) else 'DEFAULT') else: argument = argument_set.get(key) test.append(argument.get_value() if argument.is_set( ) else 'DEFAULT') break grouped_run_configurations = run_configurations.group_by_label() num_groups = len(grouped_run_configurations) metric_labels = [ key for key in self.argument_sets[0].collect_timing( run_configurations[0]) ] num_metrics = len(metric_labels) if num_metrics == 0: return # loop over independent outputs y_scatter_by_group = OrderedDict() for group_label, run_configuration_group in grouped_run_configurations.items( ): # x_scatter_by_group[group_label] = [] y_scatter_by_group[group_label] = [] # loop over argument sets that differ other than the swept variable(s) for subset_label, partial_argument_sets in sorted_argument_sets.items( ): if len(partial_argument_sets) != 1: raise ValueError( 'Assumed that sorting argument sets with no keys has a single element per sort.' ) argument_set = partial_argument_sets[0] y_list_by_metric = OrderedDict( ) # One array of y values for each metric # loop over number of coarse grain runs and concatenate results for run_configuration in run_configuration_group: results = argument_set.collect_timing(run_configuration) for metric_label in results: if not metric_label in y_list_by_metric: y_list_by_metric[metric_label] = [] y_list_by_metric[metric_label].extend( results[metric_label]) # For each metric, add a set of bars in the bar chart. for metric_label, y_list in y_list_by_metric.items(): y_scatter_by_group[group_label].extend(sorted(y_list)) for group_label, run_configuration_group in grouped_run_configurations.items( ): for run_configuration in run_configuration_group: mhz_str = "Mhz" mem_clk_str = "mclk" sys_clk_str = "sclk" mclk = run_configuration.load_specifications()['Card0'][ "Start " + mem_clk_str].split(mhz_str)[0] sclk = run_configuration.load_specifications()['Card0'][ "Start " + sys_clk_str].split(mhz_str)[0] theoMax = 0 precisionBits = int(re.search(r'\d+', precision).group()) if (function == 'gemm' and precisionBits == 32): #xdlops theoMax = float( sclk ) / 1000.00 * 256 * 120 #scaling to appropriate precision elif ( function == 'trsm' or function == 'gemm' ): #TODO better logic to decide memory bound vs compute bound theoMax = float( sclk ) / 1000.00 * 128 * 120 * 32.00 / precisionBits #scaling to appropriate precision elif self.flops and self.mem: try: n = 100000 m = 100000 flops = eval(self.flops) mem = eval(self.mem) theoMax = float(mclk) / float(eval(self.mem)) * eval( self.flops) * 32 / precisionBits / 4 except: print("flops and mem equations produce errors") if user_args.surface_plot: #=============== # First subplot #=============== # set up the axes for the first plot #ax = fig.add_subplot(1, 2, 1, projection='3d') # plot a 3D surface like in the example mplot3d/surface3d_demo X = np.array(test_x) X = np.reshape(X, (int(math.sqrt(X.size)), int(math.sqrt(X.size)))) Y = np.array(test_y) Y = np.reshape(Y, (int(math.sqrt(Y.size)), int(math.sqrt(Y.size)))) Z = np.array(y_scatter_by_group[group_label]) Z = np.reshape(Z, (int(math.sqrt(Z.size)), int(math.sqrt(Z.size)))) axes.legend() figure.suptitle(get_function_prefix(precision) + function + 'Performance', fontsize=14, fontweight='bold') axes.set_xlabel('m == lda', fontsize='large', fontweight='bold', labelpad=9) axes.set_ylabel('n', fontsize='large', fontweight='bold', labelpad=9) axes.zaxis.set_rotate_label(False) axes.set_zlabel( metric_labels[0] if len(metric_labels) == 1 else 'Time (s)', fontsize='large', fontweight='bold', rotation=0, labelpad=36) surf = axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) figure.colorbar(surf, shrink=0.5, aspect=10) plt.savefig( os.path.join( self.user_args.documentation_directory, get_function_prefix(precision) + function + ' Performance' + '_auto_plot.pdf')) plt.show() else: # Normal 2d plot if theoMax: theoMax = round(theoMax) x_co = (test[0], test[len(test) - 1]) y_co = (theoMax, theoMax) axes.plot(x_co, y_co, label="Theoretical Peak Performance: " + str(theoMax) + " GFLOP/s") color = iter(cm.rainbow(np.linspace(0, 1, len(y_scatter_by_group)))) for group_label in y_scatter_by_group: c = next(color) axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group[group_label], # gap_scalar * width, color='#000000', #c, # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group[group_label], # 'k*', '-ok', color='#000000', #c, label=get_function_prefix(precision) + function + ' Performance', #group_label, ) axes.xaxis.set_minor_locator(AutoMinorLocator()) axes.yaxis.set_minor_locator(AutoMinorLocator()) axes.set_ylabel(metric_labels[0] if len(metric_labels) == 1 else 'Time (s)') axes.set_xlabel('='.join(xLabel)) return True
def plot(self, run_configurations, axes, cuda, compare): num_argument_sets = len(self.argument_sets) if num_argument_sets == 0: return sorted_argument_sets = self.sort_argument_sets( isolate_keys=[]) # No sort applied, but labels provided #print(sorted_argument_sets) argument_diff = cr.ArgumentSetDifference( self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() test = [] xLabel = [] for key in differences: xLabel.append(key) for argument_set_hash, argument_sets in sorted_argument_sets.items(): argument_set = argument_sets[0] precision = argument_set.get("compute_type").get_value() function = argument_set.get("function").get_value() for key in differences: argument = argument_set.get(key) test.append( argument.get_value() if argument.is_set() else 'DEFAULT') break grouped_run_configurations = run_configurations.group_by_label() num_groups = len(grouped_run_configurations) metric_labels = [ key for key in self.argument_sets[0].collect_timing( run_configurations[0]) ] num_metrics = len(metric_labels) if num_metrics == 0: return # loop over independent outputs y_scatter_by_group = OrderedDict() # for comparison runs y_scatter_by_group2 = OrderedDict() for group_label, run_configuration_group in grouped_run_configurations.items( ): # x_scatter_by_group[group_label] = [] print(group_label) y_scatter_by_group[group_label] = [] y_scatter_by_group2[group_label] = [] # loop over argument sets that differ other than the swept variable(s) for subset_label, partial_argument_sets in sorted_argument_sets.items( ): if len(partial_argument_sets) != 1: raise ValueError( 'Assumed that sorting argument sets with no keys has a single element per sort.' ) argument_set = partial_argument_sets[0] y_list_by_metric = OrderedDict( ) # One array of y values for each metric y_list_by_metric2 = OrderedDict() # For comparison runs # loop over number of coarse grain runs and concatenate results for run_configuration in run_configuration_group: results = argument_set.collect_timing(run_configuration) for metric_label in results: if not metric_label in y_list_by_metric: y_list_by_metric[metric_label] = [] y_list_by_metric[metric_label].extend( results[metric_label]) if compare: results2 = argument_set.collect_timing_compare( run_configuration) for metric_label in results2: if not metric_label in y_list_by_metric2: y_list_by_metric2[metric_label] = [] y_list_by_metric2[metric_label].extend( results2[metric_label]) # For each metric, add a set of bars in the bar chart. for metric_label, y_list in y_list_by_metric.items(): y_scatter_by_group[group_label].extend(sorted(y_list)) if compare: for metric_label, y_list in y_list_by_metric2.items(): y_scatter_by_group2[group_label].extend(sorted(y_list)) for group_label, run_configuration_group in grouped_run_configurations.items( ): for run_configuration in run_configuration_group: mhz_str = "Mhz" mem_clk_str = "mclk" sys_clk_str = "sclk" mhz_str_cuda = "MHz" mem_clk_str_cuda = "memory" sys_clk_str_cuda = "sm" if cuda: mhz_str = mhz_str_cuda mem_clk_str = mem_clk_str_cuda sys_clk_str = sys_clk_str_cuda # Reference: MI-100 theoretical memory bandwidth by default tmb_MI100 = 1200 # Reference: radeon 7 theoretical memory bandwidth by default tmb_radeon7 = 1000 # Reference: Volta V100 theoretical memory bandwidth by default tmb_V100 = 900 # Reference: V-100 clock by default # sclk_cuda = 1530.0 if compare: sclk_cuda = run_configuration.load_specifications_compare( )['Card0']["Start " + sys_clk_str_cuda].split(mhz_str_cuda)[0] elif cuda: sclk_cuda = run_configuration.load_specifications( )['Card0']["Start " + sys_clk_str_cuda].split(mhz_str_cuda)[0] theoMax = 0 theoMax_cuda = 0 precisionBits = int(re.search(r'\d+', precision).group()) if (function == 'gemm' and precisionBits == 32): #xdlops theoMax = tmb_radeon7 theoMax_cuda = tmb_V100 elif ( function == 'trsm' or function == 'gemm' ): #TODO better logic to decide memory bound vs compute bound theoMax = tmb_radeon7 #scaling to appropriate precision theoMax_cuda = tmb_V100 elif (function == 'copy' and precisionBits == 32): theoMax = tmb_radeon7 theoMax_cuda = tmb_V100 elif (function == 'swap' and precisionBits == 32): theoMax = tmb_radeon7 theoMax_cuda = tmb_V100 elif self.flops and self.mem: try: # TODO: Add calculation for theoMax_cuda theoMax = tmb_radeon7 theoMax_cuda = tmb_V100 except: print("flops and mem equations produce errors") if theoMax: print(theoMax) theoMax = round(theoMax) x_co = (test[0], test[len(test) - 1]) y_co = (theoMax, theoMax) if not cuda: theo_amd, = axes.plot( x_co, y_co, color='#ED1C24', label="Theoretical Peak Performance MI-100: " + str(theoMax) + " GB/s") if compare or cuda: theoMax_cuda = round(theoMax_cuda) x_co_cuda = (test[0], test[len(test) - 1]) y_co_cuda = (theoMax_cuda, theoMax_cuda) theo_cuda, = axes.plot( x_co_cuda, y_co_cuda, color='#76B900', label="Theoretical Peak Performance V-100: " + str(theoMax_cuda) + " GB/s") if not cuda: for group_label in y_scatter_by_group: #print(y_scatter_by_group[group_label]) axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group[group_label], # gap_scalar * width, color='#ED1C24', label='MI-100 Performance' # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group[group_label], # 'k*', '-ok', color='#ED1C24', ) else: for group_label in y_scatter_by_group: axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group[group_label], # gap_scalar * width, color='#76B900', label='V-100 Performance' # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group[group_label], # 'k*', '-ok', color='#76B900', ) # if compare - already plotted AMD above if compare: for group_label in y_scatter_by_group: axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group2[group_label], # gap_scalar * width, color='#76B900', label="V-100 Performance" # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group2[group_label], # 'k*', '-ok', color='#76B900', ) axes.xaxis.set_minor_locator(AutoMinorLocator()) axes.yaxis.set_minor_locator(AutoMinorLocator()) axes.set_ylabel(metric_labels[0] if len(metric_labels) == 1 else 'Time (s)') axes.set_xlabel('='.join(xLabel)) return True
def plot(self, run_configurations, axes, cuda, compare): num_argument_sets = len(self.argument_sets) if num_argument_sets == 0: return sorted_argument_sets = self.sort_argument_sets( isolate_keys=[]) # No sort applied, but labels provided argument_diff = cr.ArgumentSetDifference( self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() test = [] xLabel = [] for key in differences: xLabel.append(key) for argument_set_hash, argument_sets in sorted_argument_sets.items(): argument_set = argument_sets[0] precision = argument_set.get("compute_type").get_value() function = argument_set.get("function").get_value() for key in differences: argument = argument_set.get(key) test.append( argument.get_value() if argument.is_set() else 'DEFAULT') break grouped_run_configurations = run_configurations.group_by_label() num_groups = len(grouped_run_configurations) metric_labels = [ key for key in self.argument_sets[0].collect_timing( run_configurations[0]) ] num_metrics = len(metric_labels) if num_metrics == 0: return # loop over independent outputs y_scatter_by_group = OrderedDict() # For comparison runs y_scatter_by_group2 = OrderedDict() for group_label, run_configuration_group in grouped_run_configurations.items( ): # x_scatter_by_group[group_label] = [] y_scatter_by_group[group_label] = [] if compare: y_scatter_by_group2[group_label] = [] # loop over argument sets that differ other than the swept variable(s) for subset_label, partial_argument_sets in sorted_argument_sets.items( ): if len(partial_argument_sets) != 1: raise ValueError( 'Assumed that sorting argument sets with no keys has a single element per sort.' ) argument_set = partial_argument_sets[0] y_list_by_metric = OrderedDict( ) # One array of y values for each metric y_list_by_metric2 = OrderedDict() # For comparison runs # loop over number of coarse grain runs and concatenate results for run_configuration in run_configuration_group: results = argument_set.collect_timing(run_configuration) for metric_label in results: if not metric_label in y_list_by_metric: y_list_by_metric[metric_label] = [] y_list_by_metric[metric_label].extend( results[metric_label]) if compare: results2 = argument_set.collect_timing_compare( run_configuration) for metric_label in results2: if not metric_label in y_list_by_metric2: y_list_by_metric2[metric_label] = [] y_list_by_metric2[metric_label].extend( results2[metric_label]) # For each metric, add a set of bars in the bar chart. for metric_label, y_list in y_list_by_metric.items(): y_scatter_by_group[group_label].extend(sorted(y_list)) if compare: for metric_label, y_list in y_list_by_metric2.items(): y_scatter_by_group2[group_label].extend(sorted(y_list)) for group_label, run_configuration_group in grouped_run_configurations.items( ): for run_configuration in run_configuration_group: mhz_str = "Mhz" mem_clk_str = "mclk" sys_clk_str = "sclk" mhz_str_cuda = "MHz" mem_clk_str_cuda = "memory" sys_clk_str_cuda = "sm" if cuda: mhz_str = mhz_str_cuda mem_clk_str = mem_clk_str_cuda sys_clk_str = sys_clk_str_cuda # Reference: MI-100 clocks by default # mclk = 1200.0 # sclk = 1087.0 mclk = run_configuration.load_specifications()['Card0'][ "Start " + mem_clk_str].split(mhz_str)[0] sclk = run_configuration.load_specifications()['Card0'][ "Start " + sys_clk_str].split(mhz_str)[0] # Reference: V-100 clock by default # sclk_cuda = 1530.0 sclk_cuda = 0 if compare: sclk_cuda = run_configuration.load_specifications_compare( )['Card0']["Start " + sys_clk_str_cuda].split(mhz_str_cuda)[0] elif cuda: sclk_cuda = run_configuration.load_specifications( )['Card0']["Start " + sys_clk_str_cuda].split(mhz_str_cuda)[0] theoMax = 0 theoMax_cuda = 0 precisionBits = int(re.search(r'\d+', precision).group()) if (function == 'gemm' and precisionBits == 32): #xdlops theoMax = float( sclk ) / 1000.00 * 256 * 120 #scaling to appropriate precision theoMax_cuda = float(sclk_cuda) / 1000.00 * 128 * 80 elif ( function == 'trsm' or function == 'gemm' ): #TODO better logic to decide memory bound vs compute bound theoMax = float( sclk ) / 1000.00 * 128 * 120 * 32.00 / precisionBits #scaling to appropriate precision theoMax_cuda = float( sclk_cuda) / 1000.00 * 128 * 80 * 32.00 / precisionBits elif self.flops and self.mem: # TODO: cuda here try: n = 100000 flops = eval(self.flops) mem = eval(self.mem) theoMax = float(mclk) / float(eval(self.mem)) * eval( self.flops) * 32 / precisionBits / 4 except: print("flops and mem equations produce errors") # Comparing efficiency amd_performance_eff = OrderedDict() cuda_performance_eff = OrderedDict() if not cuda: amd_perf_list = [ x / theoMax for x in y_scatter_by_group[group_label] ] axes.plot(test, amd_perf_list, color='#ED1C24', label="MI-100") else: cuda_perf_list = [ x / theoMax_cuda for x in y_scatter_by_group[group_label] ] axes.plot(test, cuda_perf_list, color='#76B900', label="V-100") # Already plotted AMD, use second list for CUDA results if compare: cuda_perf_list = [ x / theoMax_cuda for x in y_scatter_by_group2[group_label] ] axes.plot(test, cuda_perf_list, color='#76B900', label="V-100") axes.grid(True, which='major') axes.grid(True, which='minor') axes.yaxis.set_minor_locator(AutoMinorLocator(2)) axes.set_ylim([0, 1]) axes.set_ylabel('Efficiency') axes.set_xlabel('='.join(xLabel)) return True
def plot(self, run_configurations, axes, cuda, compare): def get_function_prefix(compute_type): if '32_r' in compute_type: return 's' elif '64_r' in compute_type: return 'd' elif '32_c' in compute_type: return 'c' elif '64_c' in compute_type: return 'z' elif 'bf16_r' in compute_type: return 'bf' elif 'f16_r' in compute_type: return 'h' else: print('Error - Cannot detect precision preFix: ' + compute_type) num_argument_sets = len(self.argument_sets) if num_argument_sets == 0: return sorted_argument_sets = self.sort_argument_sets( isolate_keys=[]) # No sort applied, but labels provided argument_diff = cr.ArgumentSetDifference( self.argument_sets, ignore_keys=self._get_sweep_keys()) differences = argument_diff.get_differences() test = [] xLabel = [] for key in differences: xLabel.append(key) for argument_set_hash, argument_sets in sorted_argument_sets.items(): argument_set = argument_sets[0] precision = argument_set.get("compute_type").get_value() function = argument_set.get("function").get_value() for key in differences: argument = argument_set.get(key) test.append( argument.get_value() if argument.is_set() else 'DEFAULT') break grouped_run_configurations = run_configurations.group_by_label() num_groups = len(grouped_run_configurations) metric_labels = [ key for key in self.argument_sets[0].collect_timing( run_configurations[0]) ] num_metrics = len(metric_labels) if num_metrics == 0: return # loop over independent outputs y_scatter_by_group = OrderedDict() for group_label, run_configuration_group in grouped_run_configurations.items( ): # x_scatter_by_group[group_label] = [] y_scatter_by_group[group_label] = [] # loop over argument sets that differ other than the swept variable(s) for subset_label, partial_argument_sets in sorted_argument_sets.items( ): if len(partial_argument_sets) != 1: raise ValueError( 'Assumed that sorting argument sets with no keys has a single element per sort.' ) argument_set = partial_argument_sets[0] y_list_by_metric = OrderedDict( ) # One array of y values for each metric # loop over number of coarse grain runs and concatenate results for run_configuration in run_configuration_group: results = argument_set.collect_timing(run_configuration) for metric_label in results: if not metric_label in y_list_by_metric: y_list_by_metric[metric_label] = [] y_list_by_metric[metric_label].extend( results[metric_label]) # For each metric, add a set of bars in the bar chart. for metric_label, y_list in y_list_by_metric.items(): y_scatter_by_group[group_label].extend(sorted(y_list)) for group_label, run_configuration_group in grouped_run_configurations.items( ): for run_configuration in run_configuration_group: mhz_str = "Mhz" mem_clk_str = "mclk" sys_clk_str = "sclk" mclk = run_configuration.load_specifications()['Card0'][ "Start " + mem_clk_str].split(mhz_str)[0] sclk = run_configuration.load_specifications()['Card0'][ "Start " + sys_clk_str].split(mhz_str)[0] theoMax = 0 precisionBits = int(re.search(r'\d+', precision).group()) if (function == 'gemm' and precisionBits == 32): #xdlops theoMax = float( sclk ) / 1000.00 * 256 * 120 #scaling to appropriate precision elif ( function == 'trsm' or function == 'gemm' ): #TODO better logic to decide memory bound vs compute bound theoMax = float( sclk ) / 1000.00 * 128 * 120 * 32.00 / precisionBits #scaling to appropriate precision elif self.flops and self.mem: try: n = 100000 m = 100000 flops = eval(self.flops) mem = eval(self.mem) theoMax = float(mclk) / float(eval(self.mem)) * eval( self.flops) * 32 / precisionBits / 4 except: print("flops and mem equations produce errors") if theoMax: theoMax = round(theoMax) x_co = (test[0], test[len(test) - 1]) y_co = (theoMax, theoMax) axes.plot(x_co, y_co, label="Theoretical Peak Performance: " + str(theoMax) + " GFLOP/s") color = iter(cm.rainbow(np.linspace(0, 1, len(y_scatter_by_group)))) for group_label in y_scatter_by_group: c = next(color) axes.scatter( # x_bar_by_group[group_label], test, y_scatter_by_group[group_label], # gap_scalar * width, color='#000000', #c, # label = group_label, ) axes.plot( # x_scatter_by_group[group_label], test, y_scatter_by_group[group_label], # 'k*', '-ok', color='#000000', #c, label=get_function_prefix(precision) + function + ' Performance', #group_label, ) axes.xaxis.set_minor_locator(AutoMinorLocator()) axes.yaxis.set_minor_locator(AutoMinorLocator()) axes.set_ylabel(metric_labels[0] if len(metric_labels) == 1 else 'Time (s)') axes.set_xlabel('='.join(xLabel)) return True