예제 #1
0
 def write_latex_table(self, latex_module):
     if len(self.argument_sets) > 0:
         argument_diff = cr.ArgumentSetDifference(self.argument_sets, ignore_keys=self._get_sweep_keys())
         differences = argument_diff.get_differences()
         is_a_comparison = len(differences) > 0
         latex_module.append(
              ('For all runs, ``' if is_a_comparison else 'Command: ')
             + ' '.join(self.argument_sets[0].get_args(require_keys=argument_diff.get_similarities()))
             +("'' is held constant." if is_a_comparison else '')
             )
예제 #2
0
    def plot(self, run_configurations, axes):
        num_argument_sets = len(self.argument_sets)
        if num_argument_sets == 0:
            return

        sorted_argument_sets = self.sort_argument_sets(
            isolate_keys=[])  # No sort applied, but labels provided
        argument_diff = cr.ArgumentSetDifference(
            self.argument_sets, ignore_keys=self._get_sweep_keys())
        differences = argument_diff.get_differences()
        test = []
        xLabel = []
        for key in differences:
            xLabel.append(key)
        for argument_set_hash, argument_sets in sorted_argument_sets.items():
            argument_set = argument_sets[0]
            precision = argument_set.get("compute_type").get_value()
            function = argument_set.get("function").get_value()
            for key in differences:
                argument = argument_set.get(key)
                test.append(
                    argument.get_value() if argument.is_set() else 'DEFAULT')
                break

        grouped_run_configurations = run_configurations.group_by_label()

        num_groups = len(grouped_run_configurations)
        metric_labels = [
            key for key in self.argument_sets[0].collect_timing(
                run_configurations[0])
        ]
        num_metrics = len(metric_labels)
        if num_metrics == 0:
            return

        # loop over independent outputs
        y_scatter_by_group = OrderedDict()
        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            # x_scatter_by_group[group_label] = []
            y_scatter_by_group[group_label] = []
            # loop over argument sets that differ other than the swept variable(s)
            for subset_label, partial_argument_sets in sorted_argument_sets.items(
            ):
                if len(partial_argument_sets) != 1:
                    raise ValueError(
                        'Assumed that sorting argument sets with no keys has a single element per sort.'
                    )
                argument_set = partial_argument_sets[0]
                y_list_by_metric = OrderedDict(
                )  # One array of y values for each metric
                # loop over number of coarse grain runs and concatenate results
                for run_configuration in run_configuration_group:
                    results = argument_set.collect_timing(run_configuration)
                    for metric_label in results:
                        if not metric_label in y_list_by_metric:
                            y_list_by_metric[metric_label] = []
                        y_list_by_metric[metric_label].extend(
                            results[metric_label])
                # For each metric, add a set of bars in the bar chart.
                for metric_label, y_list in y_list_by_metric.items():
                    y_scatter_by_group[group_label].extend(sorted(y_list))

        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            for run_configuration in run_configuration_group:
                # Reference: MI-100 theoretical memory bandwidth by default
                tmb_MI100 = 1200
                # Reference: radeon 7 theoretical memory bandwidth by default
                tmb_radeon7 = 1000
                theoMax = 0
                precisionBits = int(re.search(r'\d+', precision).group())
                if (function == 'gemm' and precisionBits == 32):  #xdlops
                    theoMax = tmb_MI100  #scaling to appropriate precision
                elif (
                        function == 'trsm' or function == 'gemm'
                ):  #TODO better logic to decide memory bound vs compute bound
                    theoMax = tmb_MI100  #scaling to appropriate precision
                elif (function == 'copy' and precisionBits == 32):
                    theoMax = tmb_MI100
                elif (function == 'swap' and precisionBits == 32):
                    theoMax = tmb_MI100
                elif self.flops and self.mem:
                    try:
                        theoMax = tmb_MI100
                    except:
                        print("flops and mem equations produce errors")
                if theoMax:
                    theoMax = round(theoMax)
                    x_co = (test[0], test[len(test) - 1])
                    y_co = (theoMax, theoMax)
                    axes.plot(x_co,
                              y_co,
                              label="Theoretical Peak Performance: " +
                              str(theoMax) + "GB/s")

        for group_label in y_scatter_by_group:
            axes.scatter(
                # x_bar_by_group[group_label],
                test,
                y_scatter_by_group[group_label],
                # gap_scalar * width,
                color='black',
                # label = group_label,
            )
            axes.plot(
                # x_scatter_by_group[group_label],
                test,
                y_scatter_by_group[group_label],
                # 'k*',
                '-ok',
            )

        axes.xaxis.set_minor_locator(AutoMinorLocator())
        axes.yaxis.set_minor_locator(AutoMinorLocator())

        axes.set_ylabel('Bandwidth (GB/s)')
        axes.set_xlabel('='.join(xLabel))
        return True
    def plot(self, run_configurations, figure, axes, cuda, compare):
        def get_function_prefix(compute_type):
            if '32_r' in compute_type:
                return 's'
            elif '64_r' in compute_type:
                return 'd'
            elif '32_c' in compute_type:
                return 'c'
            elif '64_c' in compute_type:
                return 'z'
            elif 'bf16_r' in compute_type:
                return 'bf'
            elif 'f16_r' in compute_type:
                return 'h'
            else:
                print('Error - Cannot detect precision preFix: ' +
                      compute_type)

        num_argument_sets = len(self.argument_sets)
        if num_argument_sets == 0:
            return

        sorted_argument_sets = self.sort_argument_sets(
            isolate_keys=[])  # No sort applied, but labels provided
        argument_diff = cr.ArgumentSetDifference(
            self.argument_sets, ignore_keys=self._get_sweep_keys())
        differences = argument_diff.get_differences()
        test = []
        test_x = []
        test_y = []
        xLabel = []
        for key in differences:
            xLabel.append(key)
        for argument_set_hash, argument_sets in sorted_argument_sets.items():
            argument_set = argument_sets[0]
            precision = argument_set.get("compute_type").get_value()
            function = argument_set.get("function").get_value()
            for key in differences:
                if user_args.surface_plot:
                    argument = argument_set.get(key)
                    if key == 'm':
                        test_x.append(argument.get_value() if argument.is_set(
                        ) else 'DEFAULT')
                    elif key == 'n':
                        test_y.append(argument.get_value() if argument.is_set(
                        ) else 'DEFAULT')
                else:
                    argument = argument_set.get(key)
                    test.append(argument.get_value() if argument.is_set(
                    ) else 'DEFAULT')
                    break

        grouped_run_configurations = run_configurations.group_by_label()

        num_groups = len(grouped_run_configurations)
        metric_labels = [
            key for key in self.argument_sets[0].collect_timing(
                run_configurations[0])
        ]
        num_metrics = len(metric_labels)
        if num_metrics == 0:
            return

        # loop over independent outputs
        y_scatter_by_group = OrderedDict()
        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            # x_scatter_by_group[group_label] = []
            y_scatter_by_group[group_label] = []
            # loop over argument sets that differ other than the swept variable(s)
            for subset_label, partial_argument_sets in sorted_argument_sets.items(
            ):
                if len(partial_argument_sets) != 1:
                    raise ValueError(
                        'Assumed that sorting argument sets with no keys has a single element per sort.'
                    )
                argument_set = partial_argument_sets[0]
                y_list_by_metric = OrderedDict(
                )  # One array of y values for each metric
                # loop over number of coarse grain runs and concatenate results
                for run_configuration in run_configuration_group:
                    results = argument_set.collect_timing(run_configuration)
                    for metric_label in results:
                        if not metric_label in y_list_by_metric:
                            y_list_by_metric[metric_label] = []
                        y_list_by_metric[metric_label].extend(
                            results[metric_label])
                # For each metric, add a set of bars in the bar chart.
                for metric_label, y_list in y_list_by_metric.items():
                    y_scatter_by_group[group_label].extend(sorted(y_list))

        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            for run_configuration in run_configuration_group:
                mhz_str = "Mhz"
                mem_clk_str = "mclk"
                sys_clk_str = "sclk"
                mclk = run_configuration.load_specifications()['Card0'][
                    "Start " + mem_clk_str].split(mhz_str)[0]
                sclk = run_configuration.load_specifications()['Card0'][
                    "Start " + sys_clk_str].split(mhz_str)[0]
                theoMax = 0
                precisionBits = int(re.search(r'\d+', precision).group())
                if (function == 'gemm' and precisionBits == 32):  #xdlops
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 256 * 120  #scaling to appropriate precision
                elif (
                        function == 'trsm' or function == 'gemm'
                ):  #TODO better logic to decide memory bound vs compute bound
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 128 * 120 * 32.00 / precisionBits  #scaling to appropriate precision
                elif self.flops and self.mem:
                    try:
                        n = 100000
                        m = 100000
                        flops = eval(self.flops)
                        mem = eval(self.mem)
                        theoMax = float(mclk) / float(eval(self.mem)) * eval(
                            self.flops) * 32 / precisionBits / 4
                    except:
                        print("flops and mem equations produce errors")

        if user_args.surface_plot:
            #===============
            #  First subplot
            #===============
            # set up the axes for the first plot
            #ax = fig.add_subplot(1, 2, 1, projection='3d')

            # plot a 3D surface like in the example mplot3d/surface3d_demo
            X = np.array(test_x)
            X = np.reshape(X, (int(math.sqrt(X.size)), int(math.sqrt(X.size))))
            Y = np.array(test_y)
            Y = np.reshape(Y, (int(math.sqrt(Y.size)), int(math.sqrt(Y.size))))
            Z = np.array(y_scatter_by_group[group_label])
            Z = np.reshape(Z, (int(math.sqrt(Z.size)), int(math.sqrt(Z.size))))
            axes.legend()
            figure.suptitle(get_function_prefix(precision) + function +
                            'Performance',
                            fontsize=14,
                            fontweight='bold')
            axes.set_xlabel('m == lda',
                            fontsize='large',
                            fontweight='bold',
                            labelpad=9)
            axes.set_ylabel('n',
                            fontsize='large',
                            fontweight='bold',
                            labelpad=9)
            axes.zaxis.set_rotate_label(False)
            axes.set_zlabel(
                metric_labels[0] if len(metric_labels) == 1 else 'Time (s)',
                fontsize='large',
                fontweight='bold',
                rotation=0,
                labelpad=36)
            surf = axes.plot_surface(X,
                                     Y,
                                     Z,
                                     rstride=1,
                                     cstride=1,
                                     cmap=cm.coolwarm,
                                     linewidth=0,
                                     antialiased=False)
            figure.colorbar(surf, shrink=0.5, aspect=10)
            plt.savefig(
                os.path.join(
                    self.user_args.documentation_directory,
                    get_function_prefix(precision) + function +
                    ' Performance' + '_auto_plot.pdf'))
            plt.show()

        else:  # Normal 2d plot

            if theoMax:
                theoMax = round(theoMax)
                x_co = (test[0], test[len(test) - 1])
                y_co = (theoMax, theoMax)
                axes.plot(x_co,
                          y_co,
                          label="Theoretical Peak Performance: " +
                          str(theoMax) + " GFLOP/s")

            color = iter(cm.rainbow(np.linspace(0, 1,
                                                len(y_scatter_by_group))))
            for group_label in y_scatter_by_group:
                c = next(color)
                axes.scatter(
                    # x_bar_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # gap_scalar * width,
                    color='#000000',  #c,
                    # label = group_label,
                )
                axes.plot(
                    # x_scatter_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # 'k*',
                    '-ok',
                    color='#000000',  #c,
                    label=get_function_prefix(precision) + function +
                    ' Performance',  #group_label,
                )

                axes.xaxis.set_minor_locator(AutoMinorLocator())
                axes.yaxis.set_minor_locator(AutoMinorLocator())

                axes.set_ylabel(metric_labels[0] if len(metric_labels) ==
                                1 else 'Time (s)')
                axes.set_xlabel('='.join(xLabel))
        return True
예제 #4
0
    def plot(self, run_configurations, axes, cuda, compare):
        num_argument_sets = len(self.argument_sets)
        if num_argument_sets == 0:
            return

        sorted_argument_sets = self.sort_argument_sets(
            isolate_keys=[])  # No sort applied, but labels provided
        #print(sorted_argument_sets)
        argument_diff = cr.ArgumentSetDifference(
            self.argument_sets, ignore_keys=self._get_sweep_keys())
        differences = argument_diff.get_differences()
        test = []
        xLabel = []
        for key in differences:
            xLabel.append(key)
        for argument_set_hash, argument_sets in sorted_argument_sets.items():
            argument_set = argument_sets[0]
            precision = argument_set.get("compute_type").get_value()
            function = argument_set.get("function").get_value()
            for key in differences:
                argument = argument_set.get(key)
                test.append(
                    argument.get_value() if argument.is_set() else 'DEFAULT')
                break

        grouped_run_configurations = run_configurations.group_by_label()

        num_groups = len(grouped_run_configurations)
        metric_labels = [
            key for key in self.argument_sets[0].collect_timing(
                run_configurations[0])
        ]
        num_metrics = len(metric_labels)
        if num_metrics == 0:
            return

        # loop over independent outputs
        y_scatter_by_group = OrderedDict()
        # for comparison runs
        y_scatter_by_group2 = OrderedDict()
        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            # x_scatter_by_group[group_label] = []
            print(group_label)
            y_scatter_by_group[group_label] = []
            y_scatter_by_group2[group_label] = []
            # loop over argument sets that differ other than the swept variable(s)
            for subset_label, partial_argument_sets in sorted_argument_sets.items(
            ):
                if len(partial_argument_sets) != 1:
                    raise ValueError(
                        'Assumed that sorting argument sets with no keys has a single element per sort.'
                    )
                argument_set = partial_argument_sets[0]
                y_list_by_metric = OrderedDict(
                )  # One array of y values for each metric
                y_list_by_metric2 = OrderedDict()  # For comparison runs
                # loop over number of coarse grain runs and concatenate results
                for run_configuration in run_configuration_group:
                    results = argument_set.collect_timing(run_configuration)
                    for metric_label in results:
                        if not metric_label in y_list_by_metric:
                            y_list_by_metric[metric_label] = []
                        y_list_by_metric[metric_label].extend(
                            results[metric_label])
                    if compare:
                        results2 = argument_set.collect_timing_compare(
                            run_configuration)
                        for metric_label in results2:
                            if not metric_label in y_list_by_metric2:
                                y_list_by_metric2[metric_label] = []
                            y_list_by_metric2[metric_label].extend(
                                results2[metric_label])
                # For each metric, add a set of bars in the bar chart.
                for metric_label, y_list in y_list_by_metric.items():
                    y_scatter_by_group[group_label].extend(sorted(y_list))
                if compare:
                    for metric_label, y_list in y_list_by_metric2.items():
                        y_scatter_by_group2[group_label].extend(sorted(y_list))

        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            for run_configuration in run_configuration_group:
                mhz_str = "Mhz"
                mem_clk_str = "mclk"
                sys_clk_str = "sclk"
                mhz_str_cuda = "MHz"
                mem_clk_str_cuda = "memory"
                sys_clk_str_cuda = "sm"
                if cuda:
                    mhz_str = mhz_str_cuda
                    mem_clk_str = mem_clk_str_cuda
                    sys_clk_str = sys_clk_str_cuda
                # Reference: MI-100 theoretical memory bandwidth by default
                tmb_MI100 = 1200
                # Reference: radeon 7 theoretical memory bandwidth by default
                tmb_radeon7 = 1000
                # Reference: Volta V100 theoretical memory bandwidth by default
                tmb_V100 = 900

                # Reference: V-100 clock by default
                # sclk_cuda = 1530.0
                if compare:
                    sclk_cuda = run_configuration.load_specifications_compare(
                    )['Card0']["Start " +
                               sys_clk_str_cuda].split(mhz_str_cuda)[0]
                elif cuda:
                    sclk_cuda = run_configuration.load_specifications(
                    )['Card0']["Start " +
                               sys_clk_str_cuda].split(mhz_str_cuda)[0]
                theoMax = 0
                theoMax_cuda = 0
                precisionBits = int(re.search(r'\d+', precision).group())
                if (function == 'gemm' and precisionBits == 32):  #xdlops
                    theoMax = tmb_radeon7
                    theoMax_cuda = tmb_V100
                elif (
                        function == 'trsm' or function == 'gemm'
                ):  #TODO better logic to decide memory bound vs compute bound
                    theoMax = tmb_radeon7  #scaling to appropriate precision
                    theoMax_cuda = tmb_V100
                elif (function == 'copy' and precisionBits == 32):
                    theoMax = tmb_radeon7
                    theoMax_cuda = tmb_V100
                elif (function == 'swap' and precisionBits == 32):
                    theoMax = tmb_radeon7
                    theoMax_cuda = tmb_V100
                elif self.flops and self.mem:
                    try:
                        # TODO: Add calculation for theoMax_cuda
                        theoMax = tmb_radeon7
                        theoMax_cuda = tmb_V100
                    except:
                        print("flops and mem equations produce errors")
                if theoMax:
                    print(theoMax)
                    theoMax = round(theoMax)
                    x_co = (test[0], test[len(test) - 1])
                    y_co = (theoMax, theoMax)
                    if not cuda:
                        theo_amd, = axes.plot(
                            x_co,
                            y_co,
                            color='#ED1C24',
                            label="Theoretical Peak Performance MI-100: " +
                            str(theoMax) + " GB/s")

                if compare or cuda:
                    theoMax_cuda = round(theoMax_cuda)
                    x_co_cuda = (test[0], test[len(test) - 1])
                    y_co_cuda = (theoMax_cuda, theoMax_cuda)
                    theo_cuda, = axes.plot(
                        x_co_cuda,
                        y_co_cuda,
                        color='#76B900',
                        label="Theoretical Peak Performance V-100: " +
                        str(theoMax_cuda) + " GB/s")

        if not cuda:
            for group_label in y_scatter_by_group:
                #print(y_scatter_by_group[group_label])
                axes.scatter(
                    # x_bar_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # gap_scalar * width,
                    color='#ED1C24',
                    label='MI-100 Performance'
                    # label = group_label,
                )
                axes.plot(
                    # x_scatter_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # 'k*',
                    '-ok',
                    color='#ED1C24',
                )
        else:
            for group_label in y_scatter_by_group:
                axes.scatter(
                    # x_bar_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # gap_scalar * width,
                    color='#76B900',
                    label='V-100 Performance'
                    # label = group_label,
                )
                axes.plot(
                    # x_scatter_by_group[group_label],
                    test,
                    y_scatter_by_group[group_label],
                    # 'k*',
                    '-ok',
                    color='#76B900',
                )

        # if compare - already plotted AMD above
        if compare:
            for group_label in y_scatter_by_group:
                axes.scatter(
                    # x_bar_by_group[group_label],
                    test,
                    y_scatter_by_group2[group_label],
                    # gap_scalar * width,
                    color='#76B900',
                    label="V-100 Performance"
                    # label = group_label,
                )
                axes.plot(
                    # x_scatter_by_group[group_label],
                    test,
                    y_scatter_by_group2[group_label],
                    # 'k*',
                    '-ok',
                    color='#76B900',
                )

        axes.xaxis.set_minor_locator(AutoMinorLocator())
        axes.yaxis.set_minor_locator(AutoMinorLocator())

        axes.set_ylabel(metric_labels[0] if len(metric_labels) ==
                        1 else 'Time (s)')
        axes.set_xlabel('='.join(xLabel))
        return True
예제 #5
0
    def plot(self, run_configurations, axes, cuda, compare):
        num_argument_sets = len(self.argument_sets)
        if num_argument_sets == 0:
            return

        sorted_argument_sets = self.sort_argument_sets(
            isolate_keys=[])  # No sort applied, but labels provided
        argument_diff = cr.ArgumentSetDifference(
            self.argument_sets, ignore_keys=self._get_sweep_keys())
        differences = argument_diff.get_differences()
        test = []
        xLabel = []
        for key in differences:
            xLabel.append(key)
        for argument_set_hash, argument_sets in sorted_argument_sets.items():
            argument_set = argument_sets[0]
            precision = argument_set.get("compute_type").get_value()
            function = argument_set.get("function").get_value()
            for key in differences:
                argument = argument_set.get(key)
                test.append(
                    argument.get_value() if argument.is_set() else 'DEFAULT')
                break

        grouped_run_configurations = run_configurations.group_by_label()

        num_groups = len(grouped_run_configurations)
        metric_labels = [
            key for key in self.argument_sets[0].collect_timing(
                run_configurations[0])
        ]
        num_metrics = len(metric_labels)
        if num_metrics == 0:
            return

        # loop over independent outputs
        y_scatter_by_group = OrderedDict()
        # For comparison runs
        y_scatter_by_group2 = OrderedDict()
        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            # x_scatter_by_group[group_label] = []
            y_scatter_by_group[group_label] = []
            if compare:
                y_scatter_by_group2[group_label] = []
            # loop over argument sets that differ other than the swept variable(s)
            for subset_label, partial_argument_sets in sorted_argument_sets.items(
            ):
                if len(partial_argument_sets) != 1:
                    raise ValueError(
                        'Assumed that sorting argument sets with no keys has a single element per sort.'
                    )
                argument_set = partial_argument_sets[0]
                y_list_by_metric = OrderedDict(
                )  # One array of y values for each metric
                y_list_by_metric2 = OrderedDict()  # For comparison runs
                # loop over number of coarse grain runs and concatenate results
                for run_configuration in run_configuration_group:
                    results = argument_set.collect_timing(run_configuration)
                    for metric_label in results:
                        if not metric_label in y_list_by_metric:
                            y_list_by_metric[metric_label] = []
                        y_list_by_metric[metric_label].extend(
                            results[metric_label])
                    if compare:
                        results2 = argument_set.collect_timing_compare(
                            run_configuration)
                        for metric_label in results2:
                            if not metric_label in y_list_by_metric2:
                                y_list_by_metric2[metric_label] = []
                            y_list_by_metric2[metric_label].extend(
                                results2[metric_label])
                # For each metric, add a set of bars in the bar chart.
                for metric_label, y_list in y_list_by_metric.items():
                    y_scatter_by_group[group_label].extend(sorted(y_list))
                if compare:
                    for metric_label, y_list in y_list_by_metric2.items():
                        y_scatter_by_group2[group_label].extend(sorted(y_list))

        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            for run_configuration in run_configuration_group:
                mhz_str = "Mhz"
                mem_clk_str = "mclk"
                sys_clk_str = "sclk"
                mhz_str_cuda = "MHz"
                mem_clk_str_cuda = "memory"
                sys_clk_str_cuda = "sm"
                if cuda:
                    mhz_str = mhz_str_cuda
                    mem_clk_str = mem_clk_str_cuda
                    sys_clk_str = sys_clk_str_cuda
                # Reference: MI-100 clocks by default
                # mclk = 1200.0
                # sclk = 1087.0
                mclk = run_configuration.load_specifications()['Card0'][
                    "Start " + mem_clk_str].split(mhz_str)[0]
                sclk = run_configuration.load_specifications()['Card0'][
                    "Start " + sys_clk_str].split(mhz_str)[0]

                # Reference: V-100 clock by default
                # sclk_cuda = 1530.0

                sclk_cuda = 0
                if compare:
                    sclk_cuda = run_configuration.load_specifications_compare(
                    )['Card0']["Start " +
                               sys_clk_str_cuda].split(mhz_str_cuda)[0]
                elif cuda:
                    sclk_cuda = run_configuration.load_specifications(
                    )['Card0']["Start " +
                               sys_clk_str_cuda].split(mhz_str_cuda)[0]
                theoMax = 0
                theoMax_cuda = 0
                precisionBits = int(re.search(r'\d+', precision).group())
                if (function == 'gemm' and precisionBits == 32):  #xdlops
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 256 * 120  #scaling to appropriate precision
                    theoMax_cuda = float(sclk_cuda) / 1000.00 * 128 * 80
                elif (
                        function == 'trsm' or function == 'gemm'
                ):  #TODO better logic to decide memory bound vs compute bound
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 128 * 120 * 32.00 / precisionBits  #scaling to appropriate precision
                    theoMax_cuda = float(
                        sclk_cuda) / 1000.00 * 128 * 80 * 32.00 / precisionBits
                elif self.flops and self.mem:
                    # TODO: cuda here
                    try:
                        n = 100000
                        flops = eval(self.flops)
                        mem = eval(self.mem)
                        theoMax = float(mclk) / float(eval(self.mem)) * eval(
                            self.flops) * 32 / precisionBits / 4
                    except:
                        print("flops and mem equations produce errors")

                # Comparing efficiency
                amd_performance_eff = OrderedDict()
                cuda_performance_eff = OrderedDict()

                if not cuda:
                    amd_perf_list = [
                        x / theoMax for x in y_scatter_by_group[group_label]
                    ]
                    axes.plot(test,
                              amd_perf_list,
                              color='#ED1C24',
                              label="MI-100")
                else:
                    cuda_perf_list = [
                        x / theoMax_cuda
                        for x in y_scatter_by_group[group_label]
                    ]
                    axes.plot(test,
                              cuda_perf_list,
                              color='#76B900',
                              label="V-100")

                # Already plotted AMD, use second list for CUDA results
                if compare:
                    cuda_perf_list = [
                        x / theoMax_cuda
                        for x in y_scatter_by_group2[group_label]
                    ]
                    axes.plot(test,
                              cuda_perf_list,
                              color='#76B900',
                              label="V-100")
                axes.grid(True, which='major')
                axes.grid(True, which='minor')
                axes.yaxis.set_minor_locator(AutoMinorLocator(2))
                axes.set_ylim([0, 1])

        axes.set_ylabel('Efficiency')
        axes.set_xlabel('='.join(xLabel))
        return True
예제 #6
0
    def plot(self, run_configurations, axes, cuda, compare):
        def get_function_prefix(compute_type):
            if '32_r' in compute_type:
                return 's'
            elif '64_r' in compute_type:
                return 'd'
            elif '32_c' in compute_type:
                return 'c'
            elif '64_c' in compute_type:
                return 'z'
            elif 'bf16_r' in compute_type:
                return 'bf'
            elif 'f16_r' in compute_type:
                return 'h'
            else:
                print('Error - Cannot detect precision preFix: ' +
                      compute_type)

        num_argument_sets = len(self.argument_sets)
        if num_argument_sets == 0:
            return

        sorted_argument_sets = self.sort_argument_sets(
            isolate_keys=[])  # No sort applied, but labels provided
        argument_diff = cr.ArgumentSetDifference(
            self.argument_sets, ignore_keys=self._get_sweep_keys())
        differences = argument_diff.get_differences()
        test = []
        xLabel = []
        for key in differences:
            xLabel.append(key)
        for argument_set_hash, argument_sets in sorted_argument_sets.items():
            argument_set = argument_sets[0]
            precision = argument_set.get("compute_type").get_value()
            function = argument_set.get("function").get_value()
            for key in differences:
                argument = argument_set.get(key)
                test.append(
                    argument.get_value() if argument.is_set() else 'DEFAULT')
                break

        grouped_run_configurations = run_configurations.group_by_label()

        num_groups = len(grouped_run_configurations)
        metric_labels = [
            key for key in self.argument_sets[0].collect_timing(
                run_configurations[0])
        ]
        num_metrics = len(metric_labels)
        if num_metrics == 0:
            return

        # loop over independent outputs
        y_scatter_by_group = OrderedDict()
        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            # x_scatter_by_group[group_label] = []
            y_scatter_by_group[group_label] = []
            # loop over argument sets that differ other than the swept variable(s)
            for subset_label, partial_argument_sets in sorted_argument_sets.items(
            ):
                if len(partial_argument_sets) != 1:
                    raise ValueError(
                        'Assumed that sorting argument sets with no keys has a single element per sort.'
                    )
                argument_set = partial_argument_sets[0]
                y_list_by_metric = OrderedDict(
                )  # One array of y values for each metric
                # loop over number of coarse grain runs and concatenate results
                for run_configuration in run_configuration_group:
                    results = argument_set.collect_timing(run_configuration)
                    for metric_label in results:
                        if not metric_label in y_list_by_metric:
                            y_list_by_metric[metric_label] = []
                        y_list_by_metric[metric_label].extend(
                            results[metric_label])
                # For each metric, add a set of bars in the bar chart.
                for metric_label, y_list in y_list_by_metric.items():
                    y_scatter_by_group[group_label].extend(sorted(y_list))

        for group_label, run_configuration_group in grouped_run_configurations.items(
        ):
            for run_configuration in run_configuration_group:
                mhz_str = "Mhz"
                mem_clk_str = "mclk"
                sys_clk_str = "sclk"
                mclk = run_configuration.load_specifications()['Card0'][
                    "Start " + mem_clk_str].split(mhz_str)[0]
                sclk = run_configuration.load_specifications()['Card0'][
                    "Start " + sys_clk_str].split(mhz_str)[0]
                theoMax = 0
                precisionBits = int(re.search(r'\d+', precision).group())
                if (function == 'gemm' and precisionBits == 32):  #xdlops
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 256 * 120  #scaling to appropriate precision
                elif (
                        function == 'trsm' or function == 'gemm'
                ):  #TODO better logic to decide memory bound vs compute bound
                    theoMax = float(
                        sclk
                    ) / 1000.00 * 128 * 120 * 32.00 / precisionBits  #scaling to appropriate precision
                elif self.flops and self.mem:
                    try:
                        n = 100000
                        m = 100000
                        flops = eval(self.flops)
                        mem = eval(self.mem)
                        theoMax = float(mclk) / float(eval(self.mem)) * eval(
                            self.flops) * 32 / precisionBits / 4
                    except:
                        print("flops and mem equations produce errors")
                if theoMax:
                    theoMax = round(theoMax)
                    x_co = (test[0], test[len(test) - 1])
                    y_co = (theoMax, theoMax)
                    axes.plot(x_co,
                              y_co,
                              label="Theoretical Peak Performance: " +
                              str(theoMax) + " GFLOP/s")

        color = iter(cm.rainbow(np.linspace(0, 1, len(y_scatter_by_group))))
        for group_label in y_scatter_by_group:
            c = next(color)
            axes.scatter(
                # x_bar_by_group[group_label],
                test,
                y_scatter_by_group[group_label],
                # gap_scalar * width,
                color='#000000',  #c,
                # label = group_label,
            )
            axes.plot(
                # x_scatter_by_group[group_label],
                test,
                y_scatter_by_group[group_label],
                # 'k*',
                '-ok',
                color='#000000',  #c,
                label=get_function_prefix(precision) + function +
                ' Performance',  #group_label,
            )

        axes.xaxis.set_minor_locator(AutoMinorLocator())
        axes.yaxis.set_minor_locator(AutoMinorLocator())

        axes.set_ylabel(metric_labels[0] if len(metric_labels) ==
                        1 else 'Time (s)')
        axes.set_xlabel('='.join(xLabel))
        return True