예제 #1
0
def mpip_perf_patterns(obj, reg):
    '''More perf_patterns for the tool

    .. code-block::

      -----------------------------------
      @--- MPI Time (seconds) -----------
      -----------------------------------
      Task    AppTime    MPITime     MPI%
         0        8.6      0.121     1.40 <-- min
         1        8.6      0.157     1.82
         2        8.6       5.92    68.84 <-- max
         *       25.8        6.2    24.02 <---

      => NonMPI= AppTime - MPITime

    Typical performance reporting:

    .. code-block::

      * mpip_avg_app_time: 8.6 s  (= 25.8/3mpi)
      * mpip_avg_mpi_time: 2.07 s (=  6.2/3mpi)
      * %mpip_avg_mpi_time: 24.02 %
      * %max/%min
      * %mpip_avg_non_mpi_time: 75.98 %
    '''
    # rpt = os.path.join(obj.stagedir, obj.rpt_file_txt)
    rpt = sn.extractsingle(r'^mpiP: Storing mpiP output in \[(?P<rpt>.*)\]',
                           obj.stdout, 'rpt', str)
    regex_star = r'^\s+\*\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+(?P<pct>\S+)$'
    regex_minmax = (r'^\s+(?P<mpirk>\S+)\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+'
                    r'(?P<pct>\S+)$')
    if reg == 1:
        # mpip_avg_mpi_time
        result = sn.round(
            sn.extractsingle(regex_star, rpt, 'mpit', float) / obj.num_tasks,
            2)
    elif reg == 2:
        # mpip_avg_app_time
        result = sn.round(
            sn.extractsingle(regex_star, rpt, 'appt', float) / obj.num_tasks,
            2)
    elif reg == 3:
        # %mpip_avg_mpi_time
        result = sn.extractsingle(regex_star, rpt, 'pct', float)
    elif reg == 4:
        # %nonmpi
        mpi_pct = sn.extractsingle(regex_star, rpt, 'pct', float)
        result = sn.round(100 - mpi_pct, 2)
    elif reg == 5:
        # %mpip_avg_mpi_time_max
        result = sn.max(sn.extractall(regex_minmax, rpt, 'pct', float))
    elif reg == 6:
        # %mpip_avg_mpi_time_min
        result = sn.min(sn.extractall(regex_minmax, rpt, 'pct', float))
    else:
        raise ValueError('unknown region id in mpip_perf_patterns')

    return result
예제 #2
0
def seconds_elaps(self):
    '''Reports elapsed time in seconds using the internal timer from the code

    .. code-block::

      === Total time for iteration(0) 3.61153s
      reports: * Elapsed: 3.6115 s
    '''
    regex = r'^=== Total time for iteration\(\d+\)\s+(?P<sec>\d+\D\d+)s'
    res = sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
    if res > 0:
        return sn.round(
            sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
    else:
        return 1
예제 #3
0
def scorep_inclusivepct_energy(obj):
    '''Reports % of elapsed time (inclusive) for MomentumAndEnergy function
    (small scale job)

    .. code-block::

      > sqpatch_048mpi_001omp_125n_10steps_1000000cycles/rpt.exclusive
      0.0193958 (0.0009252%) sqpatch.exe
      1.39647 (0.06661%)       + main
      ...
      714.135 (34.063%)   |   + ...
               *******
        _ZN6sphexa3sph31computeMomentumAndEnergyIADImplIdNS_13 ...
        ParticlesDataIdEEEEvRKNS_4TaskERT0_
      0.205453 (0.0098%)  |   +
        _ZN6sphexa3sph15computeTimestepIdNS0_21TimestepPress2ndOrderIdNS_13 ...
        ParticlesDataIdEEEES4_EEvRKSt6vectorINS_4TaskESaIS7_EERT1_
      201.685 (9.62%)     |   |   + MPI_Allreduce
    '''
    # regex = r'^\d+.\d+ \((?P<pct>\d+.\d+).*computeMomentumAndEnergy'
    # return sn.extractsingle(regex, obj.rpt_inclusive, 'pct', float)
    regex = r'^\d+.\d+\s+\((?P<pct>\d+.\d+).*momentumAndEnergyIAD'
    try:
        result = sn.round(
            sn.sum(sn.extractall(regex, obj.rpt_inclusive, 'pct', float)), 2)
    except Exception as e:
        printer.error(f'scorep_inclusivepct_energy failed: {e}')
        result = 0

    return result
예제 #4
0
    def patrun_walltime_and_memory(self):
        '''This table shows total wall clock time for the ranks with the
        maximum, mean, and minimum time, as well as the average across ranks.

        .. code-block::

          Table 10:  Wall Clock Time, Memory High Water Mark

             Process |   Process | PE=[mmm]
                Time |     HiMem |
                     | (MiBytes) |

           11.389914 |      76.3 | Total    <-- avgt
          |--------------------------------
          | 11.398188 |      57.7 | pe.24   <-- maxt
          | 11.389955 |      98.9 | pe.34
          | 11.365630 |      54.0 | pe.93   <-- mint
          |================================

        Typical output:
          * patrun_wallt_max: 11.3982 s
          * patrun_wallt_avg: 11.3899 s
          * patrun_wallt_min: 11.3656 s
          * patrun_mem_max: 57.7 MiBytes
          * patrun_mem_min: 54.0 MiBytes
        '''
        # TODO: bug avg mem?
        res = {}
        # --- avg
        regex = (r'^Table \d+:  Wall Clock Time, Memory High Water Mark\n'
                 r'(.*\n){4}\s+(.*\n)\s+(?P<proct>\S+)\s+\|\s+(?P<mem>\S+)'
                 r' \| Total$')
        res['patrun_wallt_avg'] = sn.extractsingle(regex, self.stdout, 'proct',
                                                   float)
        res['patrun_mem_avg'] = sn.extractsingle(regex, self.stdout, 'mem',
                                                 float)
        # --- max
        regex = (r'^Table \d+:  Wall Clock Time, Memory High Water Mark\n'
                 r'(.*\n){4}\s+(.*\n){2}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)'
                 r'\s+\|\s(?P<pe>\S+)$')
        res['patrun_wallt_max'] = sn.extractsingle(regex, self.stdout, 'proct',
                                                   float)
        res['patrun_mem_max'] = sn.extractsingle(regex, self.stdout, 'mem',
                                                 float)
        res['patrun_mem_max_pe'] = sn.extractsingle(regex, self.stdout, 'pe',
                                                    float)
        # --- min
        regex = (r'^Table \d+:  Wall Clock Time, Memory High Water Mark\n'
                 r'(.*\n){4}\s+(.*\n){4}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)'
                 r'\s+\|\s(?P<pe>\S+)$')
        res['patrun_wallt_min'] = sn.extractsingle(regex, self.stdout, 'proct',
                                                   float)
        res['patrun_mem_min'] = sn.extractsingle(regex, self.stdout, 'mem',
                                                 float)
        res['patrun_mem_min_pe'] = sn.extractsingle(regex, self.stdout, 'pe',
                                                    float)
        for kk, vv in res.items():
            if not isinstance(vv, str):
                res[kk] = sn.round(vv, 4)
        self.patrun_perf_d = res
예제 #5
0
def nsys_report_computeIAD_pct(self):
    '''Reports ``CUDA Kernel`` Time (%) for computeIAD measured by
    the tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Kernel Statistics (nanoseconds)
      #
      # Time(%)      Total Time   Instances         Average         Minimum
      # -------  --------------  ----------  --------------  --------------
      #    49.7        69968829           6      11661471.5        11507063
      #    26.4        37101887           6       6183647.8         6047175
      #    ****
      #    24.0        33719758          24       1404989.9         1371531
      # ...
      #         Maximum  Name
      #  --------------  ------------------
      #        11827539  computeMomentumAndEnergyIAD
      #         6678078  computeIAD
      #         1459594  density
      '''
    # new regex:
    regex = (r'^\s+(?P<pctg>\S+).*::computeIAD<')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
예제 #6
0
def nsys_report_cudaMemcpy_pct(self):
    '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and
    averaged over compute nodes

    .. code-block::

      > job.stdout

      # CUDA API Statistics (nanoseconds)
      #
      # Time(%)      Total Time       Calls         Average         Minimum
      # -------  --------------  ----------  --------------  --------------
      #    44.9       309427138         378        818590.3            9709
      #    ****
      #    40.6       279978449           2     139989224.5           24173
      #     9.5        65562201         308        212864.3             738
      #     4.9        33820196         306        110523.5            2812
      #     0.1          704223          36         19561.8            9305
      # ....
      #         Maximum  Name
      #  --------------  ------------------
      #        11665852  cudaMemcpy
      #       279954276  cudaMemcpyToSymbol
      #         3382747  cudaFree
      #          591094  cudaMalloc
      #           34042  cudaLaunch
    '''
    regex = r'^\s+(?P<pctg>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+cudaMemcpy\s+$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
예제 #7
0
def nsys_report_DtoH_KiB(self):
    '''Reports ``[CUDA memcpy DtoH]`` Memory Operation (KiB) measured by the
    tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Memory Operation Statistics (KiB)
      #
      #             Total      Operations            Average            Minimum
      # -----------------  --------------  -----------------  -----------------
      #         1530313.0             296             5170.0              0.055
      #           16500.0              84              196.4             62.500
      #           *******
      # ...
      #            Maximum  Name
      #  -----------------  -------------------
      #            81250.0  [CUDA memcpy HtoD]
      #              250.0  [CUDA memcpy DtoH]
    '''
    regex = (r'^\s+(?P<KiB>\d+.\d+)\s+\d+\s+\S+\s+\S+\s+\S+\s+'
             r'\[CUDA memcpy DtoH\]\s+$')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'KiB', float)),
                      1)
    return result
예제 #8
0
    def set_mpip_perf_patterns(self):
        '''More perf_patterns for the tool

    .. code-block::

      -----------------------------------
      @--- MPI Time (seconds) -----------
      -----------------------------------
      Task    AppTime    MPITime     MPI%
         0        8.6      0.121     1.40 <-- min
         1        8.6      0.157     1.82
         2        8.6       5.92    68.84 <-- max
         *       25.8        6.2    24.02 <---

      => NonMPI= AppTime - MPITime

    Typical performance reporting:

    .. code-block::

      * mpip_avg_app_time: 8.6 s  (= 25.8/3mpi)
      * mpip_avg_mpi_time: 2.07 s (=  6.2/3mpi)
      * %mpip_avg_mpi_time: 24.02 %
      * %mpip_avg_non_mpi_time: 75.98 %
        '''
        regex_star = r'^\s+\*\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+(?P<pct>\S+)$'
        app_t = sn.extractsingle(regex_star, self.rpt, 'appt', float)
        mpi_t = sn.extractsingle(regex_star, self.rpt, 'mpit', float)
        mpi_pct = sn.extractsingle(regex_star, self.rpt, 'pct', float)
        nonmpi_pct = sn.round(100 - mpi_pct, 2)
        # min/max
        regex = (r'^\s+(?P<mpirk>\S+)\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+'
                 r'(?P<pct>\S+)$')
        mpi_pct_max = sn.max(sn.extractall(regex, self.rpt, 'pct', float))
        mpi_pct_min = sn.min(sn.extractall(regex, self.rpt, 'pct', float))
        perf_pattern = {
            'mpip_avg_app_time': sn.round(app_t / self.num_tasks, 2),
            'mpip_avg_mpi_time': sn.round(mpi_t / self.num_tasks, 2),
            '%mpip_avg_mpi_time': mpi_pct,
            '%mpip_avg_mpi_time_max': mpi_pct_max,
            '%mpip_avg_mpi_time_min': mpi_pct_min,
            '%mpip_avg_non_mpi_time': nonmpi_pct,
        }
        if self.perf_patterns:
            self.perf_patterns = {**self.perf_patterns, **perf_pattern}
        else:
            self.perf_patterns = perf_pattern
 def density_ns(self):
     regex = self.set_regex('density')
     rpt = os.path.join(self.stagedir,
                        self.metric_file.replace(".txt", ".csv"))
     begin_ns = sn.extractall(regex, rpt, 'begin', int)
     end_ns = sn.extractall(regex, rpt, 'end', int)
     ns_list = [zz[1] - zz[0] for zz in zip(begin_ns, end_ns)]
     return sn.round(sn.avg(ns_list), 0)
예제 #10
0
파일: nsimd.py 프로젝트: smoors/reframe
 def speedup(self):
     # comparing fast and slow timings:
     slow = sn.extractsingle(r'^std_sqrt/f64\s+(?P<slow>\d+) ns',
                             self.stdout, 'slow', int)
     fast = sn.extractsingle(r'^nsimd_cpu_sqrt/f64\s+(?P<fast>\d+) ns',
                             self.stdout, 'fast', int)
     sp = sn.round(slow / fast, 3)
     return sp
예제 #11
0
def seconds_iad(self):
    '''Reports `IAD` time in seconds using the internal timer from the code

    .. code-block::

      # IAD: 0.626564s
      reports: * IAD: 0.6284 s
    '''
    regex = r'^# IAD:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #12
0
def seconds_denst(self):
    '''Reports `Density` time in seconds using the internal timer from the code

    .. code-block::

      # Density: 0.296224s
      reports: * Density: 0.296 s
    '''
    regex = r'^# Density:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
 def rocprof_stats_computeMomentumAndEnergyIAD(self):
     """
     "Name","Calls","TotalDurationNs","AverageNs","Percentage"
     "findNeighborsKernel",192,2334058542,12156554,47.20607754469413
     "computeMomentumAndEnergyIAD",64,1796800226,28075003,36.34008713774581
     """
     # regex = '^\"(\S+)\",.*,(\S+)\d$'
     regex = r'^\"computeMomentumAndEnergyIAD\",.*,(\S+)\d$'
     rpt = os.path.join(self.stagedir, self.rpt_file)
     return sn.round(sn.extractsingle(regex, rpt, 1, float), 1)
예제 #14
0
def seconds_domaindistrib(self):
    '''Reports `domain::distribute` time in seconds using the internal timer
    from the code

    .. code-block::

      # domain::distribute: 0.0983208s
      reports: * domain_distribute: 0.0983 s
    '''
    regex = r'^# domain::distribute:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #15
0
def seconds_step(self):
    '''Reports `Timestep` time in seconds using the internal timer from the
    code

    .. code-block::

      # Timestep: 0.621583s
      reports: * Timestep: 0.6215 s
    '''
    regex = r'^# Timestep:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #16
0
def seconds_state(self):
    '''Reports `EquationOfState` time in seconds using the internal timer from
    the code

    .. code-block::

      # EquationOfState: 0.00244751s
      reports: * EquationOfState: 0.0024 s
    '''
    regex = r'^# EquationOfState:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #17
0
def seconds_updat(self):
    '''Reports `UpdateQuantities` time in seconds using the internal timer from
    the code

    .. code-block::

      # UpdateQuantities: 0.00498222s
      reports: * UpdateQuantities: 0.0049 s
    '''
    regex = r'^# UpdateQuantities:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #18
0
def seconds_consv(self):
    '''Reports `EnergyConservation` time in seconds using the internal timer
    from the code

    .. code-block::

      # EnergyConservation: 0.00137127s
      reports: * EnergyConservation: 0.0013 s
    '''
    regex = r'^# EnergyConservation:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #19
0
def seconds_neigh(self):
    '''Reports `FindNeighbors` time in seconds using the internal timer from
    the code

    .. code-block::

      # FindNeighbors: 0.354712s
      reports: * FindNeighbors: 0.3547 s
    '''
    regex = r'^# FindNeighbors:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #20
0
def seconds_smoothinglength(self):
    '''Reports `UpdateSmoothingLength` time in seconds using the internal timer
    from the code

    .. code-block::

      # UpdateSmoothingLength: 0.00321161s
      reports: * SmoothingLength: 0.0032 s
    '''
    regex = r'^# UpdateSmoothingLength:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #21
0
def seconds_tree(self):
    '''Reports `domain:BuildTree` time in seconds using the internal timer
    from the code

    .. code-block::

      # domain::buildTree: 0.084004s
      reports: * BuildTree: 0 s
    '''
    regex = r'^# domain::buildTree:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #22
0
def rpt_tracestats_omp(obj):
    '''Reports OpenMP statistics by reading the trace.stat file:
    - omp_ibarrier_wait: OMP Wait at Implicit Barrier (sec) in Cube GUI
    - omp_lock_contention_critical: OMP Critical Contention (sec) in Cube GUI
    Each column (Count Mean Median Minimum Maximum Sum Variance Quartil25 and
    Quartil75) is read, only Sum is reported here.
    '''
    regex_omp_ibarrier = r'^omp_ibarrier_wait\s+(\S+\s+){5}(?P<Sum>\S+)'
    regex_omp_lock = r'^omp_lock_contention_critical\s+(\S+\s+){5}(?P<Sum>\S+)'
    #
    omp_ibarrier = sn.extractsingle(regex_omp_ibarrier, obj.stat_rpt, 'Sum',
                                    float)
    omp_lock = sn.extractsingle(regex_omp_lock, obj.stat_rpt, 'Sum', float)
    #
    n_procs = obj.compute_node * obj.num_tasks_per_node * obj.omp_threads
    res_d = {
        'omp_ibarrier': sn.round(omp_ibarrier / n_procs, 4),
        'omp_lock': sn.round(omp_lock / n_procs, 4),
    }
    return res_d
예제 #23
0
def seconds_updateTasks(self):
    '''Reports `updateTasks` time in seconds using the internal timer from the
    code

    .. code-block::

      # updateTasks: 0.000900428s
      reports: ...
    '''
    regex = r'^# updateTasks:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #24
0
def ipc_rk0(obj):
    '''Reports the ``IPC`` (instructions per cycle) for rank 0
    '''
    regex1 = (r'^METRIC\s+0\s+.*Values: \(\"PAPI_TOT_INS\" <0>; UINT64;'
              r'\s+(?P<ins>\d+)\)')
    tot_ins_rk0 = sn.extractall(regex1, obj.rpt_otf2, 'ins', float)
    regex2 = (r'^METRIC\s+0\s+.*Values:.*\(\"PAPI_TOT_CYC\" <1>; UINT64;'
              r'\s+(?P<cyc>\d+)\)')
    tot_cyc_rk0 = sn.extractall(regex2, obj.rpt_otf2, 'cyc', float)
    ipc = [a / b for a, b in zip(tot_ins_rk0, tot_cyc_rk0)]
    return sn.round(max(ipc), 6)
예제 #25
0
def seconds_energ(self):
    '''Reports `MomentumEnergyIAD` time in seconds using the internal timer
    from the code

    .. code-block::

       # MomentumEnergyIAD: 1.05951s
       reports: * MomentumEnergyIAD: 1.0595 s

    '''
    regex = r'^# MomentumEnergyIAD:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #26
0
def vtune_logical_core_utilization(self):
    '''Reports the minimum ``Physical Core Utilization`` (%) measured by the
    tool

    .. code-block::

      Effective Logical Core Utilization: 96.0% (23.028 out of 24)
      Effective Logical Core Utilization: 95.9% (23.007 out of 24)
      Effective Logical Core Utilization: 95.5% (22.911 out of 24)
    '''
    regex = r'^\s+Effective Logical Core Utilization: (?P<pct>\S+)%'
    return sn.round(sn.min(sn.extractall(regex, self.stdout, 'pct', float)), 4)
예제 #27
0
def vtune_physical_core_utilization(self):
    '''Reports the minimum ``Physical Core Utilization`` (%) measured by the
    tool

    .. code-block::

      Effective Physical Core Utilization: 96.3% (11.554 out of 12)
      Effective Physical Core Utilization: 96.1% (11.534 out of 12)
      Effective Physical Core Utilization: 95.9% (11.512 out of 12)
    '''
    regex = r'^Effective Physical Core Utilization: (?P<pct>\S+)%'
    return sn.round(sn.min(sn.extractall(regex, self.stdout, 'pct', float)), 4)
예제 #28
0
def seconds_halos(self):
    '''Reports `mpi::synchronizeHalos` time in seconds using the internal timer
    from the code

    .. code-block::

      # mpi::synchronizeHalos: 0.0341479s
      # mpi::synchronizeHalos: 0.0770191s
      # mpi::synchronizeHalos: 0.344856s
      reports: * mpi_synchronizeHalos: 0.4560 s
    '''
    regex = r'^# mpi::synchronizeHalos:\s+(?P<sec>.*)s'
    return sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4)
예제 #29
0
    def set_vtune_perf_patterns_rpt(self):
        '''More perf_patterns for the tool

        Typical performance reporting:

        .. literalinclude::
          ../../reframechecks/intel/intel_vtune.res
          :lines: 117-127

        '''
        regex_l = r'^vtune: Using result path .(?P<paths>\S+).$'
        paths_l = sn.extractall(regex_l, self.dir_rpt, 'paths')
        regex = (r'(?P<funcname>.*);(?P<cput>\d+.\d+);(?P<cput_efft>\S+);'
                 r'(?P<cput_spint>\S+);(?P<cput_overhead>\S+)')
        res = {}
        res2 = {}
        for ii in range(self.vtune_paths):
            rpt = paths_l[ii] + '.csv'  # rpt.nid00034.csv
            kk = 'vtune_cput_cn%s' % ii
            res[kk] = sn.round(sn.sum(sn.extractall(regex, rpt, 'cput',
                                                    float)), 2)
            kk = 'vtune_cput_cn%s_efft' % ii
            res2[kk] = sn.round(sn.sum(sn.extractall(regex, rpt, 'cput_efft',
                                                     float)), 2)
            kk = 'vtune_cput_cn%s_spint' % ii
            res2[kk] = sn.round(sn.sum(sn.extractall(regex, rpt, 'cput_spint',
                                                     float)), 2)
            kk = '%svtune_cput_cn%s_efft' % ('%', ii)
            res[kk] = sn.round(res2['vtune_cput_cn%s_efft' % ii] /
                               res['vtune_cput_cn%s' % ii] * 100, 1)
            kk = '%svtune_cput_cn%s_spint' % ('%', ii)
            res[kk] = sn.round(res2['vtune_cput_cn%s_spint' % ii] /
                               res['vtune_cput_cn%s' % ii] * 100, 1)
        if self.perf_patterns:
            self.perf_patterns = {**self.perf_patterns, **res}
        else:
            self.perf_patterns = res
예제 #30
0
def nvprof_report_DtoH_pct(self):
    '''Reports ``[CUDA memcpy DtoH]`` Time(%) measured by the tool and averaged
    over compute nodes

    .. code-block::

      > job.stdout (Name: [CUDA memcpy DtoH])
      # Time(%)    Time   Calls       Avg       Min      Max
      # 2.80%  1.3194ms      44  29.986us  29.855us 30.528us [CUDA memcpy DtoH]
      # 1.34%  1.7667ms      44  40.152us  39.519us 41.887us [CUDA memcpy DtoH]
      # ^^^^
    '''
    regex = r'^\s+\s+(?P<pctg>\S+)%.*\[CUDA memcpy DtoH\]$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result