Exemple #1
0
    def run(self):

        self.genfiles = []

        kgutils.logger.info('Starting KCover')

        model_realpath = os.path.realpath(
            '%s/%s' % (Config.path['outdir'], Config.path['model']))
        coverage_realpath = os.path.realpath(
            '%s/%s' % (Config.path['outdir'], Config.path['coverage']))

        if not os.path.exists(coverage_realpath):
            os.makedirs(coverage_realpath)

        # clear shared resources
        Config.used_srcfiles.clear()

        if not self.hasmodel(
                'coverage'
        ) or 'all' in Config.rebuild or 'coverage' in Config.rebuild:
            #if not os.path.exists('%s/%s'%(Config.path['outdir'], Config.modelfile)) or 'all' in Config.rebuild or 'coverage' in Config.rebuild:

            data_coverage_path = '%s/__data__/%s' % (
                model_realpath, Config.model['types']['code']['id'])
            if os.path.exists(data_coverage_path) and len(
                    glob.glob('%s/*' % data_coverage_path)
            ) > 1 and Config.model['reuse_rawdata']:
                kgutils.logger.info('Reusing coverage raw data.')
            else:
                kgutils.logger.info('Generating coverage raw data.')

                if os.path.exists(data_coverage_path):
                    shutil.rmtree(data_coverage_path)
                os.makedirs(data_coverage_path)

                if os.path.exists(
                        '%s/__data__/__resource__/%s' %
                    (model_realpath, Config.model['types']['code']['id'])):
                    shutil.rmtree(
                        '%s/__data__/__resource__/%s' %
                        (model_realpath, Config.model['types']['code']['id']))
                os.makedirs(
                    '%s/__data__/__resource__/%s' %
                    (model_realpath, Config.model['types']['code']['id']))

                # generate wrapper nodes
                for filepath, (srcobj, mods_used,
                               units_used) in Config.srcfiles.iteritems():
                    if hasattr(srcobj.tree, 'geninfo') and KGGenType.has_state(
                            srcobj.tree.geninfo):
                        sfile = gensobj(None, srcobj.tree, KERNEL_ID_0)
                        if filepath == Config.callsite['filepath']:
                            sfile.used4coverage = True
                        else:
                            sfile.used4coverage = False
                        if sfile is None:
                            raise kgutils.ProgramException(
                                'Kernel source file is not generated for %s.' %
                                filepath)
                        self.genfiles.append((sfile, filepath))
                        Config.used_srcfiles[filepath] = (sfile, mods_used,
                                                          units_used)

                # process each nodes in the tree
                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('cover'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.created([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('cover'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.process([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('cover'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.finalize([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('cover'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.flatten(KERNEL_ID_0, [plugin_name])

                # generate source files from each node of the tree
                coverage_files = []
                for sfile, filepath in self.genfiles:
                    filename = os.path.basename(filepath)
                    if sfile.used4coverage:
                        set_indent('')
                        slines = sfile.tostring()
                        if slines is not None:
                            slines = kgutils.remove_multiblanklines(slines)
                            coverage_files.append(filename)
                            with open('%s/%s' % (coverage_realpath, filename),
                                      'wb') as fd:
                                fd.write(slines)
                            with open(
                                    '%s/%s.kgen' %
                                (coverage_realpath, filename), 'wb') as ft:
                                ft.write('\n'.join(sfile.kgen_stmt.prep))

                self.gen_makefile()

                kgutils.logger.info(
                    'Instrumentation for coverage is generated at %s.' %
                    coverage_realpath)

                # TODO: wait until coverage data generation is completed
                # use -K option fir bsub to wait for job completion

                # clean app
                if Config.cmd_clean['cmds']:
                    kgutils.run_shcmd(Config.cmd_clean['cmds'])
                if Config.state_switch['clean']:
                    kgutils.run_shcmd(Config.state_switch['clean'])

                # TEMP
                out, err, retcode = kgutils.run_shcmd('make',
                                                      cwd=coverage_realpath)
                if retcode != 0:
                    kgutils.logger.warn(
                        'Coverage raw data is not correctly generated.: %s' %
                        err)

            if os.path.exists(data_coverage_path) and len(
                    glob.glob('%s/*' % data_coverage_path)
            ) > 1 and Config.model['reuse_rawdata']:

                kgutils.logger.info('Generating model file: %s/%s' %
                                    (Config.path['outdir'], Config.modelfile))

                files = None
                with open('%s/files' % data_coverage_path, 'r') as f:
                    files = json.load(f)

                lines = None
                with open('%s/lines' % data_coverage_path, 'r') as f:
                    lines = json.load(f)

                if not os.path.exists(
                        '%s/mpi' % data_coverage_path) or not os.path.exists(
                            '%s/openmp' % data_coverage_path):
                    kgutils.logger.error(
                        'Coverage raw data is not correct. Please rerun KGen after generating coverage raw data correctly.'
                    )
                else:
                    numranks = None
                    with open('%s/mpi' % data_coverage_path, 'r') as f:
                        for idx, line in enumerate(f.read().split('\n')):
                            if idx == 0: numranks = int(line)

                    numthreads = None
                    # NOTE: numthreads could be smaller than actual number of omp threads as it depends on code regions.
                    with open('%s/openmp' % data_coverage_path, 'r') as f:
                        for idx, line in enumerate(f.read().split('\n')):
                            if idx == 0: numthreads = int(line)

                    # collect data
                    kgutils.logger.info('Collecting raw data.')

                    usedfiles = []  # fid
                    usedlines = {}  # fid=[linenum, ...]
                    mpivisits = {}  # fileid:linenum:mpirank=visits
                    ompvisits = {}  # fileid:linenum:omptid=visits
                    invokes = {
                    }  # mpirank:omptid:invoke=[(fileid, linenum, numvisits), ... ]

                    mpipaths = []
                    for item in os.listdir(data_coverage_path):
                        if item.isdigit() and os.path.isdir(
                                os.path.join(data_coverage_path, item)):
                            mpipaths.append((data_coverage_path, item))

                    nprocs = min(len(mpipaths),
                                 multiprocessing.cpu_count() * 1)

                    if nprocs == 0:
                        kgutils.logger.warn(
                            'No coverage data files are found.')
                    else:
                        workload = [
                            chunk for chunk in chunks(
                                mpipaths, int(math.ceil(
                                    len(mpipaths) / nprocs)))
                        ]
                        inqs = []
                        outqs = []
                        for _ in range(nprocs):
                            inqs.append(multiprocessing.Queue())
                            outqs.append(multiprocessing.Queue())

                        procs = []
                        for idx in range(nprocs):
                            proc = multiprocessing.Process(
                                target=readdatafiles,
                                args=(inqs[idx], outqs[idx]))
                            procs.append(proc)
                            proc.start()

                        for inq, chunk in zip(inqs, workload):
                            inq.put(chunk)

                        for outq in outqs:
                            invoke, usedfile, usedline, mpivisit, ompvisit = outq.get(
                            )
                            update(invokes, invoke)
                            for f in usedfile:
                                if f not in usedfiles:
                                    usedfiles.append(f)
                            update(usedlines, usedline)
                            update(mpivisits, mpivisit)
                            update(ompvisits, ompvisit)

                        for idx in range(nprocs):
                            procs[idx].join()

                    if len(invokes) == 0:
                        if not _DEBUG:
                            shutil.rmtree(data_coverage_path)
                        kgutils.logger.warn(
                            'Code coverage data is not collected.')
                    else:
                        kgutils.logger.info(
                            'Adding coverage data into the model file.')

                        try:
                            coverage_sections = [
                                'summary', 'file', 'block', 'invoke'
                            ]

                            self.addmodel('coverage', coverage_sections)

                            summary = []
                            summary.append(
                                ('number_of_files_having_condblocks',
                                 str(len(files))))
                            summary.append(('number_of_files_invoked',
                                            str(len(usedfiles))))
                            summary.append(
                                ('number_of_condblocks_exist',
                                 str(
                                     sum([
                                         len(lmap)
                                         for fid, lmap in lines.items()
                                     ]))))
                            summary.append(
                                ('number_of_condblocks_invoked',
                                 str(
                                     sum([
                                         len(lids)
                                         for fid, lids in usedlines.items()
                                     ]))))
                            self.addsection('coverage', 'summary', summary)

                            # file section
                            file = []
                            #    fd.write('; <file number> = <path to file>\n')
                            for fileid, filepath in files.items():
                                file.append((str(fileid), '%s/%s.kgen\n' %
                                             (coverage_realpath,
                                              os.path.basename(filepath))))
                            file.append(
                                ('used_files',
                                 ', '.join([fid for fid in usedfiles])))
                            self.addsection('coverage', 'file', file)

                            # block section
                            block = []
                            #fd.write('; <file number> =  <line number> ...\n')
                            for fileid, lmap in lines.items():
                                block.append((str(fileid), ', '.join(
                                    [lnum for lid, lnum in lmap.items()])))

                            used_line_pairs = []
                            for fid, lids in usedlines.items():
                                for lid in lids:
                                    used_line_pairs.append((fid, lid))
                            block.append(('used_lines', ', '.join([
                                '%s:%s' % (fid, lines[fid][lid])
                                for fid, lid in used_line_pairs
                            ])))
                            self.addsection('coverage', 'block', block)

                            # invoke section
                            invoke = []
                            #fd.write('; <MPI rank> < OpenMP Thread> <invocation order> =  <file number>:<line number><num of invocations> ...\n')

                            for ranknum, threadnums in invokes.items():
                                for threadnum, invokenums in threadnums.items(
                                ):
                                    for invokenum, triples in invokenums.items(
                                    ):
                                        invoke.append( ( '%s %s %s'%(ranknum, threadnum, invokenum), \
                                            ', '.join( [ '%s:%s:%d'%(fid, lines[fid][lid], nivks) for fid, lid, nivks in triples ] ) ) )
                            self.addsection('coverage', 'invoke', invoke)

                            kgutils.logger.info(
                                '    ***** Within "%s" kernel *****:' %
                                Config.kernel['name'])
                            kgutils.logger.info(
                                '    * %d original source files have conditional blocks.'
                                % len(files))
                            kgutils.logger.info(
                                '    * %d original source files are invoked at least once.'
                                % len(usedfiles))
                            kgutils.logger.info('    * %d conditional blocks exist in the original source files.'%\
                                sum( [ len(lmap) for fid, lmap in lines.items() ] ))
                            kgutils.logger.info('    * %d conditional blocks are executed at least once among all the conditional blocks.'%\
                                sum( [ len(lids) for fid, lids in usedlines.items() ] ))

                            for fid in usedfiles:
                                basefile = '%s/%s.kgen' % (coverage_realpath,
                                                           os.path.basename(
                                                               files[fid]))
                                with open(basefile, 'r') as fsrc:
                                    srclines = fsrc.readlines()

                                filesummary = [ \
                                    '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!', \
                                    '!! %d conditional blocks exist in this file'%len(lines[fid]), \
                                    '!! %d conditional blokcs are executed at least once among all the conditional blocks.'%len(usedlines[fid]), \
                                    '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' \
                                ]
                                srclines[0] = '%s\n%s\n' % (
                                    '\n'.join(filesummary), srclines[0])

                                for lid in usedlines[fid]:
                                    linevisit = [
                                        '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
                                    ]
                                    linevisit.append(
                                        '!! Total number of visits: %d' % sum([
                                            visits for rank, visits in
                                            mpivisits[fid][lid].items()
                                        ]))
                                    if Config.mpi['enabled']:
                                        linevisit.append('!! MPI rank(visits)      : %s' % ' '.join( \
                                            ['%s(%d)'%(r,mpivisits[fid][lid][r]) for r in sorted(mpivisits[fid][lid])]))
                                        #['%s(%d)'%(r,i) for r,i in mpivisits[fid][lid].items()]))
                                    if Config.openmp['enabled']:
                                        linevisit.append('!! OpenMP thread(visits) : %s' % ' '.join( \
                                            ['%s(%d)'%(t,ompvisits[fid][lid][t]) for t in sorted(ompvisits[fid][lid])]))
                                        #['%s(%d)'%(t,i) for t,i in ompvisits[fid][lid].items()]))
                                    linevisit.append(
                                        '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
                                    )

                                    srclines[int(lines[fid][lid]) -
                                             1] = '%s%s\n' % (srclines[
                                                 int(lines[fid][lid]) -
                                                 1], '\n'.join(linevisit))

                                coveragefile = '%s/%s.coverage' % (
                                    coverage_realpath,
                                    os.path.basename(files[fid]))
                                with open(coveragefile, 'w') as fdst:
                                    fdst.write(''.join(srclines))
                        except Exception as e:
                            kgutils.logger.error(str(e))
            else:
                if not _DEBUG:
                    shutil.rmtree(data_coverage_path)
                kgutils.logger.info('failed to generate coverage information')

            out, err, retcode = kgutils.run_shcmd('make recover',
                                                  cwd=coverage_realpath)

            if Config.state_switch['clean']:
                kgutils.run_shcmd(Config.state_switch['clean'])
        else:  # check if coverage should be invoked
            kgutils.logger.info('Reusing KGen coverage file: %s/%s' %
                                (Config.path['outdir'], Config.modelfile))

        # check if coverage data exists in model file
        if not os.path.exists('%s/%s' %
                              (Config.path['outdir'], Config.modelfile)):
            kgutils.logger.warn('No coverage file is found.')
        else:
            # read ini file
            kgutils.logger.info('Reading %s/%s' %
                                (Config.path['outdir'], Config.modelfile))

            cfg = configparser.ConfigParser()
            cfg.optionxform = str
            cfg.read('%s/%s' % (Config.path['outdir'], Config.modelfile))

            number_of_files_having_condblocks = int(
                cfg.get('coverage.summary',
                        'number_of_files_having_condblocks'))
            number_of_files_invoked = int(
                cfg.get('coverage.summary', 'number_of_files_invoked'))
            number_of_condblocks_exist = int(
                cfg.get('coverage.summary', 'number_of_condblocks_exist'))
            number_of_condblocks_invoked = int(
                cfg.get('coverage.summary', 'number_of_condblocks_invoked'))

            try:
                filemap = {}
                for opt in cfg.options('coverage.file'):
                    if opt.isdigit():
                        filemap[opt] = cfg.get('coverage.file', opt)

                blockmap = {}
                for opt in cfg.options('coverage.block'):
                    if opt.isdigit():
                        blockmap[opt] = tuple(linenum for linenum in cfg.get(
                            'coverage.block', opt).split())

                # <MPI rank> < OpenMP Thread> <invocation order> =  <file number>:<line number>:<num invokes> ...
                invokemap = {}
                idx = 0
                for opt in cfg.options('coverage.invoke'):
                    idx += 1
                    ranknum, threadnum, invokenum = tuple(
                        num for num in opt.split())
                    optval = cfg.get('coverage.invoke', opt).split(',')
                    triples = tuple(triple.strip().split(':')
                                    for triple in optval)

                    invokenum = int(invokenum)
                    if invokenum not in invokemap:
                        invokemap[invokenum] = {}
                    if ranknum not in invokemap[invokenum]:
                        invokemap[invokenum][ranknum] = {}
                    if threadnum not in invokemap[invokenum][ranknum]:
                        threadnums = {}
                        invokemap[invokenum][ranknum][threadnum] = threadnums

                    #threadnums = invokemap[invokenum][ranknum][threadnum]

                    for fidstr, lnumstr, numinvokes in triples:
                        fileid = fidstr
                        linenum = lnumstr
                        if fileid not in threadnums:
                            threadnums[fileid] = {}
                        if linenum not in threadnums[fileid]:
                            threadnums[fileid][linenum] = 0
                        threadnums[fileid][linenum] += int(numinvokes)

                    if idx % 100000 == 0:
                        print 'Processed %d items: %s' % (
                            idx, datetime.datetime.now().strftime(
                                "%I:%M%p on %B %d, %Y"))
            except Exception as e:
                raise Exception(
                    'Please check the format of coverage file: %s' % str(e))

            THREASHOLD = Config.model['types']['code']['percentage'] / 100.0
            THREASHOLD_NUM = int(
                math.ceil(number_of_condblocks_invoked * THREASHOLD))
            collected = []
            triples = {}
            numC = len(collected)
            numD = Config.model['types']['code']['ndata']
            for invokenum in sorted(invokemap.keys()):
                if numC > THREASHOLD_NUM and numC >= numD: break
                ranknums = invokemap[invokenum]
                for ranknum in ranknums.keys():
                    if numC > THREASHOLD_NUM and numC >= numD: break
                    threadnums = invokemap[invokenum][ranknum]
                    for threadnum in threadnums.keys():
                        invokecount = 0
                        if numC > THREASHOLD_NUM and numC >= numD: break
                        fileids = invokemap[invokenum][ranknum][threadnum]
                        for fileid in fileids.keys():
                            if numC > THREASHOLD_NUM and numC >= numD: break
                            if Config.data['maxnuminvokes'] and \
                                invokecount >= Config.data['maxnuminvokes']:
                                break
                            lnums = invokemap[invokenum][ranknum][threadnum][
                                fileid]
                            for lnum, numinvokes in lnums.items():
                                if numC > THREASHOLD_NUM and numC >= numD:
                                    break
                                if Config.data['maxnuminvokes'] and \
                                    invokecount >= Config.data['maxnuminvokes']:
                                    break
                                if (fileid, lnum, invokenum) not in collected:
                                    collected.append((fileid, lnum, invokenum))
                                    numC = len(collected)
                                    if (ranknum, threadnum,
                                            invokenum) not in triples:
                                        invokecount += 1
                                        triples[(ranknum, threadnum,
                                                 invokenum)] = None

            print 'At least, %s of conditional blocks will be excuted by using following (MPI ranks, OpenMP Threads, Invokes) triples:' % '{:.1%}'.format(
                THREASHOLD)
            print ','.join(
                [':'.join([str(n) for n in t]) for t in triples.keys()])
            #print ''
            #print 'Following (File id, line number) pairs are covered by above triples:'
            #print str(collected)

            for ranknum, threadnum, invokenum in triples.keys():
                Config.invocation['triples'].append( ( (str(ranknum), str(ranknum)), (str(threadnum), str(threadnum)), \
                    (str(invokenum), str(invokenum)) ) )
Exemple #2
0
    def run(self):

        self.genfiles = []

        kgutils.logger.info('Starting PAPI')

        model_realpath = os.path.realpath(
            '%s/%s' % (Config.path['outdir'], Config.path['model']))
        papi_realpath = os.path.realpath(
            '%s/%s' % (Config.path['outdir'], Config.path['papi']))

        if not os.path.exists(papi_realpath):
            os.makedirs(papi_realpath)

        # clear shared resources
        Config.used_srcfiles.clear()

        if not self.hasmodel(
                'papi') or 'all' in Config.rebuild or 'papi' in Config.rebuild:
            #if not os.path.exists('%s/%s'%(Config.path['outdir'], Config.modelfile)) or 'all' in Config.rebuild or 'coverage' in Config.rebuild:

            data_papi_path = '%s/__data__/%s' % (
                model_realpath, Config.model['types']['papi']['id'])
            if os.path.exists(data_papi_path) and len(
                    glob.glob(
                        '%s/*' %
                        data_papi_path)) > 0 and Config.model['reuse_rawdata']:
                kgutils.logger.info('Reusing papi raw data.')
            else:
                kgutils.logger.info('Generating papi counter raw data.')

                if os.path.exists(data_papi_path):
                    shutil.rmtree(data_papi_path)
                os.makedirs(data_papi_path)

                if os.path.exists(
                        '%s/__data__/__resource__/%s' %
                    (model_realpath, Config.model['types']['papi']['id'])):
                    shutil.rmtree(
                        '%s/__data__/__resource__/%s' %
                        (model_realpath, Config.model['types']['papi']['id']))
                os.makedirs(
                    '%s/__data__/__resource__/%s' %
                    (model_realpath, Config.model['types']['papi']['id']))

                # generate wrapper nodes
                for filepath, (srcobj, mods_used,
                               units_used) in Config.srcfiles.iteritems():
                    if hasattr(srcobj.tree, 'geninfo') and KGGenType.has_state(
                            srcobj.tree.geninfo):
                        sfile = gensobj(None, srcobj.tree, KERNEL_ID_0)
                        if filepath == Config.callsite['filepath']:
                            sfile.used4papi = True
                        else:
                            sfile.used4papi = False
                        if sfile is None:
                            raise kgutils.ProgramException(
                                'Kernel source file is not generated for %s.' %
                                filepath)
                        self.genfiles.append((sfile, filepath))
                        Config.used_srcfiles[filepath] = (sfile, mods_used,
                                                          units_used)

                # process each nodes in the tree
                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('papi'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.created([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('papi'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.process([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('papi'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.finalize([plugin_name])

                for plugin_name in event_register.keys():
                    if not plugin_name.startswith('papi'): continue

                    for sfile, filepath in self.genfiles:
                        sfile.flatten(KERNEL_ID_0, [plugin_name])

                # generate source files from each node of the tree
                papi_files = []
                for sfile, filepath in self.genfiles:
                    filename = os.path.basename(filepath)
                    if sfile.used4papi:
                        set_indent('')
                        slines = sfile.tostring()
                        if slines is not None:
                            slines = kgutils.remove_multiblanklines(slines)
                            papi_files.append(filename)
                            with open('%s/%s' % (papi_realpath, filename),
                                      'wb') as fd:
                                fd.write(slines)
                            with open('%s/%s.kgen' % (papi_realpath, filename),
                                      'wb') as ft:
                                ft.write('\n'.join(sfile.kgen_stmt.prep))

                self.gen_makefile()

                kgutils.logger.info(
                    'Instrumentation for papi is generated at %s.' %
                    papi_realpath)

                # TODO: wait until coverage data generation is completed
                # use -K option fir bsub to wait for job completion

                # clean app
                if Config.cmd_clean['cmds']:
                    kgutils.run_shcmd(Config.cmd_clean['cmds'])
                if Config.state_switch['clean']:
                    kgutils.run_shcmd(Config.state_switch['clean'])

                # TEMP
                out, err, retcode = kgutils.run_shcmd('make',
                                                      cwd=papi_realpath)
                if retcode != 0:
                    kgutils.logger.warn(
                        'Papi counter raw data is not correctly generated.: %s'
                        % err)

            if os.path.exists(data_papi_path) and len(
                    glob.glob(
                        '%s/*' %
                        data_papi_path)) > 0 and Config.model['reuse_rawdata']:

                kgutils.logger.info('Generating model file: %s/%s' %
                                    (Config.path['outdir'], Config.modelfile))

                # collect data
                papis = {
                }  # mpirank:omptid:invoke=[(fileid, linenum, numvisits), ... ]
                papimin = 1E100
                papimax = 0
                npapis = 0
                nexcluded_under = 0
                nexcluded_over = 0

                mpipaths = []
                for item in os.listdir(data_papi_path):
                    try:
                        mpirank, ompthread = item.split('.')
                        if mpirank.isdigit() and ompthread.isdigit():
                            mpipaths.append(
                                (data_papi_path, mpirank, ompthread))
                    except:
                        pass

                nprocs = min(len(mpipaths), multiprocessing.cpu_count() * 1)

                if nprocs == 0:
                    kgutils.logger.warn('No papi data files are found.')
                else:
                    workload = [
                        chunk for chunk in chunks(
                            mpipaths, int(math.ceil(len(mpipaths) / nprocs)))
                    ]
                    inqs = []
                    outqs = []
                    for _ in range(nprocs):
                        inqs.append(multiprocessing.Queue())
                        outqs.append(multiprocessing.Queue())

                    procs = []
                    for idx in range(nprocs):
                        proc = multiprocessing.Process(target=readdatafiles,
                                                       args=(inqs[idx],
                                                             outqs[idx]))
                        procs.append(proc)
                        proc.start()

                    for inq, chunk in zip(inqs, workload):
                        inq.put(chunk)

                    for outq in outqs:
                        papi, pmeta = outq.get()
                        update(papis, papi)
                        papimin = min(papimin, pmeta[0])
                        papimax = max(papimax, pmeta[1])
                        npapis += pmeta[2]
                        nexcluded_under += pmeta[3]
                        nexcluded_over += pmeta[4]

                    for idx in range(nprocs):
                        procs[idx].join()

                    kgutils.logger.info(
                        '# of excluded samples: under limit = %d, over limit = %d'
                        % (nexcluded_under, nexcluded_over))

                if len(papis) == 0:
                    if not _DEBUG:
                        shutil.rmtree(data_papi_path)
                    kgutils.logger.warn(
                        'Papi data collection is not right. Deleting corrupted data.'
                    )
                else:
                    try:
                        papi_sections = ['counters', 'summary']

                        self.addmodel('papi', papi_sections)

                        # papi section
                        papi = []
                        #fd.write('; <MPI rank> < OpenMP Thread> <invocation order> =  <file number>:<line number><num of invocations> ...\n')

                        for ranknum, threadnums in papis.items():
                            for threadnum, invokenums in threadnums.items():
                                for invokenum, pvalue in invokenums.items():
                                    papi.append(
                                        ('%s %s %s' %
                                         (ranknum, threadnum, invokenum),
                                         str(pvalue)))
                        self.addsection('papi', 'counters', papi)

                        summary = []
                        summary.append(('minimum_papicounter', str(papimin)))
                        summary.append(('maximum_papicounter', str(papimax)))
                        summary.append(('number_papicounters', str(npapis)))
                        self.addsection('papi', 'summary', summary)

                    except Exception as e:
                        kgutils.logger.error(str(e))
            else:
                if not _DEBUG:
                    shutil.rmtree(data_papi_path)
                kgutils.logger.info(
                    'failed to generate papi counter information')

            out, err, retcode = kgutils.run_shcmd('make recover',
                                                  cwd=papi_realpath)

            if Config.state_switch['clean']:
                kgutils.run_shcmd(Config.state_switch['clean'])
        else:  # check if coverage should be invoked
            kgutils.logger.info('Reusing Papi counter file: %s/%s' %
                                (Config.path['outdir'], Config.modelfile))

        # check if papi data exists in model file
        if not os.path.exists('%s/%s' %
                              (Config.path['outdir'], Config.modelfile)):
            kgutils.logger.warn('No papi counter file is found.')
        else:
            # read ini file
            kgutils.logger.info('Reading %s/%s' %
                                (Config.path['outdir'], Config.modelfile))

            cfg = configparser.ConfigParser()
            cfg.optionxform = str
            cfg.read('%s/%s' % (Config.path['outdir'], Config.modelfile))

            try:

                papimin = int(
                    cfg.get('papi.summary', 'minimum_papicounter').strip())
                papimax = int(
                    cfg.get('papi.summary', 'maximum_papicounter').strip())
                npapis = int(
                    cfg.get('papi.summary', 'number_papicounters').strip())
                papidiff = papimax - papimin

                # <MPI rank> < OpenMP Thread> <invocation order> =  <file number>:<line number>:<num papis> ...
                if papidiff == 0:
                    nbins = 1
                else:
                    nbins = max(
                        min(Config.model['types']['papi']['nbins'], npapis), 2)

                kgutils.logger.info('nbins = %d' % nbins)
                kgutils.logger.info('papimin = %d' % papimin)
                kgutils.logger.info('papimax = %d' % papimax)
                kgutils.logger.info('papidiff = %d' % papidiff)
                kgutils.logger.info('npapis = %d' % npapis)

                if nbins > 1:
                    papibins = [{} for _ in range(nbins)]
                    papicounts = [0 for _ in range(nbins)]
                else:
                    papibins = [{}]
                    papicounts = [0]

                idx = 0
                # TODO: conver to counters
                for opt in cfg.options('papi.counters'):
                    ranknum, threadnum, invokenum = tuple(
                        num for num in opt.split())
                    count = cfg.getint('papi.counters', opt)

                    if nbins > 1:
                        binnum = int(
                            math.floor(
                                (count - papimin) / papidiff * (nbins - 1)))
                    else:
                        binnum = 0

                    papicounts[binnum] += 1

                    invokenum = int(invokenum)
                    if invokenum not in papibins[binnum]:
                        papibins[binnum][invokenum] = {}
                    if ranknum not in papibins[binnum][invokenum]:
                        papibins[binnum][invokenum][ranknum] = {}
                    if threadnum not in papibins[binnum][invokenum][ranknum]:
                        papibins[binnum][invokenum][ranknum][threadnum] = count
                    else:
                        raise Exception('Dupulicated data: (%s, %s, %s, %d)' %
                                        (invokenum, ranknum, threadnum, count))

                    idx += 1

                    if idx % 100000 == 0:
                        print 'Processed %d items: %s' % (
                            idx, datetime.datetime.now().strftime(
                                "%I:%M%p on %B %d, %Y"))
            except Exception as e:
                raise Exception(
                    'Please check the format of papi counter file: %s' %
                    str(e))

            # types of representation
            # average, median, min/max, n-stratified, distribution
            # bins with histogram

            totalcount = sum(papicounts)
            countdist = [float(c) / float(totalcount) for c in papicounts]
            ndata = Config.model['types']['papi']['ndata']
            datacollect = [int(round(dist * ndata)) for dist in countdist]

            # TODO: convert to counters
            triples = []
            for binnum, papibin in enumerate(papibins):
                bin_triples = []
                range_begin = int(binnum * (papimax - papimin) / nbins +
                                  papimin) if binnum > 0 else papimin
                range_end = int((binnum + 1) * (papimax - papimin) / nbins +
                                papimin) if binnum < (nbins - 1) else None

                if range_end is None:
                    print 'From bin # %d [ %d ~ ] %f %% of %d'%(binnum, \
                        range_begin, countdist[binnum] * 100, totalcount)
                else:
                    print 'From bin # %d [ %d ~ %d ] %f %% of %d'%(binnum, \
                        range_begin, range_end, countdist[binnum] * 100, totalcount)

                for invokenum in sorted(papibin.keys()):
                    if len(bin_triples) >= datacollect[binnum]: break
                    # select datacollect[binum] under this data tree, rank/thread/invoke
                    bininvokes = papibin[invokenum].keys()
                    random.shuffle(bininvokes)
                    for ranknum in bininvokes:
                        if len(bin_triples) >= datacollect[binnum]: break
                        binranks = papibin[invokenum][ranknum].keys()
                        random.shuffle(binranks)
                        for threadnum in binranks:
                            bin_triples.append((ranknum, threadnum, invokenum))
                            print '        invocation triple: %s:%s:%s' % (
                                ranknum, threadnum, invokenum)
                triples.extend(bin_triples)

            print 'Number of bins: %d' % nbins
            print 'Minimun papi count: %d' % papimin
            print 'Maximum papi count: %d' % papimax
            #print 'Selected invocation triples:'
            #print ','.join([ ':'.join([ str(n) for n in t ]) for t in triples])

            for ranknum, threadnum, invokenum in triples:
                Config.invocation['triples'].append( ( (str(ranknum), str(ranknum)), (str(threadnum), str(threadnum)), \
                    (str(invokenum), str(invokenum)) ) )
Exemple #3
0
    def run(self):

        self._trees = []
        self.genfiles = []

        kgutils.logger.info('Starting KExtract')

        # clear shared resources
        Config.used_srcfiles.clear()

        # create kernel directory
        if not os.path.exists('%s/%s' %
                              (Config.path['outdir'], Config.path['kernel'])):
            os.makedirs('%s/%s' %
                        (Config.path['outdir'], Config.path['kernel']))

        # create state directory
        if not os.path.exists('%s/%s' %
                              (Config.path['outdir'], Config.path['state'])):
            os.makedirs('%s/%s' %
                        (Config.path['outdir'], Config.path['state']))

        # generate kernel and instrumentation
        if 'all' in Config.rebuild or 'extract' in Config.rebuild or \
            not os.path.exists('%s/%s/Makefile'%(Config.path['outdir'], Config.path['state'])) or \
            len(glob.glob('%s/%s.*'%(Config.path['outdir'], Config.kernel['name']))) == 0:

            # generate kgen_driver.f90 in kernel directory
            driver = create_rootnode(KERNEL_ID_0)
            self._trees.append(driver)
            program = create_programnode(driver, KERNEL_ID_0)
            program.name = Config.kernel_driver['name']
            append_program_in_root(driver, program)

            # generate instrumentation
            for filepath, (srcobj, mods_used,
                           units_used) in Config.srcfiles.iteritems():
                if hasattr(srcobj.tree, 'geninfo') and KGGenType.has_state(
                        srcobj.tree.geninfo):
                    kfile = genkobj(None, srcobj.tree, KERNEL_ID_0)
                    sfile = gensobj(None, srcobj.tree, KERNEL_ID_0)
                    sfile.kgen_stmt.used4genstate = False
                    if kfile is None or sfile is None:
                        raise kgutils.ProgramException(
                            'Kernel source file is not generated for %s.' %
                            filepath)
                    self.genfiles.append((kfile, sfile, filepath))
                    Config.used_srcfiles[filepath] = (kfile, sfile, mods_used,
                                                      units_used)

            # process each nodes in the tree
            for plugin_name in event_register.keys():
                if not plugin_name.startswith('ext'): continue

                for kfile, sfile, filepath in self.genfiles:
                    kfile.created([plugin_name])
                    sfile.created([plugin_name])
                for tree in self._trees:
                    tree.created([plugin_name])

            for plugin_name in event_register.keys():
                if not plugin_name.startswith('ext'): continue

                for kfile, sfile, filepath in self.genfiles:
                    kfile.process([plugin_name])
                    sfile.process([plugin_name])
                for tree in self._trees:
                    tree.process([plugin_name])

            for plugin_name in event_register.keys():
                if not plugin_name.startswith('ext'): continue

                for kfile, sfile, filepath in self.genfiles:
                    kfile.finalize([plugin_name])
                    sfile.finalize([plugin_name])
                for tree in self._trees:
                    tree.finalize([plugin_name])

            for plugin_name in event_register.keys():
                if not plugin_name.startswith('ext'): continue

                for kfile, sfile, filepath in self.genfiles:
                    kfile.flatten(KERNEL_ID_0, [plugin_name])
                    sfile.flatten(KERNEL_ID_0, [plugin_name])
                for tree in self._trees:
                    tree.flatten(KERNEL_ID_0, [plugin_name])

            # generate source files from each node of the tree
            kernel_files = []
            state_files = []
            for kfile, sfile, filepath in self.genfiles:
                filename = os.path.basename(filepath)
                set_indent('')
                klines = kfile.tostring()
                if klines is not None:
                    klines = kgutils.remove_multiblanklines(klines)
                    kernel_files.append(filename)
                    with open(
                            '%s/%s/%s' % (Config.path['outdir'],
                                          Config.path['kernel'], filename),
                            'wb') as fd:
                        fd.write(klines)

                if sfile.kgen_stmt.used4genstate:
                    set_indent('')
                    slines = sfile.tostring()
                    if slines is not None:
                        slines = kgutils.remove_multiblanklines(slines)
                        state_files.append(filename)
                        with open(
                                '%s/%s/%s' % (Config.path['outdir'],
                                              Config.path['state'], filename),
                                'wb') as fd:
                            fd.write(slines)

            with open(
                    '%s/%s/%s' % (Config.path['outdir'], Config.path['kernel'],
                                  '%s.f90' % Config.kernel_driver['name']),
                    'wb') as fd:
                set_indent('')
                lines = driver.tostring()
                if lines is not None:
                    lines = kgutils.remove_multiblanklines(lines)
                    fd.write(lines)
            kernel_files.append(Config.kernel['name'])

            kgutils.logger.info(
                'Kernel generation and instrumentation is completed.')

            # generate kgen_utils.f90 in kernel directory
            kernel_files.append(KGUTIL)
            self.generate_kgen_utils()

            shutil.copyfile('%s/%s'%(os.path.dirname(os.path.realpath(__file__)), TPROF), \
                '%s/%s/%s'%(Config.path['outdir'], Config.path['kernel'], TPROF))
            kernel_files.append(TPROF)

            self.generate_kernel_makefile()
            kernel_files.append('Makefile')

            self.generate_state_makefile()
            state_files.append('Makefile')

            kgutils.logger.info('Makefiles are generated')

            # TODO: wait until state data generation is completed
            # use -K option for bsub to wait for job completion

            # clean app
            if Config.cmd_clean['cmds']:
                kgutils.run_shcmd(Config.cmd_clean['cmds'])
            if Config.state_switch['clean']:
                kgutils.run_shcmd(Config.state_switch['clean'])

            # build and run app with state instrumentation
            kgutils.logger.info(
                'Application is being built/run with state generation instrumentation.'
            )
            out, err, retcode = kgutils.run_shcmd(
                'make',
                cwd='%s/%s' % (Config.path['outdir'], Config.path['state']))

            out, err, retcode = kgutils.run_shcmd(
                'make recover',
                cwd='%s/%s' % (Config.path['outdir'], Config.path['state']))
            if Config.state_switch['clean']:
                kgutils.run_shcmd(Config.state_switch['clean'])

            kgutils.logger.info('Application built/run is finished.')