def parseBuildOutput(dArgs, bDebug=False):

    dFiles = {
        'returnCode': 'retcode.log',
        'buildTime': 'runtime.log',
        #'dmesg' : 'dmesg.log',
        #'stdout' : 'stdout.log',
        #'stderr' : 'stderr.log',
        'numObjects': 'numObjects.log'
    }

    dBuffer = {}

    for sFileType, sFileName in dFiles.iteritems():

        sFileName = os.path.join(dArgs['dirs']['output'], sFileName)

        if os.path.isfile(sFileName):

            with open(sFileName, 'r') as fBuilderFile:

                # get file input and trim unnecessary whitespace before/after
                dBuffer[sFileType] = (fBuilderFile.read()).strip()

        else:

            dBuffer[sFileType] = ''

    if bDebug:

        debug('func: parseBuildOutput() dBuffer:', json.dumps(dBuffer,
                                                              indent=4))

    return dBuffer
def findProjects(sCorpusPath, dConfig):

    qRedis = RedisQueue(dConfig['redis-queue-project-paths'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port'])

    iCount = 0

    for sRoot, lDirs, lFiles in os.walk(sCorpusPath):

        iLevel = sRoot.count(os.sep)

        if iLevel >= 11:

            del lDirs[:]

        if iLevel == 11:

            if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel)
            debug('func: findProjects()', 'projects-root:', sRoot, iLevel)
            
            qRedis.put(sRoot)
            
            iCount += 1

            if dConfig['debug'] and iCount >= 10: break

    printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing')
def loadExistingBuilds(dConfig, sOS):

    dMp = MuseProjectDB(db=dConfig['mysql-db'],
                        port=dConfig['mysql-port'],
                        user=dConfig['mysql-user'],
                        passwd=dConfig['mysql-passwd'],
                        loc=dConfig['mysql-loc'])
    dMp.open()

    sExistingBuilds = RedisSet(name=dConfig['redis-already-built'],
                               namespace='set',
                               host=dConfig['redis-loc'],
                               port=dConfig['redis-port'])
    sExistingBuilds.flush()

    lProjectRows = dMp.select(sSelectClause='projectName',
                              sTable='builtWith_' + sOS,
                              bDebug=dConfig['debug'])

    dMp.close()

    #    for tProjectRow in lProjectRows:

    #        (sProjectName, ) = tProjectRow
    #        sExistingBuilds.put(sProjectName)

    debug('func: loadRebuildSet()',
          sOS + ' has ' + str(len(sExistingBuilds)) + ' built projects')
Ejemplo n.º 4
0
def main(argv):

    iForks = 10
    iStart = time.time()

    ### setup consumers

    lConsumerArgs = []

    # create a locking semaphore for mutex
    lock = multiprocessing.Lock()

    for iCtr in range(0, iForks):

        lConsumerArgs.append(("lock testing procId", iCtr))

    # create pool of workers -- number of workers equals the number of search strings to be processed
    oConsumerPool = multiprocessing.Pool(processes=iForks,
                                         initializer=initialize_lock,
                                         initargs=(lock, ))

    ### do work -- use pool of workers to search for each search string in muse-corpus-source es index
    oConsumerPool.map(test, lConsumerArgs)

    oConsumerPool.close()
    oConsumerPool.join()

    # processBuildTargets( (dSearchStrings[ dConfig['queueBuildType'] ], 0, dArgs, dConfig) )

    debug('func: main()', "all processes completed")

    iEnd = time.time()

    printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
Ejemplo n.º 5
0
def findProjects(qRedis, sCorpusPath, dConfig):

    lProjectPaths = []

    iCount = 0

    for sRoot, lDirs, lFiles in os.walk(sCorpusPath):

        iLevel = sRoot.count(os.sep)

        if iLevel >= 11:

            del lDirs[:]

        if iLevel == 11 and "github" not in sRoot:

            if dConfig['debug']:
                debug('func: findProjects()', 'projects-root:', sRoot, iLevel)

            if dConfig['redis']:
                qRedis.put(sRoot)

            else:

                lProjectPaths.append(sRoot)

            iCount += 1

            if dConfig['debug'] and iCount >= 10: break

    printMsg('func: findProjects()', str(iCount),
             'projects loaded into queue for processing')

    return lProjectPaths
def copySource(dArgs, bDebug=False):

    sCmd = 'rsync -a ' + dArgs['projectPath'] + '/ ' + dArgs['dirs'][
        'source'] + '/'

    if bDebug: debug('func: copySource() copy source for container:', sCmd)

    os.system(sCmd)
def removeContainer(dArgs, bDebug=False):

    sCmd = 'docker rm ' + dArgs['containerName']

    if bDebug:
        debug('func: removeContainer() removing container post build:', sCmd)

    os.system(sCmd)
def recordProjectName(dArgs, bDebug=False):

    # tar up container directory
    sCmd = 'echo \"' + dArgs['projectName'] + '\" > ' + os.path.join(
        dArgs['dirs']['output'], 'projectName.log')

    if bDebug: debug('func: recordProjectName() recoding project name:', sCmd)

    os.system(sCmd)
def copySource(dArgs, bDebug=False):

    #sCmd = 'rsync -a ' + dArgs['projectPath'] + ' ' + dArgs['dirs']['source'] + '/'
    sCmd = 'tar xzf ' + dArgs[
        'projectPath'] + ' --exclude=\'.git\' --exclude=\'.svn\'  -C ' + dArgs[
            'dirs']['source'] + '/'

    debug('func: copySource() unpack source for container:', sCmd)
    if bDebug: debug('func: copySource() unpack source for container:', sCmd)

    os.system(sCmd)
def copyScripts(dArgs, bDebug):

    sCmd = 'rsync -a ' + dArgs['buildScripts'][
        dArgs['buildType']] + ' ' + dArgs['dirs']['scripts'] + '/' + dArgs[
            'script-name'] + ' && '
    sCmd += 'rsync -a ' + dArgs['buildScripts']['loader'] + ' ' + dArgs[
        'dirs']['scripts'] + '/'

    if bDebug: debug('func: copyScripts() copy script for container:', sCmd)

    os.system(sCmd)
Ejemplo n.º 11
0
def test(tTup):

    (sMsg, iProcId) = tTup

    for iCtr in range(0, 5):

        lock.acquire()

        time.sleep(1)
        debug('func: test():', sMsg, iProcId, 'msg #', iCtr)

        lock.release()
def pollBuild(dArgs, bDebug=False):

    sStatus = ''
    bStatus = False

    sDockerStatus = subprocess.check_output(['docker', 'ps', '-a'])

    if bDebug:
        debug('func: startBuild() docker ps -a output:\n', sDockerStatus)

    for sLine in sDockerStatus.split('\n'):

        if bDebug:
            debug('func: startBuild() parsed docker ps -a output:', sLine)

        if dArgs['containerName'] in sLine:

            sStatus = sLine

    if bDebug: debug('func: pollBuild() container building status:', sStatus)

    if 'Exited (' not in sStatus:

        bStatus = True

    if bDebug: debug('func: pollBuild() container building:', bStatus)

    return bStatus
Ejemplo n.º 13
0
def writeBuildSummaries(dConfig):

    qRedis = RedisQueue(dConfig['redis-queue-json'],
                        namespace='queue',
                        host=dConfig['redis-loc'],
                        port=dConfig['redis-port'])

    while 1:

        # get next project summary to process
        sProjectSummary = qRedis.get(block=True, timeout=30)

        if sProjectSummary:

            # do something with summary
            dProjectSummary = json.loads(sProjectSummary)

            #sBuildPath = os.path.relpath(dProjectSummary['sourcePath'], '/nfscorpus/nfscorpus')
            #sBuildPath = os.path.join('/nfsbuild/nfsbuild', sBuildPath)
            if "_8tof" in dProjectSummary['sourcePath']:
                sBuildPath = os.path.relpath(dProjectSummary['sourcePath'],
                                             '/data/corpus_8tof')
                sBuildPath = os.path.join('/data/builder_SAN/outputCyber',
                                          sBuildPath)
            if "_0to7" in dProjectSummary['sourcePath']:
                sBuildPath = os.path.relpath(dProjectSummary['sourcePath'],
                                             '/data/corpus_0to7')
                sBuildPath = os.path.join('/data/builder_SAN/outputCyber',
                                          sBuildPath)

            (sBuildPath, _) = os.path.split(sBuildPath)

            # ensure build directory exists
            sCmd = 'mkdir -p ' + sBuildPath
            if dConfig['debug']:
                debug('func: writeBuildSummaries() mkdir cmd:', sCmd)

            os.system(sCmd)

            sJsonPath = os.path.join(sBuildPath, 'build.json')
            if dConfig['debug']:
                debug('func: writeBuildSummaries() sJsonPath:', sJsonPath)

            with open(sJsonPath, 'w') as fJson:

                fJson.write(json.dumps(dProjectSummary, indent=4))

        else:

            break
def copyScripts(dArgs, bDebug):

    sCmd = ''

    for dTarget in dArgs['targets']:

        if dTarget['buildType'] not in dArgs['source-compilers'].keys():

            sCmd = 'rsync -a ' + dArgs['buildScripts'][dTarget[
                'buildType']] + ' ' + dArgs['dirs']['scripts'] + '/ && '

    sCmd += 'rsync -a ' + dArgs['buildScripts']['loader'] + ' ' + dArgs[
        'dirs']['scripts'] + '/'

    if bDebug: debug('func: copyScripts() copy script for container:', sCmd)

    os.system(sCmd)
def removeContainer(dArgs, bDebug=False):

    sCmd = 'docker rm ' + dArgs['containerName']

    if bDebug:
        debug('func: removeContainer() removing container post build:', sCmd)

    # enter mutex protected region
    lock.acquire()

    os.system(sCmd)

    # sleep for 2 seconds in protected region to serialize calls to docker daemon
    time.sleep(2)

    # exit mutex protected region
    lock.release()
def findProjects(sCorpusPath, iForks, dConfig):

    lProjectPaths = []

    if dConfig['redis']:

        qRedis = RedisQueue(dConfig['redis-queue-name'],
                            namespace='queue',
                            host=dConfig['redis-loc'])

        # ensure redis queue is empty prior to starting consumers
        qRedis.flush()

    iCount = 0

    for sRoot, lDirs, lFiles in os.walk(sCorpusPath):

        iLevel = sRoot.count(os.sep)

        if iLevel >= 11:

            del lDirs[:]

        if iLevel == 11:

            if dConfig['debug']:
                debug('func: findProjects()', 'projects-root:', sRoot, iLevel)

            if dConfig['redis']:

                qRedis.put(sRoot)

            else:

                lProjectPaths.append(sRoot)

            iCount += 1

            if dConfig['debug'] and iCount >= 1: break

    printMsg('func: findProjects()', str(iCount),
             'projects loaded into queue for processing')

    return lProjectPaths
Ejemplo n.º 17
0
def postBuildStatusUpdates(dArgs, bjson, dConfig):

    dBuildArgs = {}

    dMp = MuseProjectDB(db=dConfig['mysql-db'],
                        port=dConfig['mysql-port'],
                        user=dConfig['mysql-user'],
                        passwd=dConfig['mysql-passwd'],
                        loc=dConfig['mysql-loc'])

    dBuildArgs['projectName'] = bjson['projectName']
    dBuildArgs['projectPath'] = bjson['sourcePath']
    dBuildArgs['buildTarPath'] = bjson['builds'][0]['buildTarPath']
    dBuildArgs['targets'] = bjson['builds'][0]['targets']
    #    dBuildArgs['builder'] = bjson['containerName']
    dBuildArgs['buildTime'] = bjson['builds'][0]['buildTime']
    dBuildArgs['version'] = bjson['builds'][0]['version']
    dBuildArgs['os'] = bjson['builds'][0]['os']
    dBuildArgs['numObjectsPreBuild'] = bjson['builds'][0]['numObjectsPreBuild']
    dBuildArgs['numObjectsPostBuild'] = bjson['builds'][0][
        'numObjectsPostBuild']
    dBuildArgs['numObjectsGenerated'] = bjson['builds'][0][
        'numObjectsGenerated']
    dBuildArgs['numSources'] = bjson['builds'][0]['numSources']
    dBuildArgs['returnCode'] = bjson['builds'][0]['targets'][0]['returnCode']

    #debug("BuildArgs: ", dBuildArgs)

    if dConfig['debug']:
        debug(
            'func: postBuildStatusUpdates() build args prepared for mysql ingestion'
        )

    # commit status to database
    dMp.open()
    dMp.insertIntoBuildStatusTargets(dArgs=dBuildArgs, bDebug=dConfig['debug'])
    dMp.insertIntoBuildStatus(dArgs=dBuildArgs, bDebug=dConfig['debug'])
    dMp.close()

    if dConfig['debug']:
        debug(
            'func: postBuildStatusUpdates() build status ingested into mysql')
def makeDirs(dArgs, bDebug=False):

    lCmds = []

    # initialize -- ensure old container directories aren't there
    # remove container directory
    sCmd = 'rm -rf ' + os.path.join(dArgs['containerPath'],
                                    dArgs['containerName'])
    lCmds.append(sCmd)

    for sDirKey, sDirName in dArgs['dirs'].iteritems():

        lCmds.append('mkdir -p ' + sDirName)

    if bDebug:
        debug('func: makeDirs() making dirs for container:',
              json.dumps(lCmds, indent=4))

    for sCmd in lCmds:

        os.system(sCmd)
def tarUpContainerDirs(dArgs, bDebug=False):

    # tar up container directory
    sCmd = 'cd ' + dArgs['containerPath'] + ' && tar -zcf ' + dArgs[
        'tarName'] + ' ' + dArgs['containerName'] + ' && '

    # make project-specifc build directory if it does not exist
    sCmd += 'mkdir -p ' + dArgs['buildPath'] + ' && '

    # move tar to build directory
    sCmd += 'mv ' + os.path.join(
        dArgs['containerPath'],
        dArgs['tarName']) + ' ' + dArgs['buildPath'] + ' && '

    # remove container directory
    sCmd += 'rm -rf ' + os.path.join(dArgs['containerPath'],
                                     dArgs['containerName'])

    if bDebug:
        debug('func: tarUpContainerDirs() taring up container dirs:', sCmd)

    os.system(sCmd)
def changePerms(tTup):

    (sProjectPath, dConfig) = tTup

    sLatestDir = os.path.join(sProjectPath, 'latest')

    if os.path.exists(sLatestDir):

        if os.path.isdir(sLatestDir):

            # project-path/latest exists as a directory

            if dConfig['debug']:
                debug('func: changePerms()',
                      'changing directory permissions on', sLatestDir)

            # change directory permissions to 555
            os.system('find ' + sLatestDir +
                      ' -type d -exec chmod 555 \'{}\' \;')

            if dConfig['debug']:
                debug('func: changePerms()', 'changing file permissions on',
                      sLatestDir)

            # change file permissions to 4444
            os.system('find ' + sLatestDir +
                      ' -type f -exec chmod 444 \'{}\' \;')

        else:

            warning(
                'func changePerms() latest exists but is not a directory under path:',
                sLatestDir)

    else:

        warning('func changePerms() latest does not exist under path:',
                sProjectPath, 'at', sLatestDir)
def postBuildStatusUpdates(dArgs, dBuffer, dConfig):

    dBuildArgs = {}

    dMp = MuseProjectDB(db=dConfig['mysql-db'],
                        port=dConfig['mysql-port'],
                        user=dConfig['mysql-user'],
                        passwd=dConfig['mysql-passwd'],
                        loc=dConfig['mysql-loc'])

    lBuildTypes = dMp.getBuildTypes()
    for sBuildType in lBuildTypes:

        dBuildArgs[sBuildType] = False

    dBuildArgs['projectName'] = dArgs['projectName']
    dBuildArgs['projectPath'] = dArgs['projectPath']
    dBuildArgs['buildTarPath'] = os.path.join(dArgs['buildPath'],
                                              dArgs['tarName'])
    dBuildArgs['buildTargetPath'] = dArgs['buildTargetPath']
    dBuildArgs['builder'] = dArgs['containerName']
    dBuildArgs['buildTime'] = dBuffer['buildTime']
    #dBuildArgs['dmesg'] = dBuffer['dmesg']
    dBuildArgs['version'] = dArgs['version']
    dBuildArgs['os'] = dArgs['containerOS']
    dBuildArgs['numObjects'] = dBuffer['numObjects']
    dBuildArgs['returnCode'] = dBuffer['returnCode']
    ### troubleshoot serialization error
    #dBuildArgs['stdout'] = dBuffer['stdout']
    #dBuildArgs['stderr'] = dBuffer['stderr']

    dBuildArgs[dArgs['buildType']] = True

    if dConfig['debug']:
        debug(
            'func: postBuildStatusUpdates() build args prepared for es and mysql ingestion'
        )

    # commit status to elasticsearch

    oES = Elasticsearch(dConfig['es-instance-locs'])
    oES.index(index=dConfig['es-file-index-name'],
              doc_type=dConfig['es-file-index-type'],
              body=dBuildArgs,
              timeout="20m",
              request_timeout=600.)

    if dConfig['debug']:
        debug('func: postBuildStatusUpdates() build status ingested into es')

    # commit status to database
    dMp.open()
    dMp.insertIntoBuildStatus(dArgs=dBuildArgs, bDebug=dConfig['debug'])
    dMp.close()

    if dConfig['debug']:
        debug(
            'func: postBuildStatusUpdates() build status ingested into mysql')
def startBuild(dArgs, bDebug=False):

    #time.sleep( int(dArgs['containerId']) )

    sCmd = 'docker run -d -m=' + dArgs[
        'containerMem'] + ' --cpuset-cpus=' + dArgs['containerId']
    sCmd += ' --name ' + dArgs['containerName']
    sCmd += ' --ulimit nproc=2048:4096'
    '''
    VOLUME ["/buildArtifacts"]
    VOLUME ["/output"]
    VOLUME ["/scripts"]
    VOLUME ["/source"]
    '''

    sCmd += ' -v ' + dArgs['dirs']['buildArtifacts'] + ':/buildArtifacts'
    sCmd += ' -v ' + dArgs['dirs']['output'] + ':/output'
    sCmd += ' -v ' + dArgs['dirs']['scripts'] + ':/scripts'
    sCmd += ' -v ' + dArgs['dirs']['source'] + ':/source'
    sCmd += ' ' + dArgs['imageName']
    sCmd += ' /scripts/runBuild.sh'

    if bDebug: debug('func: startBuild() starting container:', sCmd)
    '''
    use locking semaphore for mutex
    noticing weird docker container spawning issues when containers are started simulateneously by multiple processes
    '''

    # enter mutex protected region
    lock.acquire()

    os.system(sCmd)

    # sleep for 2 seconds in protected region to serialize calls to docker daemon
    time.sleep(2)

    # exit mutex protected region
    lock.release()
def pollBuild(dArgs, bDebug=False):

    sStatus = ''
    bStatus = False

    # enter mutex protected region
    lock.acquire()

    sDockerStatus = subprocess.check_output(['docker', 'ps', '-a'])

    # sleep for 2 seconds in protected region to serialize calls to docker daemon
    time.sleep(2)

    # exit mutex protected region
    lock.release()

    if bDebug:
        debug('func: startBuild() docker ps -a output:\n', sDockerStatus)

    for sLine in sDockerStatus.split('\n'):

        if bDebug:
            debug('func: startBuild() parsed docker ps -a output:', sLine)

        if dArgs['containerName'] in sLine:

            sStatus = sLine

    if bDebug: debug('func: pollBuild() container building status:', sStatus)

    if 'Exited (' not in sStatus:

        bStatus = True

    if bDebug: debug('func: pollBuild() container building:', bStatus)

    return bStatus
Ejemplo n.º 24
0
def main(argv):

    # defaults
    bError = False

    dConfig = {}

    dConfig['debug'] = False

    dConfig['forks'] = 5

    dConfig['mysql-db'] = 'muse'
    dConfig['mysql-user'] = '******'
    dConfig['mysql-passwd'] = 'muse'
    dConfig['mysql-loc'] = 'muse2-int'
    dConfig['mysql-port'] = 54321
    dConfig['mysql'] = True

    dConfig['redis-queue-json'] = 'muse-json'
    dConfig['redis-set'] = 'muse-projects'
    dConfig['redis-loc'] = 'muse2-int'
    # dConfig['redis-port'] = '6379'
    dConfig['redis-port'] = '12345'
    dConfig['redis'] = True

    ### command line argument handling
    options, remainder = getopt.getopt(sys.argv[1:], 'f:d',
                                       ['forks=', 'debug'])

    # debug('func: main()', 'options:', options)
    # debug('func: main()', 'remainder:', remainder)

    for opt, arg in options:

        if opt in ('-f', '--forks'):

            try:

                dConfig['forks'] = int(arg)

            except ValueError as e:

                bError = True

        elif opt in ('-d', '--debug'):

            dConfig['debug'] = True

    debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4))

    if bError: usage()
    else:

        iStart = time.time()

        # prepare redis queue for producer, flush queue before starting the producer
        qRedis = RedisQueue(dConfig['redis-queue-json'],
                            namespace='queue',
                            host=dConfig['redis-loc'],
                            port=dConfig['redis-port'])
        qRedis.flush()
        '''
        # multi-process approach
        # call producer process that populates redis queue with project path roots
        pProducer = multiprocessing.Process( target=createBuildSummaries, args=(dConfig) )
        pProducer.start()

        ### setup json writers
        lConsumerArgs = []

        for iCtr in range(0, dConfig['forks']):

            lConsumerArgs.append( (dConfig) )

        # create pool of workers 
        oConsumerPool = multiprocessing.Pool(processes=dConfig['forks'])

        ### do work -- use pool of workers to search for each search string in muse-corpus-source es index
        oConsumerPool.map(writeBuildSummaries, lConsumerArgs)

        # wait for the producer to complete
        pProducer.join()

        # wait for the consumer pool to complete
        oConsumerPool.close()
        oConsumerPool.join()
        '''
        '''
        # single process approach:
        '''
        createBuildSummaries(dConfig)
        writeBuildSummaries(dConfig)

        if dConfig['debug']: debug('func: main()', "all processes completed")

        iEnd = time.time()

        printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
Ejemplo n.º 25
0
def createBuildSummaries(dConfig):

    qRedis = RedisQueue(dConfig['redis-queue-json'],
                        namespace='queue',
                        host=dConfig['redis-loc'],
                        port=dConfig['redis-port'])

    dMp = MuseProjectDB(db=dConfig['mysql-db'],
                        port=dConfig['mysql-port'],
                        user=dConfig['mysql-user'],
                        passwd=dConfig['mysql-passwd'],
                        loc=dConfig['mysql-loc'])

    sLimitClause = ''
    if dConfig['debug']: sLimitClause = '10'

    dReturnCodeLookup = {
        'buildSuccess': 'success',
        'buildPartial': 'partial',
        'buildFail': 'fail'
    }

    sSelectClause = 'projectName,projectPath,buildTarPath,buildTime,version,os,numObjectsPreBuild,numObjectsPostBuild,numObjectsGenerated,numSources,buildTargetPath,configureBuildType,configureacBuildType,configureinBuildType,cmakeBuildType,makefileBuildType,antBuildType,mavenBuildType,returnCode'

    lTargetTypes = [
        'configureBuildType', 'configureacBuildType', 'configureinBuildType',
        'cmakeBuildType', 'makefileBuildType', 'antBuildType', 'mavenBuildType'
    ]

    dMp.open()

    iProjectCount = 0

    dProjects = {
        'success':
        RedisSet(dConfig['redis-set'] + '-success',
                 namespace='set',
                 host=dConfig['redis-loc'],
                 port=dConfig['redis-port']),
        'partial':
        RedisSet(dConfig['redis-set'] + '-partial',
                 namespace='set',
                 host=dConfig['redis-loc'],
                 port=dConfig['redis-port']),
        'fail':
        RedisSet(dConfig['redis-set'] + '-fail',
                 namespace='set',
                 host=dConfig['redis-loc'],
                 port=dConfig['redis-port'])
    }

    for sTable, sProjectBin in dReturnCodeLookup.iteritems():

        # empty redis set
        dProjects[sProjectBin].flush()

        lProjects = dMp.select(sSelectClause='projectName',
                               sTable=sTable,
                               sOrderByClause='projectName',
                               sLimitClause=sLimitClause,
                               bDebug=dConfig['debug'])

        # populate redis set with projects of each bin type
        for tProject in lProjects:

            (sProjectName, ) = tProject

            dProjects[sProjectBin].put(sProjectName)

    dProjectSummary = {}

    lTargetRows = dMp.select(sSelectClause=sSelectClause,
                             sTable='buildStatusWithTargets',
                             sOrderByClause='projectName,buildTarPath',
                             sLimitClause=sLimitClause,
                             bDebug=dConfig['debug'])

    for tTargetRow in lTargetRows:

        dTarget = {}

        (dTarget['projectName'], dTarget['projectPath'],
         dTarget['buildTarPath'], dTarget['buildTime'], dTarget['version'],
         dTarget['os'], dTarget['numObjectsPreBuild'],
         dTarget['numObjectsPostBuild'], dTarget['numObjectsGenerated'],
         dTarget['numSources'], dTarget['buildTargetPath'],
         dTarget['configureBuildType'], dTarget['configureacBuildType'],
         dTarget['configureinBuildType'], dTarget['cmakeBuildType'],
         dTarget['makefileBuildType'], dTarget['antBuildType'],
         dTarget['mavenBuildType'], dTarget['returnCode']) = tTargetRow

        if dProjectSummary:

            if dProjectSummary['projectName'] == dTarget['projectName']:

                try:

                    (dBuild for dBuild in dProjectSummary['builds']
                     if dBuild['buildTarPath'] == dTarget['buildTarPath']
                     ).next()

                except (StopIteration) as e:

                    dBuild = {
                        'buildTarPath': dTarget['buildTarPath'],
                        'buildTime': dTarget['buildTime'],
                        'version': dTarget['version'],
                        'os': dTarget['os'],
                        'numObjectsPreBuild': dTarget['numObjectsPreBuild'],
                        'numObjectsPostBuild': dTarget['numObjectsPostBuild'],
                        'numObjectsGenerated': dTarget['numObjectsGenerated'],
                        'numSources': dTarget['numSources'],
                        'targets': []
                    }

                    dProjectSummary['builds'].append(dBuild)

                dTargetSummary = {
                    'buildTargetPath': dTarget['buildTargetPath'],
                    'returnCode': dTarget['returnCode']
                }

                for sTargetType in lTargetTypes:

                    if dTarget[sTargetType] == 1:

                        dTargetSummary['target-type'] = sTargetType
                        break

                dBuild['targets'].append(dTargetSummary)

            else:

                if dConfig['debug']:
                    debug('func: createBuildSummaries() dProjectSummary:',
                          json.dumps(dProjectSummary, indent=4))
                qRedis.put(json.dumps(dProjectSummary))
                iProjectCount += 1
                dProjectSummary = {}

        if not dProjectSummary:

            # project specific build summary info

            dBuild = {
                'buildTarPath': dTarget['buildTarPath'],
                'buildTime': dTarget['buildTime'],
                'version': dTarget['version'],
                'os': dTarget['os'],
                'numObjectsPreBuild': dTarget['numObjectsPreBuild'],
                'numObjectsPostBuild': dTarget['numObjectsPostBuild'],
                'numObjectsGenerated': dTarget['numObjectsGenerated'],
                'numSources': dTarget['numSources'],
                'targets': []
            }

            dProjectSummary = {
                'projectName': dTarget['projectName'],
                'sourcePath': dTarget['projectPath'],
                'builds': [dBuild]
            }

            if dTarget['projectName'] in dProjects['success']:
                dProjectSummary['buildStatus'] = 'success'
            elif dTarget['projectName'] in dProjects['partial']:
                dProjectSummary['buildStatus'] = 'partial'
            elif dTarget['projectName'] in dProjects['fail']:
                dProjectSummary['buildStatus'] = 'fail'

            # target specific build summary info

            dTargetSummary = {
                'buildTargetPath': dTarget['buildTargetPath'],
                'returnCode': dTarget['returnCode']
            }

            for sTargetType in lTargetTypes:

                if dTarget[sTargetType] == 1:

                    dTargetSummary['target-type'] = sTargetType
                    break

            dBuild['targets'].append(dTargetSummary)

    if dProjectSummary:

        if dConfig['debug']:
            debug('func: createBuildSummaries() dProjectSummary:',
                  json.dumps(dProjectSummary, indent=4))
        qRedis.put(json.dumps(dProjectSummary))
        iProjectCount += 1

        dProjectSummary = {}

    dMp.close()

    printMsg('func: createBuildSummaries()', str(iProjectCount),
             'projects queued')
def main(argv):

    # defaults
    bError = False

    dConfig = {}

    dConfig['containerImage'] = 'musebuilder'
    #dConfig['containerPath'] = '/data/builder'
    dConfig['containerPath'] = '/data/builder_SAN/containers'

    dConfig['debug'] = False

    dConfig['elasticsearch'] = True
    dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int']
    #dConfig['es-instance-locs'] = ['muse2-int','muse3-int']
    #dConfig['es-instance-locs'] = ['muse3-int']

    #dConfig['es-file-index-name'] = 'muse-corpus-source'
    dConfig['es-file-index-name'] = 'muse-corpus-build'
    dConfig['es-file-index-type'] = 'muse-project-build'

    dConfig['forks'] = 5

    dConfig['hostname'] = socket.gethostname().replace('.', '')

    dConfig['mysql-db'] = 'muse'
    dConfig['mysql-user'] = '******'
    dConfig['mysql-passwd'] = 'muse'
    dConfig['mysql-loc'] = 'muse2-int'
    dConfig['mysql-port'] = 54321
    dConfig['mysql'] = True

    dConfig['rebuild'] = False

    dConfig['redis-already-built'] = 'muse-already-built-'
    dConfig['redis-already-built-nate'] = 'NEWbuiltProjects'
    dConfig['redis-queue-to-build'] = 'muse-to-build'
    dConfig['redis-queue-building'] = 'muse-building'
    dConfig['redis-loc'] = 'muse2-int'
    # dConfig['redis-port'] = '6379'
    dConfig['redis-port'] = '12345'
    dConfig['redis'] = True

    dArgs = {}

    # number of attempts with each to build targets to resolve dependencies
    dArgs['buildCycles'] = 2
    dArgs['containerMem'] = '2g'

    dArgs['buildScripts'] = {}
    dArgs['buildScripts']['root'] = '/managed/scripts'
    dArgs['buildScripts']['loader'] = os.path.join(
        dArgs['buildScripts']['root'], 'runBuild.sh')
    dArgs['buildScripts']['cmakeBuildType'] = os.path.join(
        dArgs['buildScripts']['root'], 'cmake.sh')
    dArgs['buildScripts']['configureBuildType'] = os.path.join(
        dArgs['buildScripts']['root'], 'configure.sh')
    dArgs['buildScripts']['configureacBuildType'] = os.path.join(
        dArgs['buildScripts']['root'], 'configureac.sh')
    dArgs['buildScripts']['configureinBuildType'] = os.path.join(
        dArgs['buildScripts']['root'], 'configurein.sh')
    dArgs['buildScripts']['makefileBuildType'] = os.path.join(
        dArgs['buildScripts']['root'], 'make.sh')

    dArgs['containerScripts'] = {}
    dArgs['containerScripts']['root'] = '/scripts'
    dArgs['containerScripts']['cmakeBuildType'] = os.path.join(
        dArgs['containerScripts']['root'], 'cmake.sh')
    dArgs['containerScripts']['configureBuildType'] = os.path.join(
        dArgs['containerScripts']['root'], 'configure.sh')
    dArgs['containerScripts']['configureacBuildType'] = os.path.join(
        dArgs['containerScripts']['root'], 'configureac.sh')
    dArgs['containerScripts']['configureinBuildType'] = os.path.join(
        dArgs['containerScripts']['root'], 'configurein.sh')
    dArgs['containerScripts']['makefileBuildType'] = os.path.join(
        dArgs['containerScripts']['root'], 'make.sh')

    dArgs['containerDirs'] = ['buildArtifacts', 'output', 'scripts', 'source']
    dArgs['containerOS'] = 'ubuntu14'
    dArgs['containerPath'] = dConfig['containerPath']

    dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs['containerOS']

    dArgs['script-name'] = 'build.sh'
    '''
    dArgs['build-targets'] = {
        'configure' : 'configureBuildType',
        'configure.ac' : 'configureacBuildType',
        'configure.in' : 'configureinBuildType',
        'CMakeLists.txt' : 'cmakeBuildType',
        'Makefile' : 'makefileBuildType'
        #'build.xml' : 'antBuildType', 
        #'pom.xml' : 'mavenBuildType'
    }
    '''

    dArgs['source-compilers'] = {'cBuildType': 'gcc', 'cppBuildType': 'g++'}
    '''
    dArgs['source-targets'] = {
        '.c' : 'cBuildType',
        '.cc' : 'cppBuildType',
        '.cpp' : 'cppBuildType',
        '.cxx' : 'cppBuildType',
        '.c++' : 'cppBuildType'
    }
    '''

    lSupportedOSs = ['fedora20', 'fedora21', 'ubuntu12', 'ubuntu14']

    ### command line argument handling
    options, remainder = getopt.getopt(
        sys.argv[1:], 'f:o:rdy',
        ['forks=', 'os=', 'rebuild', 'debug', 'debug-flags'])

    # debug('func: main()', 'options:', options)
    # debug('func: main()', 'remainder:', remainder)

    for opt, arg in options:

        if opt in ('-f', '--forks'):

            try:

                dConfig['forks'] = int(arg)

            except ValueError as e:

                bError = True

        elif opt in ('-o', '--os'):

            if arg in lSupportedOSs:

                dArgs['containerOS'] = arg
                dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs[
                    'containerOS']

            else:

                bError = True

        elif opt in ('-r', '--rebuild'):

            dConfig['rebuild'] = True

        elif opt in ('-d', '--debug'):

            dConfig['debug'] = True

        elif opt in ('-y', '--debug-flags'):

            dArgs['source-compilers'] = {
                'cBuildType': 'gcc -g3 -O0 -DDEBUG',
                'cppBuildType': 'g++ -g3 -O0 -DDEBUG'
            }

    debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4))

    if bError: usage()
    else:
        '''
        # pre-initialization -- if projects remained in building queue, put them back in queue-to-build
        qToBuildRedis = RedisQueue(name=dConfig['redis-queue-building'], name2=dConfig['redis-queue-to-build'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port'])

        for iCtr in range(0, len(qToBuildRedis)):

            qToBuildRedis.getnpush()
        '''

        dConfig['redis-already-built'] = dConfig[
            'redis-already-built'] + dArgs['containerOS']

        sExistingBuilds = RedisSet(name=dConfig['redis-already-built'],
                                   namespace='set',
                                   host=dConfig['redis-loc'],
                                   port=dConfig['redis-port'])
        sExistingBuilds.flush()

        if not dConfig['rebuild']:

            loadExistingBuilds(dConfig, dArgs['containerOS'])

        iStart = time.time()

        ### setup consumers

        lConsumerArgs = []

        # create a locking semaphore for mutex
        lock = multiprocessing.Lock()

        for iCtr in range(0, dConfig['forks']):

            lConsumerArgs.append((iCtr, dArgs, dConfig))

        # create pool of workers -- number of workers equals the number of search strings to be processed
        oConsumerPool = multiprocessing.Pool(processes=dConfig['forks'],
                                             initializer=initialize_lock,
                                             initargs=(lock, ))

        ### do work -- use pool of workers to search for each search string in muse-corpus-source es index
        print(lConsumerArgs)

        oConsumerPool.map(processBuildTargets, lConsumerArgs)

        oConsumerPool.close()
        oConsumerPool.join()

        # processBuildTargets( (0, dArgs, dConfig) )

        if dConfig['debug']: debug('func: main()', "all processes completed")

        iEnd = time.time()

        printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
Ejemplo n.º 27
0
def findProjects(sLanguage, dConfig):

    # setup elasticsearch client
    oES = Elasticsearch(dConfig['es-instance-locs'])

    lProjects = []

    iCtr = 0

    dQuery = {"query": {"match_all": {}}, "fields": [sLanguage]}

    if dConfig['debug']:
        debug('func: findProjects() dQuery:', json.dumps(dQuery))

    # scroll time set to 10 minutes, change as needed -- required for consistent results, the scroll token expires at the end of scroll time

    dResponse = oES.search(index=dConfig['es-project-index-name'],
                           doc_type=dConfig['es-project-index-type'],
                           body=json.dumps(dQuery),
                           search_type='scan',
                           scroll='20m',
                           timeout='20m',
                           lowercase_expanded_terms=False)
    sScrollId = dResponse['_scroll_id']

    if dConfig['debug']:
        debug('func: findProjects() (after initial search) dResponse: ',
              dResponse)

    if dConfig['debug']:
        debug('func: findProjects() search hits: ', dResponse['hits']['total'])

    #while not dResponse['timed_out'] and dResponse['hits']['hits']['total'] > 0:
    while 'timed_out' in dResponse and not dResponse[
            'timed_out'] and 'hits' in dResponse and 'total' in dResponse[
                'hits'] and dResponse['hits']['total'] > 0:

        dResponse = oES.scroll(scroll_id=sScrollId, scroll='20m')

        sScrollId = dResponse['_scroll_id']

        if ('hits'
                in dResponse['hits']) and (len(dResponse['hits']['hits']) > 0):

            if dConfig['debug']:
                debug('func: findProjects() scroll_id:', sScrollId,
                      'number of hits:', len(dResponse['hits']['hits']))

            if dConfig['debug'] and iCtr > 10: break

            for dHit in dResponse['hits']['hits']:

                iCtr += 1

                if dConfig['debug']:

                    debug('func: findProjects()', json.dumps(dHit, indent=4))

                    if iCtr > 100: break

                # found matches

                if 'fields' in dHit and sLanguage in dHit[
                        'fields'] and '_id' in dHit:

                    lProjects.append(dHit['_id'])

        else:

            break

    printMsg('func: findProjects() found ', str(iCtr),
             ' buildTargets, spawned process exiting...')

    sLanguageFileName = './' + sLanguage.split('.')[1] + '.txt'

    printMsg('func: findProjects() file created: ', sLanguageFileName)

    with open(sLanguageFileName, 'w') as fLanguage:
        for sProject in sorted(lProjects):
            fLanguage.write(sProject + '\n')

    return lProjects
def main(argv):

    # defaults
    sCorpusPath = '/data/corpus'

    dConfig = {}
    dConfig['debug'] = False
    dConfig['redis-queue-name'] = 'muse-project-paths-perms'
    dConfig['redis-loc'] = '38.100.20.212'
    dConfig['redis'] = False

    dConfig['time-stamp'] = datetime.datetime.now().strftime(
        '%Y-%m-%dT%H:%M:%S')

    iForks = 10
    bError = False

    ### command line argument handling
    options, remainder = getopt.getopt(
        sys.argv[1:], 'c:f:rd',
        ['corpus-dir-path=', 'forks=', 'redis', 'debug'])

    # debug('func: main()', 'options:', options)
    # debug('func: main()', 'remainder:', remainder)

    for opt, arg in options:

        if opt in ('-c', '--corpus-dir-path'):

            sCorpusPath = arg

        elif opt in ('-d', '--debug'):

            dConfig['debug'] = True

        elif opt in ('-r', '--redis'):

            dConfig['redis'] = True

        elif opt in ('-f', '--forks'):

            try:

                iForks = int(arg)

            except ValueError as e:

                bError = True

    if not os.path.isdir(sCorpusPath):

        bError = True

    if bError: usage()
    else:

        iStart = time.time()

        ### setup producer

        lProjectPaths = []

        if dConfig['redis']:

            # call producer process that populates redis queue with project path roots

            pProducer = multiprocessing.Process(target=findProjects,
                                                args=(sCorpusPath, iForks,
                                                      dConfig))
            pProducer.start()

        else:

            lProjectPaths = findProjects(sCorpusPath, iForks, dConfig)

        ### setup consumers
        lArgs = []

        # create pool of workers
        oPool = multiprocessing.Pool(processes=iForks)

        if dConfig['redis']:

            for i in range(0, iForks):

                lArgs.append(dConfig)

            ### do work -- use pool of workers to descend into each project path recording/ingesting all file names
            oPool.map(processProjects, lArgs)
            pProducer.join()

        else:

            for sPath in lProjectPaths:

                lArgs.append((sPath, dConfig))

            ### do work -- use pool of workers to descend into each project path recording/ingesting all file names
            oPool.map(findProjectFiles, lArgs)

        oPool.close()
        oPool.join()

        if dConfig['debug']: debug('func: main()', "all processes completed")

        iEnd = time.time()

        printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
Ejemplo n.º 29
0
def main(argv):

    # defaults
    sCorpusPath = '/data/builder_SAN2/RAT'
    #    sCorpusPath = '/data/corpus_0to7'
    #    sCorpusPath = '/data/corpus_8tof'

    dConfig = {}
    dConfig['es-bulk-chunk-size'] = 500
    dConfig['debug'] = False
    # binding to muse2 doesn't work right now
    dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int']
    #dConfig['es-instance-locs'] = ['muse2-int','muse3-int']
    #dConfig['es-instance-locs'] = ['muse3-int']
    dConfig['es-index-name'] = 'rat-corpus-source'
    dConfig['es-index-type'] = 'files'
    dConfig['redis-queue-name'] = 'rat-project-paths'
    dConfig['redis-loc'] = 'muse2-int'
    dConfig['redis-port'] = '12345'
    dConfig['redis'] = False

    dConfig['time-stamp'] = datetime.datetime.now().strftime(
        '%Y-%m-%d %H:%M:%S')

    iForks = 5
    bError = False

    ### command line argument handling
    options, remainder = getopt.getopt(
        sys.argv[1:], 'c:f:rd',
        ['corpus-dir-path=', 'forks=', 'redis', 'debug'])

    # debug('func: main()', 'options:', options)
    # debug('func: main()', 'remainder:', remainder)

    for opt, arg in options:

        if opt in ('-c', '--corpus-dir-path'):

            sCorpusPath = arg

        elif opt in ('-d', '--debug'):

            dConfig['debug'] = True

        elif opt in ('-r', '--redis'):

            dConfig['redis'] = True

        elif opt in ('-f', '--forks'):

            try:

                iForks = int(arg)

            except ValueError as e:

                bError = True

    if not os.path.isdir(sCorpusPath):

        bError = True

    if bError: usage()
    else:

        iStart = time.time()

        #oES = createESIndex(dConfig)
        oES = Elasticsearch(dConfig['es-instance-locs'])

        ### setup producer

        lProjectPaths = []

        if dConfig['redis']:

            qRedis = RedisQueue(dConfig['redis-queue-name'],
                                namespace='queue',
                                host=dConfig['redis-loc'],
                                port=dConfig['redis-port'])

            # ensure redis queue is empty prior to starting consumers
            # qRedis.flush()

            # call producer process that populates redis queue with project path roots

            pProducer = multiprocessing.Process(target=findProjects,
                                                args=(qRedis, sCorpusPath,
                                                      dConfig))
            pProducer.start()

        else:

            lProjectPaths = findProjects(None, sCorpusPath, dConfig)

        ### setup consumers
        lArgs = []

        iForks = 1

        if dConfig['redis']:

            # create pool of workers
            oPool = multiprocessing.Pool(processes=iForks)

            for i in range(0, iForks):

                lArgs.append(dConfig)

            ### do work -- use pool of workers to descend into each project path recording/ingesting all file names
            oPool.map(processProjects, lArgs)
            pProducer.join()

            oPool.close()
            oPool.join()

        else:

            for sPath in lProjectPaths:

                findProjectFiles((sPath, oES, dConfig))

        if dConfig['debug']: debug('func: main()', "all processes completed")

        # es index was created with replication turned off for speed, turn on replicating shards
        turnReplicationOn(oES, dConfig)

        # refresh to make the documents available for search
        oES.indices.refresh(index=dConfig['es-index-name'])

        # and now we can count the documents
        printMsg('func: main()', 'number of documents in',
                 dConfig['es-index-name'], 'index: ',
                 oES.count(index=dConfig['es-index-name'])['count'])

        iEnd = time.time()

        printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
Ejemplo n.º 30
0
def findProjectFiles(tTup):

    (sProjectPath, oES, dConfig) = tTup
    sProjectName = os.path.basename(sProjectPath)

    oES = Elasticsearch(dConfig['es-instance-locs'])

    lIgnoreDirs = ['.git', '.svn']

    lProjectFiles = []

    if dConfig['debug']:
        debug('func: findProjectFiles()', 'project-path:', sProjectPath,
              'project-name:', sProjectName)

    for sRoot, lDirs, lFiles in os.walk(sProjectPath):

        if len(lProjectFiles) > dConfig['es-bulk-chunk-size']:

            # ingest chunk into elasticsearch
            (iSuccess, lResponse) = helpers.bulk(client=oES,
                                                 actions=lProjectFiles,
                                                 timeout="20m",
                                                 request_timeout=120.)

            if iSuccess < dConfig['es-bulk-chunk-size']:
                warning('func: findProjectFiles() iSuccess:', iSuccess,
                        ' expected:', dConfig['es-bulk-chunk-size'])
                warning('func: findProjectFiles()', type(lResponse),
                        'returned by bulk api')
                warning('func: findProjectFiles()',
                        json.dumps(lResponse, indent=4),
                        'returned by bulk api')

            #del lProjectFiles[0 : len(lProjectFiles)]
            lProjectFiles = []

            if dConfig['debug']:
                debug('func: findProjectFiles()', str(len(lProjectFiles)),
                      'files loaded into elasticsearch')

        for sFile in lFiles:

            # make sure dProject is emptied each loop iteration
            dProject = {
                '_index': dConfig['es-index-name'],
                '_type': dConfig['es-index-type'],
                '_source': {
                    'project-path': sProjectPath,
                    'project-name': sProjectName,
                    'crawl-time': dConfig['time-stamp']
                }
            }

            sFilePath = os.path.join(sRoot, sFile)
            sRelPath = os.path.relpath(sFilePath, sProjectPath)

            sDecodedFile = ''
            sDecodedRelPath = ''
            sEncodedWith = ''

            # Look for the tar file with the src code
            if "_code.tgz" in sFilePath:
                global counter
                counter = counter + 1
                print(str(counter) + ': working on: ' + sFilePath)

                t = tarfile.open(sFilePath, 'r:*')

                # Iterate over the files in the tar file gz
                for tarinfo in t:
                    if tarinfo.isfile():
                        filename = tarinfo.name
                        if (".svn" not in filename and ".git" not in filename):

                            # make sure dProject is emptied each loop iteration
                            dProject = {
                                '_index': dConfig['es-index-name'],
                                '_type': dConfig['es-index-type'],
                                '_source': {
                                    'project-path': sProjectPath,
                                    'project-name': sProjectName,
                                    'crawl-time': dConfig['time-stamp']
                                }
                            }

                            # append file in tar to tar path
                            sFile = os.path.join(sFilePath, filename)
                            sRelPath = os.path.relpath(sFile, sProjectPath)

                            sDecodedFile = ''
                            sDecodedRelPath = ''
                            sEncodedWith = ''
                            try:

                                sDecodedFile = sFile.decode('utf-8')
                                sDecodedRelPath = sRelPath.decode('utf-8')
                                sEncodedWith = 'utf-8'

                            except (ValueError, UnicodeDecodeError) as e:

                                try:

                                    sDecodedFile = sFile.decode('latin-1')
                                    sDecodedRelPath = sRelPath.decode(
                                        'latin-1')
                                    sEncodedWith = 'latin-1'

                                except (ValueError, UnicodeDecodeError) as e:

                                    try:

                                        sDecodedFile = sFile.decode('utf-16')
                                        sDecodedRelPath = sRelPath.decode(
                                            'utf-16')
                                        sEncodedWith = 'utf-16'

                                    except (ValueError,
                                            UnicodeDecodeError) as e:

                                        warning(
                                            'func findProjectFiles():',
                                            'sProjectPath:',
                                            dProject['_source']
                                            ['project-path'], 'sProjectName:',
                                            dProject['_source']
                                            ['project-name'], 'sFile:', sFile,
                                            'sRelPath:', sRelPath,
                                            'utf-8, latin-1, and utf-16 decoding failed',
                                            'exception:', e)
                                        print("decode failed")
                                        sDecodedFile = ''
                                        sDecodedRelPath = ''
                                        sEncodedWith = ''

                            if sDecodedFile and sDecodedRelPath:
                                dProject['_source']['file'] = sDecodedFile
                                (_, sFileExt) = os.path.splitext(sDecodedFile)
                                if sFileExt:
                                    dProject['_source']['ext'] = sFileExt[
                                        1:].lower()
                                dProject['_source']['path'] = sDecodedRelPath

                                if dConfig['debug']:
                                    debug('func: findProjectFiles() dProject:',
                                          dProject, 'encoded with',
                                          sEncodedWith)

                                lProjectFiles.append(dProject)

        lDirs[:] = [sDir for sDir in lDirs if sDir not in lIgnoreDirs]

    # ingest any stragglers remaining into elasticsearch
    (iSuccess, lResponse) = helpers.bulk(client=oES,
                                         actions=lProjectFiles,
                                         timeout="20m",
                                         request_timeout=120.)

    if iSuccess < len(lProjectFiles):
        warning('func: findProjectFiles() iSuccess:', iSuccess, ' expected:',
                len(lProjectFiles))
        warning('func: findProjectFiles()', type(lResponse),
                'returned by bulk api')
        warning('func: findProjectFiles()', json.dumps(lResponse, indent=4),
                'returned by bulk api')

    # del lProjectFiles[0 : len(lProjectFiles)]
    lProjectFiles = []