def parseBuildOutput(dArgs, bDebug=False): dFiles = { 'returnCode': 'retcode.log', 'buildTime': 'runtime.log', #'dmesg' : 'dmesg.log', #'stdout' : 'stdout.log', #'stderr' : 'stderr.log', 'numObjects': 'numObjects.log' } dBuffer = {} for sFileType, sFileName in dFiles.iteritems(): sFileName = os.path.join(dArgs['dirs']['output'], sFileName) if os.path.isfile(sFileName): with open(sFileName, 'r') as fBuilderFile: # get file input and trim unnecessary whitespace before/after dBuffer[sFileType] = (fBuilderFile.read()).strip() else: dBuffer[sFileType] = '' if bDebug: debug('func: parseBuildOutput() dBuffer:', json.dumps(dBuffer, indent=4)) return dBuffer
def findProjects(sCorpusPath, dConfig): qRedis = RedisQueue(dConfig['redis-queue-project-paths'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) debug('func: findProjects()', 'projects-root:', sRoot, iLevel) qRedis.put(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 10: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing')
def loadExistingBuilds(dConfig, sOS): dMp = MuseProjectDB(db=dConfig['mysql-db'], port=dConfig['mysql-port'], user=dConfig['mysql-user'], passwd=dConfig['mysql-passwd'], loc=dConfig['mysql-loc']) dMp.open() sExistingBuilds = RedisSet(name=dConfig['redis-already-built'], namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']) sExistingBuilds.flush() lProjectRows = dMp.select(sSelectClause='projectName', sTable='builtWith_' + sOS, bDebug=dConfig['debug']) dMp.close() # for tProjectRow in lProjectRows: # (sProjectName, ) = tProjectRow # sExistingBuilds.put(sProjectName) debug('func: loadRebuildSet()', sOS + ' has ' + str(len(sExistingBuilds)) + ' built projects')
def main(argv): iForks = 10 iStart = time.time() ### setup consumers lConsumerArgs = [] # create a locking semaphore for mutex lock = multiprocessing.Lock() for iCtr in range(0, iForks): lConsumerArgs.append(("lock testing procId", iCtr)) # create pool of workers -- number of workers equals the number of search strings to be processed oConsumerPool = multiprocessing.Pool(processes=iForks, initializer=initialize_lock, initargs=(lock, )) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index oConsumerPool.map(test, lConsumerArgs) oConsumerPool.close() oConsumerPool.join() # processBuildTargets( (dSearchStrings[ dConfig['queueBuildType'] ], 0, dArgs, dConfig) ) debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def findProjects(qRedis, sCorpusPath, dConfig): lProjectPaths = [] iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11 and "github" not in sRoot: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) if dConfig['redis']: qRedis.put(sRoot) else: lProjectPaths.append(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 10: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing') return lProjectPaths
def copySource(dArgs, bDebug=False): sCmd = 'rsync -a ' + dArgs['projectPath'] + '/ ' + dArgs['dirs'][ 'source'] + '/' if bDebug: debug('func: copySource() copy source for container:', sCmd) os.system(sCmd)
def removeContainer(dArgs, bDebug=False): sCmd = 'docker rm ' + dArgs['containerName'] if bDebug: debug('func: removeContainer() removing container post build:', sCmd) os.system(sCmd)
def recordProjectName(dArgs, bDebug=False): # tar up container directory sCmd = 'echo \"' + dArgs['projectName'] + '\" > ' + os.path.join( dArgs['dirs']['output'], 'projectName.log') if bDebug: debug('func: recordProjectName() recoding project name:', sCmd) os.system(sCmd)
def copySource(dArgs, bDebug=False): #sCmd = 'rsync -a ' + dArgs['projectPath'] + ' ' + dArgs['dirs']['source'] + '/' sCmd = 'tar xzf ' + dArgs[ 'projectPath'] + ' --exclude=\'.git\' --exclude=\'.svn\' -C ' + dArgs[ 'dirs']['source'] + '/' debug('func: copySource() unpack source for container:', sCmd) if bDebug: debug('func: copySource() unpack source for container:', sCmd) os.system(sCmd)
def copyScripts(dArgs, bDebug): sCmd = 'rsync -a ' + dArgs['buildScripts'][ dArgs['buildType']] + ' ' + dArgs['dirs']['scripts'] + '/' + dArgs[ 'script-name'] + ' && ' sCmd += 'rsync -a ' + dArgs['buildScripts']['loader'] + ' ' + dArgs[ 'dirs']['scripts'] + '/' if bDebug: debug('func: copyScripts() copy script for container:', sCmd) os.system(sCmd)
def test(tTup): (sMsg, iProcId) = tTup for iCtr in range(0, 5): lock.acquire() time.sleep(1) debug('func: test():', sMsg, iProcId, 'msg #', iCtr) lock.release()
def pollBuild(dArgs, bDebug=False): sStatus = '' bStatus = False sDockerStatus = subprocess.check_output(['docker', 'ps', '-a']) if bDebug: debug('func: startBuild() docker ps -a output:\n', sDockerStatus) for sLine in sDockerStatus.split('\n'): if bDebug: debug('func: startBuild() parsed docker ps -a output:', sLine) if dArgs['containerName'] in sLine: sStatus = sLine if bDebug: debug('func: pollBuild() container building status:', sStatus) if 'Exited (' not in sStatus: bStatus = True if bDebug: debug('func: pollBuild() container building:', bStatus) return bStatus
def writeBuildSummaries(dConfig): qRedis = RedisQueue(dConfig['redis-queue-json'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) while 1: # get next project summary to process sProjectSummary = qRedis.get(block=True, timeout=30) if sProjectSummary: # do something with summary dProjectSummary = json.loads(sProjectSummary) #sBuildPath = os.path.relpath(dProjectSummary['sourcePath'], '/nfscorpus/nfscorpus') #sBuildPath = os.path.join('/nfsbuild/nfsbuild', sBuildPath) if "_8tof" in dProjectSummary['sourcePath']: sBuildPath = os.path.relpath(dProjectSummary['sourcePath'], '/data/corpus_8tof') sBuildPath = os.path.join('/data/builder_SAN/outputCyber', sBuildPath) if "_0to7" in dProjectSummary['sourcePath']: sBuildPath = os.path.relpath(dProjectSummary['sourcePath'], '/data/corpus_0to7') sBuildPath = os.path.join('/data/builder_SAN/outputCyber', sBuildPath) (sBuildPath, _) = os.path.split(sBuildPath) # ensure build directory exists sCmd = 'mkdir -p ' + sBuildPath if dConfig['debug']: debug('func: writeBuildSummaries() mkdir cmd:', sCmd) os.system(sCmd) sJsonPath = os.path.join(sBuildPath, 'build.json') if dConfig['debug']: debug('func: writeBuildSummaries() sJsonPath:', sJsonPath) with open(sJsonPath, 'w') as fJson: fJson.write(json.dumps(dProjectSummary, indent=4)) else: break
def copyScripts(dArgs, bDebug): sCmd = '' for dTarget in dArgs['targets']: if dTarget['buildType'] not in dArgs['source-compilers'].keys(): sCmd = 'rsync -a ' + dArgs['buildScripts'][dTarget[ 'buildType']] + ' ' + dArgs['dirs']['scripts'] + '/ && ' sCmd += 'rsync -a ' + dArgs['buildScripts']['loader'] + ' ' + dArgs[ 'dirs']['scripts'] + '/' if bDebug: debug('func: copyScripts() copy script for container:', sCmd) os.system(sCmd)
def removeContainer(dArgs, bDebug=False): sCmd = 'docker rm ' + dArgs['containerName'] if bDebug: debug('func: removeContainer() removing container post build:', sCmd) # enter mutex protected region lock.acquire() os.system(sCmd) # sleep for 2 seconds in protected region to serialize calls to docker daemon time.sleep(2) # exit mutex protected region lock.release()
def findProjects(sCorpusPath, iForks, dConfig): lProjectPaths = [] if dConfig['redis']: qRedis = RedisQueue(dConfig['redis-queue-name'], namespace='queue', host=dConfig['redis-loc']) # ensure redis queue is empty prior to starting consumers qRedis.flush() iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) if dConfig['redis']: qRedis.put(sRoot) else: lProjectPaths.append(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 1: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing') return lProjectPaths
def postBuildStatusUpdates(dArgs, bjson, dConfig): dBuildArgs = {} dMp = MuseProjectDB(db=dConfig['mysql-db'], port=dConfig['mysql-port'], user=dConfig['mysql-user'], passwd=dConfig['mysql-passwd'], loc=dConfig['mysql-loc']) dBuildArgs['projectName'] = bjson['projectName'] dBuildArgs['projectPath'] = bjson['sourcePath'] dBuildArgs['buildTarPath'] = bjson['builds'][0]['buildTarPath'] dBuildArgs['targets'] = bjson['builds'][0]['targets'] # dBuildArgs['builder'] = bjson['containerName'] dBuildArgs['buildTime'] = bjson['builds'][0]['buildTime'] dBuildArgs['version'] = bjson['builds'][0]['version'] dBuildArgs['os'] = bjson['builds'][0]['os'] dBuildArgs['numObjectsPreBuild'] = bjson['builds'][0]['numObjectsPreBuild'] dBuildArgs['numObjectsPostBuild'] = bjson['builds'][0][ 'numObjectsPostBuild'] dBuildArgs['numObjectsGenerated'] = bjson['builds'][0][ 'numObjectsGenerated'] dBuildArgs['numSources'] = bjson['builds'][0]['numSources'] dBuildArgs['returnCode'] = bjson['builds'][0]['targets'][0]['returnCode'] #debug("BuildArgs: ", dBuildArgs) if dConfig['debug']: debug( 'func: postBuildStatusUpdates() build args prepared for mysql ingestion' ) # commit status to database dMp.open() dMp.insertIntoBuildStatusTargets(dArgs=dBuildArgs, bDebug=dConfig['debug']) dMp.insertIntoBuildStatus(dArgs=dBuildArgs, bDebug=dConfig['debug']) dMp.close() if dConfig['debug']: debug( 'func: postBuildStatusUpdates() build status ingested into mysql')
def makeDirs(dArgs, bDebug=False): lCmds = [] # initialize -- ensure old container directories aren't there # remove container directory sCmd = 'rm -rf ' + os.path.join(dArgs['containerPath'], dArgs['containerName']) lCmds.append(sCmd) for sDirKey, sDirName in dArgs['dirs'].iteritems(): lCmds.append('mkdir -p ' + sDirName) if bDebug: debug('func: makeDirs() making dirs for container:', json.dumps(lCmds, indent=4)) for sCmd in lCmds: os.system(sCmd)
def tarUpContainerDirs(dArgs, bDebug=False): # tar up container directory sCmd = 'cd ' + dArgs['containerPath'] + ' && tar -zcf ' + dArgs[ 'tarName'] + ' ' + dArgs['containerName'] + ' && ' # make project-specifc build directory if it does not exist sCmd += 'mkdir -p ' + dArgs['buildPath'] + ' && ' # move tar to build directory sCmd += 'mv ' + os.path.join( dArgs['containerPath'], dArgs['tarName']) + ' ' + dArgs['buildPath'] + ' && ' # remove container directory sCmd += 'rm -rf ' + os.path.join(dArgs['containerPath'], dArgs['containerName']) if bDebug: debug('func: tarUpContainerDirs() taring up container dirs:', sCmd) os.system(sCmd)
def changePerms(tTup): (sProjectPath, dConfig) = tTup sLatestDir = os.path.join(sProjectPath, 'latest') if os.path.exists(sLatestDir): if os.path.isdir(sLatestDir): # project-path/latest exists as a directory if dConfig['debug']: debug('func: changePerms()', 'changing directory permissions on', sLatestDir) # change directory permissions to 555 os.system('find ' + sLatestDir + ' -type d -exec chmod 555 \'{}\' \;') if dConfig['debug']: debug('func: changePerms()', 'changing file permissions on', sLatestDir) # change file permissions to 4444 os.system('find ' + sLatestDir + ' -type f -exec chmod 444 \'{}\' \;') else: warning( 'func changePerms() latest exists but is not a directory under path:', sLatestDir) else: warning('func changePerms() latest does not exist under path:', sProjectPath, 'at', sLatestDir)
def postBuildStatusUpdates(dArgs, dBuffer, dConfig): dBuildArgs = {} dMp = MuseProjectDB(db=dConfig['mysql-db'], port=dConfig['mysql-port'], user=dConfig['mysql-user'], passwd=dConfig['mysql-passwd'], loc=dConfig['mysql-loc']) lBuildTypes = dMp.getBuildTypes() for sBuildType in lBuildTypes: dBuildArgs[sBuildType] = False dBuildArgs['projectName'] = dArgs['projectName'] dBuildArgs['projectPath'] = dArgs['projectPath'] dBuildArgs['buildTarPath'] = os.path.join(dArgs['buildPath'], dArgs['tarName']) dBuildArgs['buildTargetPath'] = dArgs['buildTargetPath'] dBuildArgs['builder'] = dArgs['containerName'] dBuildArgs['buildTime'] = dBuffer['buildTime'] #dBuildArgs['dmesg'] = dBuffer['dmesg'] dBuildArgs['version'] = dArgs['version'] dBuildArgs['os'] = dArgs['containerOS'] dBuildArgs['numObjects'] = dBuffer['numObjects'] dBuildArgs['returnCode'] = dBuffer['returnCode'] ### troubleshoot serialization error #dBuildArgs['stdout'] = dBuffer['stdout'] #dBuildArgs['stderr'] = dBuffer['stderr'] dBuildArgs[dArgs['buildType']] = True if dConfig['debug']: debug( 'func: postBuildStatusUpdates() build args prepared for es and mysql ingestion' ) # commit status to elasticsearch oES = Elasticsearch(dConfig['es-instance-locs']) oES.index(index=dConfig['es-file-index-name'], doc_type=dConfig['es-file-index-type'], body=dBuildArgs, timeout="20m", request_timeout=600.) if dConfig['debug']: debug('func: postBuildStatusUpdates() build status ingested into es') # commit status to database dMp.open() dMp.insertIntoBuildStatus(dArgs=dBuildArgs, bDebug=dConfig['debug']) dMp.close() if dConfig['debug']: debug( 'func: postBuildStatusUpdates() build status ingested into mysql')
def startBuild(dArgs, bDebug=False): #time.sleep( int(dArgs['containerId']) ) sCmd = 'docker run -d -m=' + dArgs[ 'containerMem'] + ' --cpuset-cpus=' + dArgs['containerId'] sCmd += ' --name ' + dArgs['containerName'] sCmd += ' --ulimit nproc=2048:4096' ''' VOLUME ["/buildArtifacts"] VOLUME ["/output"] VOLUME ["/scripts"] VOLUME ["/source"] ''' sCmd += ' -v ' + dArgs['dirs']['buildArtifacts'] + ':/buildArtifacts' sCmd += ' -v ' + dArgs['dirs']['output'] + ':/output' sCmd += ' -v ' + dArgs['dirs']['scripts'] + ':/scripts' sCmd += ' -v ' + dArgs['dirs']['source'] + ':/source' sCmd += ' ' + dArgs['imageName'] sCmd += ' /scripts/runBuild.sh' if bDebug: debug('func: startBuild() starting container:', sCmd) ''' use locking semaphore for mutex noticing weird docker container spawning issues when containers are started simulateneously by multiple processes ''' # enter mutex protected region lock.acquire() os.system(sCmd) # sleep for 2 seconds in protected region to serialize calls to docker daemon time.sleep(2) # exit mutex protected region lock.release()
def pollBuild(dArgs, bDebug=False): sStatus = '' bStatus = False # enter mutex protected region lock.acquire() sDockerStatus = subprocess.check_output(['docker', 'ps', '-a']) # sleep for 2 seconds in protected region to serialize calls to docker daemon time.sleep(2) # exit mutex protected region lock.release() if bDebug: debug('func: startBuild() docker ps -a output:\n', sDockerStatus) for sLine in sDockerStatus.split('\n'): if bDebug: debug('func: startBuild() parsed docker ps -a output:', sLine) if dArgs['containerName'] in sLine: sStatus = sLine if bDebug: debug('func: pollBuild() container building status:', sStatus) if 'Exited (' not in sStatus: bStatus = True if bDebug: debug('func: pollBuild() container building:', bStatus) return bStatus
def main(argv): # defaults bError = False dConfig = {} dConfig['debug'] = False dConfig['forks'] = 5 dConfig['mysql-db'] = 'muse' dConfig['mysql-user'] = '******' dConfig['mysql-passwd'] = 'muse' dConfig['mysql-loc'] = 'muse2-int' dConfig['mysql-port'] = 54321 dConfig['mysql'] = True dConfig['redis-queue-json'] = 'muse-json' dConfig['redis-set'] = 'muse-projects' dConfig['redis-loc'] = 'muse2-int' # dConfig['redis-port'] = '6379' dConfig['redis-port'] = '12345' dConfig['redis'] = True ### command line argument handling options, remainder = getopt.getopt(sys.argv[1:], 'f:d', ['forks=', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-f', '--forks'): try: dConfig['forks'] = int(arg) except ValueError as e: bError = True elif opt in ('-d', '--debug'): dConfig['debug'] = True debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4)) if bError: usage() else: iStart = time.time() # prepare redis queue for producer, flush queue before starting the producer qRedis = RedisQueue(dConfig['redis-queue-json'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) qRedis.flush() ''' # multi-process approach # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process( target=createBuildSummaries, args=(dConfig) ) pProducer.start() ### setup json writers lConsumerArgs = [] for iCtr in range(0, dConfig['forks']): lConsumerArgs.append( (dConfig) ) # create pool of workers oConsumerPool = multiprocessing.Pool(processes=dConfig['forks']) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index oConsumerPool.map(writeBuildSummaries, lConsumerArgs) # wait for the producer to complete pProducer.join() # wait for the consumer pool to complete oConsumerPool.close() oConsumerPool.join() ''' ''' # single process approach: ''' createBuildSummaries(dConfig) writeBuildSummaries(dConfig) if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def createBuildSummaries(dConfig): qRedis = RedisQueue(dConfig['redis-queue-json'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) dMp = MuseProjectDB(db=dConfig['mysql-db'], port=dConfig['mysql-port'], user=dConfig['mysql-user'], passwd=dConfig['mysql-passwd'], loc=dConfig['mysql-loc']) sLimitClause = '' if dConfig['debug']: sLimitClause = '10' dReturnCodeLookup = { 'buildSuccess': 'success', 'buildPartial': 'partial', 'buildFail': 'fail' } sSelectClause = 'projectName,projectPath,buildTarPath,buildTime,version,os,numObjectsPreBuild,numObjectsPostBuild,numObjectsGenerated,numSources,buildTargetPath,configureBuildType,configureacBuildType,configureinBuildType,cmakeBuildType,makefileBuildType,antBuildType,mavenBuildType,returnCode' lTargetTypes = [ 'configureBuildType', 'configureacBuildType', 'configureinBuildType', 'cmakeBuildType', 'makefileBuildType', 'antBuildType', 'mavenBuildType' ] dMp.open() iProjectCount = 0 dProjects = { 'success': RedisSet(dConfig['redis-set'] + '-success', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']), 'partial': RedisSet(dConfig['redis-set'] + '-partial', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']), 'fail': RedisSet(dConfig['redis-set'] + '-fail', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']) } for sTable, sProjectBin in dReturnCodeLookup.iteritems(): # empty redis set dProjects[sProjectBin].flush() lProjects = dMp.select(sSelectClause='projectName', sTable=sTable, sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) # populate redis set with projects of each bin type for tProject in lProjects: (sProjectName, ) = tProject dProjects[sProjectBin].put(sProjectName) dProjectSummary = {} lTargetRows = dMp.select(sSelectClause=sSelectClause, sTable='buildStatusWithTargets', sOrderByClause='projectName,buildTarPath', sLimitClause=sLimitClause, bDebug=dConfig['debug']) for tTargetRow in lTargetRows: dTarget = {} (dTarget['projectName'], dTarget['projectPath'], dTarget['buildTarPath'], dTarget['buildTime'], dTarget['version'], dTarget['os'], dTarget['numObjectsPreBuild'], dTarget['numObjectsPostBuild'], dTarget['numObjectsGenerated'], dTarget['numSources'], dTarget['buildTargetPath'], dTarget['configureBuildType'], dTarget['configureacBuildType'], dTarget['configureinBuildType'], dTarget['cmakeBuildType'], dTarget['makefileBuildType'], dTarget['antBuildType'], dTarget['mavenBuildType'], dTarget['returnCode']) = tTargetRow if dProjectSummary: if dProjectSummary['projectName'] == dTarget['projectName']: try: (dBuild for dBuild in dProjectSummary['builds'] if dBuild['buildTarPath'] == dTarget['buildTarPath'] ).next() except (StopIteration) as e: dBuild = { 'buildTarPath': dTarget['buildTarPath'], 'buildTime': dTarget['buildTime'], 'version': dTarget['version'], 'os': dTarget['os'], 'numObjectsPreBuild': dTarget['numObjectsPreBuild'], 'numObjectsPostBuild': dTarget['numObjectsPostBuild'], 'numObjectsGenerated': dTarget['numObjectsGenerated'], 'numSources': dTarget['numSources'], 'targets': [] } dProjectSummary['builds'].append(dBuild) dTargetSummary = { 'buildTargetPath': dTarget['buildTargetPath'], 'returnCode': dTarget['returnCode'] } for sTargetType in lTargetTypes: if dTarget[sTargetType] == 1: dTargetSummary['target-type'] = sTargetType break dBuild['targets'].append(dTargetSummary) else: if dConfig['debug']: debug('func: createBuildSummaries() dProjectSummary:', json.dumps(dProjectSummary, indent=4)) qRedis.put(json.dumps(dProjectSummary)) iProjectCount += 1 dProjectSummary = {} if not dProjectSummary: # project specific build summary info dBuild = { 'buildTarPath': dTarget['buildTarPath'], 'buildTime': dTarget['buildTime'], 'version': dTarget['version'], 'os': dTarget['os'], 'numObjectsPreBuild': dTarget['numObjectsPreBuild'], 'numObjectsPostBuild': dTarget['numObjectsPostBuild'], 'numObjectsGenerated': dTarget['numObjectsGenerated'], 'numSources': dTarget['numSources'], 'targets': [] } dProjectSummary = { 'projectName': dTarget['projectName'], 'sourcePath': dTarget['projectPath'], 'builds': [dBuild] } if dTarget['projectName'] in dProjects['success']: dProjectSummary['buildStatus'] = 'success' elif dTarget['projectName'] in dProjects['partial']: dProjectSummary['buildStatus'] = 'partial' elif dTarget['projectName'] in dProjects['fail']: dProjectSummary['buildStatus'] = 'fail' # target specific build summary info dTargetSummary = { 'buildTargetPath': dTarget['buildTargetPath'], 'returnCode': dTarget['returnCode'] } for sTargetType in lTargetTypes: if dTarget[sTargetType] == 1: dTargetSummary['target-type'] = sTargetType break dBuild['targets'].append(dTargetSummary) if dProjectSummary: if dConfig['debug']: debug('func: createBuildSummaries() dProjectSummary:', json.dumps(dProjectSummary, indent=4)) qRedis.put(json.dumps(dProjectSummary)) iProjectCount += 1 dProjectSummary = {} dMp.close() printMsg('func: createBuildSummaries()', str(iProjectCount), 'projects queued')
def main(argv): # defaults bError = False dConfig = {} dConfig['containerImage'] = 'musebuilder' #dConfig['containerPath'] = '/data/builder' dConfig['containerPath'] = '/data/builder_SAN/containers' dConfig['debug'] = False dConfig['elasticsearch'] = True dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int'] #dConfig['es-instance-locs'] = ['muse2-int','muse3-int'] #dConfig['es-instance-locs'] = ['muse3-int'] #dConfig['es-file-index-name'] = 'muse-corpus-source' dConfig['es-file-index-name'] = 'muse-corpus-build' dConfig['es-file-index-type'] = 'muse-project-build' dConfig['forks'] = 5 dConfig['hostname'] = socket.gethostname().replace('.', '') dConfig['mysql-db'] = 'muse' dConfig['mysql-user'] = '******' dConfig['mysql-passwd'] = 'muse' dConfig['mysql-loc'] = 'muse2-int' dConfig['mysql-port'] = 54321 dConfig['mysql'] = True dConfig['rebuild'] = False dConfig['redis-already-built'] = 'muse-already-built-' dConfig['redis-already-built-nate'] = 'NEWbuiltProjects' dConfig['redis-queue-to-build'] = 'muse-to-build' dConfig['redis-queue-building'] = 'muse-building' dConfig['redis-loc'] = 'muse2-int' # dConfig['redis-port'] = '6379' dConfig['redis-port'] = '12345' dConfig['redis'] = True dArgs = {} # number of attempts with each to build targets to resolve dependencies dArgs['buildCycles'] = 2 dArgs['containerMem'] = '2g' dArgs['buildScripts'] = {} dArgs['buildScripts']['root'] = '/managed/scripts' dArgs['buildScripts']['loader'] = os.path.join( dArgs['buildScripts']['root'], 'runBuild.sh') dArgs['buildScripts']['cmakeBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'cmake.sh') dArgs['buildScripts']['configureBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configure.sh') dArgs['buildScripts']['configureacBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configureac.sh') dArgs['buildScripts']['configureinBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configurein.sh') dArgs['buildScripts']['makefileBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'make.sh') dArgs['containerScripts'] = {} dArgs['containerScripts']['root'] = '/scripts' dArgs['containerScripts']['cmakeBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'cmake.sh') dArgs['containerScripts']['configureBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configure.sh') dArgs['containerScripts']['configureacBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configureac.sh') dArgs['containerScripts']['configureinBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configurein.sh') dArgs['containerScripts']['makefileBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'make.sh') dArgs['containerDirs'] = ['buildArtifacts', 'output', 'scripts', 'source'] dArgs['containerOS'] = 'ubuntu14' dArgs['containerPath'] = dConfig['containerPath'] dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs['containerOS'] dArgs['script-name'] = 'build.sh' ''' dArgs['build-targets'] = { 'configure' : 'configureBuildType', 'configure.ac' : 'configureacBuildType', 'configure.in' : 'configureinBuildType', 'CMakeLists.txt' : 'cmakeBuildType', 'Makefile' : 'makefileBuildType' #'build.xml' : 'antBuildType', #'pom.xml' : 'mavenBuildType' } ''' dArgs['source-compilers'] = {'cBuildType': 'gcc', 'cppBuildType': 'g++'} ''' dArgs['source-targets'] = { '.c' : 'cBuildType', '.cc' : 'cppBuildType', '.cpp' : 'cppBuildType', '.cxx' : 'cppBuildType', '.c++' : 'cppBuildType' } ''' lSupportedOSs = ['fedora20', 'fedora21', 'ubuntu12', 'ubuntu14'] ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'f:o:rdy', ['forks=', 'os=', 'rebuild', 'debug', 'debug-flags']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-f', '--forks'): try: dConfig['forks'] = int(arg) except ValueError as e: bError = True elif opt in ('-o', '--os'): if arg in lSupportedOSs: dArgs['containerOS'] = arg dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs[ 'containerOS'] else: bError = True elif opt in ('-r', '--rebuild'): dConfig['rebuild'] = True elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-y', '--debug-flags'): dArgs['source-compilers'] = { 'cBuildType': 'gcc -g3 -O0 -DDEBUG', 'cppBuildType': 'g++ -g3 -O0 -DDEBUG' } debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4)) if bError: usage() else: ''' # pre-initialization -- if projects remained in building queue, put them back in queue-to-build qToBuildRedis = RedisQueue(name=dConfig['redis-queue-building'], name2=dConfig['redis-queue-to-build'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) for iCtr in range(0, len(qToBuildRedis)): qToBuildRedis.getnpush() ''' dConfig['redis-already-built'] = dConfig[ 'redis-already-built'] + dArgs['containerOS'] sExistingBuilds = RedisSet(name=dConfig['redis-already-built'], namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']) sExistingBuilds.flush() if not dConfig['rebuild']: loadExistingBuilds(dConfig, dArgs['containerOS']) iStart = time.time() ### setup consumers lConsumerArgs = [] # create a locking semaphore for mutex lock = multiprocessing.Lock() for iCtr in range(0, dConfig['forks']): lConsumerArgs.append((iCtr, dArgs, dConfig)) # create pool of workers -- number of workers equals the number of search strings to be processed oConsumerPool = multiprocessing.Pool(processes=dConfig['forks'], initializer=initialize_lock, initargs=(lock, )) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index print(lConsumerArgs) oConsumerPool.map(processBuildTargets, lConsumerArgs) oConsumerPool.close() oConsumerPool.join() # processBuildTargets( (0, dArgs, dConfig) ) if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def findProjects(sLanguage, dConfig): # setup elasticsearch client oES = Elasticsearch(dConfig['es-instance-locs']) lProjects = [] iCtr = 0 dQuery = {"query": {"match_all": {}}, "fields": [sLanguage]} if dConfig['debug']: debug('func: findProjects() dQuery:', json.dumps(dQuery)) # scroll time set to 10 minutes, change as needed -- required for consistent results, the scroll token expires at the end of scroll time dResponse = oES.search(index=dConfig['es-project-index-name'], doc_type=dConfig['es-project-index-type'], body=json.dumps(dQuery), search_type='scan', scroll='20m', timeout='20m', lowercase_expanded_terms=False) sScrollId = dResponse['_scroll_id'] if dConfig['debug']: debug('func: findProjects() (after initial search) dResponse: ', dResponse) if dConfig['debug']: debug('func: findProjects() search hits: ', dResponse['hits']['total']) #while not dResponse['timed_out'] and dResponse['hits']['hits']['total'] > 0: while 'timed_out' in dResponse and not dResponse[ 'timed_out'] and 'hits' in dResponse and 'total' in dResponse[ 'hits'] and dResponse['hits']['total'] > 0: dResponse = oES.scroll(scroll_id=sScrollId, scroll='20m') sScrollId = dResponse['_scroll_id'] if ('hits' in dResponse['hits']) and (len(dResponse['hits']['hits']) > 0): if dConfig['debug']: debug('func: findProjects() scroll_id:', sScrollId, 'number of hits:', len(dResponse['hits']['hits'])) if dConfig['debug'] and iCtr > 10: break for dHit in dResponse['hits']['hits']: iCtr += 1 if dConfig['debug']: debug('func: findProjects()', json.dumps(dHit, indent=4)) if iCtr > 100: break # found matches if 'fields' in dHit and sLanguage in dHit[ 'fields'] and '_id' in dHit: lProjects.append(dHit['_id']) else: break printMsg('func: findProjects() found ', str(iCtr), ' buildTargets, spawned process exiting...') sLanguageFileName = './' + sLanguage.split('.')[1] + '.txt' printMsg('func: findProjects() file created: ', sLanguageFileName) with open(sLanguageFileName, 'w') as fLanguage: for sProject in sorted(lProjects): fLanguage.write(sProject + '\n') return lProjects
def main(argv): # defaults sCorpusPath = '/data/corpus' dConfig = {} dConfig['debug'] = False dConfig['redis-queue-name'] = 'muse-project-paths-perms' dConfig['redis-loc'] = '38.100.20.212' dConfig['redis'] = False dConfig['time-stamp'] = datetime.datetime.now().strftime( '%Y-%m-%dT%H:%M:%S') iForks = 10 bError = False ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'c:f:rd', ['corpus-dir-path=', 'forks=', 'redis', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-c', '--corpus-dir-path'): sCorpusPath = arg elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-r', '--redis'): dConfig['redis'] = True elif opt in ('-f', '--forks'): try: iForks = int(arg) except ValueError as e: bError = True if not os.path.isdir(sCorpusPath): bError = True if bError: usage() else: iStart = time.time() ### setup producer lProjectPaths = [] if dConfig['redis']: # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process(target=findProjects, args=(sCorpusPath, iForks, dConfig)) pProducer.start() else: lProjectPaths = findProjects(sCorpusPath, iForks, dConfig) ### setup consumers lArgs = [] # create pool of workers oPool = multiprocessing.Pool(processes=iForks) if dConfig['redis']: for i in range(0, iForks): lArgs.append(dConfig) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(processProjects, lArgs) pProducer.join() else: for sPath in lProjectPaths: lArgs.append((sPath, dConfig)) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(findProjectFiles, lArgs) oPool.close() oPool.join() if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def main(argv): # defaults sCorpusPath = '/data/builder_SAN2/RAT' # sCorpusPath = '/data/corpus_0to7' # sCorpusPath = '/data/corpus_8tof' dConfig = {} dConfig['es-bulk-chunk-size'] = 500 dConfig['debug'] = False # binding to muse2 doesn't work right now dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int'] #dConfig['es-instance-locs'] = ['muse2-int','muse3-int'] #dConfig['es-instance-locs'] = ['muse3-int'] dConfig['es-index-name'] = 'rat-corpus-source' dConfig['es-index-type'] = 'files' dConfig['redis-queue-name'] = 'rat-project-paths' dConfig['redis-loc'] = 'muse2-int' dConfig['redis-port'] = '12345' dConfig['redis'] = False dConfig['time-stamp'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') iForks = 5 bError = False ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'c:f:rd', ['corpus-dir-path=', 'forks=', 'redis', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-c', '--corpus-dir-path'): sCorpusPath = arg elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-r', '--redis'): dConfig['redis'] = True elif opt in ('-f', '--forks'): try: iForks = int(arg) except ValueError as e: bError = True if not os.path.isdir(sCorpusPath): bError = True if bError: usage() else: iStart = time.time() #oES = createESIndex(dConfig) oES = Elasticsearch(dConfig['es-instance-locs']) ### setup producer lProjectPaths = [] if dConfig['redis']: qRedis = RedisQueue(dConfig['redis-queue-name'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) # ensure redis queue is empty prior to starting consumers # qRedis.flush() # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process(target=findProjects, args=(qRedis, sCorpusPath, dConfig)) pProducer.start() else: lProjectPaths = findProjects(None, sCorpusPath, dConfig) ### setup consumers lArgs = [] iForks = 1 if dConfig['redis']: # create pool of workers oPool = multiprocessing.Pool(processes=iForks) for i in range(0, iForks): lArgs.append(dConfig) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(processProjects, lArgs) pProducer.join() oPool.close() oPool.join() else: for sPath in lProjectPaths: findProjectFiles((sPath, oES, dConfig)) if dConfig['debug']: debug('func: main()', "all processes completed") # es index was created with replication turned off for speed, turn on replicating shards turnReplicationOn(oES, dConfig) # refresh to make the documents available for search oES.indices.refresh(index=dConfig['es-index-name']) # and now we can count the documents printMsg('func: main()', 'number of documents in', dConfig['es-index-name'], 'index: ', oES.count(index=dConfig['es-index-name'])['count']) iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def findProjectFiles(tTup): (sProjectPath, oES, dConfig) = tTup sProjectName = os.path.basename(sProjectPath) oES = Elasticsearch(dConfig['es-instance-locs']) lIgnoreDirs = ['.git', '.svn'] lProjectFiles = [] if dConfig['debug']: debug('func: findProjectFiles()', 'project-path:', sProjectPath, 'project-name:', sProjectName) for sRoot, lDirs, lFiles in os.walk(sProjectPath): if len(lProjectFiles) > dConfig['es-bulk-chunk-size']: # ingest chunk into elasticsearch (iSuccess, lResponse) = helpers.bulk(client=oES, actions=lProjectFiles, timeout="20m", request_timeout=120.) if iSuccess < dConfig['es-bulk-chunk-size']: warning('func: findProjectFiles() iSuccess:', iSuccess, ' expected:', dConfig['es-bulk-chunk-size']) warning('func: findProjectFiles()', type(lResponse), 'returned by bulk api') warning('func: findProjectFiles()', json.dumps(lResponse, indent=4), 'returned by bulk api') #del lProjectFiles[0 : len(lProjectFiles)] lProjectFiles = [] if dConfig['debug']: debug('func: findProjectFiles()', str(len(lProjectFiles)), 'files loaded into elasticsearch') for sFile in lFiles: # make sure dProject is emptied each loop iteration dProject = { '_index': dConfig['es-index-name'], '_type': dConfig['es-index-type'], '_source': { 'project-path': sProjectPath, 'project-name': sProjectName, 'crawl-time': dConfig['time-stamp'] } } sFilePath = os.path.join(sRoot, sFile) sRelPath = os.path.relpath(sFilePath, sProjectPath) sDecodedFile = '' sDecodedRelPath = '' sEncodedWith = '' # Look for the tar file with the src code if "_code.tgz" in sFilePath: global counter counter = counter + 1 print(str(counter) + ': working on: ' + sFilePath) t = tarfile.open(sFilePath, 'r:*') # Iterate over the files in the tar file gz for tarinfo in t: if tarinfo.isfile(): filename = tarinfo.name if (".svn" not in filename and ".git" not in filename): # make sure dProject is emptied each loop iteration dProject = { '_index': dConfig['es-index-name'], '_type': dConfig['es-index-type'], '_source': { 'project-path': sProjectPath, 'project-name': sProjectName, 'crawl-time': dConfig['time-stamp'] } } # append file in tar to tar path sFile = os.path.join(sFilePath, filename) sRelPath = os.path.relpath(sFile, sProjectPath) sDecodedFile = '' sDecodedRelPath = '' sEncodedWith = '' try: sDecodedFile = sFile.decode('utf-8') sDecodedRelPath = sRelPath.decode('utf-8') sEncodedWith = 'utf-8' except (ValueError, UnicodeDecodeError) as e: try: sDecodedFile = sFile.decode('latin-1') sDecodedRelPath = sRelPath.decode( 'latin-1') sEncodedWith = 'latin-1' except (ValueError, UnicodeDecodeError) as e: try: sDecodedFile = sFile.decode('utf-16') sDecodedRelPath = sRelPath.decode( 'utf-16') sEncodedWith = 'utf-16' except (ValueError, UnicodeDecodeError) as e: warning( 'func findProjectFiles():', 'sProjectPath:', dProject['_source'] ['project-path'], 'sProjectName:', dProject['_source'] ['project-name'], 'sFile:', sFile, 'sRelPath:', sRelPath, 'utf-8, latin-1, and utf-16 decoding failed', 'exception:', e) print("decode failed") sDecodedFile = '' sDecodedRelPath = '' sEncodedWith = '' if sDecodedFile and sDecodedRelPath: dProject['_source']['file'] = sDecodedFile (_, sFileExt) = os.path.splitext(sDecodedFile) if sFileExt: dProject['_source']['ext'] = sFileExt[ 1:].lower() dProject['_source']['path'] = sDecodedRelPath if dConfig['debug']: debug('func: findProjectFiles() dProject:', dProject, 'encoded with', sEncodedWith) lProjectFiles.append(dProject) lDirs[:] = [sDir for sDir in lDirs if sDir not in lIgnoreDirs] # ingest any stragglers remaining into elasticsearch (iSuccess, lResponse) = helpers.bulk(client=oES, actions=lProjectFiles, timeout="20m", request_timeout=120.) if iSuccess < len(lProjectFiles): warning('func: findProjectFiles() iSuccess:', iSuccess, ' expected:', len(lProjectFiles)) warning('func: findProjectFiles()', type(lResponse), 'returned by bulk api') warning('func: findProjectFiles()', json.dumps(lResponse, indent=4), 'returned by bulk api') # del lProjectFiles[0 : len(lProjectFiles)] lProjectFiles = []