def main(argv): dConfig = {} dConfig['debug'] = False dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int'] #dConfig['es-instance-locs'] = ['muse1-int','muse2-int'] #dConfig['es-instance-locs'] = ['muse3-int'] dConfig['es-project-index-name'] = 'corpuslite' dConfig['es-project-index-type'] = 'projects' ### command line argument handling options, remainder = getopt.getopt(sys.argv[1:], 'd', ['debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-d', '--debug'): dConfig['debug'] = True iStart = time.time() findProjectsWrapper(dConfig) iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def getObjects(self): while self.rq: item = self.rq.get() printMsg(self.sConsumerName, ':item:', item)
def findProjects(qRedis, sCorpusPath, dConfig): lProjectPaths = [] iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11 and "github" not in sRoot: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) if dConfig['redis']: qRedis.put(sRoot) else: lProjectPaths.append(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 10: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing') return lProjectPaths
def main(argv): iForks = 10 iStart = time.time() ### setup consumers lConsumerArgs = [] # create a locking semaphore for mutex lock = multiprocessing.Lock() for iCtr in range(0, iForks): lConsumerArgs.append(("lock testing procId", iCtr)) # create pool of workers -- number of workers equals the number of search strings to be processed oConsumerPool = multiprocessing.Pool(processes=iForks, initializer=initialize_lock, initargs=(lock, )) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index oConsumerPool.map(test, lConsumerArgs) oConsumerPool.close() oConsumerPool.join() # processBuildTargets( (dSearchStrings[ dConfig['queueBuildType'] ], 0, dArgs, dConfig) ) debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def findProjects(sCorpusPath, dConfig): qRedis = RedisQueue(dConfig['redis-queue-project-paths'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) debug('func: findProjects()', 'projects-root:', sRoot, iLevel) qRedis.put(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 10: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing')
def getObjects(self): bDone = False while not bDone: item = self.rq.get() printMsg(self.sConsumerName, ':item:', item) if item == 'done': bDone = True
def main(argv): dMp = MuseProjectDB() dMp.open() (lMultipleSameTypeProjects, lMultipleBuildTypeProjects) = dMp.findMultipleBuildTypeProjects() printMsg ( '# of same-type projects: ', len(lMultipleSameTypeProjects), '# of multiple-build-type projects:', len(lMultipleBuildTypeProjects) ) with open('multipleSameTypeProjects.json', 'w') as fSameType: json.dump(lMultipleSameTypeProjects, fSameType, indent=4) with open('multipleBuildTypeProjects.json', 'w') as fMultipleType: json.dump(lMultipleBuildTypeProjects, fMultipleType, indent=4) dMp.close()
def findProjects(sCorpusPath, iForks, dConfig): lProjectPaths = [] if dConfig['redis']: qRedis = RedisQueue(dConfig['redis-queue-name'], namespace='queue', host=dConfig['redis-loc']) # ensure redis queue is empty prior to starting consumers qRedis.flush() iCount = 0 for sRoot, lDirs, lFiles in os.walk(sCorpusPath): iLevel = sRoot.count(os.sep) if iLevel >= 11: del lDirs[:] if iLevel == 11: if dConfig['debug']: debug('func: findProjects()', 'projects-root:', sRoot, iLevel) if dConfig['redis']: qRedis.put(sRoot) else: lProjectPaths.append(sRoot) iCount += 1 if dConfig['debug'] and iCount >= 1: break printMsg('func: findProjects()', str(iCount), 'projects loaded into queue for processing') return lProjectPaths
def createBuildSummaries(dConfig): qRedis = RedisQueue(dConfig['redis-queue-json'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) dMp = MuseProjectDB(db=dConfig['mysql-db'], port=dConfig['mysql-port'], user=dConfig['mysql-user'], passwd=dConfig['mysql-passwd'], loc=dConfig['mysql-loc']) sLimitClause = '' if dConfig['debug']: sLimitClause = '10' dReturnCodeLookup = { 'buildSuccess': 'success', 'buildPartial': 'partial', 'buildFail': 'fail' } sSelectClause = 'projectName,projectPath,buildTarPath,buildTime,version,os,numObjectsPreBuild,numObjectsPostBuild,numObjectsGenerated,numSources,buildTargetPath,configureBuildType,configureacBuildType,configureinBuildType,cmakeBuildType,makefileBuildType,antBuildType,mavenBuildType,returnCode' lTargetTypes = [ 'configureBuildType', 'configureacBuildType', 'configureinBuildType', 'cmakeBuildType', 'makefileBuildType', 'antBuildType', 'mavenBuildType' ] dMp.open() iProjectCount = 0 dProjects = { 'success': RedisSet(dConfig['redis-set'] + '-success', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']), 'partial': RedisSet(dConfig['redis-set'] + '-partial', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']), 'fail': RedisSet(dConfig['redis-set'] + '-fail', namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']) } for sTable, sProjectBin in dReturnCodeLookup.iteritems(): # empty redis set dProjects[sProjectBin].flush() lProjects = dMp.select(sSelectClause='projectName', sTable=sTable, sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) # populate redis set with projects of each bin type for tProject in lProjects: (sProjectName, ) = tProject dProjects[sProjectBin].put(sProjectName) dProjectSummary = {} lTargetRows = dMp.select(sSelectClause=sSelectClause, sTable='buildStatusWithTargets', sOrderByClause='projectName,buildTarPath', sLimitClause=sLimitClause, bDebug=dConfig['debug']) for tTargetRow in lTargetRows: dTarget = {} (dTarget['projectName'], dTarget['projectPath'], dTarget['buildTarPath'], dTarget['buildTime'], dTarget['version'], dTarget['os'], dTarget['numObjectsPreBuild'], dTarget['numObjectsPostBuild'], dTarget['numObjectsGenerated'], dTarget['numSources'], dTarget['buildTargetPath'], dTarget['configureBuildType'], dTarget['configureacBuildType'], dTarget['configureinBuildType'], dTarget['cmakeBuildType'], dTarget['makefileBuildType'], dTarget['antBuildType'], dTarget['mavenBuildType'], dTarget['returnCode']) = tTargetRow if dProjectSummary: if dProjectSummary['projectName'] == dTarget['projectName']: try: (dBuild for dBuild in dProjectSummary['builds'] if dBuild['buildTarPath'] == dTarget['buildTarPath'] ).next() except (StopIteration) as e: dBuild = { 'buildTarPath': dTarget['buildTarPath'], 'buildTime': dTarget['buildTime'], 'version': dTarget['version'], 'os': dTarget['os'], 'numObjectsPreBuild': dTarget['numObjectsPreBuild'], 'numObjectsPostBuild': dTarget['numObjectsPostBuild'], 'numObjectsGenerated': dTarget['numObjectsGenerated'], 'numSources': dTarget['numSources'], 'targets': [] } dProjectSummary['builds'].append(dBuild) dTargetSummary = { 'buildTargetPath': dTarget['buildTargetPath'], 'returnCode': dTarget['returnCode'] } for sTargetType in lTargetTypes: if dTarget[sTargetType] == 1: dTargetSummary['target-type'] = sTargetType break dBuild['targets'].append(dTargetSummary) else: if dConfig['debug']: debug('func: createBuildSummaries() dProjectSummary:', json.dumps(dProjectSummary, indent=4)) qRedis.put(json.dumps(dProjectSummary)) iProjectCount += 1 dProjectSummary = {} if not dProjectSummary: # project specific build summary info dBuild = { 'buildTarPath': dTarget['buildTarPath'], 'buildTime': dTarget['buildTime'], 'version': dTarget['version'], 'os': dTarget['os'], 'numObjectsPreBuild': dTarget['numObjectsPreBuild'], 'numObjectsPostBuild': dTarget['numObjectsPostBuild'], 'numObjectsGenerated': dTarget['numObjectsGenerated'], 'numSources': dTarget['numSources'], 'targets': [] } dProjectSummary = { 'projectName': dTarget['projectName'], 'sourcePath': dTarget['projectPath'], 'builds': [dBuild] } if dTarget['projectName'] in dProjects['success']: dProjectSummary['buildStatus'] = 'success' elif dTarget['projectName'] in dProjects['partial']: dProjectSummary['buildStatus'] = 'partial' elif dTarget['projectName'] in dProjects['fail']: dProjectSummary['buildStatus'] = 'fail' # target specific build summary info dTargetSummary = { 'buildTargetPath': dTarget['buildTargetPath'], 'returnCode': dTarget['returnCode'] } for sTargetType in lTargetTypes: if dTarget[sTargetType] == 1: dTargetSummary['target-type'] = sTargetType break dBuild['targets'].append(dTargetSummary) if dProjectSummary: if dConfig['debug']: debug('func: createBuildSummaries() dProjectSummary:', json.dumps(dProjectSummary, indent=4)) qRedis.put(json.dumps(dProjectSummary)) iProjectCount += 1 dProjectSummary = {} dMp.close() printMsg('func: createBuildSummaries()', str(iProjectCount), 'projects queued')
def main(argv): # defaults bError = False dConfig = {} dConfig['debug'] = False dConfig['forks'] = 5 dConfig['mysql-db'] = 'muse' dConfig['mysql-user'] = '******' dConfig['mysql-passwd'] = 'muse' dConfig['mysql-loc'] = 'muse2-int' dConfig['mysql-port'] = 54321 dConfig['mysql'] = True dConfig['redis-queue-json'] = 'muse-json' dConfig['redis-set'] = 'muse-projects' dConfig['redis-loc'] = 'muse2-int' # dConfig['redis-port'] = '6379' dConfig['redis-port'] = '12345' dConfig['redis'] = True ### command line argument handling options, remainder = getopt.getopt(sys.argv[1:], 'f:d', ['forks=', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-f', '--forks'): try: dConfig['forks'] = int(arg) except ValueError as e: bError = True elif opt in ('-d', '--debug'): dConfig['debug'] = True debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4)) if bError: usage() else: iStart = time.time() # prepare redis queue for producer, flush queue before starting the producer qRedis = RedisQueue(dConfig['redis-queue-json'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) qRedis.flush() ''' # multi-process approach # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process( target=createBuildSummaries, args=(dConfig) ) pProducer.start() ### setup json writers lConsumerArgs = [] for iCtr in range(0, dConfig['forks']): lConsumerArgs.append( (dConfig) ) # create pool of workers oConsumerPool = multiprocessing.Pool(processes=dConfig['forks']) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index oConsumerPool.map(writeBuildSummaries, lConsumerArgs) # wait for the producer to complete pProducer.join() # wait for the consumer pool to complete oConsumerPool.close() oConsumerPool.join() ''' ''' # single process approach: ''' createBuildSummaries(dConfig) writeBuildSummaries(dConfig) if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def main(argv): # defaults sCorpusPath = '/data/corpus' dConfig = {} dConfig['debug'] = False dConfig['redis-queue-name'] = 'muse-project-paths-perms' dConfig['redis-loc'] = '38.100.20.212' dConfig['redis'] = False dConfig['time-stamp'] = datetime.datetime.now().strftime( '%Y-%m-%dT%H:%M:%S') iForks = 10 bError = False ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'c:f:rd', ['corpus-dir-path=', 'forks=', 'redis', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-c', '--corpus-dir-path'): sCorpusPath = arg elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-r', '--redis'): dConfig['redis'] = True elif opt in ('-f', '--forks'): try: iForks = int(arg) except ValueError as e: bError = True if not os.path.isdir(sCorpusPath): bError = True if bError: usage() else: iStart = time.time() ### setup producer lProjectPaths = [] if dConfig['redis']: # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process(target=findProjects, args=(sCorpusPath, iForks, dConfig)) pProducer.start() else: lProjectPaths = findProjects(sCorpusPath, iForks, dConfig) ### setup consumers lArgs = [] # create pool of workers oPool = multiprocessing.Pool(processes=iForks) if dConfig['redis']: for i in range(0, iForks): lArgs.append(dConfig) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(processProjects, lArgs) pProducer.join() else: for sPath in lProjectPaths: lArgs.append((sPath, dConfig)) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(findProjectFiles, lArgs) oPool.close() oPool.join() if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def main(argv): # defaults sCorpusPath = '/data/builder_SAN2/RAT' # sCorpusPath = '/data/corpus_0to7' # sCorpusPath = '/data/corpus_8tof' dConfig = {} dConfig['es-bulk-chunk-size'] = 500 dConfig['debug'] = False # binding to muse2 doesn't work right now dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int'] #dConfig['es-instance-locs'] = ['muse2-int','muse3-int'] #dConfig['es-instance-locs'] = ['muse3-int'] dConfig['es-index-name'] = 'rat-corpus-source' dConfig['es-index-type'] = 'files' dConfig['redis-queue-name'] = 'rat-project-paths' dConfig['redis-loc'] = 'muse2-int' dConfig['redis-port'] = '12345' dConfig['redis'] = False dConfig['time-stamp'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') iForks = 5 bError = False ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'c:f:rd', ['corpus-dir-path=', 'forks=', 'redis', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-c', '--corpus-dir-path'): sCorpusPath = arg elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-r', '--redis'): dConfig['redis'] = True elif opt in ('-f', '--forks'): try: iForks = int(arg) except ValueError as e: bError = True if not os.path.isdir(sCorpusPath): bError = True if bError: usage() else: iStart = time.time() #oES = createESIndex(dConfig) oES = Elasticsearch(dConfig['es-instance-locs']) ### setup producer lProjectPaths = [] if dConfig['redis']: qRedis = RedisQueue(dConfig['redis-queue-name'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) # ensure redis queue is empty prior to starting consumers # qRedis.flush() # call producer process that populates redis queue with project path roots pProducer = multiprocessing.Process(target=findProjects, args=(qRedis, sCorpusPath, dConfig)) pProducer.start() else: lProjectPaths = findProjects(None, sCorpusPath, dConfig) ### setup consumers lArgs = [] iForks = 1 if dConfig['redis']: # create pool of workers oPool = multiprocessing.Pool(processes=iForks) for i in range(0, iForks): lArgs.append(dConfig) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names oPool.map(processProjects, lArgs) pProducer.join() oPool.close() oPool.join() else: for sPath in lProjectPaths: findProjectFiles((sPath, oES, dConfig)) if dConfig['debug']: debug('func: main()', "all processes completed") # es index was created with replication turned off for speed, turn on replicating shards turnReplicationOn(oES, dConfig) # refresh to make the documents available for search oES.indices.refresh(index=dConfig['es-index-name']) # and now we can count the documents printMsg('func: main()', 'number of documents in', dConfig['es-index-name'], 'index: ', oES.count(index=dConfig['es-index-name'])['count']) iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def indexSourceTargets(dConfig): # setup mysql client dMp = MuseProjectDB(db=dConfig['mysql-db'],port=dConfig['mysql-port'],user=dConfig['mysql-user'],passwd=dConfig['mysql-passwd'],loc=dConfig['mysql-loc']) dMp.open() # setup elasticsearch client oES = Elasticsearch(dConfig['es-instance-locs']) # setup source targets queue qRedis = RedisQueue(dConfig['redis-queue-source-targets'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) while 1: sQuery = qRedis.get(block=True, timeout=30) if sQuery: dQuery = json.loads(sQuery) if dConfig['debug']: debug( 'func: indexSourceTargets() dQuery:', json.dumps(dQuery) ) lSourceFiles = [] # scroll time set to 10 minutes, change as needed -- required for consistent results, the scroll token expires at the end of scroll time dResponse = oES.search(index=dConfig['es-file-index-name'], doc_type=dConfig['es-file-index-type'], body=json.dumps(dQuery), search_type='scan', scroll='20m', timeout='20m', lowercase_expanded_terms=False) sScrollId = dResponse['_scroll_id'] if dConfig['debug']: debug('func: indexSourceTargets() (after initial search) dResponse: ', dResponse) if dConfig['debug']: debug('func: indexSourceTargets() search hits: ', dResponse['hits']['total']) #while not dResponse['timed_out'] and dResponse['hits']['hits']['total'] > 0: while 'timed_out' in dResponse and not dResponse['timed_out'] and 'hits' in dResponse and 'total' in dResponse['hits'] and dResponse['hits']['total'] > 0: dResponse = oES.scroll(scroll_id=sScrollId, scroll='20m') sScrollId = dResponse['_scroll_id'] if ('hits' in dResponse['hits']) and (len(dResponse['hits']['hits']) > 0): if dConfig['debug']: debug('func: indexSourceTargets() scroll_id:', sScrollId, 'number of hits:', len(dResponse['hits']['hits']) ) for dHit in dResponse['hits']['hits']: # found matches try: if '_source' in dHit: # debug('func: indexSourceTargets() dHit:', json.dumps(dHit['_source']) ) #NATE added, remove leading path from found built targets mBuildTarget=dHit['_source']['file']; mBuildTarget=mBuildTarget.split('/') dHit['_source']['file'] = mBuildTarget[len(mBuildTarget)-1] dProjectFound = {} lSourceTypes = dMp.getSourceTypes() for sSourceType in lSourceTypes: dProjectFound[sSourceType] = False if 'file' in dHit['_source'] and dHit['_source']['file']: (sFileName, sFileExt) = os.path.splitext(dHit['_source']['file']) if sFileExt.lower() in dConfig['source-targets'].keys(): dProjectFound[ dConfig['source-targets'][ sFileExt.lower() ] ] = True else: warning( 'func indexSourceTargets() es returned an improper source target:', json.dumps(dHit['_source']) ) continue if 'project-name' in dHit['_source'] and dHit['_source']['project-name']: dProjectFound['projectName'] = dHit['_source']['project-name'] if 'project-path' in dHit['_source'] and dHit['_source']['project-path']: dProjectFound['projectPath'] = dHit['_source']['project-path'] if 'path' in dHit['_source'] and dHit['_source']['path']: dProjectFound['buildTargetPath'] = verifyEncoding( dHit['_source']['path'] ) # debug('func findSourceFileHelper()', json.dumps(dProjectFound)) lSourceFiles.append(dProjectFound) # causing es reads to time out if (len(lSourceFiles) > dConfig['mysql-bulk-statement-size']) and dConfig['mysql']: dMp.insertIntoSourceTargets(lTargets=lSourceFiles, bDebug=dConfig['debug']) printMsg('func indexSourceTargets() loaded', iCtr, 'source targets') lSourceFiles = [] except (UnicodeDecodeError, UnicodeEncodeError) as e: warning('func indexSourceTargets() encountered exception:', e) #warning('func indexSourceTargets() with string: ', dHit['_source']['path']) warning('func indexSourceTargets() full _source payload: ', json.dumps( dHit['_source'], indent=4 ) ) else: break if (len(lSourceFiles) > 0) and dConfig['mysql']: dMp.insertIntoSourceTargets(lTargets=lSourceFiles, bDebug=dConfig['debug']) lSourceFiles = [] else: break dMp.close()
def main(argv): # defaults bError = False dConfig = {} dConfig['containerImage'] = 'musebuilder' #dConfig['containerPath'] = '/data/builder' dConfig['containerPath'] = '/data/builder_SAN/containers' dConfig['debug'] = False dConfig['elasticsearch'] = True dConfig['es-instance-locs'] = ['muse1-int', 'muse2-int', 'muse3-int'] #dConfig['es-instance-locs'] = ['muse2-int','muse3-int'] #dConfig['es-instance-locs'] = ['muse3-int'] #dConfig['es-file-index-name'] = 'muse-corpus-source' dConfig['es-file-index-name'] = 'muse-corpus-build' dConfig['es-file-index-type'] = 'muse-project-build' dConfig['forks'] = 5 dConfig['hostname'] = socket.gethostname().replace('.', '') dConfig['mysql-db'] = 'muse' dConfig['mysql-user'] = '******' dConfig['mysql-passwd'] = 'muse' dConfig['mysql-loc'] = 'muse2-int' dConfig['mysql-port'] = 54321 dConfig['mysql'] = True dConfig['rebuild'] = False dConfig['redis-already-built'] = 'muse-already-built-' dConfig['redis-already-built-nate'] = 'NEWbuiltProjects' dConfig['redis-queue-to-build'] = 'muse-to-build' dConfig['redis-queue-building'] = 'muse-building' dConfig['redis-loc'] = 'muse2-int' # dConfig['redis-port'] = '6379' dConfig['redis-port'] = '12345' dConfig['redis'] = True dArgs = {} # number of attempts with each to build targets to resolve dependencies dArgs['buildCycles'] = 2 dArgs['containerMem'] = '2g' dArgs['buildScripts'] = {} dArgs['buildScripts']['root'] = '/managed/scripts' dArgs['buildScripts']['loader'] = os.path.join( dArgs['buildScripts']['root'], 'runBuild.sh') dArgs['buildScripts']['cmakeBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'cmake.sh') dArgs['buildScripts']['configureBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configure.sh') dArgs['buildScripts']['configureacBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configureac.sh') dArgs['buildScripts']['configureinBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'configurein.sh') dArgs['buildScripts']['makefileBuildType'] = os.path.join( dArgs['buildScripts']['root'], 'make.sh') dArgs['containerScripts'] = {} dArgs['containerScripts']['root'] = '/scripts' dArgs['containerScripts']['cmakeBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'cmake.sh') dArgs['containerScripts']['configureBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configure.sh') dArgs['containerScripts']['configureacBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configureac.sh') dArgs['containerScripts']['configureinBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'configurein.sh') dArgs['containerScripts']['makefileBuildType'] = os.path.join( dArgs['containerScripts']['root'], 'make.sh') dArgs['containerDirs'] = ['buildArtifacts', 'output', 'scripts', 'source'] dArgs['containerOS'] = 'ubuntu14' dArgs['containerPath'] = dConfig['containerPath'] dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs['containerOS'] dArgs['script-name'] = 'build.sh' ''' dArgs['build-targets'] = { 'configure' : 'configureBuildType', 'configure.ac' : 'configureacBuildType', 'configure.in' : 'configureinBuildType', 'CMakeLists.txt' : 'cmakeBuildType', 'Makefile' : 'makefileBuildType' #'build.xml' : 'antBuildType', #'pom.xml' : 'mavenBuildType' } ''' dArgs['source-compilers'] = {'cBuildType': 'gcc', 'cppBuildType': 'g++'} ''' dArgs['source-targets'] = { '.c' : 'cBuildType', '.cc' : 'cppBuildType', '.cpp' : 'cppBuildType', '.cxx' : 'cppBuildType', '.c++' : 'cppBuildType' } ''' lSupportedOSs = ['fedora20', 'fedora21', 'ubuntu12', 'ubuntu14'] ### command line argument handling options, remainder = getopt.getopt( sys.argv[1:], 'f:o:rdy', ['forks=', 'os=', 'rebuild', 'debug', 'debug-flags']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-f', '--forks'): try: dConfig['forks'] = int(arg) except ValueError as e: bError = True elif opt in ('-o', '--os'): if arg in lSupportedOSs: dArgs['containerOS'] = arg dArgs['imageName'] = dConfig['containerImage'] + '-' + dArgs[ 'containerOS'] else: bError = True elif opt in ('-r', '--rebuild'): dConfig['rebuild'] = True elif opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-y', '--debug-flags'): dArgs['source-compilers'] = { 'cBuildType': 'gcc -g3 -O0 -DDEBUG', 'cppBuildType': 'g++ -g3 -O0 -DDEBUG' } debug('func: main()', 'dConfig:', json.dumps(dConfig, indent=4)) if bError: usage() else: ''' # pre-initialization -- if projects remained in building queue, put them back in queue-to-build qToBuildRedis = RedisQueue(name=dConfig['redis-queue-building'], name2=dConfig['redis-queue-to-build'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) for iCtr in range(0, len(qToBuildRedis)): qToBuildRedis.getnpush() ''' dConfig['redis-already-built'] = dConfig[ 'redis-already-built'] + dArgs['containerOS'] sExistingBuilds = RedisSet(name=dConfig['redis-already-built'], namespace='set', host=dConfig['redis-loc'], port=dConfig['redis-port']) sExistingBuilds.flush() if not dConfig['rebuild']: loadExistingBuilds(dConfig, dArgs['containerOS']) iStart = time.time() ### setup consumers lConsumerArgs = [] # create a locking semaphore for mutex lock = multiprocessing.Lock() for iCtr in range(0, dConfig['forks']): lConsumerArgs.append((iCtr, dArgs, dConfig)) # create pool of workers -- number of workers equals the number of search strings to be processed oConsumerPool = multiprocessing.Pool(processes=dConfig['forks'], initializer=initialize_lock, initargs=(lock, )) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index print(lConsumerArgs) oConsumerPool.map(processBuildTargets, lConsumerArgs) oConsumerPool.close() oConsumerPool.join() # processBuildTargets( (0, dArgs, dConfig) ) if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def main(argv): dConfig = {} dConfig['es-bulk-chunk-size'] = 500 dConfig['debug'] = False dConfig['forks'] = 5 # binding to muse2 doesn't work right now dConfig['es-instance-locs'] = ['38.100.20.211', '38.100.20.212'] #dConfig['es-instance-locs'] = ['38.100.20.212'] dConfig['es-file-index-name'] = 'muse-corpus-source' dConfig['es-file-index-type'] = 'muse-project-files' dConfig['es-project-index-name'] = 'muse-corpus-projects' dConfig['es-project-index-type'] = 'muse-project-buildtype' dConfig['redis-queue-name'] = 'muse-%s-projects' dConfig['redis-loc'] = '38.100.20.212' dConfig['time-stamp'] = datetime.datetime.now().strftime( '%Y-%m-%dT%H:%M:%S') dConfig['version'] = '1.0' # sSearchStrings = ['configure','configure.ac','configure.in','Makefile','build.xml','pom.xml'] # sSearchStrings = ['configure'] sSearchStrings = ['configure', 'configure.ac', 'configure.in'] bError = False ### command line argument handling options, remainder = getopt.getopt(sys.argv[1:], 'c:f:d', ['corpus-dir-path=', 'forks=', 'debug']) # debug('func: main()', 'options:', options) # debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-d', '--debug'): dConfig['debug'] = True elif opt in ('-f', '--forks'): try: dConfig['forks'] = int(arg) except ValueError as e: bError = True if bError: usage() else: iStart = time.time() ### setup producers lProducerArgs = [] for sSearchString in sSearchStrings: lProducerArgs.append((sSearchString, dConfig)) # create pool of workers -- number of workers equals the number of search strings to be processed oProducerPool = multiprocessing.Pool(processes=len(lProducerArgs)) ### do work -- use pool of workers to search for each search string in muse-corpus-source es index oProducerPool.map(findBuildFiles, lProducerArgs) oProducerPool.close() oProducerPool.join() ### setup consumers lConsumerArgs = [] for i in range(0, dConfig['forks']): lConsumerArgs.append(dConfig) # create pool of workers ##oConsumerPool = multiprocessing.Pool(processes=iForks) ### do work -- use pool of workers to descend into each project path recording/ingesting all file names ##oConsumerPool.map(findProjectFiles, lConsumerArgs) ##oConsumerPool.close() ##oConsumerPool.join() if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def queueUpSourceTargets(dConfig): if dConfig['mysql'] and dConfig['redis']: dMp = MuseProjectDB(db=dConfig['mysql-db'],port=dConfig['mysql-port'],user=dConfig['mysql-user'],passwd=dConfig['mysql-passwd'],loc=dConfig['mysql-loc']) # setup to-build queue qRedis = RedisQueue(dConfig['redis-queue-to-build'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) dMp.open() # get projects first to iterate through (makes it easier to build project specific dictionaries), limit if in debug mode iProjectCount = 0 iTargetCount = 0 iMultiTargets = 0 sLimitClause = '' if dConfig['debug']: sLimitClause = '10' lLeadingPaths = [] dProject = {} dCodeDirLookup = {} lProjectRows = dMp.select(sSelectClause='projectName,codeDir', sTable='availableProjects', bDebug=dConfig['debug']) for tProjectRow in lProjectRows: (sProjectName, sCodeDir) = tProjectRow dCodeDirLookup[sProjectName] = sCodeDir lTargetRows = [] if dConfig['unBuiltProjectsOnly']: if dConfig['queueSite']: lTargetRows = dMp.select(sSelectClause='projectName,projectPath,buildTargetPath', sTable='unBuiltSourceTargetsWithSite', sWhereClause='site=\'' + dConfig['queueSite'] + '\'', sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) else: lTargetRows = dMp.select(sSelectClause='projectName,projectPath,buildTargetPath', sTable='unBuiltSourceTargets', sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) else: if dConfig['queueSite']: lTargetRows = dMp.select(sSelectClause='projectName,projectPath,buildTargetPath', sTable='availableSourceTargetsWithSite', sWhereClause='site=\'' + dConfig['queueSite'] + '\'', sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) else: lTargetRows = dMp.select(sSelectClause='projectName,projectPath,buildTargetPath', sTable='availableSourceTargets', sOrderByClause='projectName', sLimitClause=sLimitClause, bDebug=dConfig['debug']) dMp.close() for tTargetRow in lTargetRows: dTarget = {} (sProjectName, sProjectPath, dTarget['buildTargetPath'], ) = tTargetRow (_, sFileExt) = os.path.splitext( os.path.basename(dTarget['buildTargetPath']) ) if sFileExt: sFileExt = sFileExt.lower() if sFileExt in dConfig['source-targets'].keys(): dTarget['buildType'] = dConfig['source-targets'][sFileExt] (sLeadingPath, sTarget) = os.path.split(dTarget['buildTargetPath']) # NATE remove leading tarball from path sLeadingPath = re.sub(r'[a-zA-Z_0-9-_]*.tgz/', "", sLeadingPath) dTarget['buildTargetPath'] = os.path.join(sLeadingPath, sTarget) # NATE added to grab code directory from buildTargetPath bPath=sLeadingPath.split('/') if len(bPath) > 1 : codedir2=bPath[0] iTargetCount += 1 if 'projectName' in dProject : if dProject['projectName'] != sProjectName: # new project encountered, push old project onto queue if dConfig['debug']: debug('func: queueUpSourceTargets() queuing project:', json.dumps(dProject, indent=4)) qRedis.put(json.dumps(dProject)) iProjectCount += 1 if len(lLeadingPaths) > 1: iMultiTargets += 1 dProject = { 'projectName': sProjectName, 'projectPath': sProjectPath, 'version': dConfig['version'], 'targets': [ dTarget ], 'codeDir': codedir2 #'codeDir': dCodeDirLookup[sProjectName] } lLeadingPaths = [ sLeadingPath ] else: if sLeadingPath not in lLeadingPaths: dProject['targets'].append(dTarget) lLeadingPaths.append(sLeadingPath) else: iTargetCount += -1 if dConfig['debug']: debug('func: queueUpSourceTargets() already encountered path:', sLeadingPath, 'not adding:', json.dumps(dTarget, indent=4)) else: dProject = { 'projectName': sProjectName, 'projectPath': sProjectPath, 'version': dConfig['version'], 'targets': [ dTarget ], 'codeDir': dCodeDirLookup[sProjectName] } lLeadingPaths = [ sLeadingPath ] else: warning('func: queueUpSourceTargets() unknown C/C++ file extension encountered:', sFileExt, 'file-path:',dTarget['buildTargetPath'],'for project:', sProjectName) else: warning('func: queueUpSourceTargets() missing file extension encountered file-path:') #,dTarget['buildTargetPath'],'for project:', sProjectName) if dConfig['debug']: debug('func: queueUpSourceTargets() queuing project:', json.dumps(dProject, indent=4)) qRedis.put(json.dumps(dProject)) iProjectCount += 1 if len(lLeadingPaths) > 1: iMultiTargets += 1 printMsg('func: queueUpSourceTargets()', str(iProjectCount), 'projects queued', str(iTargetCount), 'targets queued', str(iMultiTargets), 'multi-target projects queued') printMsg('func: queueUpSourceTargets()', qRedis.size(), 'projects reported by redis')
def findProjects(sLanguage, dConfig): # setup elasticsearch client oES = Elasticsearch(dConfig['es-instance-locs']) lProjects = [] iCtr = 0 dQuery = {"query": {"match_all": {}}, "fields": [sLanguage]} if dConfig['debug']: debug('func: findProjects() dQuery:', json.dumps(dQuery)) # scroll time set to 10 minutes, change as needed -- required for consistent results, the scroll token expires at the end of scroll time dResponse = oES.search(index=dConfig['es-project-index-name'], doc_type=dConfig['es-project-index-type'], body=json.dumps(dQuery), search_type='scan', scroll='20m', timeout='20m', lowercase_expanded_terms=False) sScrollId = dResponse['_scroll_id'] if dConfig['debug']: debug('func: findProjects() (after initial search) dResponse: ', dResponse) if dConfig['debug']: debug('func: findProjects() search hits: ', dResponse['hits']['total']) #while not dResponse['timed_out'] and dResponse['hits']['hits']['total'] > 0: while 'timed_out' in dResponse and not dResponse[ 'timed_out'] and 'hits' in dResponse and 'total' in dResponse[ 'hits'] and dResponse['hits']['total'] > 0: dResponse = oES.scroll(scroll_id=sScrollId, scroll='20m') sScrollId = dResponse['_scroll_id'] if ('hits' in dResponse['hits']) and (len(dResponse['hits']['hits']) > 0): if dConfig['debug']: debug('func: findProjects() scroll_id:', sScrollId, 'number of hits:', len(dResponse['hits']['hits'])) if dConfig['debug'] and iCtr > 10: break for dHit in dResponse['hits']['hits']: iCtr += 1 if dConfig['debug']: debug('func: findProjects()', json.dumps(dHit, indent=4)) if iCtr > 100: break # found matches if 'fields' in dHit and sLanguage in dHit[ 'fields'] and '_id' in dHit: lProjects.append(dHit['_id']) else: break printMsg('func: findProjects() found ', str(iCtr), ' buildTargets, spawned process exiting...') sLanguageFileName = './' + sLanguage.split('.')[1] + '.txt' printMsg('func: findProjects() file created: ', sLanguageFileName) with open(sLanguageFileName, 'w') as fLanguage: for sProject in sorted(lProjects): fLanguage.write(sProject + '\n') return lProjects
def main(argv): # defaults bError = False sCorpusPath = '/data/corpus_0to7' dConfig = {} dConfig['analyze-projects'] = False dConfig['crawl-projects'] = False dConfig['debug'] = False dConfig['es-bulk-chunk-size'] = 500 dConfig['es-instance-locs'] = ['muse1-int','muse2-int','muse3-int'] # dConfig['es-instance-locs'] = ['muse1-int','muse2-int'] #dConfig['es-instance-locs'] = ['muse3-int'] dConfig['es-file-index-name'] = 'muse-corpus-source-new' dConfig['es-file-index-type'] = 'files' dConfig['es-project-index-name'] = 'muse-projects' dConfig['es-project-index-type'] = 'projects' dConfig['forks'] = 5 dConfig['mysql-db'] = 'muse' dConfig['mysql-user'] = '******' dConfig['mysql-passwd'] = 'muse' dConfig['mysql-loc'] = 'muse2-int' dConfig['mysql-port'] = 54321 dConfig['mysql'] = True dConfig['mysql-bulk-statement-size'] = 100 dConfig['queueUpFilesForBuilding'] = False dConfig['queueSite'] = '' dConfig['redis-queue-to-build'] = 'muse-to-build' dConfig['redis-queue-building'] = 'muse-building' # dConfig['redis-queue-build-targets'] = 'muse-build-targets' dConfig['redis-queue-project-paths'] = 'muse-project-paths' dConfig['redis-queue-source-targets'] = 'muse-source-targets' dConfig['redis-loc'] = 'muse2-int' dConfig['redis-port'] = '12345' dConfig['redis'] = True dConfig['time-stamp'] = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S') dConfig['unBuiltProjectsOnly'] = False dConfig['version'] = '1.0' dConfig['build-targets'] = { 'configure' : { 'type' : 'configureBuildType', 'ranking': 4 }, 'configure.ac' : { 'type' : 'configureacBuildType', 'ranking': 2 }, 'configure.in' : { 'type' : 'configureinBuildType', 'ranking': 3 }, 'CMakeLists.txt' : { 'type' : 'cmakeBuildType', 'ranking': 1 }, 'Makefile' : { 'type' : 'makefileBuildType', 'ranking': 5 } #'build.xml' : { 'type' : 'antBuildType', 'ranking': 7 }, #'pom.xml' : { 'type' : 'mavenBuildType', 'ranking': 6 } } dConfig['source-targets'] = { '.c' : 'cBuildType', '.cc' : 'cppBuildType', '.cpp' : 'cppBuildType', '.cxx' : 'cppBuildType', '.c++' : 'cppBuildType' } ### command line argument handling options, remainder = getopt.getopt(sys.argv[1:], 'c:f:apuqs:d', ['corpus-dir-path=','forks=','analyze-projects','crawl-projects','unbuilt-projects-only','queue-projects','queue-site=','debug']) debug('func: main()', 'options:', options) debug('func: main()', 'remainder:', remainder) for opt, arg in options: if opt in ('-c', '--corpus-dir-path'): sCorpusPath = arg elif opt in ('-f', '--forks'): dConfig['forks'] = arg elif opt in ('-a', '--analyze-projects'): dConfig['analyze-projects'] = True elif opt in ('-p', '--crawl-projects'): dConfig['crawl-projects'] = True elif opt in ('-q', '--queue-projects'): dConfig['queueUpFilesForBuilding'] = True elif opt in ('-s', '--queue-site'): dConfig['queueSite'] = arg elif opt in ('-u', '--unbuilt-projects-only'): dConfig['unBuiltProjectsOnly'] = True elif opt in ('-d', '--debug'): dConfig['debug'] = True # debug(json.dumps(dConfig, indent=4)) if dConfig['crawl-projects'] and not os.path.isdir(sCorpusPath): bError = True if bError: usage() else: iStart = time.time() ### setup producers if dConfig['crawl-projects']: # initialize projects table/queue initProjects(dConfig) # call producer process that populates mysql with project names from sCorpusPath pfindProjects = multiprocessing.Process( target=findProjects, args=(sCorpusPath, dConfig) ) pfindProjects.start() # create pool of workers oProcessProjectsPool = multiprocessing.Pool(processes=dConfig['forks']) lArgs = [] for i in range(0, dConfig['forks']): lArgs.append(dConfig) ### do work -- use pool of workers to index source targets oProcessProjectsPool.map(processProjects, lArgs) pfindProjects.join() oProcessProjectsPool.close() oProcessProjectsPool.join() elif dConfig['analyze-projects']: # initialize targets table/queue initTargets(dConfig) pBuildTargets = multiprocessing.Process( target=findBuildTargets, args=(dConfig, ) ) pBuildTargets.start() pBuildTargets.join() pSourceTargets = multiprocessing.Process( target=findSourceTargets, args=(dConfig, ) ) pSourceTargets.start() # create pool of workers oSourceTargetIndexerPool = multiprocessing.Pool(processes=dConfig['forks']) lArgs = [] for i in range(0, dConfig['forks']): lArgs.append(dConfig) ### do work -- use pool of workers to index source targets oSourceTargetIndexerPool.map(indexSourceTargets, lArgs) pSourceTargets.join() oSourceTargetIndexerPool.close() oSourceTargetIndexerPool.join() elif dConfig['queueUpFilesForBuilding']: initBuildQueues(dConfig=dConfig) queueUpBuildTargets(dConfig=dConfig) queueUpSourceTargets(dConfig=dConfig) if dConfig['debug']: debug('func: main()', "all processes completed") iEnd = time.time() printMsg('func: main()', 'execution time:', (iEnd - iStart), 'seconds')
def findBuildTargets(dConfig): # setup mysql client dMp = MuseProjectDB(db=dConfig['mysql-db'],port=dConfig['mysql-port'],user=dConfig['mysql-user'],passwd=dConfig['mysql-passwd'],loc=dConfig['mysql-loc']) dMp.open() # setup elasticsearch client oES = Elasticsearch(dConfig['es-instance-locs'],timeout=180, max_retries=3, retry_on_timeout=True ) # purge build targets queue -- considering if we need to split mysql ingestion from elasticsearch queries... mysql may benefit from consumer pool inserting statements concurrently # qRedis = RedisQueue(dConfig['redis-queue-build-targets'], namespace='queue', host=dConfig['redis-loc'], port=dConfig['redis-port']) lBuildFiles = [] iCtr = 0 dQuery = { "query": { "bool": { "must": [ { "bool": { "should": [ { "wildcard": { "file.raw": "*/configure.ac" } }, { "wildcard": { "file.raw": "*/configure.in" } }, { "wildcard": { "file.raw": "*/configure" } }, { "wildcard": { "file.raw": "*/CMakeLists.txt" } }, { "wildcard": { "file.raw": "*/Makefile" } } ] } }, { "bool": { "should": [ { "match": { "path": "latest/*" } }, { "match": { "path": "content/*"} } ] } }, {"wildcard":{"file.raw": "/data/corpus_8tof/*"}} ] } } } if dConfig['debug']: debug( 'func: findBuildFiles() dQuery:', json.dumps(dQuery) ) # scroll time set to 10 minutes, change as needed -- required for consistent results, the scroll token expires at the end of scroll time dResponse = oES.search(index=dConfig['es-file-index-name'], doc_type=dConfig['es-file-index-type'], body=json.dumps(dQuery), search_type='scan', scroll='20m', timeout='20m', lowercase_expanded_terms=False, request_timeout=180,) sScrollId = dResponse['_scroll_id'] if dConfig['debug']: debug('func: findBuildFiles() (after initial search) dResponse: ', dResponse) if dConfig['debug']: debug('func: findBuildFiles() search hits: ', dResponse['hits']['total']) debug('func: findBuildFiles() search hits: ', dResponse['hits']['total']) while 'timed_out' in dResponse and not dResponse['timed_out'] and 'hits' in dResponse and 'total' in dResponse['hits'] and dResponse['hits']['total'] > 0: dResponse = oES.scroll(scroll_id=sScrollId, scroll='20m') sScrollId = dResponse['_scroll_id'] if ('hits' in dResponse['hits']) and (len(dResponse['hits']['hits']) > 0): if dConfig['debug']: debug('func: findBuildFiles() scroll_id:', sScrollId, 'number of hits:', len(dResponse['hits']['hits']) ) if dConfig['debug'] and iCtr > 10: break for dHit in dResponse['hits']['hits']: iCtr += 1 if dConfig['debug'] and iCtr > 10: break # found matches try: if '_source' in dHit: #NATE added, remove leading path from found built targets mBuildTarget=dHit['_source']['file']; mBuildTarget=mBuildTarget.split('/') dHit['_source']['file'] = mBuildTarget[len(mBuildTarget)-1] dProjectFound = {} # initialize all build target types to false lBuildTypes = dMp.getBuildTypes() for sBuildType in lBuildTypes: dProjectFound[sBuildType] = False # mark relevant build target type true if 'file' in dHit['_source'] and dHit['_source']['file'] and dHit['_source']['file'] in dConfig['build-targets'].keys(): if dConfig['debug']: debug('func findBuildFiles() returned build target:', dHit['_source']['file']) dProjectFound[ dConfig['build-targets'][ dHit['_source']['file'] ]['type'] ] = True dProjectFound['ranking'] = dConfig['build-targets'][ dHit['_source']['file'] ]['ranking'] else: warning( 'func findBuildFiles() es returned an improper build target:', json.dumps(dHit['_source']) ) continue if 'project-name' in dHit['_source'] and dHit['_source']['project-name']: dProjectFound['projectName'] = dHit['_source']['project-name'] if 'project-path' in dHit['_source'] and dHit['_source']['project-path']: dProjectFound['projectPath'] = dHit['_source']['project-path'] if 'path' in dHit['_source'] and dHit['_source']['path']: dProjectFound['buildTargetPath'] = verifyEncoding( dHit['_source']['path'] ) dProjectFound['depth'] = depth( dProjectFound['buildTargetPath'] ) # debug('func findBuildFiles()', json.dumps(dProjectFound)) lBuildFiles.append(dProjectFound) # causing es reads to time out if (len(lBuildFiles) > dConfig['mysql-bulk-statement-size']) and dConfig['mysql']: dMp.insertIntoBuildTargets(lTargets=lBuildFiles, bDebug=dConfig['debug']) printMsg('func findBuildFiles() loaded', iCtr, 'build targets') lBuildFiles = [] except (UnicodeDecodeError, UnicodeEncodeError) as e: warning('func findBuildFiles() encountered exception:', e) #warning('func findBuildFiles() with string: ', dHit['_source']['path']) warning('func findBuildFiles() full _source payload: ', json.dumps( dHit['_source'], indent=4 ) ) else: break if (len(lBuildFiles) > 0) and dConfig['mysql']: dMp.insertIntoBuildTargets(lTargets=lBuildFiles, bDebug=dConfig['debug']) lBuildFiles = [] dMp.close()