def __init__(self, studyList, version): self.DBClient = DbUtils() self.studyList = [i.upper() for i in studyList] self.version = version self.recursorList = [] self._getRecursorList(studyList) self.sortingDataList = [] self.sqlBuilder = SQLBuilder() self.moveSortingObjListDict = {} self.toConvertObjListDict = {} self.sortingTable = Sorting() self.conversionTable = Conversion() self.raw2mincConverter = Raw2MINCConverter() self.pool = Pool() self.qsubJobHandler = QSubJobHandler() self.qsubJobHandler.start() self.convertedListDict = {} self.processingTable = Processing() self.toProcessListDict = {} self.pipelineHanlder = PipelineHandler() self.QCHandler = QCHandler() self.MongoManger = MongoDBManager() self.MongoXMLManager = MongoScanXMLManager() self.MongoXMLManager.processXMLs()
def __init__(self): self.processingPPDict = { 'ADNI': { 'V1': { 'T1': ADNI_V1_T1(), 'FMRI': ADNI_V1_FMRI(), 'AV45': ADNI_V1_AV45(), 'FDG': ADNI_V1_FDG(), 'AV1451': ADNI_V1_AV1451() }, 'V2': { 'T1': ADNI_V1_T1(), 'FMRI': ADNI_V1_FMRI(), 'AV45': ADNI_V2_AV45(), 'FDG': ADNI_V2_FDG(), 'AV1451': ADNI_V2_AV1451() } }, 'DIAN': { 'V1': { 'T1': DIAN_V1_T1(), 'FDG': DIAN_V1_FDG(), 'PIB': DIAN_V1_PIB() } } } self.DBClient = DbUtils() self.QCH = QCHandler()
class QSubJobStatusReporter: def __init__(self): self.DBClient = DbUtils() self.QCHandler = QCHandler() def setStatus(self, job, status): if job.jobType == 'beast': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = 'UPDATE {0} SET BEAST_MASK = 1 WHERE RECORD_ID = {1}'.format(table, table_id) elif status == 'Fail': setSql = 'UPDATE {0} SET BEAST_MASK = -1, BEAST_SKIP = 1 WHERE RECORD_ID = {1}'.format(table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log('manager', 'error','QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}'.format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'av45': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format(table, table_id) self.requestQC(nestedJob, 'av45') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format(table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log('manager', 'error','QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}'.format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'fdg': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format(table, table_id) self.requestQC(nestedJob, 'fdg') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format(table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log('manager', 'error','QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}'.format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) def requestQC(self, processingItemObj, qctype): qcFieldDict = dict(civet='QC', beast='BEAST_QC', av45='QC', fdg='QC') qcFolderDict = { 'civet' : '{0}/civet'.format(processingItemObj.root_folder), 'beast' : '{0}/beast'.format(processingItemObj.root_folder), 'av45' : '{0}/processed'.format(processingItemObj.root_folder), 'fdg' : '{0}/processed'.format(processingItemObj.root_folder)} self.QCHandler.requestQC(processingItemObj.study, '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality), processingItemObj.table_id, qcFieldDict[qctype], qctype, qcFolderDict[qctype])
def __init__(self, studyList, version): self.DBClient = DbUtils() self.studyList = [i.upper() for i in studyList] self.version = version self.recursorList = [] self._getRecursorList(studyList) self.sortingDataList = [] self.sqlBuilder = SQLBuilder() self.moveSortingObjListDict = {} self.toConvertObjListDict = {} self.sortingTable = Sorting() self.conversionTable = Conversion() self.raw2mincConverter = Raw2MINCConverter() self.pool = Pool(processes=12) self.qsubJobHandler = QSubJobHandler() self.qsubJobHandler.start() self.convertedListDict = {} self.processingTable = Processing() self.toProcessListDict = {} self.pipelineHanlder = PipelineHandler() self.QCHandler = QCHandler() self.MongoManger = MongoDBManager() self.MongoXMLManager = MongoScanXMLManager() self.MongoXMLManager.processXMLs()
def __init__(self): self.DBClient = DbUtils() self.QCHandler = QCHandler()
def __init__(self): self.processingPPDict = {'ADNI':{'V1':{'T1':ADNI_V1_T1(), 'FMRI':ADNI_V1_FMRI(), 'AV45':ADNI_V1_AV45(), 'FDG':ADNI_V1_FDG(), 'AV1451': ADNI_V1_AV1451()}, 'V2':{'T1':ADNI_V1_T1(), 'FMRI':ADNI_V1_FMRI(), 'AV45':ADNI_V2_AV45(), 'FDG':ADNI_V2_FDG(), 'AV1451': ADNI_V2_AV1451()}}} self.DBClient = DbUtils() self.QCH = QCHandler()
class PipelineHandler: def __init__(self): self.processingPPDict = {'ADNI':{'V1':{'T1':ADNI_V1_T1(), 'FMRI':ADNI_V1_FMRI(), 'AV45':ADNI_V1_AV45(), 'FDG':ADNI_V1_FDG(), 'AV1451': ADNI_V1_AV1451()}, 'V2':{'T1':ADNI_V1_T1(), 'FMRI':ADNI_V1_FMRI(), 'AV45':ADNI_V2_AV45(), 'FDG':ADNI_V2_FDG(), 'AV1451': ADNI_V2_AV1451()}}} self.DBClient = DbUtils() self.QCH = QCHandler() def checkExternalJobs(self, study, modality): getExtJobSql = "SELECT * FROM externalWaitingJobs WHERE JOB_ID LIKE '{0}_{1}_%'".format(study, modality) extJobs = self.DBClient.executeAllResults(getExtJobSql) for job in extJobs: jobType = job[0].split('_')[-1] reportTable = job[1] tableID = job[0].split('_')[2] reportField = job[2] subjectScanID = job[0].split('_')[3] success = 0 if jobType == 'CIVETRUN': if glob.glob('{0}/{1}_{2}_*'.format(PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID)): getProccessRecSql = "SELECT * FROM Processing WHERE RECORD_ID IN (SELECT PROCESSING_TID FROM {0}_T1_Pipeline WHERE RECORD_ID = {1})".format(study, tableID) processingEntry = self.DBClient.executeAllResults(getProccessRecSql)[0] civetFolder = '{0}/civet'.format(processingEntry[8]) if os.path.exists(civetFolder): shutil.rmtree(civetFolder) try: PipelineLogger.log('manager', 'info', 'Copying - {0} -> {1}'.format(glob.glob('{0}/{1}_{2}_*'.format(PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID))[0], civetFolder)) dir_util.copy_tree(glob.glob('{0}/{1}_{2}_*'.format(PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID))[0], civetFolder) success = 1 except: success = 0 else: continue else: PipelineLogger.log('manager', 'error', 'Unknown external job type - {}'.format(jobType)) if success: updateSQL = "UPDATE {0} SET {1} = 1 WHERE RECORD_ID = {2}".format(reportTable, reportField, tableID) self.DBClient.executeNoResult(updateSQL) if jobType == 'CIVETRUN': finishSQL = "UPDATE {0} SET FINISHED = 1 WHERE RECORD_ID = {1}".format(reportTable, tableID) self.DBClient.executeNoResult(finishSQL) modal_table = reportTable modal_tableId = tableID qcField = 'QC' qctype = 'civet' qcFolder = civetFolder self.QCH.requestQC(study, modal_table, modal_tableId, qcField, qctype, qcFolder) rmSql = "DELETE FROM externalWaitingJobs WHERE JOB_ID LIKE '{0}_{1}_{2}_{3}_%'".format(study, modality, tableID, subjectScanID) self.DBClient.executeNoResult(rmSql) def process(self, study, modality): os.environ['PATH'] = ':'.join(libpath.PATH) os.environ['LD_LIBRARY_PATH'] = ':'.join(libpath.LD_LIBRARY_PATH) os.environ['LD_LIBRARYN32_PATH'] = ':'.join(libpath.LD_LIBRARYN32_PATH) os.environ['PERL5LIB'] = ':'.join(libpath.PERL5LIB) os.environ['MNI_DATAPATH'] = ':'.join(libpath.MNI_DATAPATH) os.environ['ROOT'] = ';'.join(libpath.ROOT) os.environ['MINC_TOOLKIT_VERSION'] = libpath.MINC_TOOLKIT_VERSION os.environ['MINC_COMPRESS'] = libpath.MINC_COMPRESS os.environ['MINC_FORCE_V2'] = libpath.MINC_FORCE_V2 toProcessinModalityPerStudy = self.DBClient.executeAllResults("SELECT * FROM Processing INNER JOIN (SELECT * FROM {0}_{1}_Pipeline WHERE NOT (FINISHED OR SKIP)) as TMP ON Processing.RECORD_ID=TMP.PROCESSING_TID".format(study, modality)) for processingItem in toProcessinModalityPerStudy: version = processingItem[10] # Calling on the process .section of given studies and modalities self.processingPPDict[study][version][modality].process(processingItem) return 0 def addToPipelineTable(self, processingObj): study = processingObj.study version = processingObj.version modality = processingObj.modality r_id = processingObj.record_id addToTableDict = dict(T1="INSERT IGNORE INTO {0}_T1_Pipeline VALUES (NULL, {1}, \"{2}\", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL)".format(study, r_id, PipelineConfig.defaultT1config), AV45="INSERT IGNORE INTO {0}_AV45_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)".format(study, r_id, PipelineConfig.defaultAV45config, ''), AV1451="INSERT IGNORE INTO {0}_AV1451_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)".format(study, r_id, PipelineConfig.defaultAV1451config, ''), FDG="INSERT IGNORE INTO {0}_FDG_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)".format(study, r_id, PipelineConfig.defaultFDGconfig, ''), FMRI="INSERT IGNORE INTO {0}_FMRI_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)".format(study, r_id, PipelineConfig.defaultFMRIconfig, 'NIAK_STH_COMESHERE')) self.DBClient.executeNoResult(addToTableDict[modality])
class ADNI_V1_T1: def __init__(self): self.DBClient = DbUtils() self.QCHandler = QCHandler() def process(self, processingItem): processingItemObj = ProcessingItemObj(processingItem) if processingItemObj.beast_skip and processingItemObj.manual_skip and not processingItemObj.civet: self.runCivet(processingItemObj, 'N') elif processingItemObj.manual_mask and not processingItemObj.manual_skip and not processingItemObj.civet: self.runCivet(processingItemObj, 'M') elif processingItemObj.beast_mask == 0 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 0 and not processingItemObj.manual_mask: self.runBeast(processingItemObj) elif processingItemObj.beast_skip and not processingItemObj.manual_mask and not processingItemObj.manual_skip: PipelineLogger.log( 'manager', 'error', '$$$$$$$$$$$$$$$$$ Manual Mask Requested $$$$$$$$$$$$$$$$$$ - {0}' .format(processingItem)) pass elif processingItemObj.beast_mask == 1 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 1 and not processingItemObj.manual_mask and not processingItemObj.civet: self.runCivet(processingItemObj, 'B') elif processingItemObj.beast_mask == 1 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 0 and not processingItemObj.manual_mask and not processingItemObj.manual_skip: self.requestQC(processingItemObj, 'beast') elif processingItemObj.civet == 1 and processingItemObj.civet_qc == 0: self.requestQC(processingItemObj, 'civet') else: if processingItemObj.civet_qc == -1: PipelineLogger.log( 'manager', 'error', 'Civet QC failed. Skipping. - {0}'.format(processingItem)) PipelineLogger.log( 'manager', 'error', 'Error handling obj for processing - {0}'.format( processingItem)) return 0 def getScanType(self, processingItemObj): r = self.DBClient.executeAllResults( "SELECT SCAN_TYPE FROM Conversion WHERE STUDY = '{0}' AND RID = '{1}' " "AND SCAN_DATE = '{2}' AND S_IDENTIFIER = '{3}' " "AND I_IDENTIFIER = '{4}'".format(processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date, processingItemObj.s_identifier, processingItemObj.i_identifier)) return r[0][0] def checkNative(self, processingItemObj): orig_ScanType = self.getScanType(processingItemObj) converted_file = '{0}/{1}_{2}{3}{4}{5}_{6}.mnc'.format( processingItemObj.converted_folder, processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, orig_ScanType) nativeFolder = '{0}/native'.format(processingItemObj.root_folder) nativeFileName = '{0}/{1}_{2}{3}{4}{5}_{6}.mnc'.format( nativeFolder, processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, processingItemObj.modality.lower()) if not os.path.exists(nativeFileName): try: distutils.dir_util.mkpath(nativeFolder) shutil.copyfile(converted_file, nativeFileName) except Exception as e: PipelineLogger.log( 'manager', 'error', 'Error in creating folders or copying native file. \n {0}'. format(e)) PipelineLogger.log( 'manager', 'error', 'Setting to restart conversion. \n {0}'.format(e)) sql = "UPDATE Conversion SET CONVERTED = 0, SKIP = 0 WHERE S_IDENTIFIER = '{0}' AND I_IDENTIFIER = '{1}'".format( processingItemObj.s_identifier, processingItemObj.i_identifier) self.DBClient.executeNoResult(sql) return None return nativeFileName def runBeast(self, processingItemObj): nativeFileName = self.checkNative(processingItemObj) if not nativeFileName: return 0 beastFolder = '{0}/beast'.format(processingItemObj.root_folder) logDir = '{0}/logs'.format(processingItemObj.root_folder) PipelineLogger.log('manager', 'info', 'BeAST starting for {0}'.format(nativeFileName)) PipelineLogger.log('manager', 'info', 'Current working folder : {0}'.format(os.getcwd())) try: distutils.dir_util.mkpath(logDir) except Exception as e: PipelineLogger.log('manager', 'error', 'Error in creating log folder \n {0}'.format(e)) return 0 id = '{0}{1}{2}{3}'.format( processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) beastCMD = 'source /opt/minc-1.9.15/minc-toolkit-config.sh; Pipelines/ADNI_T1/ADNI_V1_T1_BeAST {0} {1} {2} {3} {4} {5}'.format( id, nativeFileName, beastFolder, logDir, socket.gethostname(), 50500) try: shutil.rmtree(beastFolder) except: pass try: distutils.dir_util.mkpath(beastFolder) except Exception as e: PipelineLogger.log( 'manager', 'error', 'Error in creating BeAST folder. \n {0}'.format(e)) return 0 PipelineLogger.log('manager', 'debug', 'Command : {0}'.format(beastCMD)) os.chdir(pc.SourcePath) p = subprocess.Popen(beastCMD, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, executable='/bin/bash') out, err = p.communicate() PipelineLogger.log( 'manager', 'debug', 'Beast Log Output : \n{0}'.format(out.decode("utf-8"))) PipelineLogger.log('manager', 'debug', 'Beast Log Err : \n{0}'.format(err.decode("utf-8"))) QSubJobHandler.submittedJobs[id] = QSubJob(id, '02:00:00', processingItemObj, 'beast') return 1 def runCivet(self, processingItemObj, maskStatus): nativeFileName = self.checkNative(processingItemObj) if not nativeFileName: return 0 copyFolder = pc.T1TempDirForCIVETProcessing subjectFileName_base = '{0}_{1}{2}{3}{4}_{5}'.format( processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, processingItemObj.modality.lower()) jobId = '{0}_{1}_{2}_{3}{4}{5}{6}_CIVETRUN'.format( processingItemObj.study, processingItemObj.modality, processingItemObj.table_id, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) checkJobPresentSql = "SELECT * FROM externalWaitingJobs WHERE JOB_ID = '{0}'".format( jobId) if len(self.DBClient.executeAllResults(checkJobPresentSql)) is 0: beastFileName = '{0}/beast/mask/{1}_skull_mask_native.mnc'.format( processingItemObj.root_folder, subjectFileName_base) beastMaskName_base = '{0}_{1}{2}{3}{4}_mask'.format( processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) beastMaskName = '{0}/{1}.mnc'.format(copyFolder, beastMaskName_base) manualFileName = '{0}/manual/mask/{1}_skull_mask_native.mnc'.format( processingItemObj.root_folder, subjectFileName_base) manualMaskName_base = '{0}_{1}{2}{3}{4}_mask'.format( processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) manualMaskName = '{0}/{1}.mnc'.format(copyFolder, manualMaskName_base) try: distutils.file_util.copy_file(nativeFileName, copyFolder) if maskStatus == 'B': distutils.file_util.copy_file(beastFileName, beastMaskName) elif maskStatus == 'M': distutils.file_util.copy_file(manualFileName, manualMaskName) elif maskStatus == 'N': pass else: PipelineLogger.log( 'manager', 'error', 'Unknown mask status - {0} Entry : Processing ID - {1}, Table ID - {3}' .format(maskStatus, processingItemObj.processing_rid, processingItemObj.table_id)) addExternalJobSQL = "INSERT INTO externalWaitingJobs VALUES ('{0}', '{1}', '{2}', NULL, NULL, NULL)".format( jobId, '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality), 'CIVET') self.DBClient.executeNoResult(addExternalJobSQL) except Exception as e: PipelineLogger.log( 'manager', 'error', 'Error copying for CIVET input. Rolling back... - Processing Table ID -> {0} Table ID -> {1}' .format(processingItemObj.processing_rid, processingItemObj.table_id)) PipelineLogger.log('manager', 'exception', e) nativeFileOnCopyFolder = '{0}/{1}'.format( copyFolder, os.path.basename(nativeFileName)) os.remove(nativeFileOnCopyFolder) if os.path.exists( nativeFileOnCopyFolder) else None os.remove(beastMaskName) if os.path.exists( beastMaskName) else None os.remove(manualMaskName) if os.path.exists( manualMaskName) else None def requestQC(self, processingItemObj, qctype): qcFieldDict = dict(civet='QC', beast='BEAST_QC') qcFolderDict = { 'civet': '{0}/civet'.format(processingItemObj.root_folder), 'beast': '{0}/beast'.format(processingItemObj.root_folder) } tablename = '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality) self.QCHandler.requestQC(processingItemObj.study, tablename, processingItemObj.table_id, qcFieldDict[qctype], qctype, qcFolderDict[qctype])
class PipelineManager: def __init__(self, studyList, version): self.DBClient = DbUtils() self.studyList = [i.upper() for i in studyList] self.version = version self.recursorList = [] self._getRecursorList(studyList) self.sortingDataList = [] self.sqlBuilder = SQLBuilder() self.moveSortingObjListDict = {} self.toConvertObjListDict = {} self.sortingTable = Sorting() self.conversionTable = Conversion() self.raw2mincConverter = Raw2MINCConverter() self.pool = Pool() self.qsubJobHandler = QSubJobHandler() self.qsubJobHandler.start() self.convertedListDict = {} self.processingTable = Processing() self.toProcessListDict = {} self.pipelineHanlder = PipelineHandler() self.QCHandler = QCHandler() self.MongoManger = MongoDBManager() self.MongoXMLManager = MongoScanXMLManager() self.MongoXMLManager.processXMLs() # This method will return a list of Recursor Objects based on the study list provided. def _getRecursorList(self, studyList): for study in studyList: if study == 'ADNI': self.recursorList.append( Recursor(study, StudyConfig.ADNIDownloadRoot)) elif study == 'ADNI_OLD': self.recursorList.append( Recursor(study, StudyConfig.ADNIOLDDownloadRoot)) elif study == 'DIAN': self.recursorList.append( Recursor(study, StudyConfig.DIANDownloadRoot)) # This method will recurse through the download folders. def recurseForNewData(self): for recursor in self.recursorList: self.sortingDataList.append(recursor.recurse()) # This method will add the new entries to the DB. def addNewDatatoDB(self): for sortingDataSet in self.sortingDataList: ##From the set of study specific recursors SortingObjList = [ SortingObject(o.getValuesDict()) for o in sortingDataSet ] self.sortingTable.insertToTable(SortingObjList) # This method will get the list of files need to be moved to study, subject specific folders. def getUnmovedRawDataList(self): for study in self.studyList: self.moveSortingObjListDict[ study] = self.sortingTable.getUnmovedFilesPerStudy(study) # This method will move the downloaded raw files to the study, subject specific folders and add moved tag in sorting table. def moveRawData(self): def removeCommaIfThere(destFolder): PipelineLogger.log( 'manager', 'debug', 'Removing unsupported chars from file names...... :') for dpath, dnames, fnames in os.walk(destFolder): for f in fnames: os.chdir(dpath) if ',' in f: os.rename(f, f.replace(',', '')) PipelineLogger.log( 'manager', 'debug', 'Removing unsupported chars from file names done ...:') def copyFile(sourceFolder, destFolder): try: PipelineLogger.log( 'manager', 'debug', 'Raw Data Copying : {0} -> {1}'.format( sourceFolder, destFolder)) distutils.dir_util.copy_tree(sourceFolder, destFolder, update=True) PipelineLogger.log( 'manager', 'debug', 'Raw Data Copy Done...... : {0} -> {1}'.format( sourceFolder, destFolder)) removeCommaIfThere(destFolder) return 1 except Exception as exc: PipelineLogger.log( 'manager', 'error', 'Raw Data Move Error : {0} -> {1}'.format( sourceFolder, destFolder)) PipelineLogger.log('manager', 'exception', exc) return 0 for study in self.studyList: totalToMove = len(self.moveSortingObjListDict[study]) PipelineLogger.log( 'manager', 'info', 'Moving started for study {0} - Total to be moved : {1}'. format(study, totalToMove)) count = 1 for sortingObj in self.moveSortingObjListDict[study]: PipelineLogger.log( 'manager', 'info', 'Moving {0}/{1} - {2}'.format(count, totalToMove, sortingObj.download_folder)) copied = copyFile(sortingObj.download_folder, sortingObj.raw_folder) count += 1 if copied: self.conversionTable.insertFromSortingObj( sortingObj, self.version) self.sortingTable.setMovedTrue(sortingObj) else: PipelineLogger.log( 'manager', 'error', 'File Move Error : {0} -> {1}. Moving to next...'. format(sortingObj.download_folder, sortingObj.raw_folder)) def getConversionList(self): for study in self.studyList: self.toConvertObjListDict[ study] = self.conversionTable.gettoBeConvertedPerStudy(study) def convertRawData(self): def addTODB(result): if result['converted']: #### Add to correspoing table #self.conversionTable.insertFromConvertionObj(convertionObj, self.version) self.conversionTable.setConvertedTrue(result['obj']) else: PipelineLogger.log( 'manager', 'error', 'File conversion Error : {0} -> {1}. Moving to next...'. format(result['obj'].raw_folder, result['obj'].converted_folder)) self.conversionTable.setConvertedFailed(result['obj']) for study in self.studyList: totalToConv = len(self.toConvertObjListDict[study]) PipelineLogger.log( 'manager', 'info', 'Convertion started for study {0} - Total to be converted : {1}' .format(study, totalToConv)) results = [] for convObj in self.toConvertObjListDict[study]: convertedResult = self.pool.apply_async( self.raw2mincConverter.convert2minc, args=(convObj, ), callback=addTODB) results.append(convertedResult) for r in results: r.wait() def getConvertedList(self): for study in self.studyList: self.convertedListDict[ study] = self.conversionTable.getConvertedListPerStudy(study) def refreshModalityTables(self): for study in self.studyList: for convertionObj in self.convertedListDict[study]: self.processingTable.insertFromConvertionObj(convertionObj) def getProcessList(self): for study in self.studyList: self.toProcessListDict[ study] = self.processingTable.getToProcessListPerStudy(study) def fillPipelineTables(self): for study in self.studyList: for processingObj in self.toProcessListDict[study]: self.pipelineHanlder.addToPipelineTable(processingObj) def checkExternalJobs(self, modality): PipelineLogger.log('manager', 'info', 'Checking on external jobs ... ###########') for study in self.studyList: self.pipelineHanlder.checkExternalJobs(study, modality) def checkOnQCJobs(self, modality): PipelineLogger.log('manager', 'info', 'Checking on QC jobs ... ###########') for study in self.studyList: self.QCHandler.checkQCJobs(study, modality) def processModality(self, modality): PipelineLogger.log('manager', 'info', 'File processing started ... ###########') for study in self.studyList: self.pipelineHanlder.process(study, modality)
class ADNI_V1_T1: def __init__(self): self.DBClient = DbUtils() self.QCHandler = QCHandler() def process(self, processingItem): processingItemObj = ProcessingItemObj(processingItem) if processingItemObj.beast_skip and processingItemObj.manual_skip and not processingItemObj.civet: self.runCivet(processingItemObj, 'N') elif processingItemObj.manual_mask and not processingItemObj.manual_skip and not processingItemObj.civet: self.runCivet(processingItemObj, 'M') elif processingItemObj.beast_mask == 0 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 0 and not processingItemObj.manual_mask: self.runBeast(processingItemObj) elif processingItemObj.beast_skip and not processingItemObj.manual_mask and not processingItemObj.manual_skip: PipelineLogger.log('manager', 'error', '$$$$$$$$$$$$$$$$$ Manual Mask Requested $$$$$$$$$$$$$$$$$$ - {0}'.format(processingItem)) pass elif processingItemObj.beast_mask == 1 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 1 and not processingItemObj.manual_mask and not processingItemObj.civet: self.runCivet(processingItemObj, 'B') elif processingItemObj.beast_mask == 1 and not processingItemObj.beast_skip and processingItemObj.beast_qc == 0 and not processingItemObj.manual_mask and not processingItemObj.manual_skip: self.requestQC(processingItemObj, 'beast') elif processingItemObj.civet == 1 and processingItemObj.civet_qc == 0: self.requestQC(processingItemObj, 'civet') else: if processingItemObj.civet_qc == -1: PipelineLogger.log('manager', 'error', 'Civet QC failed. Skipping. - {0}'.format(processingItem)) PipelineLogger.log('manager', 'error', 'Error handling obj for processing - {0}'.format(processingItem)) return 0 def getScanType(self, processingItemObj): r = self.DBClient.executeAllResults("SELECT SCAN_TYPE FROM Conversion WHERE STUDY = '{0}' AND RID = '{1}' " "AND SCAN_DATE = '{2}' AND S_IDENTIFIER = '{3}' " "AND I_IDENTIFIER = '{4}'".format(processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date, processingItemObj.s_identifier, processingItemObj.i_identifier)) return r[0][0] def checkNative(self, processingItemObj): orig_ScanType = self.getScanType(processingItemObj) converted_file = '{0}/{1}_{2}{3}{4}{5}_{6}.mnc'.format(processingItemObj.converted_folder, processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, orig_ScanType) nativeFolder = '{0}/native'.format(processingItemObj.root_folder) nativeFileName = '{0}/{1}_{2}{3}{4}{5}_{6}.mnc'.format(nativeFolder, processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, processingItemObj.modality.lower()) if not os.path.exists(nativeFileName): try: distutils.dir_util.mkpath(nativeFolder) shutil.copyfile(converted_file, nativeFileName) except Exception as e: PipelineLogger.log('manager', 'error', 'Error in creating folders or copying native file. \n {0}'.format(e)) PipelineLogger.log('manager', 'error', 'Setting to restart conversion. \n {0}'.format(e)) sql = "UPDATE Conversion SET CONVERTED = 0, SKIP = 0 WHERE S_IDENTIFIER = '{0}' AND I_IDENTIFIER = '{1}'".format(processingItemObj.s_identifier, processingItemObj.i_identifier) self.DBClient.executeNoResult(sql) return None return nativeFileName def runBeast(self, processingItemObj): nativeFileName = self.checkNative(processingItemObj) if not nativeFileName: return 0 beastFolder = '{0}/beast'.format(processingItemObj.root_folder) logDir = '{0}/logs'.format(processingItemObj.root_folder) PipelineLogger.log('manager', 'info', 'BeAST starting for {0}'.format(nativeFileName)) PipelineLogger.log('manager', 'info', 'Current working folder : {0}'.format(os.getcwd())) try: distutils.dir_util.mkpath(logDir) except Exception as e: PipelineLogger.log('manager', 'error', 'Error in creating log folder \n {0}'.format(e)) return 0 id = '{0}{1}{2}{3}'.format(processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) beastCMD = 'source /opt/minc-toolkit/minc-toolkit-config.sh; Pipelines/ADNI_T1/ADNI_V1_T1_BeAST {0} {1} {2} {3} {4} {5}'.format(id, nativeFileName, beastFolder, logDir, socket.gethostname(), 50500) try: shutil.rmtree(beastFolder) except: pass try: distutils.dir_util.mkpath(beastFolder) except Exception as e: PipelineLogger.log('manager', 'error', 'Error in creating BeAST folder. \n {0}'.format(e)) return 0 PipelineLogger.log('manager', 'debug', 'Command : {0}'.format(beastCMD)) os.chdir(pc.SourcePath) p = subprocess.Popen(beastCMD, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, executable='/bin/bash') out, err = p.communicate() PipelineLogger.log('manager', 'debug', 'Beast Log Output : \n{0}'.format(out.decode("utf-8"))) PipelineLogger.log('manager', 'debug', 'Beast Log Err : \n{0}'.format(err.decode("utf-8"))) QSubJobHandler.submittedJobs[id] = QSubJob(id, '02:00:00', processingItemObj, 'beast') return 1 def runCivet(self, processingItemObj, maskStatus): nativeFileName = self.checkNative(processingItemObj) if not nativeFileName: return 0 copyFolder = pc.T1TempDirForCIVETProcessing subjectFileName_base = '{0}_{1}{2}{3}{4}_{5}'.format(processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier, processingItemObj.modality.lower()) jobId = '{0}_{1}_{2}_{3}{4}{5}{6}_CIVETRUN'.format(processingItemObj.study, processingItemObj.modality, processingItemObj.table_id, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) checkJobPresentSql = "SELECT * FROM externalWaitingJobs WHERE JOB_ID = '{0}'".format(jobId) if len(self.DBClient.executeAllResults(checkJobPresentSql)) is 0: beastFileName = '{0}/beast/mask/{1}_skull_mask_native.mnc'.format(processingItemObj.root_folder, subjectFileName_base) beastMaskName_base = '{0}_{1}{2}{3}{4}_mask'.format(processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) beastMaskName = '{0}/{1}.mnc'.format(copyFolder, beastMaskName_base) manualFileName = '{0}/manual/mask/{1}_skull_mask_native.mnc'.format(processingItemObj.root_folder, subjectFileName_base) manualMaskName_base = '{0}_{1}{2}{3}{4}_mask'.format(processingItemObj.study, processingItemObj.subject_rid, processingItemObj.scan_date.replace('-', ''), processingItemObj.s_identifier, processingItemObj.i_identifier) manualMaskName = '{0}/{1}.mnc'.format(copyFolder, manualMaskName_base) try: distutils.file_util.copy_file(nativeFileName, copyFolder) if maskStatus == 'B': distutils.file_util.copy_file(beastFileName, beastMaskName) elif maskStatus == 'M': distutils.file_util.copy_file(manualFileName, manualMaskName) elif maskStatus == 'N': pass else: PipelineLogger.log('manager', 'error', 'Unknown mask status - {0} Entry : Processing ID - {1}, Table ID - {3}'.format(maskStatus, processingItemObj.processing_rid, processingItemObj.table_id)) addExternalJobSQL = "INSERT INTO externalWaitingJobs VALUES ('{0}', '{1}', '{2}', NULL, NULL, NULL)".format(jobId, '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality), 'CIVET') self.DBClient.executeNoResult(addExternalJobSQL) except Exception as e: PipelineLogger.log('manager', 'error', 'Error copying for CIVET input. Rolling back... - Processing Table ID -> {0} Table ID -> {1}'.format( processingItemObj.processing_rid, processingItemObj.table_id)) PipelineLogger.log('manager', 'exception', e) nativeFileOnCopyFolder = '{0}/{1}'.format(copyFolder, os.path.basename(nativeFileName)) os.remove(nativeFileOnCopyFolder) if os.path.exists(nativeFileOnCopyFolder) else None os.remove(beastMaskName) if os.path.exists(beastMaskName) else None os.remove(manualMaskName) if os.path.exists(manualMaskName) else None def requestQC(self, processingItemObj, qctype): qcFieldDict = dict(civet='QC', beast='BEAST_QC') qcFolderDict = { 'civet' : '{0}/civet'.format(processingItemObj.root_folder), 'beast' : '{0}/beast'.format(processingItemObj.root_folder)} tablename = '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality) self.QCHandler.requestQC(processingItemObj.study, tablename, processingItemObj.table_id, qcFieldDict[qctype], qctype, qcFolderDict[qctype])
class PipelineHandler: def __init__(self): self.processingPPDict = { 'ADNI': { 'V1': { 'T1': ADNI_V1_T1(), 'FMRI': ADNI_V1_FMRI(), 'AV45': ADNI_V1_AV45(), 'FDG': ADNI_V1_FDG(), 'AV1451': ADNI_V1_AV1451() }, 'V2': { 'T1': ADNI_V1_T1(), 'FMRI': ADNI_V1_FMRI(), 'AV45': ADNI_V2_AV45(), 'FDG': ADNI_V2_FDG(), 'AV1451': ADNI_V2_AV1451() } }, 'DIAN': { 'V1': { 'T1': DIAN_V1_T1(), 'FDG': DIAN_V1_FDG(), 'PIB': DIAN_V1_PIB() } } } self.DBClient = DbUtils() self.QCH = QCHandler() def checkExternalJobs(self, study, modality): getExtJobSql = "SELECT * FROM externalWaitingJobs WHERE JOB_ID LIKE '{0}_{1}_%'".format( study, modality) extJobs = self.DBClient.executeAllResults(getExtJobSql) for job in extJobs: jobType = job[0].split('_')[-1] reportTable = job[1] tableID = job[0].split('_')[2] reportField = job[2] subjectScanID = job[0].split('_')[3] success = 0 if jobType == 'CIVETRUN': if glob.glob('{0}/{1}_{2}_*'.format( PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID)): getProccessRecSql = "SELECT * FROM Processing WHERE RECORD_ID IN (SELECT PROCESSING_TID FROM {0}_T1_Pipeline WHERE RECORD_ID = {1})".format( study, tableID) processingEntry = self.DBClient.executeAllResults( getProccessRecSql)[0] civetFolder = '{0}/civet'.format(processingEntry[8]) if os.path.exists(civetFolder): shutil.rmtree(civetFolder) try: PipelineLogger.log( 'manager', 'info', 'Copying - {0} -> {1}'.format( glob.glob('{0}/{1}_{2}_*'.format( PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID))[0], civetFolder)) dir_util.copy_tree( glob.glob('{0}/{1}_{2}_*'.format( PipelineConfig.T1TempDirForCIVETDownload, study, subjectScanID))[0], civetFolder) success = 1 except: success = 0 else: continue else: PipelineLogger.log( 'manager', 'error', 'Unknown external job type - {}'.format(jobType)) if success: updateSQL = "UPDATE {0} SET {1} = 1 WHERE RECORD_ID = {2}".format( reportTable, reportField, tableID) self.DBClient.executeNoResult(updateSQL) if jobType == 'CIVETRUN': finishSQL = "UPDATE {0} SET FINISHED = 1 WHERE RECORD_ID = {1}".format( reportTable, tableID) self.DBClient.executeNoResult(finishSQL) modal_table = reportTable modal_tableId = tableID qcField = 'QC' qctype = 'civet' qcFolder = civetFolder self.QCH.requestQC(study, modal_table, modal_tableId, qcField, qctype, qcFolder) rmSql = "DELETE FROM externalWaitingJobs WHERE JOB_ID LIKE '{0}_{1}_{2}_{3}_%'".format( study, modality, tableID, subjectScanID) self.DBClient.executeNoResult(rmSql) def process(self, study, modality): os.environ['PATH'] = ':'.join(libpath.PATH) os.environ['LD_LIBRARY_PATH'] = ':'.join(libpath.LD_LIBRARY_PATH) os.environ['LD_LIBRARYN32_PATH'] = ':'.join(libpath.LD_LIBRARYN32_PATH) os.environ['PERL5LIB'] = ':'.join(libpath.PERL5LIB) os.environ['MNI_DATAPATH'] = ':'.join(libpath.MNI_DATAPATH) os.environ['ROOT'] = ';'.join(libpath.ROOT) os.environ['MINC_TOOLKIT_VERSION'] = libpath.MINC_TOOLKIT_VERSION os.environ['MINC_COMPRESS'] = libpath.MINC_COMPRESS os.environ['MINC_FORCE_V2'] = libpath.MINC_FORCE_V2 toProcessinModalityPerStudy = self.DBClient.executeAllResults( "SELECT * FROM Processing INNER JOIN (SELECT * FROM {0}_{1}_Pipeline WHERE NOT (FINISHED OR SKIP)) as TMP ON Processing.RECORD_ID=TMP.PROCESSING_TID" .format(study, modality)) for processingItem in toProcessinModalityPerStudy: version = processingItem[10] # Calling on the process .section of given studies and modalities self.processingPPDict[study][version][modality].process( processingItem) return 0 def addToPipelineTable(self, processingObj): study = processingObj.study version = processingObj.version modality = processingObj.modality r_id = processingObj.record_id addToTableDict = dict( T1= "INSERT IGNORE INTO {0}_T1_Pipeline VALUES (NULL, {1}, \"{2}\", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultT1config), AV45= "INSERT IGNORE INTO {0}_AV45_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultAV45config, ''), AV1451= "INSERT IGNORE INTO {0}_AV1451_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultAV1451config, ''), FDG= "INSERT IGNORE INTO {0}_FDG_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultFDGconfig, ''), PIB= "INSERT IGNORE INTO {0}_PIB_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultFDGconfig, ''), FMRI= "INSERT IGNORE INTO {0}_FMRI_Pipeline VALUES (NULL, {1}, \"{2}\", '{3}', 0, 0, 0, NULL, NULL)" .format(study, r_id, PipelineConfig.defaultFMRIconfig, 'NIAK_STH_COMESHERE')) self.DBClient.executeNoResult(addToTableDict[modality])
class QSubJobStatusReporter: def __init__(self): self.DBClient = DbUtils() self.QCHandler = QCHandler() def setStatus(self, job, status): if job.jobType == 'beast': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = 'UPDATE {0} SET BEAST_MASK = 1 WHERE RECORD_ID = {1}'.format( table, table_id) elif status == 'Fail': setSql = 'UPDATE {0} SET BEAST_MASK = -1, BEAST_SKIP = 1 WHERE RECORD_ID = {1}'.format( table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log( 'manager', 'error', 'QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}' .format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'av45': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format( table, table_id) self.requestQC(nestedJob, 'av45') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format( table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log( 'manager', 'error', 'QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}' .format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'av1451': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format( table, table_id) self.requestQC(nestedJob, 'av1451') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format( table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log( 'manager', 'error', 'QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}' .format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'fdg': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format( table, table_id) self.requestQC(nestedJob, 'fdg') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format( table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log( 'manager', 'error', 'QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}' .format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) if job.jobType == 'pib': nestedJob = job.job table = '{0}_{1}_Pipeline'.format(nestedJob.study, nestedJob.modality) table_id = nestedJob.table_id if status == 'Success': setSql = "UPDATE {0} SET FINISHED = 1, PROC_Failed = Null WHERE RECORD_ID = {1}".format( table, table_id) self.requestQC(nestedJob, 'pib') elif status == 'Fail': setSql = "UPDATE {0} SET PROC_Failed = 'Failed' , SKIP = 1 WHERE RECORD_ID = {1}".format( table, table_id) self.DBClient.executeNoResult(setSql) if status == 'Fail': PipelineLogger.log( 'manager', 'error', 'QSUB job Status Failed: - {0} - Processing Table ID : {1} - Modality Table ID : {2}' .format(job.jobType, nestedJob.processing_rid, nestedJob.table_id)) def requestQC(self, processingItemObj, qctype): qcFieldDict = dict(civet='QC', beast='BEAST_QC', av45='QC', fdg='QC', av1451='QC', pib='QC') qcFolderDict = { 'civet': '{0}/civet'.format(processingItemObj.root_folder), 'beast': '{0}/beast'.format(processingItemObj.root_folder), 'av45': '{0}/processed'.format(processingItemObj.root_folder), 'av1451': '{0}/processed'.format(processingItemObj.root_folder), 'fdg': '{0}/processed'.format(processingItemObj.root_folder), 'pib': '{0}/processed'.format(processingItemObj.root_folder) } self.QCHandler.requestQC( processingItemObj.study, '{0}_{1}_Pipeline'.format(processingItemObj.study, processingItemObj.modality), processingItemObj.table_id, qcFieldDict[qctype], qctype, qcFolderDict[qctype])
class PipelineManager: def __init__(self, studyList, version): self.DBClient = DbUtils() self.studyList = [i.upper() for i in studyList] self.version = version self.recursorList = [] self._getRecursorList(studyList) self.sortingDataList = [] self.sqlBuilder = SQLBuilder() self.moveSortingObjListDict = {} self.toConvertObjListDict = {} self.sortingTable = Sorting() self.conversionTable = Conversion() self.raw2mincConverter = Raw2MINCConverter() self.pool = Pool(processes=12) self.qsubJobHandler = QSubJobHandler() self.qsubJobHandler.start() self.convertedListDict = {} self.processingTable = Processing() self.toProcessListDict = {} self.pipelineHanlder = PipelineHandler() self.QCHandler = QCHandler() self.MongoManger = MongoDBManager() self.MongoXMLManager = MongoScanXMLManager() self.MongoXMLManager.processXMLs() # This method will return a list of Recursor Objects based on the study list provided. def _getRecursorList(self, studyList): for study in studyList: if study == 'ADNI': self.recursorList.append(Recursor(study, StudyConfig.ADNIDownloadRoot)) elif study == 'ADNI_OLD': self.recursorList.append(Recursor(study, StudyConfig.ADNIOLDDownloadRoot)) # This method will recurse through the download folders. def recurseForNewData(self): for recursor in self.recursorList: self.sortingDataList.append(recursor.recurse()) # This method will add the new entries to the DB. def addNewDatatoDB(self): for sortingDataSet in self.sortingDataList: ##From the set of study specific recursors SortingObjList = [SortingObject(o.getValuesDict()) for o in sortingDataSet] self.sortingTable.insertToTable(SortingObjList) # This method will get the list of files need to be moved to study, subject specific folders. def getUnmovedRawDataList(self): for study in self.studyList: self.moveSortingObjListDict[study] = self.sortingTable.getUnmovedFilesPerStudy(study) # This method will move the downloaded raw files to the study, subject specific folders and add moved tag in sorting table. def moveRawData(self): def removeCommaIfThere(destFolder): PipelineLogger.log('manager', 'debug', 'Removing unsupported chars from file names...... :') for dpath, dnames, fnames in os.walk(destFolder): for f in fnames: os.chdir(dpath) if ',' in f: os.rename(f, f.replace(',', '')) PipelineLogger.log('manager', 'debug', 'Removing unsupported chars from file names done ...:') def copyFile(sourceFolder, destFolder): try: PipelineLogger.log('manager', 'debug', 'Raw Data Copying : {0} -> {1}'.format(sourceFolder, destFolder)) distutils.dir_util.copy_tree(sourceFolder, destFolder, update=True) PipelineLogger.log('manager', 'debug', 'Raw Data Copy Done...... : {0} -> {1}'.format(sourceFolder, destFolder)) removeCommaIfThere(destFolder) return 1 except Exception as exc: PipelineLogger.log('manager', 'error', 'Raw Data Move Error : {0} -> {1}'.format(sourceFolder, destFolder)) PipelineLogger.log('manager', 'exception', exc) return 0 for study in self.studyList: totalToMove = len(self.moveSortingObjListDict[study]) PipelineLogger.log('manager', 'info', 'Moving started for study {0} - Total to be moved : {1}'.format(study, totalToMove)) count = 1 for sortingObj in self.moveSortingObjListDict[study]: PipelineLogger.log('manager', 'info', 'Moving {0}/{1} - {2}'.format(count, totalToMove, sortingObj.download_folder)) copied = copyFile(sortingObj.download_folder, sortingObj.raw_folder) count += 1 if copied: self.conversionTable.insertFromSortingObj(sortingObj, self.version) self.sortingTable.setMovedTrue(sortingObj) else: PipelineLogger.log('manager', 'error', 'File Move Error : {0} -> {1}. Moving to next...'.format(sortingObj.download_folder, sortingObj.raw_folder)) def getConversionList(self): for study in self.studyList: self.toConvertObjListDict[study] = self.conversionTable.gettoBeConvertedPerStudy(study) def convertRawData(self): def addTODB(result): if result['converted']: #### Add to correspoing table #self.conversionTable.insertFromConvertionObj(convertionObj, self.version) self.conversionTable.setConvertedTrue(result['obj']) else: PipelineLogger.log('manager', 'error', 'File conversion Error : {0} -> {1}. Moving to next...'.format(result['obj'].raw_folder, result['obj'].converted_folder)) self.conversionTable.setConvertedFailed(result['obj']) for study in self.studyList: totalToConv = len(self.toConvertObjListDict[study]) PipelineLogger.log('manager', 'info', 'Convertion started for study {0} - Total to be converted : {1}'.format(study, totalToConv)) results = [] for convObj in self.toConvertObjListDict[study]: convertedResult = self.pool.apply_async(self.raw2mincConverter.convert2minc, args=(convObj,), callback=addTODB) results.append(convertedResult) for r in results: r.wait() def getConvertedList(self): for study in self.studyList: self.convertedListDict[study] = self.conversionTable.getConvertedListPerStudy(study) def refreshModalityTables(self): for study in self.studyList: for convertionObj in self.convertedListDict[study]: self.processingTable.insertFromConvertionObj(convertionObj) def getProcessList(self): for study in self.studyList: self.toProcessListDict[study] = self.processingTable.getToProcessListPerStudy(study) def fillPipelineTables(self): for study in self.studyList: for processingObj in self.toProcessListDict[study]: self.pipelineHanlder.addToPipelineTable(processingObj) def checkExternalJobs(self, modality): PipelineLogger.log('manager', 'info', 'Checking on external jobs ... ###########') for study in self.studyList: self.pipelineHanlder.checkExternalJobs(study, modality) def checkOnQCJobs(self, modality): PipelineLogger.log('manager', 'info', 'Checking on QC jobs ... ###########') for study in self.studyList: self.QCHandler.checkQCJobs(study, modality) def processModality(self, modality): PipelineLogger.log('manager', 'info', 'File processing started ... ###########') for study in self.studyList: self.pipelineHanlder.process(study, modality)