preProcess.preProcess() proSol = ProSol.ProSol() if not redoCalculations and not resumeALLInterprosCalc: strDate = str(datetime.date.isoformat(datetime.date.today())) if len(runDirName) > 1: currentRunDir = NGS_Util.createDirectoryPath( projectResultsDir, strDate + "_" + runDirName) else: currentRunDir = NGS_Util.createDirectoryPath( projectResultsDir, strDate) NGS_Util.createDirectory(currentRunDir) proSol.initialize(orgListFile, orgFastaDir, accession2speciesFile, species2accessionFile, interpro_xml, PfamA_hmm_file, blastEValue, currentRunDir, pfamDatfile, pfamDataDir, organismsToCompareList) if (useALLInterpros): proSol.runForAllInterPros() else: iprFile_fh = open(iprFileName) proSol.makeOrganismsSequenceIdsDict()
taxonomy = NGS_Util.createFilePath(projectDataDir, "taxonomy") nrdb40_fasta = NGS_Util.createFilePath(projectDataDir, "GTG_database/nrdb40_v2.fasta") orgGTGDatabaseDir = NGS_Util.createDirectoryPath(projectDataDir, "GTG_database") CAA1Dir = NGS_Util.createFilePath(projectDataDir, "GTG_database/index/CAA1.index") nids_up = NGS_Util.createFilePath(projectDataDir, "GTG_database/nids.up") nrdb40_dust = NGS_Util.createFilePath(orgBlastDustDir, "nrdb40_dust.asnb") nrdb40_blast_db = NGS_Util.createFilePath(orgBlastDBDir, "nrdb40") ########################################################################################## Sequence Blast Output ###################################################################################### orgBlastResDir = NGS_Util.createDirectoryPath(projectResultDir, "blast_results") jointBlastDir = NGS_Util.createDirectoryPath(projectResultDir, "blast_joint_results") NGS_Util.createDirectory(orgBlastResDir) NGS_Util.createDirectory(jointBlastDir) ########################################################################################## Sequence GTG Output ###################################################################################### orgGTGBlastResDir = NGS_Util.createDirectoryPath(projectResultDir, "GTG_blast_results") GTGBestHitsDir = NGS_Util.createDirectoryPath(projectResultDir, "GTG_best_hits") GTGKNNDir = NGS_Util.createDirectoryPath(projectResultDir, "GTG_knn") NGS_Util.createDirectory(orgGTGBlastResDir) NGS_Util.createDirectory(GTGBestHitsDir) NGS_Util.createDirectory(GTGKNNDir) ########################################################################################## Sequence IPR Output ######################################################################################
def initialize(self, seq_org_list, jointBlastDir, GTGFungiKNNDir, fungi_InterProScan_result, phylogeneticTreeFile, modelTrainingDir): try: self.seq_org_list = seq_org_list self.jointBlastDir = jointBlastDir self.GTGFungiKNNDir = GTGFungiKNNDir self.fungi_InterProScan_result = fungi_InterProScan_result self.phylogeneticTreeFile = phylogeneticTreeFile self.modelTrainingDir = modelTrainingDir self.modelTraining_IPR_EC_Dir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "IPR_EC") self.modelTrainingBlastPVDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "BlastPValues") self.modelTraining_EC_Scores_Dir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ECScores") self.modelTrainingProbabilityDensityScoreDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ProbabilityDensityScore") self.modelTrainingTreeDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Tree") self.modelTrainingModelDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Model") NGS_Util.createDirectory(self.modelTrainingDir) NGS_Util.createDirectory(self.modelTraining_IPR_EC_Dir) NGS_Util.createDirectory(self.modelTrainingBlastPVDir) NGS_Util.createDirectory(self.modelTraining_EC_Scores_Dir) NGS_Util.createDirectory(self.modelTrainingProbabilityDensityScoreDir) NGS_Util.createDirectory(self.modelTrainingTreeDir) NGS_Util.createDirectory(self.modelTrainingModelDir) if (os.path.exists(self.phylogeneticTreeFile)): NGS_Util.copyFile( self.phylogeneticTreeFile,NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree") ) self.phylogeneticTreeFile = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree") self.treeCPDS = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree.cpds") except Exception: print traceback.print_exc()