def copySequenceFiles(self, srcDataDir):
    
        try:
                
            print("Copy Fasta Files from %s to %s" %(srcDataDir,self.orgFastaDir))

            orgListFile_fh = open(self.orgListFile)

	    for line in orgListFile_fh:

                organismNameID, organismName = line.strip().split()

 		if not os.path.exists( NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa") ):

 		    orgFasta = NGS_Util.createFilePath(srcDataDir, organismName + ".faa")
		
		    NGS_Util.copyFile(orgFasta, self.orgFastaDir)		    
                    print("Copied fasta file for %s" % (organismName))
                else:
                    print("\tDoing nothing (files already copied) for %s" % (organismName))
                

            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
    def getIPRScanScore(self, mode):
    
        try:
        
                
            print "getIPRScanScore"

            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                
                organismNameID, organismName = line.strip().split()
                
                organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")

               
                if not os.path.exists(organism_IPR_final):
                    
                    print "getIPRScanScore : " + organismName
                    
                    if mode == 1:

                        self.runClusterIPRScan(organismName)
                        
                        time.sleep(21600) # sleep for 6 hrs
                        
                    elif mode == 2:
                        
                        ipr_xml_file = self.concatenate_ClusterIPRScan_results(organismName)                        
                        ipr_raw_file = self.xmlIPRScanToRAWOutput(organismName, ipr_xml_file)
    
                        organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file)
                        organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go)
                        organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file)
                        
                       
                        if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec):
    
    
                            organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt")                        
                            organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")
                            
                            NGS_Util.copyFile(ipr_raw_file, organism_raw_final)
                            NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final)


            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Example #3
0
    def getIPRScanScore(self):
    
        try:
        
            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                if line.startswith("#"):
                    continue
                organismNameID, organismName = line.strip().split()
                
                organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")

#                self.create_new_seq_org_list(organismName,organismNameID)
                
                if not os.path.exists(organism_IPR_final):

		    print "getIPRScanScore : " + organismName

                    org_ipr_split_dir = self.splitFiles(organismName)
                    ipr_raw_file = self.rawIPRScan(organismName,org_ipr_split_dir)
                    ipr_xml_file = self.rawIPRScanToXMlOutput( organismName, ipr_raw_file)

                    organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file)
                    organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go)
                    organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file)
                    
                   
                    if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec):

                        organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt")                        
                        organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")
                        
                        NGS_Util.copyFile(ipr_raw_file, organism_raw_final)
                        NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final)


            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Example #4
0
    def initialize(self, seq_org_list, jointBlastDir, GTGFungiKNNDir, fungi_InterProScan_result, phylogeneticTreeFile, modelTrainingDir):
    
        try:
        
	    self.seq_org_list                            = seq_org_list

	    self.jointBlastDir                           = jointBlastDir
	    self.GTGFungiKNNDir                          = GTGFungiKNNDir
	    self.fungi_InterProScan_result               = fungi_InterProScan_result	    

	    self.phylogeneticTreeFile                    = phylogeneticTreeFile
	    
	    self.modelTrainingDir                        = modelTrainingDir
	    self.modelTraining_IPR_EC_Dir                = NGS_Util.createDirectoryPath(self.modelTrainingDir, "IPR_EC")
	    self.modelTrainingBlastPVDir                 = NGS_Util.createDirectoryPath(self.modelTrainingDir, "BlastPValues")
	    self.modelTraining_EC_Scores_Dir             = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ECScores")    
	    self.modelTrainingProbabilityDensityScoreDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ProbabilityDensityScore")	    
	    self.modelTrainingTreeDir                    = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Tree")	    
	    self.modelTrainingModelDir                   = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Model")


	    NGS_Util.createDirectory(self.modelTrainingDir)
	    NGS_Util.createDirectory(self.modelTraining_IPR_EC_Dir)
	    NGS_Util.createDirectory(self.modelTrainingBlastPVDir)
	    NGS_Util.createDirectory(self.modelTraining_EC_Scores_Dir)
	    NGS_Util.createDirectory(self.modelTrainingProbabilityDensityScoreDir)
	    NGS_Util.createDirectory(self.modelTrainingTreeDir)
	    NGS_Util.createDirectory(self.modelTrainingModelDir)
	    
	    
	    if (os.path.exists(self.phylogeneticTreeFile)):
		
		NGS_Util.copyFile( self.phylogeneticTreeFile,NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree")   )
		self.phylogeneticTreeFile = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree")


	    self.treeCPDS = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree.cpds")
    
        except Exception:
            print traceback.print_exc()