def rotateLeucines(proj_path, proj_name, molec_name, leuList, modelCount): ''' Create a new project whose name is postfixed with ROTL_STR. Return True on error. ''' yasara.info.mode = "txt" yasara.info.licenseshown = 0 #yasara.Console('Off') prl_name = '%s_%s' % (proj_name, ROTL_STR) locOut = '%s/%s.cing' % (proj_path, prl_name) if os.path.exists(locOut): nTmessage("Removing previously existing directory: %s" % locOut) rmdir(locOut) # end if copyProject(proj_path, proj_name, prl_name) if rotating(proj_path, prl_name, molec_name, leuList, modelCount): nTerror("Failed rotating") return True # end if deleteDirs(proj_path, proj_name, molec_name) if changeCoordinates(proj_path, prl_name): nTerror("Failed changeCoordinates") return True # end if nTmessage("Done with rotateLeucines")
class AllChecks(TestCase): cingDirTmpTest = os.path.join(cingDirTmp, 'test_Molgrap') if os.path.exists(cingDirTmpTest): rmdir(cingDirTmpTest) # end if mkdirs(cingDirTmpTest) os.chdir(cingDirTmpTest) def testMolgrapRunFromPdbFile(self): # SETUP FIRST # entryId = "1ai0" # Most complex molecular system in any PDB NMR entry # entryId = "1a4d" # Small much studied PDB NMR entry # entryId = "1zwj" # X-ray entry of CESG interest. entryId = "1brv" # Small much studied PDB NMR entry # entryId = "2hgh_1model" # does it matter to import it just now? project = Project(entryId) self.failIf(project.removeFromDisk()) project = Project.open(entryId, status='new') cyanaFile = os.path.join(cingDirTestsData, "cyana", entryId + ".cyana.tgz") self.assertTrue(project.initCyana(cyanaFolder=cyanaFile)) project.save() gifFileName = entryId + ".gif" pathGif = os.path.join(self.cingDirTmpTest, gifFileName) self.assertFalse(project.molecule.export2gif(pathGif, project=project)) self.assertTrue(os.path.exists(pathGif)) pathMolGifPinup = pathGif[:-4] + '_pin.gif' self.assertTrue(os.path.exists(pathMolGifPinup)) pathGifDefault = os.path.join(cingPythonCingDir, 'PluginCode', DATA_STR, 'UnknownImage.gif') self.assertFalse( os.path.getsize(pathGif) == os.path.getsize(pathGifDefault)) nTmessage("Created new molecular imagery at: %s" % self.cingDirTmpTest) # end def def _testMolgrapRunFromCcpnFile(self): # entryId = "1cjg" # Nucleic acid entry. entryId = "1brv" # Nucleic acid entry. project = Project.open(entryId, status='new') self.assertTrue(project, 'Failed opening project: ' + entryId) ccpnFile = os.path.join(cingDirTestsData, "ccpn", entryId + ".tgz") self.assertTrue(project.initCcpn(ccpnFolder=ccpnFile)) self.assertTrue(project.save()) gifFileName = entryId + ".gif" pathGif = os.path.join(cingDirTmp, gifFileName) self.assertFalse(project.molecule.export2gif(pathGif, project=None)) self.assertTrue(os.path.exists(pathGif)) # Do not leave the old CCPN directory laying around since it might get added to by another test. if os.path.exists(entryId): self.assertFalse(shutil.rmtree(entryId))
def doPylintOverall(pylintFileName='pylint.txt'): "Add the ones you don't want to pylint (perhaps you know they don't work yet)" namepattern = "*.py" pylintDir = os.path.join(cingDirTmp, 'pylint') # pylintFileName = os.path.join( pylintDir, 'pylint.log') if os.path.exists(pylintDir): rmdir(pylintDir) mkdirs(pylintDir) if os.path.exists(pylintFileName): os.unlink(pylintFileName) excludedModuleList = [ # enable exclusions for quick testing. # cingPythonDir + "/cing/core*", # cingPythonDir + "/cing/Database*", # cingPythonDir + "/cing/Libs*", # cingPythonDir + "/cing/NRG*", # cingPythonDir + "/cing/PluginCode*", # cingPythonDir + "/cing/Scripts*", # cingPythonDir + "/cing/STAR*", # cingPythonDir + "/cing/Talos*", # Normal set: cingPythonDir + "/cing/Database/CCPN*", cingPythonDir + "/cyana2cing*", cingPythonDir + "/pdbe2*", cingPythonDir + "/queen*", cingPythonDir + "/Refine*", cingPythonDir + "/UtilsAnalysis*", cingPythonDir + "/xplorcing*" ] startdir = cingPythonDir nameList = findFiles(namepattern, startdir, exclude=excludedModuleList) # enable next line(s) to do a couple of checks only. # nameList = ['/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_NmrStar.py', # '/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_ccpn.py', # '/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_ccpn_2.py'] nTdebug('Will unit check: ' + repr(nameList)) f = ForkOff( processes_max=cing.ncpus, max_time_to_wait=600, # on a very slow setup verbosity=2) job_list = [] for name in nameList: job_list.append((doPylintByName, (name, excludedModuleList))) done_list = f.forkoff_start(job_list, 0) nTmessage("Finished ids: %s", done_list) for name in nameList: mod_name = pathToModuleName(name) if mod_name in excludedModuleList: print "Skipping module: " + mod_name return pylintOutputFileName = os.path.join(pylintDir, mod_name + '.log') if not os.path.exists(pylintDir): nTerror("Failed to find pylint output: " + pylintOutputFileName) continue if appendTextFileToFile(pylintOutputFileName, pylintFileName): nTerror("Failed to appendTextFileToFile") # nTdebug("Done appending from: %s" % pylintOutputFileName) # end for doPylintOverallSummary(pylintFileName=pylintFileName) nTmessage("Done with pylint")
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = False # default: False htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doQueeny = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. # NB leave this set to True or modify code below. removeCcpnDirectory = 1 # perhaps not so in the future. modelCount = None # default setting is None # ranges = None if fastestTest: modelCount = 2 # if this is more and there is only one model present it leads to an error message. htmlOnly = True doWhatif = False doProcheck = False doWattos = False doQueeny = False doTalos = False forceRedo = True forceRetrieveInput = True nTmessage(header) nTmessage(getStartMessage()) # Sync below code with nrgCing#createToposTokens expectedArgumentList = """ verbosity inputDir outputDir pdbConvention restraintsConvention archiveType projectType storeCING2db ranges filterTopViolations filterVasco singleCoreOperation """.split() expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTmessage("consider updating code to include all sequential parameters: %s" % str(expectedArgumentList)) if len(extraArgList) > expectedNumberOfArguments: nTerror("Got arguments: " + str(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % ( expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True # end if # end if entryCodeChar2and3 = entryId[1:3] cing.verbosity = int( extraArgList[IDX_VERBOSITY] ) inputDir = extraArgList[IDX_INPUT] outputDir = os.path.join(extraArgList[IDX_OUTPUT], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[IDX_PDB] #@UnusedVariable restraintsConvention = extraArgList[IDX_RESTRAINTS] archiveType = extraArgList[IDX_ARCHIVE] # Only used for deriving the input location not the output. projectType = extraArgList[IDX_PROJECT_TYPE] storeCING2db = stringMeansBooleanTrue( getDeepByKeysOrAttributes(extraArgList, IDX_STORE_DB)) ranges = getDeepByKeysOrAttributes(extraArgList, IDX_RANGES) filterTopViolations = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_TOP) if filterTopViolations: filterTopViolations = int(filterTopViolations) # change '0' to 0 filterVasco = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_VASCO) if filterVasco: filterVasco = int(filterVasco) else: filterVasco = 1 # Default should be True # end if singleCoreOperation = getDeepByKeysOrAttributes(extraArgList, IDX_SINGLE_CORE_OPERATION ) if singleCoreOperation: singleCoreOperation = int(singleCoreOperation) else: singleCoreOperation = 0 # Default should be True # end if if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) isRemoteOutputDir = False if '@' in outputDir: isRemoteOutputDir = True # end if # vc = vCing('.') # argument is a fake master_ssh_url not needed here. archive_id = getArchiveIdFromDirectoryName( outputDir ) nTdebug("Using program arguments:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) nTdebug("filterTopViolations: %s" % filterTopViolations) nTdebug("filterVasco: %s" % filterVasco) nTdebug("singleCoreOperation: %s" % singleCoreOperation) nTdebug("") nTdebug("Using derived settings:") nTdebug("modelCount: %s" % modelCount) nTdebug("isRemoteOutputDir: %s" % isRemoteOutputDir) nTdebug("archive_id: %s" % archive_id) # For NMR_REDO required as most efficient. if singleCoreOperation: setToSingleCoreOperation() # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if forceRedo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. if isRemoteOutputDir: os.chdir(cingDirTmp) else: os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. formatFileName = '%s.tgz' if projectType == PROJECT_TYPE_CING: formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) allowedInputProtocolList = 'http file ssh'.split() inputProtocal = string.split( inputDir, ':' )[0] if inputProtocal in allowedInputProtocolList: stillToRetrieve = False if os.path.exists(fileNameTgz): if forceRetrieveInput: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if else: nTdebug("Entry not retrieved which might be normal in some situations.") # end if. if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. # fullFileNameTgz = os.path.join(inputDir, fileNameTgz) # shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') pdbFilePath = entryId + ".pdb" gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # end if if ranges is not None: project.molecule.setRanges(ranges) # end if if archive_id: project.molecule.setArchiveId(archive_id) # end if project.molecule.superpose(ranges=ranges) if filterTopViolations and not project.filterHighRestraintViol(): nTerror("Failed to filterHighRestraintViol") ####> MAIN UTILITY HERE if 0: # DEFAULT 0 project.save() if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doQueeny = doQueeny, doTalos=doTalos, filterVasco = filterVasco ): nTerror("Failed to validate project read") return True # end if filterVasco # Write a single PDB file containing all models # according to IUPAC conventions project.export2PDB() project.save() if storeCING2db and archive_id: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. # archive_id = ARCHIVE_DEV_NRG_ID # if isProduction: # archive_id = ARCHIVE_NRG_ID try: if doStoreCING2db( entryId, archive_id, project=project): nTerror("Failed to store CING project's data to DB but continuing.") except: nTtracebackError() nTerror("Failed to store CING project's data due to above traceback error.") if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' os.unlink(fileNameTgz) # temporary ccpn tgz if removeCcpnDirectory: rmdir(entryId) # ccpn dir may contain vasco info. if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) nTdebug("cmd: %s" % cmd) # do_cmd(cmd) status, result = commands.getstatusoutput(cmd) if status: nTerror("Failed to tar status: %s with result %s" % (status, result)) return True if isRemoteOutputDir: if putFileBySsh(tgzFileNameCing, outputDir, ntriesMax = 2): nTerror("Failed to send File By Scp status: %s with result %s" % (status, result)) nTerror("Maintaining results.") return True # end if nTmessage("Removing tgz result: %s" % tgzFileNameCing) os.remove(tgzFileNameCing) nTmessage("Removing cing dir itself: %s" % directoryNameCing) rmdir(directoryNameCing) else: # do NOT remove local copy pass
def getCingEntryInfo(self): """Returns True for error Will remove entry directories if they do not occur in NRG up to a maximum number as not to whip out every one in a single blow by accident. """ nTmessage( "Get the entries tried, todo, crashed, and stopped in PDB-CING from file system." ) self.entry_list_obsolete = NTlist() self.entry_list_tried = NTlist() self.entry_list_crashed = NTlist() self.entry_list_stopped = NTlist( ) # mutely exclusive from entry_list_crashed self.entry_list_done = NTlist() self.entry_list_todo = NTlist() subDirList = os.listdir(DATA_STR) for subDir in subDirList: if len(subDir) != 2: if subDir != DS_STORE_STR: nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']') continue entryList = os.listdir(os.path.join(DATA_STR, subDir)) for entryDir in entryList: entry_code = entryDir if not is_pdb_code(entry_code): if entry_code != DS_STORE_STR: nTerror("String doesn't look like a pdb code: " + entry_code) continue # nTdebug("Working on: " + entry_code) entrySubDir = os.path.join(DATA_STR, subDir, entry_code) if not entry_code in self.entry_list_pdb: nTwarning( "Found entry %s in PDB-CING-CING but not in PDB. Will be obsoleted in PDB-CING too" % entry_code) if len(self.entry_list_obsolete ) < self.entry_to_delete_count_max: rmdir(entrySubDir) self.entry_list_obsolete.append(entry_code) else: nTerror( "Entry %s in PDB-CING not obsoleted since there were already removed: %s" % (entry_code, self.entry_to_delete_count_max)) # end if cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing") if not os.path.exists(cingDirEntry): nTmessage("Failed to find directory: %s" % cingDirEntry) continue # Look for last log file logList = glob(entrySubDir + '/log_validateEntry/*.log') if not logList: nTmessage("Failed to find any log file in directory: %s" % entrySubDir) continue # .cing directory and .log file present so it was tried to start but might not have finished self.entry_list_tried.append(entry_code) logLastFile = logList[-1] # nTdebug("Found logLastFile %s" % logLastFile) # set timeTaken = (` grep 'CING took :' $logFile | gawk '{print $(NF-1)}' `) # text = readTextFromFile(logLastFile) entryCrashed = False for r in AwkLike(logLastFile): line = r.dollar[0] if entryCrashed: nTdebug(line) if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] self.timeTakenDict[entry_code] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('Traceback (most recent call last)'): nTdebug("%s Matched line: %s" % (entry_code, line)) if entry_code in self.entry_list_crashed: nTwarning( "%s was already found before; not adding again." % entry_code) else: self.entry_list_crashed.append(entry_code) entryCrashed = True if entryCrashed: continue # don't mark it as stopped anymore. # end for AwkLike if not self.timeTakenDict.has_key(entry_code): # was stopped by time out or by user or by system (any other type of stop but stack trace) nTmessage( "%s Since CING end message was not found assumed to have stopped" % entry_code) self.entry_list_stopped.append(entry_code) continue # Look for end statement from CING which shows it wasn't killed before it finished. indexFileEntry = os.path.join(cingDirEntry, "index.html") if not os.path.exists(indexFileEntry): nTmessage( "%s Since index file %s was not found assumed to have stopped" % (entry_code, indexFileEntry)) self.entry_list_stopped.append(entry_code) continue projectHtmlFile = os.path.join(cingDirEntry, entry_code, "HTML/index.html") if not os.path.exists(projectHtmlFile): nTmessage( "%s Since project html file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue if False: # Default is True molGifFile = os.path.join(cingDirEntry, entry_code, "HTML/mol.gif") if not os.path.exists(molGifFile): nTmessage( "%s Since mol.gif file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue self.entry_list_done.append(entry_code) # end for entryDir # end for subDir timeTakenList = NTlist() # local variable. timeTakenList.addList(self.timeTakenDict.values()) nTmessage("Time taken by CING by statistics\n%s" % timeTakenList.statsFloat()) if not self.entry_list_tried: nTerror("Failed to find entries that CING tried.") self.entry_list_todo.addList(self.entry_list_pdb) self.entry_list_todo = self.entry_list_todo.difference( self.entry_list_done) nTmessage("Found %s entries that CING tried (T)." % len(self.entry_list_tried)) nTmessage("Found %s entries that CING crashed (C)." % len(self.entry_list_crashed)) nTmessage("Found %s entries that CING stopped (S)." % len(self.entry_list_stopped)) if not self.entry_list_done: nTerror("Failed to find entries that CING did.") nTmessage("Found %s entries that CING did (B=A-C-S)." % len(self.entry_list_done)) nTmessage("Found %s entries todo (A-B)." % len(self.entry_list_todo)) nTmessage("Found %s entries in PDB-CING made obsolete." % len(self.entry_list_obsolete))
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = True # default: False # ranges=AUTO_STR # default is None retrieved from DBMS csv files. htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. modelCount = None # default setting is None if fastestTest: modelCount = 3 htmlOnly = True doWhatif = False doProcheck = False doWattos = False doTalos = False force_redo = True force_retrieve_input = True nTmessage(cing.cingDefinitions.getHeaderString()) nTmessage(cing.systemDefinitions.getStartMessage()) expectedArgumentList = [ 'inputDir', 'outputDir', 'pdbConvention', 'restraintsConvention', 'archiveType','projectType','storeCING2db'] expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTerror("Got arguments: " + repr(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % ( expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True entryCodeChar2and3 = entryId[1:3] inputDir = extraArgList[0] outputDir = os.path.join(extraArgList[1], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[2] #@UnusedVariable restraintsConvention = extraArgList[3] archiveType = extraArgList[4] projectType = extraArgList[5] storeCING2db = False if len(extraArgList) >= expectedNumberOfArguments: storeCING2db = extraArgList[6] if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) ranges = None # targetId = getTargetForFullEntryName(entryId) # if not targetId: # nTerror("Failed to getTargetForFullEntryName for entryId: %s" % entryId) # return True # ranges = getRangesForTarget(targetId) # if ranges == None: # nTerror("Failed to getRangesForTarget for targetId: %s" % targetId) # return True nTdebug("Using:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("modelCount: %s" % modelCount) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if force_redo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. # extension = '.tgz' formatFileName = '%s.tgz' # fileNameTgz = entryId + '.tgz' if projectType == PROJECT_TYPE_CING: # fileNameTgz = entryId + '.cing.tgz' formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) # if true will do retrieveTgzFromUrl. if inputDir.startswith("http") or inputDir.startswith("file"): stillToRetrieve = False if os.path.exists(fileNameTgz): if force_retrieve_input: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if # end if. # retrieveTgzFromUrl(entryId, inputDir) if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. fullFileNameTgz = os.path.join(inputDir, fileNameTgz) shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') # pdbFileFormats = [ entryId + ".pdb", "pdb" + entryId + ".ent.gz" ] # for pdbFileName in pdbFileFormats: # pdbFileName = "pdb" + entryId + ".ent.gz" # # pdbFilePath = os.path.join( inputDir, pdbFileName) # pdbFilePath = os.path.join(inputDir, pdbFileName) # if os.path.exists(pdbFilePath): # break # tmpPdbFile = None # if pdbFileName.endswith('.gz'): pdbFilePath = entryId + ".pdb" # tmpPdbFile = pdbFilePath # if os.path.exists(pdbFilePath): # os.unlink(pdbFilePath) gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # if inputDirOrg == inputDirCASD_NMR: # if True: # Default is False for this is specific to CASD-NMR # nTmessage("Renaming molecule name to entry id: %s" % entryId) # project.molecule.name = entryId # insufficient since all data is already initialized to disk. # project.molecule.rename( entryId ) # project.save() # project.molecule.ranges = ranges # JFD: this doesn't seem to be set there exactly. project.molecule.superpose(ranges=ranges) if True: if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doTalos=doTalos): nTerror("Failed to validate project read") return True if storeCING2db: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. try: if doStoreCING2db( entryId, ARCHIVE_CASP_ID, project=project): nTerror("Failed to store CING project's data to DB but continuing.") except: nTtracebackError() nTerror("Failed to store CING project's data due to above traceback error.") project.save() if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' # os.unlink(fileNameTgz) # temporary ccpn tgz rmdir(entryId) # temporary ccpn dir if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) do_cmd(cmd)
def doPylintOverall(pylintFileName='pylint.txt'): "Add the ones you don't want to pylint (perhaps you know they don't work yet)" namepattern = "*.py" pylintDir = os.path.join( cingDirTmp, 'pylint' ) # pylintFileName = os.path.join( pylintDir, 'pylint.log') if os.path.exists( pylintDir ): rmdir( pylintDir ) mkdirs( pylintDir ) if os.path.exists( pylintFileName ): os.unlink(pylintFileName) excludedModuleList = [ # enable exclusions for quick testing. # cingPythonDir + "/cing/core*", # cingPythonDir + "/cing/Database*", # cingPythonDir + "/cing/Libs*", # cingPythonDir + "/cing/NRG*", # cingPythonDir + "/cing/PluginCode*", # cingPythonDir + "/cing/Scripts*", # cingPythonDir + "/cing/STAR*", # cingPythonDir + "/cing/Talos*", # Normal set: cingPythonDir + "/cing/Database/CCPN*", cingPythonDir + "/cyana2cing*", cingPythonDir + "/pdbe2*", cingPythonDir + "/queen*", cingPythonDir + "/Refine*", cingPythonDir + "/UtilsAnalysis*", cingPythonDir + "/xplorcing*" ] startdir = cingPythonDir nameList = findFiles(namepattern, startdir, exclude=excludedModuleList) # enable next line(s) to do a couple of checks only. # nameList = ['/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_NmrStar.py', # '/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_ccpn.py', # '/Users/jd/workspace35/cing/python/cing/PluginCode/test/test_ccpn_2.py'] nTdebug('Will unit check: ' + repr(nameList)) f = ForkOff( processes_max=cing.ncpus, max_time_to_wait=600, # on a very slow setup verbosity=2 ) job_list = [] for name in nameList: job_list.append( (doPylintByName, (name, excludedModuleList)) ) done_list = f.forkoff_start(job_list, 0) nTmessage("Finished ids: %s", done_list) for name in nameList: mod_name = pathToModuleName( name ) if mod_name in excludedModuleList: print "Skipping module: " + mod_name return pylintOutputFileName = os.path.join( pylintDir, mod_name + '.log') if not os.path.exists( pylintDir ): nTerror("Failed to find pylint output: " + pylintOutputFileName) continue if appendTextFileToFile( pylintOutputFileName, pylintFileName): nTerror("Failed to appendTextFileToFile") # nTdebug("Done appending from: %s" % pylintOutputFileName) # end for doPylintOverallSummary(pylintFileName=pylintFileName) nTmessage("Done with pylint")
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = False # default: False htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doQueeny = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. # NB leave this set to True or modify code below. removeCcpnDirectory = 1 # perhaps not so in the future. modelCount = None # default setting is None # ranges = None if fastestTest: modelCount = 2 # if this is more and there is only one model present it leads to an error message. htmlOnly = True doWhatif = False doProcheck = False doWattos = False doQueeny = False doTalos = False forceRedo = True forceRetrieveInput = True nTmessage(header) nTmessage(getStartMessage()) # Sync below code with nrgCing#createToposTokens expectedArgumentList = """ verbosity inputDir outputDir pdbConvention restraintsConvention archiveType projectType storeCING2db ranges filterTopViolations filterVasco singleCoreOperation """.split() expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTmessage( "consider updating code to include all sequential parameters: %s" % str(expectedArgumentList)) if len(extraArgList) > expectedNumberOfArguments: nTerror("Got arguments: " + str(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % (expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True # end if # end if entryCodeChar2and3 = entryId[1:3] cing.verbosity = int(extraArgList[IDX_VERBOSITY]) inputDir = extraArgList[IDX_INPUT] outputDir = os.path.join(extraArgList[IDX_OUTPUT], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[IDX_PDB] #@UnusedVariable restraintsConvention = extraArgList[IDX_RESTRAINTS] archiveType = extraArgList[ IDX_ARCHIVE] # Only used for deriving the input location not the output. projectType = extraArgList[IDX_PROJECT_TYPE] storeCING2db = stringMeansBooleanTrue( getDeepByKeysOrAttributes(extraArgList, IDX_STORE_DB)) ranges = getDeepByKeysOrAttributes(extraArgList, IDX_RANGES) filterTopViolations = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_TOP) if filterTopViolations: filterTopViolations = int(filterTopViolations) # change '0' to 0 filterVasco = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_VASCO) if filterVasco: filterVasco = int(filterVasco) else: filterVasco = 1 # Default should be True # end if singleCoreOperation = getDeepByKeysOrAttributes(extraArgList, IDX_SINGLE_CORE_OPERATION) if singleCoreOperation: singleCoreOperation = int(singleCoreOperation) else: singleCoreOperation = 0 # Default should be True # end if if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) isRemoteOutputDir = False if '@' in outputDir: isRemoteOutputDir = True # end if # vc = vCing('.') # argument is a fake master_ssh_url not needed here. archive_id = getArchiveIdFromDirectoryName(outputDir) nTdebug("Using program arguments:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) nTdebug("filterTopViolations: %s" % filterTopViolations) nTdebug("filterVasco: %s" % filterVasco) nTdebug("singleCoreOperation: %s" % singleCoreOperation) nTdebug("") nTdebug("Using derived settings:") nTdebug("modelCount: %s" % modelCount) nTdebug("isRemoteOutputDir: %s" % isRemoteOutputDir) nTdebug("archive_id: %s" % archive_id) # For NMR_REDO required as most efficient. if singleCoreOperation: setToSingleCoreOperation() # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if forceRedo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. if isRemoteOutputDir: os.chdir(cingDirTmp) else: os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. formatFileName = '%s.tgz' if projectType == PROJECT_TYPE_CING: formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) allowedInputProtocolList = 'http file ssh'.split() inputProtocal = string.split(inputDir, ':')[0] if inputProtocal in allowedInputProtocolList: stillToRetrieve = False if os.path.exists(fileNameTgz): if forceRetrieveInput: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if else: nTdebug( "Entry not retrieved which might be normal in some situations.") # end if. if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. # fullFileNameTgz = os.path.join(inputDir, fileNameTgz) # shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') pdbFilePath = entryId + ".pdb" gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # end if if ranges is not None: project.molecule.setRanges(ranges) # end if if archive_id: project.molecule.setArchiveId(archive_id) # end if project.molecule.superpose(ranges=ranges) if filterTopViolations and not project.filterHighRestraintViol(): nTerror("Failed to filterHighRestraintViol") ####> MAIN UTILITY HERE if 0: # DEFAULT 0 project.save() if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doQueeny=doQueeny, doTalos=doTalos, filterVasco=filterVasco): nTerror("Failed to validate project read") return True # end if filterVasco # Write a single PDB file containing all models # according to IUPAC conventions project.export2PDB() project.save() if storeCING2db and archive_id: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. # archive_id = ARCHIVE_DEV_NRG_ID # if isProduction: # archive_id = ARCHIVE_NRG_ID try: if doStoreCING2db(entryId, archive_id, project=project): nTerror( "Failed to store CING project's data to DB but continuing." ) except: nTtracebackError() nTerror( "Failed to store CING project's data due to above traceback error." ) if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' os.unlink(fileNameTgz) # temporary ccpn tgz if removeCcpnDirectory: rmdir(entryId) # ccpn dir may contain vasco info. if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) nTdebug("cmd: %s" % cmd) # do_cmd(cmd) status, result = commands.getstatusoutput(cmd) if status: nTerror("Failed to tar status: %s with result %s" % (status, result)) return True if isRemoteOutputDir: if putFileBySsh(tgzFileNameCing, outputDir, ntriesMax=2): nTerror( "Failed to send File By Scp status: %s with result %s" % (status, result)) nTerror("Maintaining results.") return True # end if nTmessage("Removing tgz result: %s" % tgzFileNameCing) os.remove(tgzFileNameCing) nTmessage("Removing cing dir itself: %s" % directoryNameCing) rmdir(directoryNameCing) else: # do NOT remove local copy pass
def getCingEntryInfo(self): """Returns True for error Will remove entry directories if they do not occur in NRG up to a maximum number as not to whip out every one in a single blow by accident. """ nTmessage("Get the entries tried, todo, crashed, and stopped in PDB-CING from file system.") self.entry_list_obsolete = NTlist() self.entry_list_tried = NTlist() self.entry_list_crashed = NTlist() self.entry_list_stopped = NTlist() # mutely exclusive from entry_list_crashed self.entry_list_done = NTlist() self.entry_list_todo = NTlist() subDirList = os.listdir(DATA_STR) for subDir in subDirList: if len(subDir) != 2: if subDir != DS_STORE_STR: nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']') continue entryList = os.listdir(os.path.join(DATA_STR, subDir)) for entryDir in entryList: entry_code = entryDir if not is_pdb_code(entry_code): if entry_code != DS_STORE_STR: nTerror("String doesn't look like a pdb code: " + entry_code) continue # nTdebug("Working on: " + entry_code) entrySubDir = os.path.join(DATA_STR, subDir, entry_code) if not entry_code in self.entry_list_pdb: nTwarning("Found entry %s in PDB-CING-CING but not in PDB. Will be obsoleted in PDB-CING too" % entry_code) if len(self.entry_list_obsolete) < self.entry_to_delete_count_max: rmdir(entrySubDir) self.entry_list_obsolete.append(entry_code) else: nTerror("Entry %s in PDB-CING not obsoleted since there were already removed: %s" % ( entry_code, self.entry_to_delete_count_max)) # end if cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing") if not os.path.exists(cingDirEntry): nTmessage("Failed to find directory: %s" % cingDirEntry) continue # Look for last log file logList = glob(entrySubDir + '/log_validateEntry/*.log') if not logList: nTmessage("Failed to find any log file in directory: %s" % entrySubDir) continue # .cing directory and .log file present so it was tried to start but might not have finished self.entry_list_tried.append(entry_code) logLastFile = logList[-1] # nTdebug("Found logLastFile %s" % logLastFile) # set timeTaken = (` grep 'CING took :' $logFile | gawk '{print $(NF-1)}' `) # text = readTextFromFile(logLastFile) entryCrashed = False for r in AwkLike(logLastFile): line = r.dollar[0] if entryCrashed: nTdebug(line) if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] self.timeTakenDict[entry_code] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('Traceback (most recent call last)'): nTdebug("%s Matched line: %s" % (entry_code, line)) if entry_code in self.entry_list_crashed: nTwarning("%s was already found before; not adding again." % entry_code) else: self.entry_list_crashed.append(entry_code) entryCrashed = True if entryCrashed: continue # don't mark it as stopped anymore. # end for AwkLike if not self.timeTakenDict.has_key(entry_code): # was stopped by time out or by user or by system (any other type of stop but stack trace) nTmessage("%s Since CING end message was not found assumed to have stopped" % entry_code) self.entry_list_stopped.append(entry_code) continue # Look for end statement from CING which shows it wasn't killed before it finished. indexFileEntry = os.path.join(cingDirEntry, "index.html") if not os.path.exists(indexFileEntry): nTmessage("%s Since index file %s was not found assumed to have stopped" % (entry_code, indexFileEntry)) self.entry_list_stopped.append(entry_code) continue projectHtmlFile = os.path.join(cingDirEntry, entry_code, "HTML/index.html") if not os.path.exists(projectHtmlFile): nTmessage("%s Since project html file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue if False: # Default is True molGifFile = os.path.join(cingDirEntry, entry_code, "HTML/mol.gif") if not os.path.exists(molGifFile): nTmessage("%s Since mol.gif file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue self.entry_list_done.append(entry_code) # end for entryDir # end for subDir timeTakenList = NTlist() # local variable. timeTakenList.addList(self.timeTakenDict.values()) nTmessage("Time taken by CING by statistics\n%s" % timeTakenList.statsFloat()) if not self.entry_list_tried: nTerror("Failed to find entries that CING tried.") self.entry_list_todo.addList(self.entry_list_pdb) self.entry_list_todo = self.entry_list_todo.difference(self.entry_list_done) nTmessage("Found %s entries that CING tried (T)." % len(self.entry_list_tried)) nTmessage("Found %s entries that CING crashed (C)." % len(self.entry_list_crashed)) nTmessage("Found %s entries that CING stopped (S)." % len(self.entry_list_stopped)) if not self.entry_list_done: nTerror("Failed to find entries that CING did.") nTmessage("Found %s entries that CING did (B=A-C-S)." % len(self.entry_list_done)) nTmessage("Found %s entries todo (A-B)." % len(self.entry_list_todo)) nTmessage("Found %s entries in PDB-CING made obsolete." % len(self.entry_list_obsolete))
def prepare(self): 'Return True on error.' if self.restartFromScratch: rmdir(matchBmrbPdbDir) if not os.path.exists(matchBmrbPdbDir): csvFileDir = os.path.join(cingRoot, matchBmrbPdbDataDir) nTmessage("Recreating data dir %s from SVN %s" % (matchBmrbPdbDir, csvFileDir)) # mkdirs( matchBmrbPdbDir ) copytree(csvFileDir, matchBmrbPdbDir) else: nTmessage("Reusing existing data dir " + matchBmrbPdbDir) os.chdir(matchBmrbPdbDir) if 1: # DEFAULT: 1 nTmessage("Getting ADIT from: %s" % self.adit_url) if os.path.exists(self.adit_fn): os.unlink(self.adit_fn) # prevent buildup of endless copies. wgetProgram = ExecuteProgram('wget --no-verbose %s' % self.adit_url, redirectOutputToFile ='getAdit.log' ) exitCode = wgetProgram() if exitCode: nTerror("Failed to download file %s" % self.adit_url) return True if not os.path.exists(self.adit_fn): nTerror("Failed to find downloaded file %s" % self.adit_url) return True columnOrder = 'bmrb_id pdb_id'.split() if addColumnHeaderRowToCsvFile(self.adit_fn, columnOrder): nTerror("Failed to add header row to " + self.adit_fn) return True nTmessage("Got the ADIT info") if 1: # DEFAULT: 1 nTmessage("Getting BMRB file list from : %s" % bmrbDir) bmrbFileList = findFiles("bmr*_21.str", bmrbDir) bmrbIdList = [] for bmrbFile in bmrbFileList: _directory, basename, _extension = nTpath(bmrbFile) bmrbId = int(basename[3:-3]) # bmr970_21 -> 970 bmrbIdList.append(bmrbId) bmrbIdList.sort() bmrbId2List = getBmrbEntries() bmrbIdNTList = NTlist(*bmrbIdList) bmrbId2NTList = NTlist(*bmrbId2List) bmrbIdNTmissingList = bmrbIdNTList.difference(bmrbId2NTList) if bmrbIdNTmissingList: nTmessage("Found %d entries on file but not in DB: %s" % (len(bmrbIdNTmissingList), str(bmrbIdNTmissingList))) bmrbId2NTmissingList = bmrbId2NTList.difference(bmrbIdNTList) if bmrbId2NTmissingList: nTmessage("Found %d entries in DB but not on file: %s" % (len(bmrbId2NTmissingList), str(bmrbId2NTmissingList))) if len( bmrbIdNTmissingList + bmrbId2NTmissingList ) > 40: # was 18 + 3=21 on April 11, 2011. nTwarning("More than one hundred inconsistencies between BMRB DB and on file.") bmrbIdStrList = ['bmrb_id'] + [ str(x) for x in bmrbIdList] # add header for CSV reader. fileName = os.path.join( matchBmrbPdbDir, 'bmrb.csv') txt = '\n'.join(bmrbIdStrList) if writeTextToFile(fileName, txt): return True if 1: # DEFAULT: 1 dbms2 = DBMS() pdbList = getPdbEntries(onlyNmr = True) pdbNmrTable = Relation('pdbNmr', dbms2, columnList=['pdb_id']) pdbIdColumn = pdbNmrTable.getColumnByIdx(0) # pylint: disable=W0612 pdbIdColumn += pdbList pdbNmrTable.writeCsvFile('pdbNmrTable.csv')
def prepare(self): 'Return True on error.' if self.restartFromScratch: rmdir(matchBmrbPdbDir) if not os.path.exists(matchBmrbPdbDir): csvFileDir = os.path.join(cingRoot, matchBmrbPdbDataDir) nTmessage("Recreating data dir %s from SVN %s" % (matchBmrbPdbDir, csvFileDir)) # mkdirs( matchBmrbPdbDir ) copytree(csvFileDir, matchBmrbPdbDir) else: nTmessage("Reusing existing data dir " + matchBmrbPdbDir) os.chdir(matchBmrbPdbDir) if 1: # DEFAULT: 1 nTmessage("Getting ADIT from: %s" % self.adit_url) if os.path.exists(self.adit_fn): os.unlink(self.adit_fn) # prevent buildup of endless copies. wgetProgram = ExecuteProgram('wget --no-verbose %s' % self.adit_url, redirectOutputToFile='getAdit.log') exitCode = wgetProgram() if exitCode: nTerror("Failed to download file %s" % self.adit_url) return True if not os.path.exists(self.adit_fn): nTerror("Failed to find downloaded file %s" % self.adit_url) return True columnOrder = 'bmrb_id pdb_id'.split() if addColumnHeaderRowToCsvFile(self.adit_fn, columnOrder): nTerror("Failed to add header row to " + self.adit_fn) return True nTmessage("Got the ADIT info") if 1: # DEFAULT: 1 nTmessage("Getting BMRB file list from : %s" % bmrbDir) bmrbFileList = findFiles("bmr*_21.str", bmrbDir) bmrbIdList = [] for bmrbFile in bmrbFileList: _directory, basename, _extension = nTpath(bmrbFile) bmrbId = int(basename[3:-3]) # bmr970_21 -> 970 bmrbIdList.append(bmrbId) bmrbIdList.sort() bmrbId2List = getBmrbEntries() bmrbIdNTList = NTlist(*bmrbIdList) bmrbId2NTList = NTlist(*bmrbId2List) bmrbIdNTmissingList = bmrbIdNTList.difference(bmrbId2NTList) if bmrbIdNTmissingList: nTmessage("Found %d entries on file but not in DB: %s" % (len(bmrbIdNTmissingList), str(bmrbIdNTmissingList))) bmrbId2NTmissingList = bmrbId2NTList.difference(bmrbIdNTList) if bmrbId2NTmissingList: nTmessage( "Found %d entries in DB but not on file: %s" % (len(bmrbId2NTmissingList), str(bmrbId2NTmissingList))) if len(bmrbIdNTmissingList + bmrbId2NTmissingList ) > 40: # was 18 + 3=21 on April 11, 2011. nTwarning( "More than one hundred inconsistencies between BMRB DB and on file." ) bmrbIdStrList = ['bmrb_id'] + [str(x) for x in bmrbIdList ] # add header for CSV reader. fileName = os.path.join(matchBmrbPdbDir, 'bmrb.csv') txt = '\n'.join(bmrbIdStrList) if writeTextToFile(fileName, txt): return True if 1: # DEFAULT: 1 dbms2 = DBMS() pdbList = getPdbEntries(onlyNmr=True) pdbNmrTable = Relation('pdbNmr', dbms2, columnList=['pdb_id']) pdbIdColumn = pdbNmrTable.getColumnByIdx(0) # pylint: disable=W0612 pdbIdColumn += pdbList pdbNmrTable.writeCsvFile('pdbNmrTable.csv')
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = True # default: False # ranges=AUTO_STR # default is None retrieved from DBMS csv files. htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. modelCount = None # default setting is None if fastestTest: modelCount = 3 htmlOnly = True doWhatif = False doProcheck = False doWattos = False doTalos = False force_redo = True force_retrieve_input = True nTmessage(header) nTmessage(getStartMessage()) expectedArgumentList = [ 'inputDir', 'outputDir', 'pdbConvention', 'restraintsConvention', 'archiveType', 'projectType', 'storeCING2db' ] expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTerror("Got arguments: " + repr(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % (expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True entryCodeChar2and3 = entryId[1:3] inputDir = extraArgList[0] outputDir = os.path.join(extraArgList[1], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[2] #@UnusedVariable restraintsConvention = extraArgList[3] archiveType = extraArgList[4] projectType = extraArgList[5] storeCING2db = False if len(extraArgList) >= expectedNumberOfArguments: storeCING2db = extraArgList[6] if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) ranges = None # targetId = getTargetForFullEntryName(entryId) # if not targetId: # nTerror("Failed to getTargetForFullEntryName for entryId: %s" % entryId) # return True # ranges = getRangesForTarget(targetId) # if ranges == None: # nTerror("Failed to getRangesForTarget for targetId: %s" % targetId) # return True nTdebug("Using:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("modelCount: %s" % modelCount) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if force_redo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. # extension = '.tgz' formatFileName = '%s.tgz' # fileNameTgz = entryId + '.tgz' if projectType == PROJECT_TYPE_CING: # fileNameTgz = entryId + '.cing.tgz' formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) # if true will do retrieveTgzFromUrl. if inputDir.startswith("http") or inputDir.startswith("file"): stillToRetrieve = False if os.path.exists(fileNameTgz): if force_retrieve_input: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if # end if. # retrieveTgzFromUrl(entryId, inputDir) if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. fullFileNameTgz = os.path.join(inputDir, fileNameTgz) shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') # pdbFileFormats = [ entryId + ".pdb", "pdb" + entryId + ".ent.gz" ] # for pdbFileName in pdbFileFormats: # pdbFileName = "pdb" + entryId + ".ent.gz" # # pdbFilePath = os.path.join( inputDir, pdbFileName) # pdbFilePath = os.path.join(inputDir, pdbFileName) # if os.path.exists(pdbFilePath): # break # tmpPdbFile = None # if pdbFileName.endswith('.gz'): pdbFilePath = entryId + ".pdb" # tmpPdbFile = pdbFilePath # if os.path.exists(pdbFilePath): # os.unlink(pdbFilePath) gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # if inputDirOrg == inputDirCASD_NMR: # if True: # Default is False for this is specific to CASD-NMR # nTmessage("Renaming molecule name to entry id: %s" % entryId) # project.molecule.name = entryId # insufficient since all data is already initialized to disk. # project.updateProject() # project.molecule.rename( entryId ) # project.save() # project.molecule.ranges = ranges # JFD: this doesn't seem to be set there exactly. project.molecule.superpose(ranges=ranges) if True: if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doTalos=doTalos): nTerror("Failed to validate project read") return True if storeCING2db: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. try: if doStoreCING2db(entryId, ARCHIVE_CASP_ID, project=project): nTerror( "Failed to store CING project's data to DB but continuing." ) except: nTtracebackError() nTerror( "Failed to store CING project's data due to above traceback error." ) project.save() if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' # os.unlink(fileNameTgz) # temporary ccpn tgz rmdir(entryId) # temporary ccpn dir if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) do_cmd(cmd)