def main(): """This is a potentially dangerous script. It took JFD an hour one time to realize it was called inadvertently by not having it wrappen in a function. """ # parameters for doScriptOnEntryList startDir = os.path.join(cingDirTmp, subdir) pythonScriptFileName = os.path.join(cingDirScripts, 'getPhiPsi.py') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB.LIS') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB_WI_SELECT_Rfactor0.21_Res2.0_2009-02-28_noObs.LIS') entryListFileName = os.path.join( cingDirScripts, DATA_STR, 'PDB_WI_SELECT_Rfactor0.19_Res1.3_2009-02-28_noObs.LIS') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB_todo.txt') start_entry_id = 0 # default 0 max_entries_todo = 1 # default a ridiculously large number like 999999 doScriptOnEntryList( pythonScriptFileName, entryListFileName, startDir, max_time_to_wait=240, # 1gkp took over 120 processes_max=8, # default 3 start_entry_id=start_entry_id, # default 0 max_entries_todo= max_entries_todo # default a ridiculously large number like 999999 )
def runCing(self): """On self.entry_list_todo. Return True on error. """ entryListFileName = "entry_list_todo.csv" writeTextToFile(entryListFileName, toCsv(self.entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'validateEntry.py') # inputDir = 'file://' + self.results_dir + '/recoordSync' inputDir = 'file:///Users/jd/wattosTestingPlatform/pdb/data/structures/divided/pdb' outputDir = self.results_dir extraArgList = (inputDir, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23, PROJECT_TYPE_PDB) if doScriptOnEntryList( pythonScriptFileName, entryListFileName, self.results_dir, processes_max=self.processes_max, delay_between_submitting_jobs= 5, # why is this so long? because of time outs at tang? max_time_to_wait=self.max_time_to_wait, # <Molecule "2p80" (C:20,R:1162,A:24552,M:20)> start_entry_id=0, # default. max_entries_todo=self.max_entries_todo, extraArgList=extraArgList, shuffleBeforeSelecting=True): nTerror("Failed to doScriptOnEntryList") return True
def runCing(self): """On self.entry_list_todo. Return True on error. """ entryListFileName = "entry_list_todo.csv" writeTextToFile(entryListFileName, toCsv(self.entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'validateEntry.py') # inputDir = 'file://' + self.results_dir + '/recoordSync' inputDir = 'file:///Users/jd/wattosTestingPlatform/pdb/data/structures/divided/pdb' outputDir = self.results_dir extraArgList = (inputDir, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23, PROJECT_TYPE_PDB) if doScriptOnEntryList(pythonScriptFileName, entryListFileName, self.results_dir, processes_max = self.processes_max, delay_between_submitting_jobs = 5, # why is this so long? because of time outs at tang? max_time_to_wait = self.max_time_to_wait, # <Molecule "2p80" (C:20,R:1162,A:24552,M:20)> start_entry_id = 0, # default. max_entries_todo = self.max_entries_todo, extraArgList = extraArgList, shuffleBeforeSelecting = True ): nTerror("Failed to doScriptOnEntryList") return True
def runCing(self): """On self.entry_list_todo. Return True on error. """ entryListFileName = "entry_list_todo.csv" writeTextToFile(entryListFileName, toCsv(self.entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'validateEntry.py') # inputDir = 'file://' + self.results_dir + '/recoordSync' inputDir = 'http://www.bmrb.wisc.edu/ftp/pub/bmrb/nmr_pdb_integrated_data/coordinates_restraints_chemshifts/all/ccpn/' outputDir = self.results_dir extraArgList = (inputDir, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23, PROJECT_TYPE_PDB) if doScriptOnEntryList(pythonScriptFileName, entryListFileName, self.results_dir, processes_max = self.processes_max, delay_between_submitting_jobs = 5, # why is this so long? because of time outs at tang? max_time_to_wait = self.max_time_to_wait, # <Molecule "2p80" (C:20,R:1162,A:24552,M:20)> start_entry_id = 0, # default. max_entries_todo = self.max_entries_todo, extraArgList = extraArgList, shuffleBeforeSelecting = True ): nTerror("Failed to doScriptOnEntryList") return True
def test_DoScriptOnEntryList(self): cingDirTmpTest = os.path.join(cingDirTmp, getCallerName()) mkdirs(cingDirTmpTest) self.failIf(os.chdir(cingDirTmpTest), msg="Failed to change to test directory for files: " + cingDirTmpTest) entryListFileName = "entry_list_todo.csv" entry_list_todo = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] writeTextToFile(entryListFileName, toCsv(entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'doNothing.py') extraArgList = ('.', '.', '.', '.', ARCHIVE_TYPE_BY_CH23, PROJECT_TYPE_PDB) self.assertFalse( doScriptOnEntryList(pythonScriptFileName, entryListFileName, '.', processes_max=8, delay_between_submitting_jobs=5, max_time_to_wait=20, start_entry_id=0, max_entries_todo=1, extraArgList=extraArgList, shuffleBeforeSelecting=True))
def main(): """This is a potentially dangerous script. It took JFD an hour one time to realize it was called inadvertently by not having it wrappen in a function. """ # parameters for doScriptOnEntryList startDir = os.path.join(cingDirTmp, subdir) pythonScriptFileName = os.path.join(cingDirScripts, 'getPhiPsi.py') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB.LIS') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB_WI_SELECT_Rfactor0.21_Res2.0_2009-02-28_noObs.LIS') entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB_WI_SELECT_Rfactor0.19_Res1.3_2009-02-28_noObs.LIS') # entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'PDB_todo.txt') start_entry_id =0 # default 0 max_entries_todo =1 # default a ridiculously large number like 999999 doScriptOnEntryList(pythonScriptFileName, entryListFileName, startDir, max_time_to_wait = 240, # 1gkp took over 120 processes_max = 8, # default 3 start_entry_id = start_entry_id, # default 0 max_entries_todo = max_entries_todo # default a ridiculously large number like 999999 )
def refine(self): """On self.entry_list_todo. Return True on error. NB. On 2012-08-09 a cloud based method was implemented using nrgCing.py """ entryListFileName = "entry_list_todo.csv" writeTextToFile(entryListFileName, toCsv(self.entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'refineEntry.py') # inputDir = 'file://' + self.nrgCing.results_dir + '/' + self.inputDir # NB input is from NrgCing. inputDir = 'file://' + self.nrgCing.results_dir + '/' + DATA_STR outputDir = self.results_dir storeCING2db = "1" # DEFAULT: '1' All arguments need to be strings. filterTopViolations = '1' # DEFAULT: '1' filterVasco = '0' singleCoreOperation = '1' # Tune this to: # verbosity inputDir outputDir # pdbConvention restraintsConvention archiveType projectType # storeCING2db ranges filterTopViolations filterVasco # singleCoreOperation extraArgList = (str(cing.verbosity), inputDir, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23_BY_ENTRY, PROJECT_TYPE_CING, storeCING2db, CV_RANGES_STR, filterTopViolations, filterVasco, singleCoreOperation) if doScriptOnEntryList( pythonScriptFileName, entryListFileName, self.results_dir, processes_max=self.processes_max, delay_between_submitting_jobs= 5, # why is this so long? because of time outs at tang? max_time_to_wait=self.max_time_to_wait, start_entry_id=0, max_entries_todo=self.max_entries_todo, extraArgList=extraArgList): nTerror("Failed to doScriptOnEntryList") return True
def refine(self): """On self.entry_list_todo. Return True on error. NB. On 2012-08-09 a cloud based method was implemented using nrgCing.py """ entryListFileName = "entry_list_todo.csv" writeTextToFile(entryListFileName, toCsv(self.entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'refineEntry.py') # inputDir = 'file://' + self.nrgCing.results_dir + '/' + self.inputDir # NB input is from NrgCing. inputDir = 'file://' + self.nrgCing.results_dir + '/' + DATA_STR outputDir = self.results_dir storeCING2db = "1" # DEFAULT: '1' All arguments need to be strings. filterTopViolations = '1' # DEFAULT: '1' filterVasco = '0' singleCoreOperation = '1' # Tune this to: # verbosity inputDir outputDir # pdbConvention restraintsConvention archiveType projectType # storeCING2db ranges filterTopViolations filterVasco # singleCoreOperation extraArgList = ( str(cing.verbosity), inputDir, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23_BY_ENTRY, PROJECT_TYPE_CING, storeCING2db, CV_RANGES_STR, filterTopViolations, filterVasco, singleCoreOperation) if doScriptOnEntryList(pythonScriptFileName, entryListFileName, self.results_dir, processes_max = self.processes_max, delay_between_submitting_jobs = 5, # why is this so long? because of time outs at tang? max_time_to_wait = self.max_time_to_wait, start_entry_id = 0, max_entries_todo = self.max_entries_todo, extraArgList=extraArgList): nTerror("Failed to doScriptOnEntryList") return True
def test_DoScriptOnEntryList(self): cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) entryListFileName = "entry_list_todo.csv" entry_list_todo = [ 0,1,2,3,4,5,6,7,8,9 ] writeTextToFile(entryListFileName, toCsv(entry_list_todo)) pythonScriptFileName = os.path.join(cingDirScripts, 'doNothing.py') extraArgList = ('.', '.', '.', '.', ARCHIVE_TYPE_BY_CH23, PROJECT_TYPE_PDB) self.assertFalse( doScriptOnEntryList(pythonScriptFileName, entryListFileName, '.', processes_max = 8, delay_between_submitting_jobs = 5, max_time_to_wait = 20, start_entry_id = 0, max_entries_todo = 1, extraArgList = extraArgList, shuffleBeforeSelecting = True ))
# parameters for validateEntry #inputDir = '/Volumes/proteins/var/www/html/Education/Validation/HTML/Exercise_1/Data/' #inputDir = '/Users/jd/Sites/cing/in/Tests/data/cyana' inputDir = os.path.join(cingDirTestsData, "cing" ) outputDir = startDir pdbConvention = '.' restraintsConvention = '.' storeCING2db = "0" filterTopViolations = '0' filterVasco = '0' extraArgList = ( str(cing.verbosity), inputDir, outputDir, pdbConvention, restraintsConvention, repr(ARCHIVE_TYPE_FLAT), repr(PROJECT_TYPE_CING), storeCING2db, CV_RANGES_STR, filterTopViolations, filterVasco ) doScriptOnEntryList(pythonScriptFileName, entryListFileName, startDir, processes_max = 8, max_time_to_wait = 12000, # 1y4o took more than 600. This is one of the optional arguments. start_entry_id = 0, max_entries_todo = 10, extraArgList=extraArgList)
cingDirNRG = os.path.join(cingPythonDir, 'cing', 'NRG' ) pythonScriptFileName = os.path.join(cingDirNRG, 'doAnnotateCasdNmr.py') if False: entryListFileName = os.path.join(startDir, 'list', 'entry_list_all.csv') # entryListFileName = os.path.join(startDir, 'list', 'entry_list_redo.csv') else: entryListFileName = os.path.join(startDir, 'list', 'entry_list_single.csv') entryList = 'VpR247Cheshire'.split() writeEntryListToFile(entryListFileName, entryList) extraArgList = () doScriptOnEntryList(pythonScriptFileName, entryListFileName, startDir, processes_max = 2, delay_between_submitting_jobs = 2, max_time_to_wait = 6000, start_entry_id = 0, max_entries_todo = 100, expectPdbEntryList = False, extraArgList = extraArgList) def annotateLoop(): """ Alternatively to the strategy above use a simple loop that does not put the log files nicely into separate directories etc. NOT USED AFTER MOVED HERE. """ maxCities = 100 maxEntries = 100 entryList = ['CGR26A'] # cityList = [ 'Cheshire', 'Frankfurt', 'Lyon', 'Paris', 'Piscataway', 'Seattle', 'Utrecht' ]
from cing.NRG.CaspNmrMassageCcpnProject import baseDir from cing.NRG.PDBEntryLists import writeEntryListToFile from cing.Scripts.doScriptOnEntryList import doScriptOnEntryList cing.verbosity = cing.verbosityDebug cingDirNRG = os.path.join(cingPythonDir, 'cing', 'NRG' ) pythonScriptFileName = os.path.join(cingDirNRG, 'doAnnotateCaspNmr.py') if False: entryListFileName = os.path.join(baseDir, 'list', 'entry_list_all.csv') # entryListFileName = os.path.join(startDir, 'list', 'entry_list_redo.csv') else: entryListFileName = os.path.join(baseDir, 'list', 'entry_list_single.csv') # entryList = 'T0538Org T0538TS001 T0538TS039'.split() # entryList = 'T0538Org'.split() # entryList = 'T0538TS001 T0538TS002 T0538TS257'.split() entryList = 'T0538TS328'.split() writeEntryListToFile(entryListFileName, entryList) extraArgList = () doScriptOnEntryList(pythonScriptFileName, entryListFileName, baseDir, processes_max = 2, delay_between_submitting_jobs = 2, max_time_to_wait = 6000, start_entry_id = 0, max_entries_todo = 100, expectPdbEntryList = False, extraArgList = extraArgList)
#entryList = list(calcData.keys()) # entryList = [ tt[0] for tt in sorted(calcData.items()) if tt[1].get('PDBcode') == '2M5O' ] #entryList.remove('2m2e_Lyon_263') #entryList = ['2m2e_Lyon_263',] outputDir = startDir extraArgList = (inputDirCASD_NMR, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23_BY_ENTRY, PROJECT_TYPE_CCPN) max_time_to_wait = 60 * 60 * 6 # 2p80 took the longest: 5.2 hours. doScriptOnEntryList( pythonScriptFileName, None, startDir, processes_max=2, delay_between_submitting_jobs= 5, # why is this so long? because of time outs at tang? max_time_to_wait= max_time_to_wait, # 1y4o took more than 600. This is one of the optional arguments. # 1ai0 took over 20 min; let's set this to 1 hour start_entry_id=0, max_entries_todo=200, expectPdbEntryList=False, entryList=entryList, extraArgList=extraArgList)
# get master results file calcDataFile = os.path.join(inputDirCASD_NMR, 'calcData.json') calcData = json.load(open(calcDataFile)) # Get all entries for CASD 2013 #entryList = list(calcData.keys()) # entryList = [tt[0] for tt in sorted(calcData.items()) if tt[1].get('PDBcode') == '2M5O'] #entryList.remove('2m2e_Lyon_263') #entryList = ['2m2e_Lyon_263',] outputDir = startDir extraArgList = (inputDirCASD_NMR, outputDir, '.', '.', ARCHIVE_TYPE_BY_CH23_BY_ENTRY, PROJECT_TYPE_CCPN) max_time_to_wait = 60 * 60 * 6 # 2p80 took the longest: 5.2 hours. doScriptOnEntryList(pythonScriptFileName, None, startDir, processes_max = 2, delay_between_submitting_jobs = 5, # why is this so long? because of time outs at tang? max_time_to_wait = max_time_to_wait, # 1y4o took more than 600. This is one of the optional arguments. # 1ai0 took over 20 min; let's set this to 1 hour start_entry_id = 0, max_entries_todo = 200, expectPdbEntryList = False, entryList = entryList, extraArgList = extraArgList)
pythonScriptFileName = os.path.join(cingDirScripts, 'validateEntry.py') #entryListFileName = os.path.join('/Users/jd', 'entryCodeList.csv') entryListFileName = os.path.join(cingDirScripts, DATA_STR, 'entryCodeListProteinsSite1') # parameters for validateEntry #inputDir = '/Volumes/proteins/var/www/html/Education/Validation/HTML/Exercise_1/Data/' #inputDir = '/Users/jd/Sites/cing/in/Tests/data/cyana' inputDir = os.path.join(cingDirTestsData, "cyana" ) outputDir = startDir #pdbConvention = PDB before using Yasara to update. pdbConvention = IUPAC restraintsConvention = CYANA extraArgList = ( inputDir, outputDir, pdbConvention, restraintsConvention, repr(ARCHIVE_TYPE_BY_ENTRY), repr(PROJECT_TYPE_CYANA) ) doScriptOnEntryList(pythonScriptFileName, entryListFileName, startDir, processes_max = 2, max_time_to_wait = 12000, # 1y4o took more than 600. This is one of the optional arguments. start_entry_id = 0, max_entries_todo = 10, extraArgList=extraArgList)
for entryCode in entryCodeList: fileNameZipped = os.path.join(PDBZ2, entryCode[1:3], "pdb" + entryCode + ".ent.gz") outputFileName = os.path.join(inputDir, entryCode + ".pdb") dstDir = os.path.join(inputDir, entryCode) dst = os.path.join(dstDir, entryCode + ".pdb") cmd = "/Users/jd/BMRB/PdbArchive/getPdb.csh " + entryCode exit_code = os.system(cmd) if exit_code: nTerror("failed to get pdb file") continue if gunzip(fileNameZipped, outputFileName=outputFileName): nTerror("Failed gunzip for entry: " + entryCode) # Unusual path hierarchy by symlink only. if not os.path.exists(dstDir): os.mkdir(dstDir) if os.path.exists(dst): os.unlink(dst) if os.symlink(outputFileName, dst): nTerror("failed to symlink pdb file") continue doScriptOnEntryList( pythonScriptFileName, entryListFileName, startDir, processes_max=2, max_time_to_wait=12000, # 1y4o took more than 600. This is one of the optional arguments. extraArgList=extraArgList, )