def testSingleFile(filename): """ Extra Test Routine going over some entries in the NMR Restraints Grid """ strf = File() strf.filename = filename nTdebug("reading file ", strf.filename) strf.read() strf.filename = strf.filename + "_new.str" nTdebug("writing file ", strf.filename) strf.write()
class AllChecks(TestCase): strf = File() cingDirTmpTest = os.path.join(cingDirTmp, 'test_File') mkdirs(cingDirTmpTest) os.chdir(cingDirTmpTest) def test_Parse(self): text = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category '#It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm' ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertFalse(self.strf.parse(text=text)) st = self.strf.star_text() # print "unparsed text:[" +st+ "]" exp = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category ; #It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm ; ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertTrue(Utils.equalIgnoringWhiteSpace(exp, st)) def test_ReadFile(self): testEntry('1edp')
def _processComplCheck(self, fullName): """ Put parsed data of all models into CING data model Return None for success or True for error. Example of processed data structure attached to say a residue: "wattos": { "COMPCHK": { "valeList": [ 0.009, 0.100 ], "qualList": ["POOR", "GOOD" ]}, "BLABLACHK": { "valeList": [ 0.009, 0.100 ], }} """ # nTdetail("==> Processing the Wattos results into CING data model") # Assemble the atom, residue and molecule specific checks # set the formats of each check easy printing # self.molecule.setAllChildrenByKey( WHATIF_STR, None) self.molecule.wattos = self # is self and that's asking for luggage # sorting on mols, residues, and atoms # nTmessage(" for self.checks: " + repr(self.checks)) # nTdebug(" for self.checks count: %s" % len(self.checks)) starFile = File() starFile.filename = fullName if starFile.read(): nTerror("Failed to read star file: %s" % fullName) return True # end if sfList = starFile.getSaveFrames(category="NOE_completeness_statistics") if not sfList or len(sfList) != 1: nTerror("Failed to get single saveframe but got list of: [%s]" % sfList) return True saveFrameCompl = sfList[0] tagTableComplHeader = saveFrameCompl.tagtables[0] completenessMol = tagTableComplHeader.getFloat( "_NOE_completeness_stats.Completeness_cumulative_pct", 0) noe_compl_obs = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_observed_count", 0) noe_compl_exp = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_expected_count", 0) noe_compl_mat = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_matched_count", 0) self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_obs, WATTOS_STR, OBS_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_exp, WATTOS_STR, EXP_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_mat, WATTOS_STR, MAT_COUNT_STR, VALUE_LIST_STR) tagTableComplBody = saveFrameCompl.tagtables[3] entityAssemblyIdList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Entity_assembly_ID") compIndexIdList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Comp_index_ID") compIdList = tagTableComplBody.getStringListByColumnName( "_NOE_completeness_comp.Comp_ID") obsAtomCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Obs_atom_count") obsCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_observed_count") expCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_expected_count") matCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_matched_count") completenessResidueList = tagTableComplBody.getFloatListByColumnName( "_NOE_completeness_comp.Completeness_cumulative_pct") detailsList = tagTableComplBody.getStringListByColumnName( "_NOE_completeness_comp.Details") for i, completenessResidue in enumerate(completenessResidueList): entityAssemblyId = entityAssemblyIdList[i] compIndexId = compIndexIdList[i] compId = compIdList[i] obsAtomCount = obsAtomCountList[i] obsCount = obsCountList[i] expCount = expCountList[i] matCount = matCountList[i] details = detailsList[i] wattosTuple = (entityAssemblyId, compIndexId, compId) residue = self.locateWattosResidue(*wattosTuple) if not residue: nTerror("Failed to find Wattos residue in CING: %s %s %s" % (wattosTuple)) return True residueWattosDic = residue.setdefault(WATTOS_STR, NTdict()) complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict()) # "valeList": [ 0.009] # "qualList": [">sigma" ] complDic[VALUE_LIST_STR] = completenessResidue complDic[QUAL_LIST_STR] = details residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR, VALUE_LIST_STR)
def getMolTypes(): 'Return True on error' outputFile = 'moltypes.csv' pdbList = PDBEntryLists.getBmrbNmrGridEntries()[0:2] #pdbList = PDBEntryLists.getBmrbNmrGridEntries() #pdbList=['1a03'] #pdbList=['1brv'] print "Read pdb entries from NMR Restraints Grid:", len( pdbList ) pdbList.sort() molTypes = {} seq_length = {} for entry in pdbList: try: inputFN = os.path.join(starDir,entry,entry+'_wattos.str') headFN = os.path.join(tmpDir, entry+'_head.str') f = File() saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"] f.getHeader(saveFrameRegExList, inputFN, headFN) f.filename = headFN f.read() os.unlink( f.filename ) # removing temp file. molTypesPerEntry = {} molTypes[entry] = molTypesPerEntry seq_lengthPerEntry = {} seq_length[entry] = seq_lengthPerEntry sfList = f.getSaveFrames( category = 'entity') for node in sfList: tT = node.tagtables[0] # print tT typeIdx = tT.tagnames.index('_Entity.Type') # print typeIdx type = tT.tagvalues[typeIdx][0] poltype = '' if '_Entity.Polymer_type' in tT.tagnames: poltypeIdx = tT.tagnames.index('_Entity.Polymer_type') # print poltypeIdx poltype = tT.tagvalues[poltypeIdx][0] # print "type", type, ", and poltype", poltype key = type +'/' + poltype if molTypesPerEntry.has_key(key): molTypesPerEntry[key] += 1 else: molTypesPerEntry[key] = 1 lengthIdx = -1 if '_Entity.Number_of_monomers' in tT.tagnames: lengthIdx = tT.tagnames.index('_Entity.Number_of_monomers') if lengthIdx>=0: length = string.atoi(tT.tagvalues[lengthIdx][0]) else: length = 0 if seq_lengthPerEntry.has_key(key): seq_lengthPerEntry[key] += length else: seq_lengthPerEntry[key] = length for key in molTypes[entry].keys(): str = entry+","+key+','+`molTypes[entry][key]`+','+`seq_length[entry][key]` print str except KeyboardInterrupt: print "ERROR: Caught KeyboardInterrupt will exit(1)" return True except Exception, info: print "Skipping entry: ", entry, info
def _processComplCheck(self, fullName): """ Put parsed data of all models into CING data model Return None for success or True for error. Example of processed data structure attached to say a residue: "wattos": { "COMPCHK": { "valeList": [ 0.009, 0.100 ], "qualList": ["POOR", "GOOD" ]}, "BLABLACHK": { "valeList": [ 0.009, 0.100 ], }} """ # nTdetail("==> Processing the Wattos results into CING data model") # Assemble the atom, residue and molecule specific checks # set the formats of each check easy printing # self.molecule.setAllChildrenByKey( WHATIF_STR, None) self.molecule.wattos = self # is self and that's asking for luggage # sorting on mols, residues, and atoms # nTmessage(" for self.checks: " + repr(self.checks)) # nTdebug(" for self.checks count: %s" % len(self.checks)) starFile = File() starFile.filename = fullName if starFile.read(): nTerror("Failed to read star file: %s" % fullName) return True # end if sfList = starFile.getSaveFrames(category = "NOE_completeness_statistics") if not sfList or len(sfList) != 1: nTerror("Failed to get single saveframe but got list of: [%s]" % sfList) return True saveFrameCompl = sfList[0] tagTableComplHeader = saveFrameCompl.tagtables[0] completenessMol = tagTableComplHeader.getFloat("_NOE_completeness_stats.Completeness_cumulative_pct", 0) noe_compl_obs = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_observed_count", 0) noe_compl_exp = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_expected_count", 0) noe_compl_mat = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_matched_count", 0) self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_obs , WATTOS_STR, OBS_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_exp , WATTOS_STR, EXP_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_mat , WATTOS_STR, MAT_COUNT_STR, VALUE_LIST_STR) tagTableComplBody = saveFrameCompl.tagtables[3] entityAssemblyIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Entity_assembly_ID") compIndexIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Comp_index_ID") compIdList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Comp_ID") obsAtomCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Obs_atom_count") obsCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_observed_count") expCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_expected_count") matCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_matched_count") completenessResidueList = tagTableComplBody.getFloatListByColumnName("_NOE_completeness_comp.Completeness_cumulative_pct") detailsList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Details") for i, completenessResidue in enumerate(completenessResidueList): entityAssemblyId = entityAssemblyIdList[i] compIndexId = compIndexIdList[i] compId = compIdList[i] obsAtomCount = obsAtomCountList[i] obsCount = obsCountList [i] expCount = expCountList [i] matCount = matCountList [i] details = detailsList[i] wattosTuple = (entityAssemblyId, compIndexId, compId) residue = self.locateWattosResidue(*wattosTuple) if not residue: nTerror("Failed to find Wattos residue in CING: %s %s %s" % (wattosTuple)) return True residueWattosDic = residue.setdefault(WATTOS_STR, NTdict()) complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict()) # "valeList": [ 0.009] # "qualList": [">sigma" ] complDic[VALUE_LIST_STR] = completenessResidue complDic[QUAL_LIST_STR] = details residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR, VALUE_LIST_STR)
def testEntry(entry): """ Extra Test Routine going over some entries in the NMR Restraints Grid """ # Put a check in for internet availability. nTmessage("Testing Entry") strf = File() # Freely available on the web so not included in package. stage = "2-parsed" # stage = "3-converted-DOCR" urlLocation = ("http://www.bmrb.wisc.edu/NRG/MRGridServlet?" + "block_text_type=%s&file_detail=%s&pdb_id=%s" + "&program=STAR&request_type=archive&subtype=full&type=entry" ) % (stage, stage, entry) fnamezip = entry + ".zip" # print "DEBUG: downloading url:", urlLocation # TODO: wrap this in a try so the test is less invulnerable to network outages. try: urllib.urlretrieve(urlLocation, fnamezip) except: # not a real error since there might not be a network connection. nTwarning("Failed to get; " + urlLocation) return # print "DEBUG: opening local zip file:", fnamezip zfobj = zipfile.ZipFile(fnamezip) fname = None for name in zfobj.namelist(): if name.endswith('.str'): fname = name orgWattosWrittenFile = entry + "_org.str" pystarlibWrittenFile = entry + "_pystar.str" wattosWrittenFile = entry + "_wattos.str" diffOrgPystarFile = entry + "_diff_org_pystar.str" diffPystarWattosFile = entry + "_diff_pystar_wattos.str" diffOrgWattosWattosFile = entry + "_diff_org_wattos_wattos.str" outfile = open(orgWattosWrittenFile, 'w') outfile.write(zfobj.read(fname)) outfile.close() zfobj.close() strf.filename = orgWattosWrittenFile strf.read() strf.filename = pystarlibWrittenFile strf.write() nTmessage( "Most likely the below diff will fail because it depends on diff being installed" ) cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % ( orgWattosWrittenFile, pystarlibWrittenFile, diffOrgPystarFile) os.system(cmd) if not os.path.exists(diffOrgPystarFile): nTwarning("failed to diff files: " + orgWattosWrittenFile + ", " + pystarlibWrittenFile) nTdebug( "Most likely the below check will fail because it depends on Wattos being installed" ) nTdebug("rewrite to Java formating for comparison") cmd = "%s Wattos.Star.STARFilter %s %s ." % ( JVM_CMD_STD, pystarlibWrittenFile, wattosWrittenFile) logFileName = "wattos_STARFilter.log" wattosProgram = ExecuteProgram(cmd, redirectOutputToFile=logFileName) wattosExitCode = wattosProgram() if wattosExitCode: nTwarning("failed to execute Wattos") return if not os.path.exists(wattosWrittenFile): nTerror("failed to rewrite file: " + pystarlibWrittenFile) return cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % ( pystarlibWrittenFile, wattosWrittenFile, diffPystarWattosFile) os.system(cmd) if not os.path.exists(diffPystarWattosFile): nTwarning("failed to diff file: " + pystarlibWrittenFile + ", " + wattosWrittenFile) cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % ( orgWattosWrittenFile, wattosWrittenFile, diffOrgWattosWattosFile) os.system(cmd) if not os.path.exists(diffOrgWattosWattosFile): nTwarning("failed to diff file: ", orgWattosWrittenFile + ", " + wattosWrittenFile) if 1: try: os.unlink(entry + ".zip") os.unlink(orgWattosWrittenFile) os.unlink(pystarlibWrittenFile) except: pass
outputFile = '/Users/jd/moltypes.csv' pdbList = PDBEntryLists.getBmrbNmrGridEntries()[0:200] #pdbList = PDBEntryLists.getBmrbNmrGridEntries() #pdbList=['1a03'] #pdbList=['1brv'] print "Read pdb entries from NMR Restraints Grid:", len(pdbList) pdbList.sort() molTypes = {} seq_length = {} for entry in pdbList: try: inputFN = os.path.join(starDir, entry, entry + '_wattos.str') headFN = os.path.join(tmpDir, entry + '_head.str') f = File() saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"] f.getHeader(saveFrameRegExList, inputFN, headFN) f.filename = headFN f.read() os.unlink(f.filename) # removing temp file. molTypesPerEntry = {} molTypes[entry] = molTypesPerEntry seq_lengthPerEntry = {} seq_length[entry] = seq_lengthPerEntry sfList = f.getSaveFrames(category='entity') for node in sfList: tT = node.tagtables[0] # print tT typeIdx = tT.tagnames.index('_Entity.Type') # print typeIdx
def testEntry(entry): """ Extra Test Routine going over some entries in the NMR Restraints Grid """ # Put a check in for internet availability. nTmessage("Testing Entry") strf = File() # Freely available on the web so not included in package. stage = "2-parsed" # stage = "3-converted-DOCR" urlLocation = ("http://www.bmrb.wisc.edu/NRG/MRGridServlet?" + "block_text_type=%s&file_detail=%s&pdb_id=%s" + "&program=STAR&request_type=archive&subtype=full&type=entry") % (stage, stage, entry) fnamezip = entry + ".zip" # print "DEBUG: downloading url:", urlLocation # TODO: wrap this in a try so the test is less invulnerable to network outages. try: urllib.urlretrieve(urlLocation, fnamezip) except: # not a real error since there might not be a network connection. nTwarning("Failed to get; " + urlLocation) return # print "DEBUG: opening local zip file:", fnamezip zfobj = zipfile.ZipFile(fnamezip) fname = None for name in zfobj.namelist(): if name.endswith('.str'): fname = name orgWattosWrittenFile = entry + "_org.str" pystarlibWrittenFile = entry + "_pystar.str" wattosWrittenFile = entry + "_wattos.str" diffOrgPystarFile = entry + "_diff_org_pystar.str" diffPystarWattosFile = entry + "_diff_pystar_wattos.str" diffOrgWattosWattosFile = entry + "_diff_org_wattos_wattos.str" outfile = open(orgWattosWrittenFile, 'w') outfile.write(zfobj.read(fname)) outfile.close() zfobj.close() strf.filename = orgWattosWrittenFile strf.read() strf.filename = pystarlibWrittenFile strf.write() nTmessage("Most likely the below diff will fail because it depends on diff being installed") cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % (orgWattosWrittenFile, pystarlibWrittenFile, diffOrgPystarFile) os.system(cmd) if not os.path.exists(diffOrgPystarFile): nTwarning("failed to diff files: " + orgWattosWrittenFile + ", " + pystarlibWrittenFile) nTdebug("Most likely the below check will fail because it depends on Wattos being installed") nTdebug("rewrite to Java formating for comparison") cmd = "%s Wattos.Star.STARFilter %s %s ." % (JVM_CMD_STD, pystarlibWrittenFile, wattosWrittenFile) logFileName = "wattos_STARFilter.log" wattosProgram = ExecuteProgram(cmd, redirectOutputToFile = logFileName) wattosExitCode = wattosProgram() if wattosExitCode: nTwarning("failed to execute Wattos") return if not os.path.exists(wattosWrittenFile): nTerror("failed to rewrite file: " + pystarlibWrittenFile) return cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % (pystarlibWrittenFile, wattosWrittenFile, diffPystarWattosFile) os.system(cmd) if not os.path.exists(diffPystarWattosFile): nTwarning("failed to diff file: " + pystarlibWrittenFile + ", " + wattosWrittenFile) cmd = "diff --ignore-all-space --ignore-blank-lines %s %s > %s" % (orgWattosWrittenFile, wattosWrittenFile, diffOrgWattosWattosFile) os.system(cmd) if not os.path.exists(diffOrgWattosWattosFile): nTwarning("failed to diff file: ", orgWattosWrittenFile + ", " + wattosWrittenFile) if 1: try: os.unlink(entry + ".zip") os.unlink(orgWattosWrittenFile) os.unlink(pystarlibWrittenFile) except: pass