def _processComplCheck(self, fullName): """ Put parsed data of all models into CING data model Return None for success or True for error. Example of processed data structure attached to say a residue: "wattos": { "COMPCHK": { "valeList": [ 0.009, 0.100 ], "qualList": ["POOR", "GOOD" ]}, "BLABLACHK": { "valeList": [ 0.009, 0.100 ], }} """ # nTdetail("==> Processing the Wattos results into CING data model") # Assemble the atom, residue and molecule specific checks # set the formats of each check easy printing # self.molecule.setAllChildrenByKey( WHATIF_STR, None) self.molecule.wattos = self # is self and that's asking for luggage # sorting on mols, residues, and atoms # nTmessage(" for self.checks: " + repr(self.checks)) # nTdebug(" for self.checks count: %s" % len(self.checks)) starFile = File() starFile.filename = fullName if starFile.read(): nTerror("Failed to read star file: %s" % fullName) return True # end if sfList = starFile.getSaveFrames(category="NOE_completeness_statistics") if not sfList or len(sfList) != 1: nTerror("Failed to get single saveframe but got list of: [%s]" % sfList) return True saveFrameCompl = sfList[0] tagTableComplHeader = saveFrameCompl.tagtables[0] completenessMol = tagTableComplHeader.getFloat( "_NOE_completeness_stats.Completeness_cumulative_pct", 0) noe_compl_obs = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_observed_count", 0) noe_compl_exp = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_expected_count", 0) noe_compl_mat = tagTableComplHeader.getInt( "_NOE_completeness_stats.Constraint_matched_count", 0) self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_obs, WATTOS_STR, OBS_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_exp, WATTOS_STR, EXP_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_mat, WATTOS_STR, MAT_COUNT_STR, VALUE_LIST_STR) tagTableComplBody = saveFrameCompl.tagtables[3] entityAssemblyIdList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Entity_assembly_ID") compIndexIdList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Comp_index_ID") compIdList = tagTableComplBody.getStringListByColumnName( "_NOE_completeness_comp.Comp_ID") obsAtomCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Obs_atom_count") obsCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_observed_count") expCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_expected_count") matCountList = tagTableComplBody.getIntListByColumnName( "_NOE_completeness_comp.Constraint_matched_count") completenessResidueList = tagTableComplBody.getFloatListByColumnName( "_NOE_completeness_comp.Completeness_cumulative_pct") detailsList = tagTableComplBody.getStringListByColumnName( "_NOE_completeness_comp.Details") for i, completenessResidue in enumerate(completenessResidueList): entityAssemblyId = entityAssemblyIdList[i] compIndexId = compIndexIdList[i] compId = compIdList[i] obsAtomCount = obsAtomCountList[i] obsCount = obsCountList[i] expCount = expCountList[i] matCount = matCountList[i] details = detailsList[i] wattosTuple = (entityAssemblyId, compIndexId, compId) residue = self.locateWattosResidue(*wattosTuple) if not residue: nTerror("Failed to find Wattos residue in CING: %s %s %s" % (wattosTuple)) return True residueWattosDic = residue.setdefault(WATTOS_STR, NTdict()) complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict()) # "valeList": [ 0.009] # "qualList": [">sigma" ] complDic[VALUE_LIST_STR] = completenessResidue complDic[QUAL_LIST_STR] = details residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR, VALUE_LIST_STR)
def _processComplCheck(self, fullName): """ Put parsed data of all models into CING data model Return None for success or True for error. Example of processed data structure attached to say a residue: "wattos": { "COMPCHK": { "valeList": [ 0.009, 0.100 ], "qualList": ["POOR", "GOOD" ]}, "BLABLACHK": { "valeList": [ 0.009, 0.100 ], }} """ # nTdetail("==> Processing the Wattos results into CING data model") # Assemble the atom, residue and molecule specific checks # set the formats of each check easy printing # self.molecule.setAllChildrenByKey( WHATIF_STR, None) self.molecule.wattos = self # is self and that's asking for luggage # sorting on mols, residues, and atoms # nTmessage(" for self.checks: " + repr(self.checks)) # nTdebug(" for self.checks count: %s" % len(self.checks)) starFile = File() starFile.filename = fullName if starFile.read(): nTerror("Failed to read star file: %s" % fullName) return True # end if sfList = starFile.getSaveFrames(category = "NOE_completeness_statistics") if not sfList or len(sfList) != 1: nTerror("Failed to get single saveframe but got list of: [%s]" % sfList) return True saveFrameCompl = sfList[0] tagTableComplHeader = saveFrameCompl.tagtables[0] completenessMol = tagTableComplHeader.getFloat("_NOE_completeness_stats.Completeness_cumulative_pct", 0) noe_compl_obs = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_observed_count", 0) noe_compl_exp = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_expected_count", 0) noe_compl_mat = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_matched_count", 0) self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_obs , WATTOS_STR, OBS_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_exp , WATTOS_STR, EXP_COUNT_STR, VALUE_LIST_STR) self.molecule.setDeepByKeys(noe_compl_mat , WATTOS_STR, MAT_COUNT_STR, VALUE_LIST_STR) tagTableComplBody = saveFrameCompl.tagtables[3] entityAssemblyIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Entity_assembly_ID") compIndexIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Comp_index_ID") compIdList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Comp_ID") obsAtomCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Obs_atom_count") obsCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_observed_count") expCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_expected_count") matCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_matched_count") completenessResidueList = tagTableComplBody.getFloatListByColumnName("_NOE_completeness_comp.Completeness_cumulative_pct") detailsList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Details") for i, completenessResidue in enumerate(completenessResidueList): entityAssemblyId = entityAssemblyIdList[i] compIndexId = compIndexIdList[i] compId = compIdList[i] obsAtomCount = obsAtomCountList[i] obsCount = obsCountList [i] expCount = expCountList [i] matCount = matCountList [i] details = detailsList[i] wattosTuple = (entityAssemblyId, compIndexId, compId) residue = self.locateWattosResidue(*wattosTuple) if not residue: nTerror("Failed to find Wattos residue in CING: %s %s %s" % (wattosTuple)) return True residueWattosDic = residue.setdefault(WATTOS_STR, NTdict()) complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict()) # "valeList": [ 0.009] # "qualList": [">sigma" ] complDic[VALUE_LIST_STR] = completenessResidue complDic[QUAL_LIST_STR] = details residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR, VALUE_LIST_STR) residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR, VALUE_LIST_STR)
def getMolTypes(): 'Return True on error' outputFile = 'moltypes.csv' pdbList = PDBEntryLists.getBmrbNmrGridEntries()[0:2] #pdbList = PDBEntryLists.getBmrbNmrGridEntries() #pdbList=['1a03'] #pdbList=['1brv'] print "Read pdb entries from NMR Restraints Grid:", len( pdbList ) pdbList.sort() molTypes = {} seq_length = {} for entry in pdbList: try: inputFN = os.path.join(starDir,entry,entry+'_wattos.str') headFN = os.path.join(tmpDir, entry+'_head.str') f = File() saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"] f.getHeader(saveFrameRegExList, inputFN, headFN) f.filename = headFN f.read() os.unlink( f.filename ) # removing temp file. molTypesPerEntry = {} molTypes[entry] = molTypesPerEntry seq_lengthPerEntry = {} seq_length[entry] = seq_lengthPerEntry sfList = f.getSaveFrames( category = 'entity') for node in sfList: tT = node.tagtables[0] # print tT typeIdx = tT.tagnames.index('_Entity.Type') # print typeIdx type = tT.tagvalues[typeIdx][0] poltype = '' if '_Entity.Polymer_type' in tT.tagnames: poltypeIdx = tT.tagnames.index('_Entity.Polymer_type') # print poltypeIdx poltype = tT.tagvalues[poltypeIdx][0] # print "type", type, ", and poltype", poltype key = type +'/' + poltype if molTypesPerEntry.has_key(key): molTypesPerEntry[key] += 1 else: molTypesPerEntry[key] = 1 lengthIdx = -1 if '_Entity.Number_of_monomers' in tT.tagnames: lengthIdx = tT.tagnames.index('_Entity.Number_of_monomers') if lengthIdx>=0: length = string.atoi(tT.tagvalues[lengthIdx][0]) else: length = 0 if seq_lengthPerEntry.has_key(key): seq_lengthPerEntry[key] += length else: seq_lengthPerEntry[key] = length for key in molTypes[entry].keys(): str = entry+","+key+','+`molTypes[entry][key]`+','+`seq_length[entry][key]` print str except KeyboardInterrupt: print "ERROR: Caught KeyboardInterrupt will exit(1)" return True except Exception, info: print "Skipping entry: ", entry, info
seq_length = {} for entry in pdbList: try: inputFN = os.path.join(starDir, entry, entry + '_wattos.str') headFN = os.path.join(tmpDir, entry + '_head.str') f = File() saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"] f.getHeader(saveFrameRegExList, inputFN, headFN) f.filename = headFN f.read() os.unlink(f.filename) # removing temp file. molTypesPerEntry = {} molTypes[entry] = molTypesPerEntry seq_lengthPerEntry = {} seq_length[entry] = seq_lengthPerEntry sfList = f.getSaveFrames(category='entity') for node in sfList: tT = node.tagtables[0] # print tT typeIdx = tT.tagnames.index('_Entity.Type') # print typeIdx type = tT.tagvalues[typeIdx][0] poltype = '' if '_Entity.Polymer_type' in tT.tagnames: poltypeIdx = tT.tagnames.index('_Entity.Polymer_type') # print poltypeIdx poltype = tT.tagvalues[poltypeIdx][0] # print "type", type, ", and poltype", poltype key = type + '/' + poltype if molTypesPerEntry.has_key(key):