Ejemplo n.º 1
0
    def _processComplCheck(self, fullName):
        """
        Put parsed data of all models into CING data model
        Return None for success or True for error.

        Example of processed data structure attached to say a residue:
            "wattos": {
                "COMPCHK": {
                    "valeList": [ 0.009, 0.100 ],
                    "qualList": ["POOR", "GOOD" ]},
                "BLABLACHK": {
                    "valeList": [ 0.009, 0.100 ],
                    }}
                    """
        #        nTdetail("==> Processing the Wattos results into CING data model")
        # Assemble the atom, residue and molecule specific checks
        # set the formats of each check easy printing
        #        self.molecule.setAllChildrenByKey( WHATIF_STR, None)
        self.molecule.wattos = self  # is self and that's asking for luggage

        # sorting on mols, residues, and atoms
        #        nTmessage("  for self.checks: " + repr(self.checks))
        #        nTdebug("  for self.checks count: %s" % len(self.checks))

        starFile = File()
        starFile.filename = fullName
        if starFile.read():
            nTerror("Failed to read star file: %s" % fullName)
            return True
        # end if

        sfList = starFile.getSaveFrames(category="NOE_completeness_statistics")
        if not sfList or len(sfList) != 1:
            nTerror("Failed to get single saveframe but got list of: [%s]" %
                    sfList)
            return True

        saveFrameCompl = sfList[0]
        tagTableComplHeader = saveFrameCompl.tagtables[0]
        completenessMol = tagTableComplHeader.getFloat(
            "_NOE_completeness_stats.Completeness_cumulative_pct", 0)
        noe_compl_obs = tagTableComplHeader.getInt(
            "_NOE_completeness_stats.Constraint_observed_count", 0)
        noe_compl_exp = tagTableComplHeader.getInt(
            "_NOE_completeness_stats.Constraint_expected_count", 0)
        noe_compl_mat = tagTableComplHeader.getInt(
            "_NOE_completeness_stats.Constraint_matched_count", 0)

        self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR,
                                    VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_obs, WATTOS_STR, OBS_COUNT_STR,
                                    VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_exp, WATTOS_STR, EXP_COUNT_STR,
                                    VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_mat, WATTOS_STR, MAT_COUNT_STR,
                                    VALUE_LIST_STR)

        tagTableComplBody = saveFrameCompl.tagtables[3]

        entityAssemblyIdList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Entity_assembly_ID")
        compIndexIdList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Comp_index_ID")
        compIdList = tagTableComplBody.getStringListByColumnName(
            "_NOE_completeness_comp.Comp_ID")

        obsAtomCountList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Obs_atom_count")
        obsCountList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Constraint_observed_count")
        expCountList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Constraint_expected_count")
        matCountList = tagTableComplBody.getIntListByColumnName(
            "_NOE_completeness_comp.Constraint_matched_count")

        completenessResidueList = tagTableComplBody.getFloatListByColumnName(
            "_NOE_completeness_comp.Completeness_cumulative_pct")
        detailsList = tagTableComplBody.getStringListByColumnName(
            "_NOE_completeness_comp.Details")

        for i, completenessResidue in enumerate(completenessResidueList):
            entityAssemblyId = entityAssemblyIdList[i]
            compIndexId = compIndexIdList[i]
            compId = compIdList[i]

            obsAtomCount = obsAtomCountList[i]
            obsCount = obsCountList[i]
            expCount = expCountList[i]
            matCount = matCountList[i]

            details = detailsList[i]
            wattosTuple = (entityAssemblyId, compIndexId, compId)
            residue = self.locateWattosResidue(*wattosTuple)
            if not residue:
                nTerror("Failed to find Wattos residue in CING: %s %s %s" %
                        (wattosTuple))
                return True

            residueWattosDic = residue.setdefault(WATTOS_STR, NTdict())
            complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict())
            #                    "valeList": [ 0.009]
            #                    "qualList": [">sigma" ]
            complDic[VALUE_LIST_STR] = completenessResidue
            complDic[QUAL_LIST_STR] = details

            residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR,
                                           VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR,
                                           VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR,
                                           VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR,
                                           VALUE_LIST_STR)
Ejemplo n.º 2
0
    def _processComplCheck(self, fullName):
        """
        Put parsed data of all models into CING data model
        Return None for success or True for error.

        Example of processed data structure attached to say a residue:
            "wattos": {
                "COMPCHK": {
                    "valeList": [ 0.009, 0.100 ],
                    "qualList": ["POOR", "GOOD" ]},
                "BLABLACHK": {
                    "valeList": [ 0.009, 0.100 ],
                    }}
                    """
#        nTdetail("==> Processing the Wattos results into CING data model")
        # Assemble the atom, residue and molecule specific checks
        # set the formats of each check easy printing
#        self.molecule.setAllChildrenByKey( WHATIF_STR, None)
        self.molecule.wattos = self # is self and that's asking for luggage


        # sorting on mols, residues, and atoms
#        nTmessage("  for self.checks: " + repr(self.checks))
#        nTdebug("  for self.checks count: %s" % len(self.checks))

        starFile = File()
        starFile.filename = fullName
        if starFile.read():
            nTerror("Failed to read star file: %s" % fullName)
            return True
        # end if

        sfList = starFile.getSaveFrames(category = "NOE_completeness_statistics")
        if not sfList or len(sfList) != 1:
            nTerror("Failed to get single saveframe but got list of: [%s]" % sfList)
            return True

        saveFrameCompl = sfList[0]
        tagTableComplHeader = saveFrameCompl.tagtables[0]
        completenessMol = tagTableComplHeader.getFloat("_NOE_completeness_stats.Completeness_cumulative_pct", 0)
        noe_compl_obs   = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_observed_count", 0)
        noe_compl_exp   = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_expected_count", 0)
        noe_compl_mat   = tagTableComplHeader.getInt("_NOE_completeness_stats.Constraint_matched_count", 0)

        self.molecule.setDeepByKeys(completenessMol, WATTOS_STR, COMPLCHK_STR,  VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_obs  , WATTOS_STR, OBS_COUNT_STR, VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_exp  , WATTOS_STR, EXP_COUNT_STR, VALUE_LIST_STR)
        self.molecule.setDeepByKeys(noe_compl_mat  , WATTOS_STR, MAT_COUNT_STR, VALUE_LIST_STR)

        tagTableComplBody = saveFrameCompl.tagtables[3]

        entityAssemblyIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Entity_assembly_ID")
        compIndexIdList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Comp_index_ID")
        compIdList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Comp_ID")

        obsAtomCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Obs_atom_count")
        obsCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_observed_count")
        expCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_expected_count")
        matCountList = tagTableComplBody.getIntListByColumnName("_NOE_completeness_comp.Constraint_matched_count")

        completenessResidueList = tagTableComplBody.getFloatListByColumnName("_NOE_completeness_comp.Completeness_cumulative_pct")
        detailsList = tagTableComplBody.getStringListByColumnName("_NOE_completeness_comp.Details")

        for i, completenessResidue in enumerate(completenessResidueList):
            entityAssemblyId = entityAssemblyIdList[i]
            compIndexId = compIndexIdList[i]
            compId = compIdList[i]

            obsAtomCount = obsAtomCountList[i]
            obsCount = obsCountList   [i]
            expCount = expCountList   [i]
            matCount = matCountList   [i]

            details = detailsList[i]
            wattosTuple = (entityAssemblyId, compIndexId, compId)
            residue = self.locateWattosResidue(*wattosTuple)
            if not residue:
                nTerror("Failed to find Wattos residue in CING: %s %s %s" % (wattosTuple))
                return True

            residueWattosDic = residue.setdefault(WATTOS_STR, NTdict())
            complDic = residueWattosDic.setdefault(COMPLCHK_STR, NTdict())
#                    "valeList": [ 0.009]
#                    "qualList": [">sigma" ]
            complDic[VALUE_LIST_STR] = completenessResidue
            complDic[QUAL_LIST_STR] = details

            residueWattosDic.setDeepByKeys(obsAtomCount, OBS_ATOM_COUNT_STR, VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(obsCount, OBS_COUNT_STR, VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(expCount, EXP_COUNT_STR, VALUE_LIST_STR)
            residueWattosDic.setDeepByKeys(matCount, MAT_COUNT_STR, VALUE_LIST_STR)
Ejemplo n.º 3
0
def getMolTypes():
    'Return True on error'
    outputFile = 'moltypes.csv'
    
    pdbList = PDBEntryLists.getBmrbNmrGridEntries()[0:2]
    #pdbList = PDBEntryLists.getBmrbNmrGridEntries()
    #pdbList=['1a03']
    #pdbList=['1brv']
    print "Read pdb entries from NMR Restraints Grid:", len( pdbList )
    pdbList.sort()
    
    molTypes = {}
    seq_length = {}
    for entry in pdbList:
        try:
            inputFN  = os.path.join(starDir,entry,entry+'_wattos.str')
            headFN = os.path.join(tmpDir,       entry+'_head.str')
            f = File()
            saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"]
            f.getHeader(saveFrameRegExList, inputFN, headFN)
            f.filename = headFN
            f.read()
            os.unlink( f.filename ) # removing temp file.
            molTypesPerEntry = {}
            molTypes[entry] = molTypesPerEntry
            seq_lengthPerEntry = {}
            seq_length[entry] = seq_lengthPerEntry
            sfList = f.getSaveFrames( category = 'entity')
            for node in sfList:
                tT = node.tagtables[0]
        #        print tT
                typeIdx = tT.tagnames.index('_Entity.Type')
        #        print typeIdx
                type = tT.tagvalues[typeIdx][0]
                poltype = ''
                if '_Entity.Polymer_type' in tT.tagnames:
                    poltypeIdx = tT.tagnames.index('_Entity.Polymer_type')
            #        print poltypeIdx
                    poltype = tT.tagvalues[poltypeIdx][0]
    
        #        print "type", type, ", and poltype", poltype
                key = type +'/' + poltype
                if molTypesPerEntry.has_key(key):
                    molTypesPerEntry[key] += 1
                else:
                    molTypesPerEntry[key] = 1
    
                lengthIdx = -1
                if '_Entity.Number_of_monomers' in tT.tagnames:
                    lengthIdx = tT.tagnames.index('_Entity.Number_of_monomers')
                if lengthIdx>=0:
                    length = string.atoi(tT.tagvalues[lengthIdx][0])
                else:
                    length = 0
    
                if seq_lengthPerEntry.has_key(key):
                    seq_lengthPerEntry[key] += length
                else:
                    seq_lengthPerEntry[key] = length
    
    
            for key in molTypes[entry].keys():
                str = entry+","+key+','+`molTypes[entry][key]`+','+`seq_length[entry][key]`
                print str
        except KeyboardInterrupt:
            print "ERROR: Caught KeyboardInterrupt will exit(1)"
            return True
        except Exception, info:
            print "Skipping entry: ", entry, info
Ejemplo n.º 4
0
seq_length = {}
for entry in pdbList:
    try:
        inputFN = os.path.join(starDir, entry, entry + '_wattos.str')
        headFN = os.path.join(tmpDir, entry + '_head.str')
        f = File()
        saveFrameRegExList = [r"^save_.*constraints", r"^save_conformer"]
        f.getHeader(saveFrameRegExList, inputFN, headFN)
        f.filename = headFN
        f.read()
        os.unlink(f.filename)  # removing temp file.
        molTypesPerEntry = {}
        molTypes[entry] = molTypesPerEntry
        seq_lengthPerEntry = {}
        seq_length[entry] = seq_lengthPerEntry
        sfList = f.getSaveFrames(category='entity')
        for node in sfList:
            tT = node.tagtables[0]
            #        print tT
            typeIdx = tT.tagnames.index('_Entity.Type')
            #        print typeIdx
            type = tT.tagvalues[typeIdx][0]
            poltype = ''
            if '_Entity.Polymer_type' in tT.tagnames:
                poltypeIdx = tT.tagnames.index('_Entity.Polymer_type')
                #        print poltypeIdx
                poltype = tT.tagvalues[poltypeIdx][0]

    #        print "type", type, ", and poltype", poltype
            key = type + '/' + poltype
            if molTypesPerEntry.has_key(key):