Exemple #1
0
    def run(self):
        if self.prepare():
            nTerrorT("Failed to prepare")
            return True
#        return

        relationNames = glob("*.csv")
        relationNames = [ relationName[:-4] for relationName in relationNames]
        dbms = DBMS()
        dbms.readCsvRelationList(relationNames, '.')

        # This will overwrite the just read newMany2OneTable
        newMany2OneTable = Relation('newMany2OneTable', dbms, columnList=['pdb_id', 'bmrb_id'])
        bmrbIdNewMany2OneList = newMany2OneTable.getColumn('bmrb_id')
        pdbIdNewMany2OneList = newMany2OneTable.getColumn('pdb_id')

        tableScore_many2one = dbms.tables['score_many2one']
        bmrbIdOldMany2OneList = tableScore_many2one.getColumn('bmrb_id')
        pdbIdOldMany2OneList = tableScore_many2one.getColumn('pdb_id')

        # NB that this table is a one 2 many unlike what I need.
        tableAdit = dbms.tables['adit_nmr_matched_pdb_bmrb_entry_ids']
        bmrbIdAditList = tableAdit.getColumn('bmrb_id')
        pdbIdAditList = tableAdit.getColumn('pdb_id')
        pdbIdAditList = [x.lower() for x in pdbIdAditList]
        pdbIdAditNmrHash = list2dict( pdbIdAditList )

        # New table from Dmitri with 655 matches.
        # With corrections for invalids:
#15591,2k0b,1,SOLUTION NMR,        15591 should be matched to 2jy7
        tableAdit2 = dbms.tables['BMRB_PDB_match']
        bmrbIdAdit2List = tableAdit2.getColumn('BMRB_ID')
        pdbIdAdit2List = tableAdit2.getColumn('PDB_ID')
        pdbIdAdit2NmrHash = list2dict( pdbIdAdit2List )


        # Manual corrections to Dmitri's table etc.
        tableManual = dbms.tables['manualMatches'] # Maintain this list in SVN control.
        bmrbIdManualList = tableManual.getColumn('bmrb_id')
        pdbIdManualList = tableManual.getColumn('pdb_id')
        _pdbIdManualNmrHash = list2dict( pdbIdManualList )

        tablePdbNmrTable = dbms.tables['pdbNmrTable']
        pdbIdPdbNmrList = tablePdbNmrTable.getColumn('pdb_id')
        pdbIdPdbNmrHash = list2dict( pdbIdPdbNmrList )

        bmrbTable = dbms.tables['bmrb']
        bmrbIdList = bmrbTable.getColumn('bmrb_id')
        bmrbIdHash = list2dict( bmrbIdList )

        pdbIdListAbsent =[]
        bmrbIdListAbsent =[]
        for idx, pdb_id in enumerate(pdbIdOldMany2OneList):
            bmrb_id = bmrbIdOldMany2OneList[idx]
            if not pdbIdPdbNmrHash.has_key(pdb_id):
                pdbIdListAbsent.append(pdb_id)
                continue
            if not bmrbIdHash.has_key(bmrb_id):
                bmrbIdListAbsent.append(bmrb_id)
                continue
            bmrbIdNewMany2OneList.append(bmrb_id)
            pdbIdNewMany2OneList.append(pdb_id)

        l1 = len(pdbIdNewMany2OneList)
        nTmessage("Skipped: %s obsolete PDB entries from score_many2one %s" % (len(pdbIdListAbsent),str(pdbIdListAbsent)))
        nTmessage("Skipped: %s obsolete BMRB entries from score_many2one %s" % (len(bmrbIdListAbsent),str(bmrbIdListAbsent)))
        nTmessage("Accepted from old list %s matches" % l1)



        # Do both adit lists
        pdbIdLoLDouble = [[],[]]
        pdbIdLoLObsolete = [[],[]]
        bmrbIdLoLObsolete =[[],[]]
        ltotal1 = [ -1, -1]
        l2 = [ -1, -1]
        nadit = 2
        for aditIdx, pdbIdAditXList in enumerate( [pdbIdAditList, pdbIdAdit2List] ):
            bmrbIdAditXList = (bmrbIdAditList, bmrbIdAdit2List )[aditIdx]
            pdbIdAditXNmrHash = (pdbIdAditNmrHash, pdbIdAdit2NmrHash )[aditIdx]
            pdbIdListDouble = pdbIdLoLDouble[aditIdx]
            pdbIdListObsolete = pdbIdLoLObsolete[aditIdx]
            bmrbIdListObsolete = bmrbIdLoLObsolete[aditIdx]
            for idx, pdb_id in enumerate(pdbIdAditXList):
                bmrb_id = bmrbIdAditXList[idx]
                if pdbIdAditXNmrHash[pdb_id] > 1:
                    if pdb_id not in pdbIdListDouble:
                        pdbIdListDouble.append(pdb_id)
                    continue
                if not pdbIdPdbNmrHash.has_key(pdb_id):
                    pdbIdListObsolete.append(pdb_id)
                    continue
                if not bmrbIdHash.has_key(bmrb_id):
                    bmrbIdListObsolete.append(bmrb_id)
                    continue
                if pdb_id in pdbIdNewMany2OneList:
                    continue
            #    if bmrb_id in bmrbIdNewMany2OneList: allow this.
            #        continue
                bmrbIdNewMany2OneList.append(bmrb_id)
                pdbIdNewMany2OneList.append(pdb_id)
            ltotal1[aditIdx] = len(pdbIdNewMany2OneList)
            if aditIdx == 0:
                l2[aditIdx] = ltotal1[aditIdx] - l1
            else:
                l2[aditIdx] = ltotal1[aditIdx] - ltotal1[aditIdx-1]


        for idx, pdb_id in enumerate(pdbIdManualList):
            bmrb_id = bmrbIdManualList[idx]
            if not pdbIdPdbNmrHash.has_key(pdb_id):
                nTerror("Failed to find %s in PDB; update the manual list." % pdb_id)
                continue
            if not bmrbIdHash.has_key(bmrb_id):
                nTerror("Failed to find %s in BMRB; update the manual list." % bmrb_id)
                continue
            if pdb_id in pdbIdNewMany2OneList:
                idx = pdbIdNewMany2OneList.index(pdb_id)
                bmrb_id_current = bmrbIdNewMany2OneList[idx]
                if bmrb_id_current == bmrb_id:
                    nTmessage("Already found %s in PDB with BMRB %s in manual and current list; consider updating the manual list." % (
                        pdb_id, bmrb_id))
                    continue
                nTmessage("Using manual mapping of %s in PDB with BMRB %s in manual list instead of BMRB %s in current list." % (
                        pdb_id, bmrb_id, bmrb_id_current))
                nTmessage("First removing match at idx %s in current list." % idx)
                del bmrbIdNewMany2OneList[idx]
                del pdbIdNewMany2OneList[idx]
        #    if bmrb_id in bmrbIdNewMany2OneList: allow this.
        #        continue
            bmrbIdNewMany2OneList.append(bmrb_id)
            pdbIdNewMany2OneList.append(pdb_id)

        ltotal2 = len(pdbIdNewMany2OneList)
        l3 = ltotal2 - ltotal1[nadit-1]

        pdbIdNewHash = list2dict( pdbIdNewMany2OneList )
        bmrbIdNewHash = list2dict( bmrbIdNewMany2OneList )
        uniquePdbCount = len(pdbIdNewHash)
        uniqueBmrbCount = len(bmrbIdNewHash)

        nTmessage("Skipped: %s double entries from pdbIdAditList %s" % (len(pdbIdListDouble),str(pdbIdListDouble)))
        for aditIdx in range(nadit):
            pdbIdLoLObsolete[aditIdx].sort()
            bmrbIdLoLObsolete[aditIdx].sort()
            nTmessage("Skipped: %s obsolete  PDB entries from adit%s %s" % (len( pdbIdLoLObsolete[aditIdx]), aditIdx,  
                                                                            str(pdbIdLoLObsolete[aditIdx])))
            nTmessage("Skipped: %s obsolete BMRB entries from adit%s %s" % (len(bmrbIdLoLObsolete[aditIdx]), aditIdx, 
                                                                            str(bmrbIdLoLObsolete[aditIdx])))
            nTmessage("Accepted from adit%s %s for a total of %s matches" %(aditIdx, l2[aditIdx], ltotal1[aditIdx]))
        nTmessage("Accepted from manual list %s for a total of %s matches" %( l3, ltotal2))
        nTmessage("Accepted unique %d PDB and %d BMRB entries" %( uniquePdbCount, uniqueBmrbCount))

        pdbIdNewMany2OneNTList = NTlist(*pdbIdNewMany2OneList)
        pdbIdDuplicateList = pdbIdNewMany2OneNTList.removeDuplicates()
        if pdbIdDuplicateList:
            nTerror("Got %s duplicate PDB entries in result: %s" % (len(pdbIdDuplicateList), str(pdbIdDuplicateList) ))
            return True

        bmrbIdNewMany2OneNTList = NTlist(*bmrbIdNewMany2OneList)
        bmrbIdDuplicateList = bmrbIdNewMany2OneNTList.removeDuplicates()
        bmrbIdDuplicateList = bmrbIdDuplicateList.removeDuplicates()
        if bmrbIdDuplicateList:
            nTmessage("Using %s BMRB entries that match two or more PDB entries." % len(bmrbIdDuplicateList) )

        if newMany2OneTable.sortRelationByColumnIdx([0,1]):
            nTerror("Failed to sort table: %s")
            return True
        newMany2OneTable.writeCsvFile()
Exemple #2
0
    def run(self):
        if self.prepare():
            nTerrorT("Failed to prepare")
            return True


#        return

        relationNames = glob("*.csv")
        relationNames = [relationName[:-4] for relationName in relationNames]
        dbms = DBMS()
        dbms.readCsvRelationList(relationNames, '.')

        # This will overwrite the just read newMany2OneTable
        newMany2OneTable = Relation('newMany2OneTable',
                                    dbms,
                                    columnList=['pdb_id', 'bmrb_id'])
        bmrbIdNewMany2OneList = newMany2OneTable.getColumn('bmrb_id')
        pdbIdNewMany2OneList = newMany2OneTable.getColumn('pdb_id')

        tableScore_many2one = dbms.tables['score_many2one']
        bmrbIdOldMany2OneList = tableScore_many2one.getColumn('bmrb_id')
        pdbIdOldMany2OneList = tableScore_many2one.getColumn('pdb_id')

        # NB that this table is a one 2 many unlike what I need.
        tableAdit = dbms.tables['adit_nmr_matched_pdb_bmrb_entry_ids']
        bmrbIdAditList = tableAdit.getColumn('bmrb_id')
        pdbIdAditList = tableAdit.getColumn('pdb_id')
        pdbIdAditList = [x.lower() for x in pdbIdAditList]
        pdbIdAditNmrHash = list2dict(pdbIdAditList)

        # New table from Dmitri with 655 matches.
        # With corrections for invalids:
        #15591,2k0b,1,SOLUTION NMR,        15591 should be matched to 2jy7
        tableAdit2 = dbms.tables['BMRB_PDB_match']
        bmrbIdAdit2List = tableAdit2.getColumn('BMRB_ID')
        pdbIdAdit2List = tableAdit2.getColumn('PDB_ID')
        pdbIdAdit2NmrHash = list2dict(pdbIdAdit2List)

        # Manual corrections to Dmitri's table etc.
        tableManual = dbms.tables[
            'manualMatches']  # Maintain this list in SVN control.
        bmrbIdManualList = tableManual.getColumn('bmrb_id')
        pdbIdManualList = tableManual.getColumn('pdb_id')
        _pdbIdManualNmrHash = list2dict(pdbIdManualList)

        tablePdbNmrTable = dbms.tables['pdbNmrTable']
        pdbIdPdbNmrList = tablePdbNmrTable.getColumn('pdb_id')
        pdbIdPdbNmrHash = list2dict(pdbIdPdbNmrList)

        bmrbTable = dbms.tables['bmrb']
        bmrbIdList = bmrbTable.getColumn('bmrb_id')
        bmrbIdHash = list2dict(bmrbIdList)

        pdbIdListAbsent = []
        bmrbIdListAbsent = []
        for idx, pdb_id in enumerate(pdbIdOldMany2OneList):
            bmrb_id = bmrbIdOldMany2OneList[idx]
            if not pdbIdPdbNmrHash.has_key(pdb_id):
                pdbIdListAbsent.append(pdb_id)
                continue
            if not bmrbIdHash.has_key(bmrb_id):
                bmrbIdListAbsent.append(bmrb_id)
                continue
            bmrbIdNewMany2OneList.append(bmrb_id)
            pdbIdNewMany2OneList.append(pdb_id)

        l1 = len(pdbIdNewMany2OneList)
        nTmessage("Skipped: %s obsolete PDB entries from score_many2one %s" %
                  (len(pdbIdListAbsent), str(pdbIdListAbsent)))
        nTmessage("Skipped: %s obsolete BMRB entries from score_many2one %s" %
                  (len(bmrbIdListAbsent), str(bmrbIdListAbsent)))
        nTmessage("Accepted from old list %s matches" % l1)

        # Do both adit lists
        pdbIdLoLDouble = [[], []]
        pdbIdLoLObsolete = [[], []]
        bmrbIdLoLObsolete = [[], []]
        ltotal1 = [-1, -1]
        l2 = [-1, -1]
        nadit = 2
        for aditIdx, pdbIdAditXList in enumerate(
            [pdbIdAditList, pdbIdAdit2List]):
            bmrbIdAditXList = (bmrbIdAditList, bmrbIdAdit2List)[aditIdx]
            pdbIdAditXNmrHash = (pdbIdAditNmrHash, pdbIdAdit2NmrHash)[aditIdx]
            pdbIdListDouble = pdbIdLoLDouble[aditIdx]
            pdbIdListObsolete = pdbIdLoLObsolete[aditIdx]
            bmrbIdListObsolete = bmrbIdLoLObsolete[aditIdx]
            for idx, pdb_id in enumerate(pdbIdAditXList):
                bmrb_id = bmrbIdAditXList[idx]
                if pdbIdAditXNmrHash[pdb_id] > 1:
                    if pdb_id not in pdbIdListDouble:
                        pdbIdListDouble.append(pdb_id)
                    continue
                if not pdbIdPdbNmrHash.has_key(pdb_id):
                    pdbIdListObsolete.append(pdb_id)
                    continue
                if not bmrbIdHash.has_key(bmrb_id):
                    bmrbIdListObsolete.append(bmrb_id)
                    continue
                if pdb_id in pdbIdNewMany2OneList:
                    continue
            #    if bmrb_id in bmrbIdNewMany2OneList: allow this.
            #        continue
                bmrbIdNewMany2OneList.append(bmrb_id)
                pdbIdNewMany2OneList.append(pdb_id)
            ltotal1[aditIdx] = len(pdbIdNewMany2OneList)
            if aditIdx == 0:
                l2[aditIdx] = ltotal1[aditIdx] - l1
            else:
                l2[aditIdx] = ltotal1[aditIdx] - ltotal1[aditIdx - 1]

        for idx, pdb_id in enumerate(pdbIdManualList):
            bmrb_id = bmrbIdManualList[idx]
            if not pdbIdPdbNmrHash.has_key(pdb_id):
                nTerror("Failed to find %s in PDB; update the manual list." %
                        pdb_id)
                continue
            if not bmrbIdHash.has_key(bmrb_id):
                nTerror("Failed to find %s in BMRB; update the manual list." %
                        bmrb_id)
                continue
            if pdb_id in pdbIdNewMany2OneList:
                idx = pdbIdNewMany2OneList.index(pdb_id)
                bmrb_id_current = bmrbIdNewMany2OneList[idx]
                if bmrb_id_current == bmrb_id:
                    nTmessage(
                        "Already found %s in PDB with BMRB %s in manual and current list; consider updating the manual list."
                        % (pdb_id, bmrb_id))
                    continue
                nTmessage(
                    "Using manual mapping of %s in PDB with BMRB %s in manual list instead of BMRB %s in current list."
                    % (pdb_id, bmrb_id, bmrb_id_current))
                nTmessage("First removing match at idx %s in current list." %
                          idx)
                del bmrbIdNewMany2OneList[idx]
                del pdbIdNewMany2OneList[idx]
        #    if bmrb_id in bmrbIdNewMany2OneList: allow this.
        #        continue
            bmrbIdNewMany2OneList.append(bmrb_id)
            pdbIdNewMany2OneList.append(pdb_id)

        ltotal2 = len(pdbIdNewMany2OneList)
        l3 = ltotal2 - ltotal1[nadit - 1]

        pdbIdNewHash = list2dict(pdbIdNewMany2OneList)
        bmrbIdNewHash = list2dict(bmrbIdNewMany2OneList)
        uniquePdbCount = len(pdbIdNewHash)
        uniqueBmrbCount = len(bmrbIdNewHash)

        nTmessage("Skipped: %s double entries from pdbIdAditList %s" %
                  (len(pdbIdListDouble), str(pdbIdListDouble)))
        for aditIdx in range(nadit):
            pdbIdLoLObsolete[aditIdx].sort()
            bmrbIdLoLObsolete[aditIdx].sort()
            nTmessage("Skipped: %s obsolete  PDB entries from adit%s %s" %
                      (len(pdbIdLoLObsolete[aditIdx]), aditIdx,
                       str(pdbIdLoLObsolete[aditIdx])))
            nTmessage("Skipped: %s obsolete BMRB entries from adit%s %s" %
                      (len(bmrbIdLoLObsolete[aditIdx]), aditIdx,
                       str(bmrbIdLoLObsolete[aditIdx])))
            nTmessage("Accepted from adit%s %s for a total of %s matches" %
                      (aditIdx, l2[aditIdx], ltotal1[aditIdx]))
        nTmessage("Accepted from manual list %s for a total of %s matches" %
                  (l3, ltotal2))
        nTmessage("Accepted unique %d PDB and %d BMRB entries" %
                  (uniquePdbCount, uniqueBmrbCount))

        pdbIdNewMany2OneNTList = NTlist(*pdbIdNewMany2OneList)
        pdbIdDuplicateList = pdbIdNewMany2OneNTList.removeDuplicates()
        if pdbIdDuplicateList:
            nTerror("Got %s duplicate PDB entries in result: %s" %
                    (len(pdbIdDuplicateList), str(pdbIdDuplicateList)))
            return True

        bmrbIdNewMany2OneNTList = NTlist(*bmrbIdNewMany2OneList)
        bmrbIdDuplicateList = bmrbIdNewMany2OneNTList.removeDuplicates()
        bmrbIdDuplicateList = bmrbIdDuplicateList.removeDuplicates()
        if bmrbIdDuplicateList:
            nTmessage(
                "Using %s BMRB entries that match two or more PDB entries." %
                len(bmrbIdDuplicateList))

        if newMany2OneTable.sortRelationByColumnIdx([0, 1]):
            nTerror("Failed to sort table: %s")
            return True
        newMany2OneTable.writeCsvFile()