Beispiel #1
0
 def create_methane_test(self, params, name):
     # example: build some methanes
     print "create_methane_test"
     assy = self.win.assy
     from geometry.VQT import V
     from model.chunk import Chunk
     from model.chem import Atom
     mol = Chunk(assy, 'bug') # name is reset below!
     n = max(params[0],1)
     for x in range(n):
       for y in range(2):
         ## build methane, much like make_Atom_and_bondpoints method does it
         pos = V(x,y,0)
         atm = Atom('C', pos, mol)
         atm.make_bondpoints_when_no_bonds() # notices atomtype
     mol.name = name
     ## assy.addmol(mol)
     return mol
Beispiel #2
0
def _readpdb_new(assy, 
             filename, 
             isInsert = False, 
             showProgressDialog = False, 
             chainId = None):
    """
    Read a Protein DataBank-format file into a single new chunk, which is 
    returned unless there are no atoms in the file, in which case a warning
    is printed and None is returned. (The new chunk (if returned) is in assy,
    but is not yet added into any Group or Part in assy -- caller must do that.)
    Unless isInsert = True, set assy.filename to match the file we read,
    even if we return None.
    
    @param assy: The assembly.
    @type  assy: L{assembly}
    
    @param filename: The PDB filename to read.
    @type  filename: string
    
    @param isInsert: If True, the PDB file will be inserted into the current
                     assembly. If False (default), the PDB is opened as the 
                     assembly.
    @param isInsert: boolean
    
    @param showProgressDialog: if True, display a progress dialog while reading
                               a file.
    @type  showProgressDialog: boolean
    
    @return: A chunk containing the contents of the PDB file.
    @rtype:  L{Chunk}
    
    @see: U{B{PDB File Format}<http://www.wwpdb.org/documentation/format23/v2.3.html>}
    """

    from protein.model.Protein import is_water
    
    def _finish_molecule():
        """
        Perform some operations after reading entire PDB chain:
          - rebuild (infer) bonds
          - rename molecule to reflect a chain ID
          - delete protein object if this is not a protein
          - append the molecule to the molecule list
        """
        
        if mol == water:
            # Skip water, to be added explicitly at the end.
            return
        
        if mol.atoms:  
            ###print "READING PDB ", (mol, numconects, chainId)
            
            mol.name = pdbid.lower() + chainId

            ###idzialprint "SEQ = ", mol.protein.get_sequence_string()
            ###print "SEC = ", mol.protein.get_secondary_structure_string()
            
            if mol.protein.count_c_alpha_atoms() == 0:
                # If there is no C-alpha atoms, consider the chunk 
                # as a non-protein. But! Split it into individual 
                # hetero groups.
                res_list = mol.protein.get_amino_acids()
                assy.part.ensure_toplevel_group()
                hetgroup = Group("Heteroatoms", assy, assy.part.topnode) 
                for res in res_list:
                    hetmol = Chunk(assy, 
                                   res.get_three_letter_code().replace(" ", "") + \
                                   "[" + str(res.get_id()) + "]")
                    for atom in res.get_atom_list():
                        newatom = Atom(atom.element.symbol, atom.posn(), hetmol)
                    # New chunk - infer the bonds anyway (this is not
                    # correct, should first check connectivity read from
                    # the PDB file CONECT records).
                    inferBonds(hetmol)
                    hetgroup.addchild(hetmol)
                mollist.append(hetgroup)
            else:
                #if numconects == 0:
                #    msg = orangemsg("PDB file has no bond info; inferring bonds")
                #    env.history.message(msg)
                #    # let user see message right away (bond inference can take significant 
                #    # time) [bruce 060620]
                #    env.history.h_update() 

                # For protein - infer the bonds anyway.
                inferBonds(mol)
                    
                mol.protein.set_chain_id(chainId)
                mol.protein.set_pdb_id(pdbid)
                if mol.atoms:
                    mollist.append(mol)                
        else:
            env.history.message( redmsg( "Warning: Pdb file contained no atoms"))
            env.history.h_update() 
                    
    fi = open(filename,"rU")
    lines = fi.readlines()
    fi.close()
    
    mollist = []

    # Lists of secondary structure tuples (res_id, chain_id) 
    helix = []
    sheet = []
    turn = []
    
    dir, nodename = os.path.split(filename)
    if not isInsert:
        assy.filename = filename
    
    ndix = {}
    mol = Chunk(assy, nodename)
    mol.protein = Protein()
    
    # Create a chunk for water molecules.
    water = Chunk(assy, nodename)
            
    numconects = 0
    
    comment_text = ""
    _read_rosetta_info = False
    
    # Create a temporary PDB ID - it should be later extracted from the
    # file header.
    pdbid = nodename.replace(".pdb","").lower()
    
    atomname_exceptions = {
        "HB":"H", #k these are all guesses -- I can't find this documented 
                  # anywhere [bruce 070410]
        "CA":"C", #k these are all guesses -- I can't find this documented 
        ## "HE":"H", ### REVIEW: I'm not sure about this one -- 
                    ###          leaving it out means it's read as Helium,
        # but including it erroneously might prevent reading an actual Helium 
        # if that was intended.
        # Guess for now: include it for ATOM but not HETATM. (So it's 
        # specialcased below, rather than being included in this table.)
        # (Later: can't we use the case of the 'E' to distinguish it from He?)
        "HN":"H",
     }
    
    # Create and display a Progress dialog while reading the MMP file. 
    # One issue with this implem is that QProgressDialog always displays 
    # a "Cancel" button, which is not hooked up. I think this is OK for now,
    # but later we should either hook it up or create our own progress
    # dialog that doesn't include a "Cancel" button. --mark 2007-12-06
    if showProgressDialog:
        _progressValue = 0
        _progressFinishValue = len(lines)
        win = env.mainwindow()
        win.progressDialog.setLabelText("Reading file...")
        win.progressDialog.setRange(0, _progressFinishValue)
        _progressDialogDisplayed = False
        _timerStart = time.time()

    for card in lines:
        key = card[:6].lower().replace(" ", "")
        if key in ["atom", "hetatm"]:
            ## sym = capitalize(card[12:14].replace(" ", "").replace("_", "")) 
            # bruce 080508 revision (guess at a bugfix for reading NE1-saved
            # pdb files):
            # get a list of atomnames to try; use the first one we recognize.
            # Note that full atom name is in columns 13-16 i.e. card[12:16];
            # see http://www.wwpdb.org/documentation/format2.3-0108-us.pdf,
            # page 156. The old code only looked at two characters,
            # card[12:14] == columns 13-14, and discarded ' ' and '_',
            # and capitalized (the first character only). The code as I revised
            # it on 070410 also discarded digits, and handled HB, HE, HN
            # (guesses) using the atomname_exceptions dict.
            name4 = card[12:16].replace(" ", "").replace("_", "")
            name3 = card[12:15].replace(" ", "").replace("_", "")
            name2 = card[12:14].replace(" ", "").replace("_", "")
            chainId = card[21]
            resIdStr = card[22:26].replace(" ", "")
            if resIdStr != "":
                resId = int(resIdStr)
            else:
                resId = 0
            resName = card[17:20]
            sym = card[77:78]
            alt = card[16] # Alternate location indicator
            
            if alt != ' ' and \
               alt != 'A':
                # Skip non-standard alternate location
                # This is not very safe test, it should preserve
                # the remaining atoms. piotr 080715 
                continue
            
###ATOM    131  CB  ARG A  18     104.359  32.924  58.573  1.00 36.93           C  

            def nodigits(name):
                for bad in "0123456789":
                    name = name.replace(bad, "")
                return name
            atomnames_to_try = [
                name4, # as seems best according to documentation
                name3,
                name2, # like old code
                nodigits(name4),
                nodigits(name3),
                nodigits(name2) # like code as revised on 070410
            ]
            
            # First, look at 77-78 field - it should include an element symbol.
            foundit = False
            try:
                PeriodicTable.getElement(sym)
            except:
                pass
            else:
                foundit = True
            if not foundit:
                for atomname in atomnames_to_try:
                    atomname = atomname_exceptions.get(atomname, atomname)
                    if atomname[0] == 'H' and key == "atom":
                        atomname = "H" # see comment in atomname_exceptions
                    sym = capitalize(atomname) # turns either 'he' or 'HE' into 'He'
                    
                    try:
                        PeriodicTable.getElement(sym)
                    except:
                        # note: this typically fails with AssertionError 
                        # (not e.g. KeyError) [bruce 050322]
                        continue
                    else:
                        foundit = True
                        break
                    pass
            if not foundit:
                msg = "Warning: Pdb file: will use Carbon in place of unknown element %s in: %s" \
                    % (name4, card)
                print msg #bruce 070410 added this print
                env.history.message( redmsg( msg ))

                ##e It would probably be better to create a fake atom, so the 
                # CONECT records would still work.
                #bruce 080508 let's do that:
                sym = "C"
                
                # Better still might be to create a fake element, 
                # so we could write out the pdb file again
                # (albeit missing lots of info). [bruce 070410 comment]
                
                # Note: an advisor tells us:
                #   PDB files sometimes encode atomtypes,
                #   using C_R instead of C, for example, to represent sp2 
                #   carbons.
                # That particular case won't trigger this exception, since we
                # only look at 2 characters [eventually, after trying more, as of 080508],
                # i.e. C_ in that case. It would be better to realize this means
                # sp2 and set the atomtype here (and perhaps then use it when
                # inferring bonds,  which we do later if the file doesn't have 
                # any bonds). [bruce 060614/070410 comment]

            _is_water = is_water(resName, name4)
            if _is_water:
                tmpmol = mol
                mol = water
                
            # Now the element name is in sym.
            xyz = map(float, [card[30:38], card[38:46], card[46:54]] )
            n = int(card[6:11])
            a = Atom(sym, A(xyz), mol)
            ndix[n] = a
            
            if not _is_water:
                mol.protein.add_pdb_atom(a, 
                                         name4, 
                                         resId, 
                                         resName)
            
            # Assign secondary structure.            
            if (resId, chainId) in helix:
                mol.protein.assign_helix(resId)
            
            if (resId, chainId) in sheet:
                mol.protein.assign_strand(resId)
                
            if (resId, chainId) in turn:
                mol.protein.assign_turn(resId)
            
            if mol == water:
                mol = tmpmol
            
        elif key == "conect":
            try:
                a1 = ndix[int(card[6:11])]
            except:
                #bruce 050322 added this level of try/except and its message;
                # see code below for at least two kinds of errors this might
                # catch, but we don't try to distinguish these here. BTW this 
                # also happens as a consequence of not finding the element 
                # symbol, above,  since atoms with unknown elements are not 
                # created.
                env.history.message( redmsg( "Warning: Pdb file: can't find first atom in CONECT record: %s" % (card,) ))
            else:
                for i in range(11, 70, 5):
                    try:
                        a2 = ndix[int(card[i:i+5])]
                    except ValueError:
                        # bruce 050323 comment:
                        # we assume this is from int('') or int(' ') etc;
                        # this is the usual way of ending this loop.
                        break
                    except KeyError:
                        #bruce 050322-23 added history warning for this,
                        # assuming it comes from ndix[] lookup.
                        env.history.message( redmsg( "Warning: Pdb file: can't find atom %s in: %s" % (card[i:i+5], card) ))
                        continue
                    bond_atoms(a1, a2)
                    numconects += 1
        elif key == "ter":
            # Finish the current molecule.
            _finish_molecule()
            
            # Discard the original molecule and create a new one. 
            mol = Chunk(assy, nodename)
            mol.protein = Protein()
            numconects = 0
                        
        elif key == "header":
            # Extract PDB ID from the header string.
            pdbid = card[62:66].lower()
            comment_text += card
        
        elif key == "compnd":
            comment_text += card
        
        elif key == "remark":
            comment_text += card
            
        elif key == "model":
            # Check out the MODEL record, ignore everything other than MODEL 1.
            # This behavior has to be optional and set via User Preference.
            # piotr 080714
            model_id = int(card[6:20])
            if model_id > 1:
                # Skip remaining part of the file.
                break
            
        elif key in ["helix", "sheet", "turn"]:
            # Read secondary structure information.
            if key == "helix":
                begin = int(card[22:25])
                end = int(card[34:37])
                chainId = card[19]
                for s in range(begin, end+1):
                    helix.append((s, chainId))            
            elif key == "sheet":
                begin = int(card[23:26])
                end = int(card[34:37])
                chainId = card[21]
                for s in range(begin, end+1):
                    sheet.append((s, chainId))            
            elif key == "turn":
                begin = int(card[23:26])
                end = int(card[34:37])
                chainId = card[19]
                for s in range(begin, end+1):
                    turn.append((s, chainId))            
        else:
            if card[7:15] == "ntrials:":
                _read_rosetta_info = True
                comment_text += "Rosetta Scoring Analysis\n"
            if _read_rosetta_info:
                comment_text += card
                
        if showProgressDialog: # Update the progress dialog.
            _progressValue += 1
            if _progressValue >= _progressFinishValue:
                win.progressDialog.setLabelText("Building model...")
            elif _progressDialogDisplayed:
                win.progressDialog.setValue(_progressValue)
            else:
                _timerDuration = time.time() - _timerStart
                if _timerDuration > 0.25: 
                    # Display progress dialog after 0.25 seconds
                    win.progressDialog.setValue(_progressValue)
                    _progressDialogDisplayed = True
    
    if showProgressDialog: # Make the progress dialog go away.
        win.progressDialog.setValue(_progressFinishValue) 
    
    _finish_molecule()
    
    if water.atoms:
        # Check if there are any water molecules
        water.name = "Solvent"
        # The water should be hidden by default.
        water.hide()
        mollist.append(water)
        
    return (mollist, comment_text)