예제 #1
0
def _readpdb_new(assy, 
             filename, 
             isInsert = False, 
             showProgressDialog = False, 
             chainId = None):
    """
    Read a Protein DataBank-format file into a single new chunk, which is 
    returned unless there are no atoms in the file, in which case a warning
    is printed and None is returned. (The new chunk (if returned) is in assy,
    but is not yet added into any Group or Part in assy -- caller must do that.)
    Unless isInsert = True, set assy.filename to match the file we read,
    even if we return None.
    
    @param assy: The assembly.
    @type  assy: L{assembly}
    
    @param filename: The PDB filename to read.
    @type  filename: string
    
    @param isInsert: If True, the PDB file will be inserted into the current
                     assembly. If False (default), the PDB is opened as the 
                     assembly.
    @param isInsert: boolean
    
    @param showProgressDialog: if True, display a progress dialog while reading
                               a file.
    @type  showProgressDialog: boolean
    
    @return: A chunk containing the contents of the PDB file.
    @rtype:  L{Chunk}
    
    @see: U{B{PDB File Format}<http://www.wwpdb.org/documentation/format23/v2.3.html>}
    """

    from protein.model.Protein import is_water
    
    def _finish_molecule():
        """
        Perform some operations after reading entire PDB chain:
          - rebuild (infer) bonds
          - rename molecule to reflect a chain ID
          - delete protein object if this is not a protein
          - append the molecule to the molecule list
        """
        
        if mol == water:
            # Skip water, to be added explicitly at the end.
            return
        
        if mol.atoms:  
            ###print "READING PDB ", (mol, numconects, chainId)
            
            mol.name = pdbid.lower() + chainId

            ###idzialprint "SEQ = ", mol.protein.get_sequence_string()
            ###print "SEC = ", mol.protein.get_secondary_structure_string()
            
            if mol.protein.count_c_alpha_atoms() == 0:
                # If there is no C-alpha atoms, consider the chunk 
                # as a non-protein. But! Split it into individual 
                # hetero groups.
                res_list = mol.protein.get_amino_acids()
                assy.part.ensure_toplevel_group()
                hetgroup = Group("Heteroatoms", assy, assy.part.topnode) 
                for res in res_list:
                    hetmol = Chunk(assy, 
                                   res.get_three_letter_code().replace(" ", "") + \
                                   "[" + str(res.get_id()) + "]")
                    for atom in res.get_atom_list():
                        newatom = Atom(atom.element.symbol, atom.posn(), hetmol)
                    # New chunk - infer the bonds anyway (this is not
                    # correct, should first check connectivity read from
                    # the PDB file CONECT records).
                    inferBonds(hetmol)
                    hetgroup.addchild(hetmol)
                mollist.append(hetgroup)
            else:
                #if numconects == 0:
                #    msg = orangemsg("PDB file has no bond info; inferring bonds")
                #    env.history.message(msg)
                #    # let user see message right away (bond inference can take significant 
                #    # time) [bruce 060620]
                #    env.history.h_update() 

                # For protein - infer the bonds anyway.
                inferBonds(mol)
                    
                mol.protein.set_chain_id(chainId)
                mol.protein.set_pdb_id(pdbid)
                if mol.atoms:
                    mollist.append(mol)                
        else:
            env.history.message( redmsg( "Warning: Pdb file contained no atoms"))
            env.history.h_update() 
                    
    fi = open(filename,"rU")
    lines = fi.readlines()
    fi.close()
    
    mollist = []

    # Lists of secondary structure tuples (res_id, chain_id) 
    helix = []
    sheet = []
    turn = []
    
    dir, nodename = os.path.split(filename)
    if not isInsert:
        assy.filename = filename
    
    ndix = {}
    mol = Chunk(assy, nodename)
    mol.protein = Protein()
    
    # Create a chunk for water molecules.
    water = Chunk(assy, nodename)
            
    numconects = 0
    
    comment_text = ""
    _read_rosetta_info = False
    
    # Create a temporary PDB ID - it should be later extracted from the
    # file header.
    pdbid = nodename.replace(".pdb","").lower()
    
    atomname_exceptions = {
        "HB":"H", #k these are all guesses -- I can't find this documented 
                  # anywhere [bruce 070410]
        "CA":"C", #k these are all guesses -- I can't find this documented 
        ## "HE":"H", ### REVIEW: I'm not sure about this one -- 
                    ###          leaving it out means it's read as Helium,
        # but including it erroneously might prevent reading an actual Helium 
        # if that was intended.
        # Guess for now: include it for ATOM but not HETATM. (So it's 
        # specialcased below, rather than being included in this table.)
        # (Later: can't we use the case of the 'E' to distinguish it from He?)
        "HN":"H",
     }
    
    # Create and display a Progress dialog while reading the MMP file. 
    # One issue with this implem is that QProgressDialog always displays 
    # a "Cancel" button, which is not hooked up. I think this is OK for now,
    # but later we should either hook it up or create our own progress
    # dialog that doesn't include a "Cancel" button. --mark 2007-12-06
    if showProgressDialog:
        _progressValue = 0
        _progressFinishValue = len(lines)
        win = env.mainwindow()
        win.progressDialog.setLabelText("Reading file...")
        win.progressDialog.setRange(0, _progressFinishValue)
        _progressDialogDisplayed = False
        _timerStart = time.time()

    for card in lines:
        key = card[:6].lower().replace(" ", "")
        if key in ["atom", "hetatm"]:
            ## sym = capitalize(card[12:14].replace(" ", "").replace("_", "")) 
            # bruce 080508 revision (guess at a bugfix for reading NE1-saved
            # pdb files):
            # get a list of atomnames to try; use the first one we recognize.
            # Note that full atom name is in columns 13-16 i.e. card[12:16];
            # see http://www.wwpdb.org/documentation/format2.3-0108-us.pdf,
            # page 156. The old code only looked at two characters,
            # card[12:14] == columns 13-14, and discarded ' ' and '_',
            # and capitalized (the first character only). The code as I revised
            # it on 070410 also discarded digits, and handled HB, HE, HN
            # (guesses) using the atomname_exceptions dict.
            name4 = card[12:16].replace(" ", "").replace("_", "")
            name3 = card[12:15].replace(" ", "").replace("_", "")
            name2 = card[12:14].replace(" ", "").replace("_", "")
            chainId = card[21]
            resIdStr = card[22:26].replace(" ", "")
            if resIdStr != "":
                resId = int(resIdStr)
            else:
                resId = 0
            resName = card[17:20]
            sym = card[77:78]
            alt = card[16] # Alternate location indicator
            
            if alt != ' ' and \
               alt != 'A':
                # Skip non-standard alternate location
                # This is not very safe test, it should preserve
                # the remaining atoms. piotr 080715 
                continue
            
###ATOM    131  CB  ARG A  18     104.359  32.924  58.573  1.00 36.93           C  

            def nodigits(name):
                for bad in "0123456789":
                    name = name.replace(bad, "")
                return name
            atomnames_to_try = [
                name4, # as seems best according to documentation
                name3,
                name2, # like old code
                nodigits(name4),
                nodigits(name3),
                nodigits(name2) # like code as revised on 070410
            ]
            
            # First, look at 77-78 field - it should include an element symbol.
            foundit = False
            try:
                PeriodicTable.getElement(sym)
            except:
                pass
            else:
                foundit = True
            if not foundit:
                for atomname in atomnames_to_try:
                    atomname = atomname_exceptions.get(atomname, atomname)
                    if atomname[0] == 'H' and key == "atom":
                        atomname = "H" # see comment in atomname_exceptions
                    sym = capitalize(atomname) # turns either 'he' or 'HE' into 'He'
                    
                    try:
                        PeriodicTable.getElement(sym)
                    except:
                        # note: this typically fails with AssertionError 
                        # (not e.g. KeyError) [bruce 050322]
                        continue
                    else:
                        foundit = True
                        break
                    pass
            if not foundit:
                msg = "Warning: Pdb file: will use Carbon in place of unknown element %s in: %s" \
                    % (name4, card)
                print msg #bruce 070410 added this print
                env.history.message( redmsg( msg ))

                ##e It would probably be better to create a fake atom, so the 
                # CONECT records would still work.
                #bruce 080508 let's do that:
                sym = "C"
                
                # Better still might be to create a fake element, 
                # so we could write out the pdb file again
                # (albeit missing lots of info). [bruce 070410 comment]
                
                # Note: an advisor tells us:
                #   PDB files sometimes encode atomtypes,
                #   using C_R instead of C, for example, to represent sp2 
                #   carbons.
                # That particular case won't trigger this exception, since we
                # only look at 2 characters [eventually, after trying more, as of 080508],
                # i.e. C_ in that case. It would be better to realize this means
                # sp2 and set the atomtype here (and perhaps then use it when
                # inferring bonds,  which we do later if the file doesn't have 
                # any bonds). [bruce 060614/070410 comment]

            _is_water = is_water(resName, name4)
            if _is_water:
                tmpmol = mol
                mol = water
                
            # Now the element name is in sym.
            xyz = map(float, [card[30:38], card[38:46], card[46:54]] )
            n = int(card[6:11])
            a = Atom(sym, A(xyz), mol)
            ndix[n] = a
            
            if not _is_water:
                mol.protein.add_pdb_atom(a, 
                                         name4, 
                                         resId, 
                                         resName)
            
            # Assign secondary structure.            
            if (resId, chainId) in helix:
                mol.protein.assign_helix(resId)
            
            if (resId, chainId) in sheet:
                mol.protein.assign_strand(resId)
                
            if (resId, chainId) in turn:
                mol.protein.assign_turn(resId)
            
            if mol == water:
                mol = tmpmol
            
        elif key == "conect":
            try:
                a1 = ndix[int(card[6:11])]
            except:
                #bruce 050322 added this level of try/except and its message;
                # see code below for at least two kinds of errors this might
                # catch, but we don't try to distinguish these here. BTW this 
                # also happens as a consequence of not finding the element 
                # symbol, above,  since atoms with unknown elements are not 
                # created.
                env.history.message( redmsg( "Warning: Pdb file: can't find first atom in CONECT record: %s" % (card,) ))
            else:
                for i in range(11, 70, 5):
                    try:
                        a2 = ndix[int(card[i:i+5])]
                    except ValueError:
                        # bruce 050323 comment:
                        # we assume this is from int('') or int(' ') etc;
                        # this is the usual way of ending this loop.
                        break
                    except KeyError:
                        #bruce 050322-23 added history warning for this,
                        # assuming it comes from ndix[] lookup.
                        env.history.message( redmsg( "Warning: Pdb file: can't find atom %s in: %s" % (card[i:i+5], card) ))
                        continue
                    bond_atoms(a1, a2)
                    numconects += 1
        elif key == "ter":
            # Finish the current molecule.
            _finish_molecule()
            
            # Discard the original molecule and create a new one. 
            mol = Chunk(assy, nodename)
            mol.protein = Protein()
            numconects = 0
                        
        elif key == "header":
            # Extract PDB ID from the header string.
            pdbid = card[62:66].lower()
            comment_text += card
        
        elif key == "compnd":
            comment_text += card
        
        elif key == "remark":
            comment_text += card
            
        elif key == "model":
            # Check out the MODEL record, ignore everything other than MODEL 1.
            # This behavior has to be optional and set via User Preference.
            # piotr 080714
            model_id = int(card[6:20])
            if model_id > 1:
                # Skip remaining part of the file.
                break
            
        elif key in ["helix", "sheet", "turn"]:
            # Read secondary structure information.
            if key == "helix":
                begin = int(card[22:25])
                end = int(card[34:37])
                chainId = card[19]
                for s in range(begin, end+1):
                    helix.append((s, chainId))            
            elif key == "sheet":
                begin = int(card[23:26])
                end = int(card[34:37])
                chainId = card[21]
                for s in range(begin, end+1):
                    sheet.append((s, chainId))            
            elif key == "turn":
                begin = int(card[23:26])
                end = int(card[34:37])
                chainId = card[19]
                for s in range(begin, end+1):
                    turn.append((s, chainId))            
        else:
            if card[7:15] == "ntrials:":
                _read_rosetta_info = True
                comment_text += "Rosetta Scoring Analysis\n"
            if _read_rosetta_info:
                comment_text += card
                
        if showProgressDialog: # Update the progress dialog.
            _progressValue += 1
            if _progressValue >= _progressFinishValue:
                win.progressDialog.setLabelText("Building model...")
            elif _progressDialogDisplayed:
                win.progressDialog.setValue(_progressValue)
            else:
                _timerDuration = time.time() - _timerStart
                if _timerDuration > 0.25: 
                    # Display progress dialog after 0.25 seconds
                    win.progressDialog.setValue(_progressValue)
                    _progressDialogDisplayed = True
    
    if showProgressDialog: # Make the progress dialog go away.
        win.progressDialog.setValue(_progressFinishValue) 
    
    _finish_molecule()
    
    if water.atoms:
        # Check if there are any water molecules
        water.name = "Solvent"
        # The water should be hidden by default.
        water.hide()
        mollist.append(water)
        
    return (mollist, comment_text)
예제 #2
0
    def make_aligned(self,
                     assy,
                     name,
                     aa_idx,
                     phi, psi,
                     pos1, pos2,
                     secondary = SS_COIL,
                     fake_chain = False,
                     length = None):
        """
        Build and return a chunk that is a h**o-peptide aligned to
        a pos2-pos1 vector.

        @param aa_idx: amino acid type (index in AMINO_ACIDS list)
        @type aa_idx: int

        @param name: chunk name
        @type name: string

        @param phi, psi: peptide bond angles
        @type phi, psi: float

        @param pos1, pos2: desired peptide positions (beginning and end)
        @type pos1, pos2: V

        @param secondary: secondary structure class, used for visual representation
        The actual peptide chain conformation is based on phi / psi angles.
        @type secondary: int

        @param fake_chain: if True, create only C-alpha atoms. used for drawing
        peptide trace image during interactive peptide placement (used by
        PeptideLine_GraphicsMode.py)
        @type fake_chain: boolean

        @param length: optional peptide length (number of amino acids), if
        not specified, pos1 and pos2 are used to figure out the length
        @type length: int

        @return: A h**o-polypeptide chain.
        @rtype:  L{Chunk}
        """

        if not length:
            self.length = self.get_number_of_res(pos1, pos2, phi, psi)
            if self.length == 0:
                return None
        else:
            self.length = length

        # Create a molecule
        mol = Chunk(assy, name)

        if not fake_chain:
            mol.protein = Protein()
            mol.protein.set_chain_id('A')

        # Generate dummy atoms positions
        self.prev_coords[0][0] = pos1[0] - 1.0
        self.prev_coords[0][1] = pos1[1] - 1.0
        self.prev_coords[0][2] = pos1[2]

        self.prev_coords[1][0] = pos1[0] - 1.0
        self.prev_coords[1][1] = pos1[1]
        self.prev_coords[1][2] = pos1[2]

        self.prev_coords[2][0] = pos1[0]
        self.prev_coords[2][1] = pos1[1]
        self.prev_coords[2][2] = pos1[2]

        name, short_name, symbol, zmatrix, size = AMINO_ACIDS[aa_idx]

        # Add a N-terminal hydrogen
        self.nterm_hydrogen = None

        # Initially, the Peptide Builder was creating peptide structures
        # saturated at both ends, i.e. with N-terminal hydrogen and C-terminal
        # OH group present. Currently, this code is commented out to allow
        # connecting multiple peptide structure be creating bonds between
        # the C- and N- terminal ends of two individual structures.
        """
        if not fake_chain:
            atom = Atom("H", pos1, mol)
            atom._is_aromatic = False
            atom._is_single = False
            self.nterm_hydrogen = atom
            mol.protein.add_pdb_atom(atom, "H", 1, name)
            atom.pdb_info = {}
            atom.pdb_info['atom_name'] = "H"
            atom.pdb_info['residue_name'] = short_name
            atom.pdb_info['residue_id'] = "  1 "
            atom.pdb_info['standard_atom'] = True
        """

        self.init_ca = None

        # Generate the peptide chain.
        for idx in range(int(self.length)):
            self._buildResidue(mol, zmatrix, size, idx+1, phi, psi, secondary, None, short_name, fake_chain=fake_chain)

        # See the comment above.
        """
        # Add a C-terminal OH group
        self._buildResidue(mol, CTERM_ZMATRIX, 5, int(self.length), 0.0, 0.0, secondary, None, short_name, fake_chain=fake_chain)
        """

        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.

        if not fake_chain:
            inferBonds(mol)

        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic and
                            bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False and
                               bond.atom1._is_aromatic == False) and
                               not (bond.atom1._is_single and
                                    bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1

        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        # Axis of first selected chunk
        ax = V(0.,0.,1.)
        mol.rot(Q(mol.getaxis(),ax))

        self._orient(mol, pos2, pos1)

        if self.init_ca:
            mol.move(pos1 - self.init_ca.posn())

        mol_dummy = None

        return mol
예제 #3
0
    def make_aligned(self,
                     assy,
                     name,
                     aa_idx,
                     phi,
                     psi,
                     pos1,
                     pos2,
                     secondary=SS_COIL,
                     fake_chain=False,
                     length=None):
        """
        Build and return a chunk that is a h**o-peptide aligned to
        a pos2-pos1 vector.

        @param aa_idx: amino acid type (index in AMINO_ACIDS list)
        @type aa_idx: int

        @param name: chunk name
        @type name: string

        @param phi, psi: peptide bond angles
        @type phi, psi: float

        @param pos1, pos2: desired peptide positions (beginning and end)
        @type pos1, pos2: V

        @param secondary: secondary structure class, used for visual representation
        The actual peptide chain conformation is based on phi / psi angles.
        @type secondary: int

        @param fake_chain: if True, create only C-alpha atoms. used for drawing
        peptide trace image during interactive peptide placement (used by
        PeptideLine_GraphicsMode.py)
        @type fake_chain: boolean

        @param length: optional peptide length (number of amino acids), if
        not specified, pos1 and pos2 are used to figure out the length
        @type length: int

        @return: A h**o-polypeptide chain.
        @rtype:  L{Chunk}
        """

        if not length:
            self.length = self.get_number_of_res(pos1, pos2, phi, psi)
            if self.length == 0:
                return None
        else:
            self.length = length

        # Create a molecule
        mol = Chunk(assy, name)

        if not fake_chain:
            mol.protein = Protein()
            mol.protein.set_chain_id('A')

        # Generate dummy atoms positions
        self.prev_coords[0][0] = pos1[0] - 1.0
        self.prev_coords[0][1] = pos1[1] - 1.0
        self.prev_coords[0][2] = pos1[2]

        self.prev_coords[1][0] = pos1[0] - 1.0
        self.prev_coords[1][1] = pos1[1]
        self.prev_coords[1][2] = pos1[2]

        self.prev_coords[2][0] = pos1[0]
        self.prev_coords[2][1] = pos1[1]
        self.prev_coords[2][2] = pos1[2]

        name, short_name, symbol, zmatrix, size = AMINO_ACIDS[aa_idx]

        # Add a N-terminal hydrogen
        self.nterm_hydrogen = None

        # Initially, the Peptide Builder was creating peptide structures
        # saturated at both ends, i.e. with N-terminal hydrogen and C-terminal
        # OH group present. Currently, this code is commented out to allow
        # connecting multiple peptide structure be creating bonds between
        # the C- and N- terminal ends of two individual structures.
        """
        if not fake_chain:
            atom = Atom("H", pos1, mol)
            atom._is_aromatic = False
            atom._is_single = False
            self.nterm_hydrogen = atom
            mol.protein.add_pdb_atom(atom, "H", 1, name)
            atom.pdb_info = {}
            atom.pdb_info['atom_name'] = "H"
            atom.pdb_info['residue_name'] = short_name
            atom.pdb_info['residue_id'] = "  1 "
            atom.pdb_info['standard_atom'] = True
        """

        self.init_ca = None

        # Generate the peptide chain.
        for idx in range(int(self.length)):
            self._buildResidue(mol,
                               zmatrix,
                               size,
                               idx + 1,
                               phi,
                               psi,
                               secondary,
                               None,
                               short_name,
                               fake_chain=fake_chain)

        # See the comment above.
        """
        # Add a C-terminal OH group
        self._buildResidue(mol, CTERM_ZMATRIX, 5, int(self.length), 0.0, 0.0, secondary, None, short_name, fake_chain=fake_chain)
        """

        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.

        if not fake_chain:
            inferBonds(mol)

        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic
                                and bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False
                               and bond.atom1._is_aromatic == False)
                              and not (bond.atom1._is_single
                                       and bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1

        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        # Axis of first selected chunk
        ax = V(0., 0., 1.)
        mol.rot(Q(mol.getaxis(), ax))

        self._orient(mol, pos2, pos1)

        if self.init_ca:
            mol.move(pos1 - self.init_ca.posn())

        mol_dummy = None

        return mol