Ejemplo n.º 1
0
def retrieve_ca_model(structure):
    """
    chains are represented only by main chain atoms (Calfas or C4')
    """
    reduced_struct = Structure('clustering_model')
    my_model = Model(0)
    reduced_struct.add(my_model)

    main_chain_atoms = []
    for ch in structure[0]:
        my_chain = Chain(ch.id)
        reduced_struct[0].add(my_chain)
        for resi in ch:
            for atom in resi:
                #print "----", resi.id, resi.get_segid(), ch.id
                if atom.get_name() == "CA" or atom.get_name(
                ) == "C4'" or atom.get_name() == "C4*":
                    my_residue = Residue((' ', resi.id[1], ' '),
                                         resi.get_resname(), ' ')
                    atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA',
                                atom.get_serial_number())
                    my_chain.add(my_residue)
                    my_residue.add(atom)

                    main_chain_atoms.append(atom)

    return reduced_struct
Ejemplo n.º 2
0
    def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure:
        # Grab the Alphafold dictionary entry that descrives the residue range in the structure
        seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0])
        seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0])

        # start empty
        renumbered_structure = Structure(structure.id)
        for model in structure:
            renumbered_model = Model(model.id)
            for chain in model:
                transcript_residue_number = seq_db_align_begin
                renumbered_chain = Chain(chain.id)
                for residue in chain:
                    renumbered_residue = residue.copy()
                    renumbered_residue.id = (' ', transcript_residue_number, ' ')
                    # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder
                    for atom in renumbered_residue:
                        atom.disordered_flag = 0
                    renumbered_residue.disordered = 0
                    renumbered_chain.add(renumbered_residue)
                    transcript_residue_number += 1

                assert transcript_residue_number == seq_db_align_end + 1
                renumbered_model.add(renumbered_chain)

            renumbered_structure.add(renumbered_model)
        return renumbered_structure
 def create_new_chain(self, old_struct):
     s = Structure(old_struct.chain)
     my_model = Model(0)
     s.add(my_model)
     my_chain = Chain(old_struct.chain)
     my_model.add(my_chain)  #what if more chains in one component?
     return s
    def create_sphere_representation(self):
        """
	each chain is here represented by centre of mass only
	"""
        new_struct = Structure('sphrere')
        my_model = Model(0)
        new_struct.add(my_model)

        chain_mass_centres, index = [], 1
        my_chain = Chain(self.fa_struct.chain)
        new_struct[0].add(my_chain)

        coord, self.molmass, self.radius = self.calculate_centre_of_complex(
            self.fa_struct.struct)
        my_residue = Residue((' ', index, ' '), "ALA", ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        self.cg_struct = new_struct
        name = "dddd" + self.fa_struct.chain
        self.save_pdb(new_struct, name)
Ejemplo n.º 5
0
def splitOnePDB(fname, outPath):

  try:
    s= parser.get_structure(fname, fname)
  except Exception:
    print ("Error loading pdb")
    return 0
  banLenChains=[]    
  try:
    for chain in s[0]:
      badResInChain=0
      for res in  chain.get_list():
        if not is_aa(res,standard=True):
          badResInChain+=1
      chainLen= sum(1 for res in chain if "CA" in res) - badResInChain
      if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
        print(chainLen)
        banLenChains.append(chain.get_id())
  except KeyError:
    print ("Not good model")
    return 0  
  for badChainId in banLenChains:
    s[0].detach_child(badChainId)

  receptorChainList= []
  ligandChainList= []
  if len( s[0].get_list())<2:
    print(s)
    print( s[0].get_list())
    print("Not enough good chains")
    return 0
  for chain1 in s[0]:

    tmpReceptorList=[]
    for chain2 in s[0]:
      if chain1!= chain2:
        tmpReceptorList.append(chain2)
    if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList:   
      ligandChainList.append(chain1)
      receptorChainList.append(tmpReceptorList)
    
  prefix= os.path.basename(fname).split(".")[0]
  for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)):
    io=PDBIO()
    ligandStruct= Structure(prefix+"ligand")
    ligandStruct.add(Model(0))
    ligandChain.set_parent(ligandStruct[0])
    ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb"))

    io=PDBIO()
    receptorStruct= Structure(prefix+"receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
      receptorChain.set_parent(receptorStruct[0])    
      receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb"))
    print( "ligand:", ligandChain, "receptor:",receptorChains )
Ejemplo n.º 6
0
    def calculate_BSA(self):
        "Uses NACCESS module in order to calculate the Buried Surface Area"

        # Extract list of chains in the interface only
        chains = list(self.get_chains())
           
        # Create temporary structures to feed NACCESS
        structure_A=Structure("chainA")
        structure_B=Structure("chainB")
        mA = Model(0)
        mB = Model(0)
        mA.add(self.model[chains[0]])
        mB.add(self.model[chains[1]])
        structure_A.add(mA)
        structure_B.add(mB)
        
        # Calculate SASAs
        NACCESS_atomic(self.model)
        NACCESS_atomic(structure_A[0])
        NACCESS_atomic(structure_B[0])

        sas_tot= _get_atomic_SASA(self.model)
        #print 'Accessible surface area, complex:', sas_tot
        sas_A= _get_atomic_SASA(structure_A)
        #print 'Accessible surface aream CHAIN A :', sas_A
        sas_B= _get_atomic_SASA(structure_B)
        #print 'Accessible surface aream CHAIN B :',sas_B
        
        # Calculate BSA
        bsa = sas_A+sas_B-sas_tot
                
        return [bsa, sas_A, sas_B, sas_tot]
Ejemplo n.º 7
0
    def getStructFromFasta(self, fname, chainType):
        '''
    Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled
    and thus no coordiantes availables. Implements from Structure to Residue hierarchy.
    :param fname: str. path to fasta file
    @chainType: str. "l" or "r"
    '''

        seq = self.parseFasta(
            fname, inputNumber="1" if chainType == "l" else
            "2")  #inpuNumber is used to report which partner fails if error
        prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0]
        chainId = chainType.upper()
        residues = []
        struct = Structure(prefix)
        model = Model(0)
        struct.add(model)
        chain = Chain(chainId)
        model.add(chain)
        for i, aa in enumerate(seq):
            try:
                resname = one_to_three(aa)
            except KeyError:
                resname = "UNK"
            res = Residue((' ', i, ' '), resname, prefix)
            chain.add(res)
        return struct
Ejemplo n.º 8
0
    def slice(cls, obj, selection, name='slice'):
        """Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection> 
        defines which  descendents 'S1' will be stored in 'S2'."""
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain

        ent = Structure(name)  # Biopython structure object
        # Loop over selection and determine what model/chain objects we need to create in order to
        # store the slice
        models = {}
        for item in selection:
            mid = item[1]
            cid = item[2]
            if mid not in models:
                models[mid] = set()  # store chain ids
            models[mid].add(cid)

        # Create model/chains to store slice
        for mid in models:
            ent.add(Model(mid))
            for cid in models[mid]:
                ent[mid].add(Chain(cid))

        # Add residues to slice
        for item in selection:
            mid = item[1]
            cid = item[2]
            rid = item[3]
            ent[mid][cid].add(obj[mid][cid][rid].copy())

        return cls(ent, name=name)
Ejemplo n.º 9
0
def retrieve_sphere_model(structure):  #, score):
    """
    each chain is here represented by centre of mass only
    """
    sphere_struct = Structure('clustering_model')
    my_model = Model(0)
    sphere_struct.add(my_model)

    #bedzie zmieniona numeracja
    chain_mass_centres, index = [], 0
    for chain in structure.get_chains():
        my_chain = Chain(chain.id)
        sphere_struct[0].add(my_chain)

        coord = calculate_centre_of_complex(chain)
        chain_mass_centres.append(coord)
        my_residue = Residue((' ', index, ' '), chain.id, ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        index += 1
    del structure
    return sphere_struct
Ejemplo n.º 10
0
    def _rsa_calculation(self, model, chain_list, rsa_threshold):
        "Uses NACCESS module in order to calculate the Buried Surface Area"
        pairs=[]
        # Create temporary structures to feed NACCESS
        structure_A=Structure("chainA")
        structure_B=Structure("chainB")
        mA = Model(0)
        mB = Model(0)
        mA.add(model[chain_list[0]])
        mB.add(model[chain_list[1]])
        structure_A.add(mA)
        structure_B.add(mB)
        # Calculate SASAs
        nacc_at=NACCESS(model)
        model_values=[]
                
        res_list = [r for r in model.get_residues() if r.id[0] == ' ']
        structure_A_reslist =[r for r in structure_A[0].get_residues() if r.id[0] == ' ']
        structure_B_reslist =[r for r in structure_B[0].get_residues() if r.id[0] == ' ']
        
        for res in res_list:
            model_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))
            
                
        sas_tot= self._get_residue_SASA(model)
        #print 'Accessible surface area, complex:', sas_tot

        nacc_at=NACCESS(structure_A[0])
        nacc_at=NACCESS(structure_B[0])
        submodel_values=[]
                
        for res in structure_A_reslist:
            if res.id[0]==' ':
                submodel_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))                
                
        for res in structure_B_reslist:
            if res.id[0]==' ':
                submodel_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))
        
        count=0        
        for res in res_list:
            if res in structure_A_reslist and ((submodel_values[count] - model_values[count]) > rsa_threshold):
                pairs.append(res)
            elif res in structure_B_reslist and ((submodel_values[count] - model_values[count]) > rsa_threshold):
                pairs.append(res)
            count=count+1
        
        
        sas_A= self._get_residue_SASA(structure_A)
        #print 'Accessible surface aream CHAIN A :', sas_A
        sas_B= self._get_residue_SASA(structure_B)
        #print 'Accessible surface aream CHAIN B :',sas_B
        
        # Calculate BSA
        bsa = sas_A+sas_B-sas_tot
                
        self.interface.accessibility=[bsa, sas_A, sas_B, sas_tot]
        
        return pairs
Ejemplo n.º 11
0
def save_chain_to(chain, filename: str):
    from Bio.PDB.PDBIO import PDBIO
    io = PDBIO()
    # io.set_structure(chain.get_bio_chain())
    structure = Structure(filename)
    structure.add(chain)
    io.set_structure(structure)
    io.save(filename)
Ejemplo n.º 12
0
def initialize_res(residue: Union[Geo, str]) -> Structure:
    """Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0."""

    if isinstance(residue, Geo):
        geo = residue
    elif isinstance(residue, str):
        geo = geometry(residue)
    else:
        raise ValueError("Invalid residue argument:", residue)

    segID = 1
    AA = geo.residue_name
    CA_N_length = geo.CA_N_length
    CA_C_length = geo.CA_C_length
    N_CA_C_angle = geo.N_CA_C_angle

    CA_coord = np.array([0.0, 0.0, 0.0])
    C_coord = np.array([CA_C_length, 0, 0])
    N_coord = np.array([
        CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)),
        CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)),
        0,
    ])

    N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N")

    # Check if the peptide is capped or not
    if geo.residue_name == "ACE":
        CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C")
    else:
        CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C")

    C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length = geo.C_O_length
    CA_C_O_angle = geo.CA_C_O_angle
    N_CA_C_O_diangle = geo.N_CA_C_O_diangle

    carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle,
                                    N_CA_C_O_diangle)
    O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O")

    res = make_res_of_type(segID, N, CA, C, O, geo)

    cha = Chain("A")
    cha.add(res)

    mod = Model(0)
    mod.add(cha)

    struc = Structure("X")
    struc.add(mod)
    return struc
def splitOnePDB(fname, chainIdL, chainIdR, outPath):
    print(os.path.basename(fname))
    try:
        s = parser.get_structure(os.path.basename(fname), fname)
    except Exception:
        print("Error loading pdb")
        return 0

    banLenChains = []
    try:
        for chain in s[0]:
            badResInChain = 0
            for res in chain.get_list():
                if not is_aa(res, standard=True) and res.resname != "HOH":
                    badResInChain += 1
            # for res in chain: print(res)
            chainLen = sum(1 for res in chain if "CA" in res) - badResInChain
            if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
                print(chain, chainLen)
                banLenChains.append(chain.get_id())
    except KeyError:
        print("Not good model")
        return 0

    # print(banLenChains)
    if len(s[0].get_list()) - len(banLenChains) < 2:
        print(s)
        print(s[0].get_list())
        print("Not enough good chains")
        return 0

    ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR)
    print("ligand:", ligandChains, "receptor:", receptorChains)

    prefix = os.path.basename(fname).split(".")[0]

    io = PDBIO()
    ligandStruct = Structure(prefix + "ligand")
    ligandStruct.add(Model(0))

    for ligandChain in ligandChains:
        ligandChain.set_parent(ligandStruct[0])
        ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb"))

    io = PDBIO()
    receptorStruct = Structure(prefix + "receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
        receptorChain.set_parent(receptorStruct[0])
        receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
Ejemplo n.º 14
0
    def get_structure(self, name='RNA chain'):
        """Returns chain as a PDB.Structure object."""
        struc = Structure(name)
        model = Model(0)
        chain = Chain(self.chain_name)
        struc.add(model)
        struc[0].add(chain)

        for resi in self:
            struc[0][self.chain_name].add(resi)
        return struc
def single_chain_structure(chain, name='superposition'):
    from Bio.PDB.Structure import Structure
    from Bio.PDB.Model import Model

    structure = Structure(name)
    model = Model(0)
    structure.add(model)

    model.add(chain)

    return structure
Ejemplo n.º 16
0
def complex_save(given_complex, i, path):

    s = Structure(i)
    my_model = Model(0)
    s.add(my_model)
    for component in given_complex.components:
        my_model.add(
            component.pyrystruct.struct[0][component.pyrystruct.chain])
    out = PDBIO()
    out.set_structure(s)
    out.save(path)
    return path
Ejemplo n.º 17
0
def extract_model(pdb_struct, k):
    """
    Extract a model from the given PDB structure.
    """
    assert k < len(pdb_struct), 'missing specified model'

    new_struct = Structure(pdb_struct.id)
    new_model = pdb_struct[k].copy()
    new_model.id = 0
    new_model.serial_num = 1
    new_struct.add(new_model)

    return new_struct
Ejemplo n.º 18
0
def multiply_model(pdb_struct, num_models):
    """
    Given a single-model PDB structure, multiply that model.
    """
    assert len(pdb_struct) == 1, 'single-model PDB file required'

    new_struct = Structure(pdb_struct.id)

    for i in range(num_models):
        new_model = pdb_struct[0].copy()
        new_model.detach_parent()
        new_model.id = i
        new_model.serial_num = i + 1
        new_struct.add(new_model)
        new_model.set_parent(new_struct)

    return new_struct
Ejemplo n.º 19
0
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''
    
    if isinstance( residue, Geo ):
        geo = residue
    else:
        geo= Geo(residue) 
    
    segID=1
    AA= geo.residue_name
    CA_N_length=geo.CA_N_length
    CA_C_length=geo.CA_C_length
    N_CA_C_angle=geo.N_CA_C_angle
    
    CA_coord= np.array([0.,0.,0.])
    C_coord= np.array([CA_C_length,0,0])
    N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0])

    N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N")
    CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C")
    C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length=geo.C_O_length
    CA_C_O_angle=geo.CA_C_O_angle
    N_CA_C_O_diangle=geo.N_CA_C_O_diangle
    
    carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle)
    O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O")

    res=makeRes(segID, N, CA, C, O, geo)

    cha= Chain('A')
    cha.add(res)
    
    mod= Model(0)
    mod.add(cha)

    struc= Structure('X')
    struc.add(mod)
    return struc
Ejemplo n.º 20
0
    def save_pdb(self, complex_id, temp = "", name = ""):
        """
        gets coordinates of all complex components and writes them in one
        file one component = one pdb model
        
        Parameters:
        ------------
            complex_id  : number of complex from simulation
        Returns:
        --------
            pdb files with simulated components in OUTFOLDER
        """
        ##add component chain by chain not residue by residue.
        model_num = 0
        score = round(self.simulation_score, 4)
        s = Structure(complex_id) 
        my_model = Model(0)
        s.add(my_model)
        
        for component in self.components:
#@TODO: #what if more chains in one component?
            my_model.add(component.pyrystruct.struct[0][component.pyrystruct.chain])
        out = PDBIO()
        out.set_structure(s)
        outname = outfolder.outdirname.split("/")[-1]

        temp = str(temp)

        try:
            temp = round(float(temp),1)
        except: pass

        if name:
            fi_name = str(outfolder.outdirname)+'/'+name+'_'+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'
            out.save(fi_name)
        else:
            fi_name = str(outfolder.outdirname)+'/'+str(outname)+"_"+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'          
            out.save(fi_name)

        for comp in self.components:
            comp.pyrystruct.struct[0][comp.pyrystruct.chain].detach_parent()

        return fi_name
    def __make_structure_from_residues__(self, residues):
        """
        Makes a Structure object either from a pdbfile or a list of residues
        """
        # KR: this probably can be outsourced to another module.
        struct = Structure('s')
        model = Model('m')
        n_chain = 1
        chain = Chain('c%i' % n_chain)

        for residue in residues:
            if chain.has_id(residue.id):
                model.add(chain)
                n_chain += 1
                chain = Chain('c%i' % n_chain)
            chain.add(residue)

        model.add(chain)
        struct.add(model)
        return struct
Ejemplo n.º 22
0
def select_structure(selector, structure):
    new_structure = Structure(structure.id)
    for model in structure:
        if not selector.accept_model(model):
            continue
        new_model = Model(model.id, model.serial_num)
        new_structure.add(new_model)
        for chain in model:
            if not selector.accept_chain(chain):
                continue
            new_chain = Chain(chain.id)
            new_model.add(new_chain)
            for residue in chain:
                if not selector.accept_residue(residue):
                    continue
                new_residue = Residue(residue.id, residue.resname,
                                      residue.segid)
                new_chain.add(new_residue)
                for atom in residue:
                    if selector.accept_atom(atom):
                        new_residue.add(atom)
    return new_structure
Ejemplo n.º 23
0
    def create_structure(coords, pdb_type, remove_masked):
        """Create the structure.

        Args:
            coords: 3D coordinates of structure
            pdb_type: predict or actual structure
            remove_masked: whether to include masked atoms. If false,
                           the masked atoms have coordinates of [0,0,0].

        Returns:
            structure
        """

        name = protein.id_
        structure = Structure(name)
        model = Model(0)
        chain = Chain('A')
        for i, residue in enumerate(protein.primary):
            residue = AA_LETTERS[residue]
            if int(protein.mask[i]) == 1 or remove_masked == False:
                new_residue = Residue((' ', i + 1, ' '), residue, '    ')
                j = 3 * i
                atom_list = ['N', 'CA', 'CB']
                for k, atom in enumerate(atom_list):
                    new_atom = Atom(name=atom,
                                    coord=coords[j + k, :],
                                    bfactor=0,
                                    occupancy=1,
                                    altloc=' ',
                                    fullname=" {} ".format(atom),
                                    serial_number=0)
                    new_residue.add(new_atom)
                chain.add(new_residue)
        model.add(chain)
        structure.add(model)
        io = PDBIO()
        io.set_structure(structure)
        io.save(save_dir + name + '_' + pdb_type + '.pdb')
        return structure
    def createPDBFile(self):
        "Create test CIF file with 12 Atoms in icosahedron vertexes"
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain
        from Bio.PDB.Residue import Residue
        from Bio.PDB.Atom import Atom
        from Bio.PDB.mmcifio import MMCIFIO
        import os
        CIFFILENAME = "/tmp/out.cif"

        # create atom struct with ico simmety (i222r)
        icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r')
        pentomVectorI222r = icosahedron.getVertices()

        # create biopython object
        structure = Structure('result')  # structure_id
        model = Model(1, 1)  # model_id,serial_num
        structure.add(model)
        chain = Chain('A')  # chain Id
        model.add(chain)
        for i, v in enumerate(pentomVectorI222r, 1):
            res_id = (' ', i, ' ')  # first arg ' ' -> aTOm else heteroatom
            res_name = "ALA"  #+ str(i)  # define name of residue
            res_segid = '    '
            residue = Residue(res_id, res_name, res_segid)
            chain.add(residue)
            # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number,
            #             element=None)
            atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C")
            residue.add(atom)

        io = MMCIFIO()
        io.set_structure(structure)
        # delete file if exists
        if os.path.exists(CIFFILENAME):
            os.remove(CIFFILENAME)
        io.save(CIFFILENAME)
        return CIFFILENAME
Ejemplo n.º 25
0
class StructWriter():
    def __init__(self, structId="subset"):
        self.structId = structId
        self.pdbParser = PDBParser(QUIET=True)
        self.structure = Structure(structId)

    def addModel(self, pdb_as_str, modelId):
        pdbLikeFile = StringIO.StringIO()
        pdbLikeFile.write(pdb_as_str)
        pdbLikeFile.flush()
        pdbLikeFile.seek(0, 0)
        # print( "--->", pdbLikeFile.getvalue())
        new_struct = self.pdbParser.get_structure(str(modelId), pdbLikeFile)
        print(new_struct.child_list)
        model = new_struct[0]
        model.detach_parent()
        model.id = int(modelId)
        model.serial_num = model.id
        model.get_full_id()
        self.structure.add(model)
        print("Current struct", self.structure.child_list)

    def saveStruct(self, fname, desiredOrder):
        io = PDBIO(use_model_flag=True)
        if desiredOrder is not None:
            children = self.structure.child_list

        self.structure = Structure(self.structId)
        for modelId in desiredOrder:
            child = [model for model in children if model.id == modelId][0]
            child.detach_parent()
            self.structure.add(child)

        io.set_structure(self.structure)
        io.save(fname)  #,  preserve_atom_numbering=True)

    def __len__(self):
        return len(self.structure.child_list)
Ejemplo n.º 26
0
class StructureBuilder:
    """Deals with constructing the Structure object.

    The StructureBuilder class is used by the PDBParser classes to
    translate a file to a Structure object.
    """

    def __init__(self):
        """Initialize the class."""
        self.line_counter = 0
        self.header = {}

    def _is_completely_disordered(self, residue):
        """Return 1 if all atoms in the residue have a non blank altloc (PRIVATE)."""
        atom_list = residue.get_unpacked_list()
        for atom in atom_list:
            altloc = atom.get_altloc()
            if altloc == " ":
                return 0
        return 1

    # Public methods called by the Parser classes

    def set_header(self, header):
        """Set header."""
        self.header = header

    def set_line_counter(self, line_counter):
        """Tracks line in the PDB file that is being parsed.

        Arguments:
         - line_counter - int

        """
        self.line_counter = line_counter

    def init_structure(self, structure_id):
        """Initialize a new Structure object with given id.

        Arguments:
         - id - string

        """
        self.structure = Structure(structure_id)

    def init_model(self, model_id, serial_num=None):
        """Create a new Model object with given id.

        Arguments:
         - id - int
         - serial_num - int

        """
        self.model = Model(model_id, serial_num)
        self.structure.add(self.model)

    def init_chain(self, chain_id):
        """Create a new Chain object with given id.

        Arguments:
         - chain_id - string

        """
        if self.model.has_id(chain_id):
            self.chain = self.model[chain_id]
            warnings.warn(
                "WARNING: Chain %s is discontinuous at line %i."
                % (chain_id, self.line_counter),
                PDBConstructionWarning,
            )
        else:
            self.chain = Chain(chain_id)
            self.model.add(self.chain)

    def init_seg(self, segid):
        """Flag a change in segid.

        Arguments:
         - segid - string

        """
        self.segid = segid

    def init_residue(self, resname, field, resseq, icode):
        """Create a new Residue object.

        Arguments:
         - resname - string, e.g. "ASN"
         - field - hetero flag, "W" for waters, "H" for
           hetero residues, otherwise blank.
         - resseq - int, sequence identifier
         - icode - string, insertion code

        """
        if field != " ":
            if field == "H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field = "H_" + resname
        res_id = (field, resseq, icode)
        if field == " ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn(
                    "WARNING: Residue ('%s', %i, '%s') redefined at line %i."
                    % (field, resseq, icode, self.line_counter),
                    PDBConstructionWarning,
                )
                duplicate_residue = self.chain[res_id]
                if duplicate_residue.is_disordered() == 2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue = duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue = Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue = duplicate_residue
                        return
                else:
                    if resname == duplicate_residue.resname:
                        warnings.warn(
                            "WARNING: Residue ('%s', %i, '%s','%s') already defined "
                            "with the same name at line  %i."
                            % (field, resseq, icode, resname, self.line_counter),
                            PDBConstructionWarning,
                        )
                        self.residue = duplicate_residue
                        return
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue = None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode)
                        )
                    self.chain.detach_child(res_id)
                    new_residue = Residue(res_id, resname, self.segid)
                    disordered_residue = DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue = disordered_residue
                    return
        self.residue = Residue(res_id, resname, self.segid)
        self.chain.add(self.residue)

    def init_atom(
        self,
        name,
        coord,
        b_factor,
        occupancy,
        altloc,
        fullname,
        serial_number=None,
        element=None,
        pqr_charge=None,
        radius=None,
        is_pqr=False,
    ):
        """Create a new Atom object.

        Arguments:
         - name - string, atom name, e.g. CA, spaces should be stripped
         - coord - Numeric array (Float0, size 3), atomic coordinates
         - b_factor - float, B factor
         - occupancy - float
         - altloc - string, alternative location specifier
         - fullname - string, atom name including spaces, e.g. " CA "
         - element - string, upper case, e.g. "HG" for mercury
         - pqr_charge - float, atom charge (PQR format)
         - radius - float, atom radius (PQR format)
         - is_pqr - boolean, flag to specify if a .pqr file is being parsed

        """
        residue = self.residue
        # if residue is None, an exception was generated during
        # the construction of the residue
        if residue is None:
            return
        # First check if this atom is already present in the residue.
        # If it is, it might be due to the fact that the two atoms have atom
        # names that differ only in spaces (e.g. "CA.." and ".CA.",
        # where the dots are spaces). If that is so, use all spaces
        # in the atom name of the current atom.
        if residue.has_id(name):
            duplicate_atom = residue[name]
            # atom name with spaces of duplicate atom
            duplicate_fullname = duplicate_atom.get_fullname()
            if duplicate_fullname != fullname:
                # name of current atom now includes spaces
                name = fullname
                warnings.warn(
                    "Atom names %r and %r differ only in spaces at line %i."
                    % (duplicate_fullname, fullname, self.line_counter),
                    PDBConstructionWarning,
                )
        if not is_pqr:
            self.atom = Atom(
                name,
                coord,
                b_factor,
                occupancy,
                altloc,
                fullname,
                serial_number,
                element,
            )
        elif is_pqr:
            self.atom = Atom(
                name,
                coord,
                None,
                None,
                altloc,
                fullname,
                serial_number,
                element,
                pqr_charge,
                radius,
            )
        if altloc != " ":
            # The atom is disordered
            if residue.has_id(name):
                # Residue already contains this atom
                duplicate_atom = residue[name]
                if duplicate_atom.is_disordered() == 2:
                    duplicate_atom.disordered_add(self.atom)
                else:
                    # This is an error in the PDB file:
                    # a disordered atom is found with a blank altloc
                    # Detach the duplicate atom, and put it in a
                    # DisorderedAtom object together with the current
                    # atom.
                    residue.detach_child(name)
                    disordered_atom = DisorderedAtom(name)
                    residue.add(disordered_atom)
                    disordered_atom.disordered_add(self.atom)
                    disordered_atom.disordered_add(duplicate_atom)
                    residue.flag_disordered()
                    warnings.warn(
                        "WARNING: disordered atom found with blank altloc before "
                        "line %i.\n" % self.line_counter,
                        PDBConstructionWarning,
                    )
            else:
                # The residue does not contain this disordered atom
                # so we create a new one.
                disordered_atom = DisorderedAtom(name)
                residue.add(disordered_atom)
                # Add the real atom to the disordered atom, and the
                # disordered atom to the residue
                disordered_atom.disordered_add(self.atom)
                residue.flag_disordered()
        else:
            # The atom is not disordered
            residue.add(self.atom)

    def set_anisou(self, anisou_array):
        """Set anisotropic B factor of current Atom."""
        self.atom.set_anisou(anisou_array)

    def set_siguij(self, siguij_array):
        """Set standard deviation of anisotropic B factor of current Atom."""
        self.atom.set_siguij(siguij_array)

    def set_sigatm(self, sigatm_array):
        """Set standard deviation of atom position of current Atom."""
        self.atom.set_sigatm(sigatm_array)

    def get_structure(self):
        """Return the structure."""
        # first sort everything
        # self.structure.sort()
        # Add the header dict
        self.structure.header = self.header
        return self.structure

    def set_symmetry(self, spacegroup, cell):
        """Set symmetry."""
        pass
Ejemplo n.º 27
0
class StructureBuilder(object):
    """
    Deals with contructing the Structure object. The StructureBuilder class is used
    by the PDBParser classes to translate a file to a Structure object.
    """
    def __init__(self):
        self.line_counter=0
        self.header={}

    def _is_completely_disordered(self, residue):
        "Return 1 if all atoms in the residue have a non blank altloc."
        atom_list=residue.get_unpacked_list()
        for atom in atom_list:
            altloc=atom.get_altloc()
            if altloc==" ":
                return 0
        return 1

    # Public methods called by the Parser classes

    def set_header(self, header):
        self.header=header

    def set_line_counter(self, line_counter):
        """
        The line counter keeps track of the line in the PDB file that
        is being parsed.

        Arguments:
        o line_counter - int
        """
        self.line_counter=line_counter

    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure=Structure(structure_id)

    def init_model(self, model_id, serial_num = None):
        """Initiate a new Model object with given id.

        Arguments:
        o id - int
        o serial_num - int
        """
        self.model=Model(model_id,serial_num)
        self.structure.add(self.model)

    def init_chain(self, chain_id):
        """Initiate a new Chain object with given id.

        Arguments:
        o chain_id - string
        """
        if self.model.has_id(chain_id):
            self.chain=self.model[chain_id]
            warnings.warn("WARNING: Chain %s is discontinuous at line %i."
                          % (chain_id, self.line_counter),
                          PDBConstructionWarning)
        else:
            self.chain=Chain(chain_id)
            self.model.add(self.chain)

    def init_seg(self, segid):
        """Flag a change in segid.

        Arguments:
        o segid - string
        """
        self.segid=segid

    def init_residue(self, resname, field, resseq, icode):
        """
        Initiate a new Residue object.

        Arguments:
        o resname - string, e.g. "ASN"
        o field - hetero flag, "W" for waters, "H" for
            hetero residues, otherwise blank.
        o resseq - int, sequence identifier
        o icode - string, insertion code
        """
        if field!=" ":
            if field=="H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field="H_"+resname
        res_id=(field, resseq, icode)
        if field==" ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn("WARNING: Residue ('%s', %i, '%s') "
                              "redefined at line %i."
                              % (field, resseq, icode, self.line_counter),
                              PDBConstructionWarning)
                duplicate_residue=self.chain[res_id]
                if duplicate_residue.is_disordered()==2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue=duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue=Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue=duplicate_residue
                        return
                else:
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue=None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode))
                    self.chain.detach_child(res_id)
                    new_residue=Residue(res_id, resname, self.segid)
                    disordered_residue=DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue=disordered_residue
                    return
        residue=Residue(res_id, resname, self.segid)
        self.chain.add(residue)
        self.residue=residue

    def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                  serial_number=None, element=None):
        """
        Initiate a new Atom object.

        Arguments:
        o name - string, atom name, e.g. CA, spaces should be stripped
        o coord - Numeric array (Float0, size 3), atomic coordinates
        o b_factor - float, B factor
        o occupancy - float
        o altloc - string, alternative location specifier
        o fullname - string, atom name including spaces, e.g. " CA "
        o element - string, upper case, e.g. "HG" for mercury
        """
        residue=self.residue
        # if residue is None, an exception was generated during
        # the construction of the residue
        if residue is None:
            return
        # First check if this atom is already present in the residue.
        # If it is, it might be due to the fact that the two atoms have atom
        # names that differ only in spaces (e.g. "CA.." and ".CA.",
        # where the dots are spaces). If that is so, use all spaces
        # in the atom name of the current atom.
        if residue.has_id(name):
                duplicate_atom=residue[name]
                # atom name with spaces of duplicate atom
                duplicate_fullname=duplicate_atom.get_fullname()
                if duplicate_fullname!=fullname:
                    # name of current atom now includes spaces
                    name=fullname
                    warnings.warn("Atom names %r and %r differ "
                                  "only in spaces at line %i."
                                  % (duplicate_fullname, fullname,
                                     self.line_counter),
                                  PDBConstructionWarning)
        atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc,
                            fullname, serial_number, element)
        if altloc!=" ":
            # The atom is disordered
            if residue.has_id(name):
                # Residue already contains this atom
                duplicate_atom=residue[name]
                if duplicate_atom.is_disordered()==2:
                    duplicate_atom.disordered_add(atom)
                else:
                    # This is an error in the PDB file:
                    # a disordered atom is found with a blank altloc
                    # Detach the duplicate atom, and put it in a
                    # DisorderedAtom object together with the current
                    # atom.
                    residue.detach_child(name)
                    disordered_atom=DisorderedAtom(name)
                    residue.add(disordered_atom)
                    disordered_atom.disordered_add(atom)
                    disordered_atom.disordered_add(duplicate_atom)
                    residue.flag_disordered()
                    warnings.warn("WARNING: disordered atom found "
                                  "with blank altloc before line %i.\n"
                                  % self.line_counter,
                                  PDBConstructionWarning)
            else:
                # The residue does not contain this disordered atom
                # so we create a new one.
                disordered_atom=DisorderedAtom(name)
                residue.add(disordered_atom)
                # Add the real atom to the disordered atom, and the
                # disordered atom to the residue
                disordered_atom.disordered_add(atom)
                residue.flag_disordered()
        else:
            # The atom is not disordered
            residue.add(atom)

    def set_anisou(self, anisou_array):
        "Set anisotropic B factor of current Atom."
        self.atom.set_anisou(anisou_array)

    def set_siguij(self, siguij_array):
        "Set standard deviation of anisotropic B factor of current Atom."
        self.atom.set_siguij(siguij_array)

    def set_sigatm(self, sigatm_array):
        "Set standard deviation of atom position of current Atom."
        self.atom.set_sigatm(sigatm_array)

    def get_structure(self):
        "Return the structure."
        # first sort everything
        # self.structure.sort()
        # Add the header dict
        self.structure.header=self.header
        return self.structure

    def set_symmetry(self, spacegroup, cell):
        pass
Ejemplo n.º 28
0
class PdbSite:
    """M-CSA PDB catalytic site. Contains lists of PdbResidues and mapped UniProt
    catalytic sites (UniSite objects), a 3D structure (Biopython Structure) built
    from individual PdbResidue structures (Biopython Residue), a parent structure
    (Biopython Structure), all and close to the site hetero components (possible
    ligands according to their chemical similarity to the cognate ligand and their
    centrality in the active site) as Het objects (containing a Biopython Residue
    structure), as well as a dictionary of annotations extracted from the parent
    mmCIF assembly structure file and SIFTS"""
    def __init__(self):
        self.parent_entry = None
        self.residues = []
        self.residues_dict = {}
        self.mapped_unisites = []
        self.reference_site = None
        self.parent_structure = None
        self.structure = None
        self.ligands = []
        self.mmcif_dict = dict()
        self.is_sane = None

    def __str__(self):
        """Show as pseudo-sequence in one-letter code"""
        return self.sequence

    def __len__(self):
        """Return size of site (residue count)"""
        return self.size

    def __iter__(self):
        """Iterate over residues"""
        yield from self.residues

    def __eq__(self, other):
        """Check if sites contain the same residues (same IDs)"""
        if len(self) == len(other):
            for res in other:
                if res.full_id not in self.residues_dict:
                    return False
            return True
        return False

    def __contains__(self, residue):
        """Check if residue is there"""
        return residue.full_id in self.residues_dict

    def __getitem__(self, full_id):
        """Return the residue with given ID."""
        return self.residues_dict[full_id]

    # Alternative constructors

    @classmethod
    def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
        """Construct PdbSite object directly from residue list"""
        mmcif_dict = dict()
        # First reduce redundant residues with multiple function locations
        reslist = PdbSite._cleanup_list(reslist)
        site = cls()
        site.parent_entry = parent_entry
        try:
            if annotate:
                parser = MMCIFParser(QUIET=True)
                structure = parser.get_structure('', cif_path)
                mmcif_dict = parser._mmcif_dict
            else:
                parser = FastMMCIFParser(QUIET=True)
                structure = parser.get_structure('', cif_path)
        except (TypeError, PDBConstructionException):
            warnings.warn(
                'Could not build site from residue list. Check entry',
                RuntimeWarning)
            return
        for res in reslist:
            if structure:
                res.add_structure(structure)
            site.add(res)
        if annotate:
            site.parent_structure = structure
            site.mmcif_dict = mmcif_dict
            site.find_ligands()
        return site

    @classmethod
    def build_reference(cls, reslist, parent_entry, cif_path, annotate=True):
        """Builds reference active site from a list of PDB catalytic residues.
        Assumes that the list only contains one active site, so use it only
        if it is a list of manually annotated catalytic residues"""
        ref = PdbSite.from_list(reslist, cif_path, parent_entry, annotate)
        ref.reference_site = ref
        ref.is_sane = True
        return ref

    @classmethod
    def build(cls, seed, reslist, reference_site, parent_entry):
        """Builds active site from a list of catalytic residues that may form
        multiple active sites (e.g. all residues annotated as catalytic in a
        PDB structure). Using a residue as seed, it starts building an active site
        by checking the euclidean distances of all residues that have the same resid
        and name. In the end, it maps the site to the reference defined in the args"""
        site = cls()
        if seed.structure is None:
            return
        for res in reslist:
            candidate = PdbSite._get_nearest_equivalent(
                res, seed, reslist, site)
            if candidate is None:
                continue
            if candidate not in site:
                site.add(candidate)
        site.reference_site = reference_site
        site.parent_entry = parent_entry
        site._map_reference_residues()
        return site

    @classmethod
    def build_all(cls,
                  reslist,
                  reference_site,
                  parent_entry,
                  cif_path,
                  annotate=True,
                  redundancy_cutoff=None):
        """Builds all sites in using as input a list of catalytic residues.
        Returns a list of PdbSite objects"""
        # Map structure objects in every residue
        sites = []
        mmcif_dict = dict()
        try:
            if annotate:
                parser = MMCIFParser(QUIET=True)
                structure = parser.get_structure('', cif_path)
                mmcif_dict = parser._mmcif_dict
            else:
                parser = FastMMCIFParser(QUIET=True)
                structure = parser.get_structure('', cif_path)
        except (TypeError, PDBConstructionException):
            warnings.warn('Could not parse structure {}'.format(
                cif_path, RuntimeWarning))
            return sites
        # First reduce redundant residues with multiple function locations
        reslist = PdbSite._cleanup_list(reslist)
        # We want all equivalent residues from identical assembly chains
        reslist = PdbSite._get_assembly_residues(reslist, structure)
        # Get seeds to build active sites
        seeds = PdbSite._get_seeds(reslist)
        # Build a site from each seed
        for seed in seeds:
            sites.append(cls.build(seed, reslist, reference_site,
                                   parent_entry))
        # Reduce redundancy
        sites = PdbSite._remove_redundant_sites(sites,
                                                cutoff=redundancy_cutoff)
        # Add ligands and annotations
        if annotate and structure:
            for site in sites:
                site.parent_structure = structure
                site.mmcif_dict = mmcif_dict
                site.find_ligands()
        # Flag unclustered sites
        PdbSite._mark_unclustered(sites)
        return sites

    # Properties

    @property
    def mcsa_id(self):
        """Get M-CSA ID of catalytic residues."""
        for res in self.residues:
            if res.mcsa_id:
                return res.mcsa_id
        return

    @property
    def pdb_id(self):
        """Get PDB ID of catalytic residues. Not a unique site ID"""
        for res in self.residues:
            if res.pdb_id:
                return res.pdb_id
        return

    @property
    def uniprot_id(self):
        """Get UniProt ID of the chain of the first residue"""
        for res in self.residues:
            if res.chain:
                try:
                    return PDB2UNI[(self.pdb_id, res.chain[0])]
                except KeyError:
                    continue
        return

    @property
    def ec(self):
        """Get EC number from SIFTS"""
        for res in self.residues:
            if res.chain:
                try:
                    return PDB2EC[(self.pdb_id, res.chain[0])]
                except KeyError:
                    try:
                        return PDB2EC[(self.pdb_id, res.alt_chain[0])]
                    except KeyError:
                        continue
        return

    @property
    def sequence(self):
        """Show as pseudo-sequence in one-letter code"""
        return ''.join([
            AA_3TO1[res.resname] if (res.is_standard or res.is_gap) else 'X'
            for res in self
        ])

    @property
    def title(self):
        """Return title of PDB entry"""
        try:
            return self.mmcif_dict['_struct.title'][0]
        except KeyError:
            return

    @property
    def enzyme(self):
        """Return enzyme name"""
        try:
            return self.mmcif_dict['_struct.pdbx_descriptor'][0]
        except KeyError:
            return

    @property
    def assembly_id(self):
        """Return PDB assembly ID"""
        try:
            return int(self.mmcif_dict['_entity_poly.assembly_id'][0][-1])
        except (TypeError, KeyError):
            return

    @property
    def experimental_method(self):
        """Return structure determination method"""
        try:
            return self.mmcif_dict['_exptl.method'][0]
        except KeyError:
            return

    @property
    def resolution(self):
        """Return resolution in Angstrom"""
        try:
            if 'nmr' in self.experimental_method.lower():
                return
            elif 'microscopy' in self.experimental_method.lower():
                return float(
                    self.mmcif_dict['_em_3d_reconstruction.resolution'][0])
            else:
                return float(self.mmcif_dict['_refine.ls_d_res_high'][0])
        except (TypeError, KeyError, AttributeError):
            return

    @property
    def organism_name(self):
        """Return name of organism of origin"""
        try:
            return self.mmcif_dict['_entity_src_nat.pdbx_organism_scientific'][
                0]
        except KeyError:
            try:
                return self.mmcif_dict[
                    '_entity_src_gen.pdbx_gene_src_scientific_name'][0]
            except KeyError:
                return

    @property
    def organism_id(self):
        """Return id of organism of origin"""
        try:
            return self.mmcif_dict['_entity_src_nat.pdbx_ncbi_taxonomy_id'][0]
        except KeyError:
            try:
                return self.mmcif_dict[
                    '_entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id'][0]
            except KeyError:
                return

    @property
    def id(self):
        """Unique ID of the active site. Consists of PDB ID and a string
        of chain IDs of all residues"""
        return '{}_{}'.format(self.pdb_id,
                              '-'.join(res.chain for res in self.residues))

    @property
    def size(self):
        """Get site size in residue count"""
        return len(self.residues)

    @property
    def acts_on_polymer(self):
        """Check if it belongs to a family of enzymes whose substrate is a polymer
        (protein or nucleic)"""
        return self.parent_entry.info['reaction']['is_polymeric']

    @property
    def is_reference(self):
        """Check if site is reference site"""
        if self.size > 0:
            return self.residues[0].is_reference
        return False

    @property
    def is_conserved(self):
        """Check if all residues are conserved by comparing to the reference"""
        if self.is_reference:
            return True
        return str(self) == str(self.reference_site)

    @property
    def is_conservative_mutation(self, ignore_funcloc_main=True):
        """Checks if the mutations in the site are conservative. Option to
        ignore residues that function via main chain"""
        result = False
        for res in self.residues:
            if ignore_funcloc_main:
                if res.has_main_chain_function or res.has_double_funcloc:
                    result = True
                    continue
            if not res.is_conserved and not res.is_conservative_mutation:
                return False
            if res.is_conservative_mutation:
                result = True
        return result

    @property
    def has_missing_functional_atoms(self):
        """Checks if there are missing functional atoms from the residue
        structures or site is empty"""
        try:
            gaps = set(self.get_gaps())
            for i, res in enumerate(self):
                if i in gaps:
                    continue
                func_atoms, _ = res.get_func_atoms()
                if len(func_atoms) != 3:
                    return True
            return False
        except (TypeError, ValueError):
            return True

    # Methods

    def copy(self, include_structure=True):
        """Returns a copy of the site. If include_structure is False,
        then the structure is not copied"""
        site = copy(self)
        if include_structure:
            site.structure = self.structure.copy()
        return site

    def add(self, residue):
        """Add PdbResidue object to site (in the residues list and dict)"""
        residue = residue.copy(include_structure=True)
        if type(residue) == PdbResidue:
            self.residues.append(residue)
            self.residues_dict[residue.full_id] = residue
            residue.parent_site = self
        if type(residue) == Het:
            self.ligands.append(residue)
            residue.parent_site = self
            if residue.is_polymer:
                if residue.chain in self.structure[0]:
                    for r in residue.structure:
                        self.structure[0][residue.chain].add(r)
                    return True
                self.structure[0].add(residue.structure)
                return True
        if residue.structure:
            # Initialize structure if empty
            if self.structure is None:
                self.structure = Structure(self.id)
                self.structure.add(Model(0))
            chain_id = residue.structure.get_parent().get_id()
            if chain_id not in self.structure[0]:
                self.structure[0].add(Chain(chain_id))
            # Add residue structure to site structure
            if residue.structure.get_id() not in self.structure[0][chain_id]:
                self.structure[0][chain_id].add(residue.structure)
        return True

    def get_distances(self, kind='com'):
        """Calculates all intra-site residue distances and returns a
        numpy array"""
        dists = []
        seen = set()
        for p in self.residues:
            for q in self.residues:
                if p == q or (q.full_id, p.full_id) in seen:
                    continue
                if p.is_gap or q.is_gap:
                    dists.append(np.nan)
                else:
                    dists.append(p.get_distance(q, kind))
                seen.add((p.full_id, q.full_id))
        return np.array(dists)

    def get_residues(self):
        """To iterate over catalytic residues"""
        yield from self.residues

    def get_gaps(self):
        """Returns an index of the gap positions (non-aligned residues)"""
        gaps = []
        for i, res in enumerate(self.residues):
            if res.is_gap:
                gaps.append(i)
        return gaps

    def contains_equivalent(self, res):
        """Checks if the site contains a catalytic residue of the basic info
        (name, resid, auth_resid), and either the same chiral_id or chain"""
        for sres in self:
            if sres.is_equivalent(res, by_chiral_id=True) or \
               sres.is_equivalent(res, by_chiral_id=False, by_chain=True):
                return True
        return False

    def has_identical_residues(self, other):
        """Checks if two sites have the same residues, although their order might be
        different. Used to cleanup redundant symmetrical active sites like
        HIV-protease"""
        for res in other:
            if not self.contains_equivalent(res):
                return False
        return True

    def get_chiral_residues(self):
        """Gets chiral residues from the site if there are any (residues that have
        the same resname, resid, auth_resid but different chains)"""
        identicals = set()
        for i, p in enumerate(self):
            for j, q in enumerate(self):
                if p == q or q.is_gap or q.is_gap:
                    continue
                if p.is_equivalent(q, by_chiral_id=False, by_chain=False):
                    if (j, i) not in identicals:
                        identicals.add((i, j))
        return identicals

    def find_ligands(self, radius=3):
        """
        Searches the parent structure for hetero components close to the
        catalytic residues, by searching around the atoms of catalytic residues
        and the dummy atoms between distant residues. Populates the ligands list 
        with Het objects.

        Args:
            radius: the search space (in Å) around the atoms of the catalytic residues
        """
        if type(self.parent_structure) != Structure:
            return
        # Get centers of search
        centers = self._get_ligand_search_centers(radius)
        # Initialize KD tree
        query_atoms = Bio.PDB.Selection.unfold_entities(
            self.parent_structure, 'A')
        ns = NeighborSearch(query_atoms)
        # Search for ligands around each center
        polymers = defaultdict(list)
        site_chains = set([res.chain for res in self])
        seen = set()
        added = set()
        # Search for components close to catalytic residues
        for center in centers:
            hits = ns.search(center, radius, level='R')
            for res in hits:
                if res.get_full_id() in seen:
                    continue
                seen.add(res.get_full_id())
                restype = res.get_id()[0][0]
                chain = res.get_parent().get_id()
                # Ignore waters
                if restype == 'W':
                    continue
                # HET components
                if restype == 'H':
                    self.add(
                        Het(self.mcsa_id,
                            self.pdb_id,
                            res.get_resname(),
                            res.get_id()[1],
                            chain,
                            structure=res,
                            parent_site=self))
                    added.add(res.get_full_id())
                # Protein/nucleic polymer components
                if restype == ' ' and chain not in site_chains:
                    polymers[chain].append(res)
        # Build polymers
        if self.acts_on_polymer:
            for chain, reslist in polymers.items():
                self.add(
                    Het.polymer(reslist, self.mcsa_id, self.pdb_id, chain,
                                self))
        # Find distal co-factor-like or substrate-like molecules
        hits = ns.search(self.structure.center_of_mass(geometric=True),
                         30,
                         level='R')
        for res in hits:
            restype = res.get_id()[0][0]
            if restype == 'H' and res.get_full_id() not in added:
                ligand = Het(self.mcsa_id,
                             self.pdb_id,
                             res.get_resname(),
                             res.get_id()[1],
                             res.get_parent().get_id(),
                             structure=res,
                             parent_site=self)
                if ligand.type in ('Substrate (non-polymer)',
                                   'Co-factor (non-ion)'):
                    ligand.is_distal = True
                    self.add(ligand)
        return

    def write_pdb(self,
                  outdir=None,
                  outfile=None,
                  write_hets=False,
                  func_atoms_only=False,
                  include_dummy_atoms=False):
        """
        Writes site coordinates in PDB format
        Args:
            write_hets: Include coordinates of ligands.
            outdir: Directory to save the .pdb file
            outfile: If unspecified, name is formatted to include info on M-CSA ID, 
                     chain of each catalytic residue, annotation if the site is a
                     reference site and an annotation about the conservation, relatively
                     to the reference (c: conserved, m: mutated, cm: has only conservative
                     mutation)
        """
        if not outdir:
            outdir = '.'
        if not outfile:
            conservation = 'm'
            if self.is_conservative_mutation:
                conservation = 'cm'
            if self.is_conserved:
                conservation = 'c'
            if func_atoms_only:
                atms = 'func'
            else:
                atms = 'all'
            if self.is_sane:
                sanity = 'sane'
            else:
                sanity = 'insane'
            outfile = '{}/mcsa_{}.{}.{}.{}.{}.{}.pdb'.format(
                outdir.rstrip('/'),
                str(self.mcsa_id).zfill(4), self.id,
                'reference' if self.is_reference else 'cat_site', conservation,
                atms, sanity)
        with open(outfile, 'w') as o:
            if bool(self.mmcif_dict):
                ligands = ','.join(
                    '{0.resname};{0.resid};{0.chain};{0.similarity};{0.centrality};{0.type}'
                    .format(h) for h in self.ligands)
                remarks = (
                    'REMARK CATALYTIC SITE\n'
                    'REMARK ID {0.id}\n'
                    'REMARK PDB_ID {0.pdb_id}\n'
                    'REMARK ASSEMBLY_ID {0.assembly_id}\n'
                    'REMARK UNIPROT_ID {0.uniprot_id}\n'
                    'REMARK EC {0.ec}\n'
                    'REMARK TITLE {0.title}\n'
                    'REMARK ENZYME {0.enzyme}\n'
                    'REMARK EXPERIMENTAL_METHOD {0.experimental_method}\n'
                    'REMARK RESOLUTION {0.resolution}\n'
                    'REMARK ORGANISM_NAME {0.organism_name}\n'
                    'REMARK ORGANISM_ID {0.organism_id}\n'
                    'REMARK NEARBY_LIGANDS {1}'.format(self, ligands))
                print(remarks, file=o)
            residues = self.residues.copy()
            if write_hets:
                residues += self.ligands
            for res in residues:
                if not include_dummy_atoms and res.is_gap:
                    continue
                structure = res.structure
                if res.dummy_structure:
                    structure = res.dummy_structure
                if structure is not None:
                    for atom in structure.get_atoms():
                        resname = res.resname.upper()
                        if res.has_main_chain_function or not res.is_standard:
                            resname = 'ANY'
                        funcstring = '{}.{}'.format(resname,
                                                    atom.get_id().upper())
                        if func_atoms_only and type(
                                res
                        ) == PdbResidue and funcstring not in RESIDUE_DEFINITIONS:
                            continue
                        pdb_line = '{:6}{:5d} {:<4}{}{:>3}{:>2}{:>4}{:>12.3f}' \
                                   '{:>8.3f}{:>8.3f} {:6}'.format(
                            'HETATM' if (atom.get_parent().get_id()[0] != ' ' or type(res) == Het) else 'ATOM',
                            int(atom.get_serial_number()) if atom.get_serial_number() else 0,
                            atom.name if len(atom.name) == 4 else ' {}'.format(atom.name),
                            'Z' if funcstring in RESIDUE_DEFINITIONS else atom.get_altloc(),
                            atom.get_parent().get_resname(),
                            atom.get_parent().get_parent().get_id(),
                            atom.get_parent().get_id()[1],
                            atom.get_coord()[0],
                            atom.get_coord()[1],
                            atom.get_coord()[2],
                            atom.get_occupancy() if atom.get_occupancy() else '')
                        print(pdb_line, file=o)
            print('END', file=o)

    def fit(self,
            other,
            weighted=False,
            cycles=1,
            cutoff=999,
            scaling_factor=None,
            transform=False,
            mutate=True,
            reorder=True,
            allow_symmetrics=True,
            exclude=None,
            get_array=False):
        """Iteratively fits two catalytic sites (self: fixed site, other: mobile site)
        using the Kabsch algorithm from the rmsd module (https://github.com/charnley/rmsd).
        Can also find the optimal atom alignment in each residue, considering
        symmetrical atoms and functionally similar residues, using the
        Hungarian algorithm.

        Args:
            other: mobile active site to fit
            weighted: to perform weighted superposition in the last iteration
            cycles: Number of fitting iterations to exclude outlying atoms
            transform: Also transforms the mobile site's coordinates
            mutate: If the two active sites do not have the same residues,
                    make pseudo-mutations to the mobile site to facilitate
                    atom correspondence
            reorder: Find the optimal atom correspondence (within a residue)
                     between the two sites, taking into account conservative
                     mutations and symmetrical atoms (optional). See and
                     definitions in residue_definitions.py module.
            allow_symmetrics: Allows flipping of side chains if atoms are
                              equivalent or symmetrical

        Returns: rot, tran, rms, rms_all
            rot: Rotation matrix to transform mobile site into the fixed site
            tran: Translation vector to transform mobile site into the fixed site
            rms: RMSD after fitting, excluding outliers
            rms_all: RMSD over all atoms, including outliers

        Raises:
            Exception: If number of functions atoms in the two sites is not the same (e.g.
                       if there are missing atoms from the parent structure)
        """
        # In case gaps are present, exclude those positions
        gaps = set(self.get_gaps() + other.get_gaps())
        # If we want to exclude residues from fitting
        if exclude is not None:
            if type(exclude) not in (list, tuple, set):
                exclude = [exclude]
            for i in exclude:
                gaps.add(i)
        # Get atom identifier strings and coords as numpy arrays
        p_atoms, p_coords = self._get_func_atoms(allow_symmetrics, omit=gaps)
        q_atoms, q_coords = other._get_func_atoms(allow_symmetrics, omit=gaps)
        if p_atoms is None or q_atoms is None:
            return None, None, None, None
        if len(p_atoms) != len(q_atoms):
            raise Exception('Atom number mismatch in sites {} and {}'.format(
                self.id, other.id))
        # Initial crude superposition
        rot, tran, rms, _ = PdbSite._super(p_coords, q_coords, cycles=1)
        q_trans = PdbSite._transform(q_coords, rot, tran)
        # In case of non-conservative mutations, make pseudo-mutations to facilitate superposition
        if mutate:
            for i, (p_atom, q_atom) in enumerate(zip(p_atoms, q_atoms)):
                if p_atom != q_atom:
                    #q_atoms[i] = p_atom
                    q_atoms[i] = '{}.MUT'.format(q_atoms[i].split('.')[0])
                    p_atoms[i] = '{}.MUT'.format(p_atoms[i].split('.')[0])
        # Reorder atoms using the Hungarian algorithm from rmsd package
        if reorder:
            q_review = reorder_hungarian(p_atoms, q_atoms, p_coords, q_trans)
            q_coords = q_coords[q_review]
        # Iterative superposition. Get rotation matrix, translation vector and RMSD
        rot, tran, rms, rms_all = PdbSite._super(p_coords, q_coords, cycles,
                                                 cutoff, weighted,
                                                 scaling_factor)
        if transform:
            other.structure.transform(rot, tran)
        if get_array:
            q_trans = np.dot(q_coords, rot) + tran
            return rot, tran, rms, rms_all, p_coords, q_trans
        return rot, tran, rms, rms_all

    def per_residue_rms(self, other, rot=None, tran=None, transform=False):
        """Calculates the RMSD of each residue in two superimposed sites.
        If superposition rotation matrix and translation vector are not given,
        RMSD is calculated without transformation. Otherwise, fitting is performed
        automatically, using weighted superposition to compensate for bias caused
        by slightly outlying residues."""
        rmsds = []
        if rot is None or tran is None:
            rot, tran, _, _ = self.fit(other, weighted=True, transform=False)
        for i, (p, q) in enumerate(zip(self, other)):
            if p.is_gap or q.is_gap:
                rmsds.append(np.nan)
                continue
            # Get functional atoms
            p_atoms, p_coords = p.get_func_atoms()
            q_atoms, q_coords = q.get_func_atoms()
            # Mutate if there are mismatches
            for i, (p_atom, q_atom) in enumerate(zip(p_atoms, q_atoms)):
                if p_atom != q_atom:
                    p_atoms[i] = 'MUT'
                    q_atoms[i] = 'MUT'
            # Transform functional atoms
            if transform:
                q_coords = PdbSite._transform(q_coords, rot, tran)
            # Reorder
            q_review = reorder_hungarian(p_atoms, q_atoms, p_coords, q_coords)
            q_coords = q_coords[q_review]
            # Calculate RMSD
            rms = PdbSite._rmsd(p_coords, q_coords)
            rmsds.append(np.round(rms, 3))
        return np.array(rmsds)

    # Private methods

    def _map_reference_residues(self):
        """Puts each residue in the site in the correct order, according
        to the reference site, using the individual residue mapping to a
        reference residue. Wherever a mapping cannot be found, an empty
        residue is assigned to that position"""
        if self.reference_site is None:
            return
        for reference_residue in self.reference_site:
            found = False
            for res in self:
                if reference_residue == res.reference_residue:
                    found = True
            if not found:
                gap = PdbResidue(mcsa_id=self.mcsa_id,
                                 pdb_id=self.pdb_id,
                                 chiral_id=reference_residue.chiral_id,
                                 dummy_structure=True)
                gap.reference_residue = reference_residue
                self.add(gap)
        self._reorder()
        return

    def _get_func_atoms(self, allow_symmetrics=True, omit=None):
        """Gets atoms and coordinates for superposition and atom reordering
        calculations        

        Args:
            allow_symmetrics: If True, equivalent residues and atoms
                              get the same id string, according to the
                              definitions in residue_definitions.py
                              (EQUIVALENT_ATOMS)
            omit: Residues to exclude
        Returns:
            atoms: A NumPy array of atom identifier strings of type
                   'N.RES.AT' where N is the residue serial number
                   in the .pdb file (consistent among all sites),
                   RES is the residue name and AT is the atom name
            coords: A NumPy array of the atomic coordinates
        """
        atoms = []
        coords = []
        for i, res in enumerate(self):
            if omit:
                if i in omit:
                    continue
            if not res.structure:
                return np.array(atoms), np.array(coords)
            for atom in res.structure:
                resname = res.resname.upper()
                if allow_symmetrics:
                    if res.has_main_chain_function:
                        resname = 'ANY'
                    if not res.is_standard:
                        resname = 'PTM'
                atmid = '{}.{}'.format(resname, atom.name)
                if atmid in RESIDUE_DEFINITIONS:
                    if allow_symmetrics:
                        if atmid in EQUIVALENT_ATOMS:
                            atmid = EQUIVALENT_ATOMS[atmid]
                    atoms.append('{}.{}'.format(i, atmid))
                    coords.append(atom.get_coord())
        try:
            atoms = np.array(atoms, dtype=object)
            coords = np.stack(coords, axis=0)
        except ValueError:
            return None, None
        return atoms, coords

    def _reorder(self):
        """Residue reordering routine for _map_reference_residues"""
        if self.reference_site is None:
            return
        reorder = []
        for i, reference_residue in enumerate(self.reference_site):
            for j, res in enumerate(self):
                if i == j and reference_residue == res.reference_residue:
                    reorder.append(i)
                elif i != j and reference_residue == res.reference_residue:
                    reorder.append(j)
        self.residues = [self.residues[i] for i in reorder]

        # If site contains chiral residues, reorder them by chain
        chiral = self.get_chiral_residues()
        if chiral:
            for pair in chiral:
                p = self.residues[pair[0]]
                q = self.residues[pair[1]]
                if q.chain < p.chain:
                    self.residues[pair[0]], self.residues[pair[1]] = \
                    self.residues[pair[1]], self.residues[pair[0]]
        return

    def _get_ligand_search_centers(self, radius=4):
        """Gets atom coordinates from catalytic residues, and interpolates the empty 
        space between distant residues, by calculating the center of geometry of the
        two residues. Radius is used to identify distant residues in between which an
        extra center will be added."""
        centers = []
        seen = set()
        for p in self:
            if p.is_gap or p.structure is None:
                continue
            for atom in p.structure.get_unpacked_list():
                centers.append(atom.get_coord())
            p_centroid = p.structure.center_of_mass(geometric=True)
            for q in self:
                if p is q or (q.id,
                              p.id) in seen or q.is_gap or q.structure is None:
                    continue
                seen.add((p.id, q.id))
                q_centroid = q.structure.center_of_mass(geometric=True)
                dist = p.get_distance(q, kind='min')
                if 2 * radius <= dist <= 4 * radius:
                    dummy_coords = np.mean([p_centroid, q_centroid], axis=0)
                    centers.append(dummy_coords)
        return centers

    @staticmethod
    def _cleanup_list(reslist):
        """Finds duplicate residues of different funclocs and makes a single
        one with two funclocs. Returns a new list without redundant residues"""
        new_reslist = []
        seen = set()
        ignore = set()
        for p in reslist:
            for q in reslist:
                if p == q or (q.full_id, p.full_id) in seen:
                    continue
                if p.is_equivalent(q, by_chiral_id=False, by_chain=True):
                    if p.funclocs != q.funclocs:
                        new_res = p.copy(include_structure=True)
                        new_res.funclocs = [p.funclocs[0], q.funclocs[0]]
                        new_reslist.append(new_res)
                        ignore.add(p.full_id)
                        ignore.add(q.full_id)
                seen.add((p.full_id, q.full_id))
        for p in reslist:
            if p.full_id not in ignore and p not in new_reslist:
                new_reslist.append(p)
        return new_reslist

    @staticmethod
    def _get_assembly_residues(reslist, parent_structure):
        """
        Makes a new residue list of all equivalent residues found in identical assembly
        chains. Also applies an auth_resid correction  where residues in identical chains 
        might have a different auth_resid (usually of 1xxx or 2xxx for chains A and B 

        Args:
            reslist: The residue list to be enriched
            parent_structure: BioPython Structure object of the parent structure
        Returns:
            An enriched list of residues with mapped structures.
        """
        new_reslist = []
        for res in reslist:
            res_structure = None
            for chain in parent_structure[0]:
                if res.chain != chain.get_id()[0]:
                    continue
                # If we have a standard residue
                if res.is_standard:
                    try:
                        res_structure = chain[res.auth_resid]
                    except KeyError:
                        try:
                            res_structure = chain[res.corrected_auth_resid]
                        except KeyError:
                            try:
                                res_structure = chain[res.resid]
                            except KeyError:
                                continue
                        if res_structure.resname != res.resname.upper():
                            continue
                # If we have a modified residue
                else:
                    for _res in chain:
                        if _res.get_id()[1] == res.auth_resid:
                            res_structure = _res
                new_res = res.copy(include_structure=False)
                new_res.chain = chain.get_id()
                new_res.structure = res_structure
                new_reslist.append(new_res)
        return new_reslist

    @staticmethod
    def _get_seeds(reslist):
        """Finds residues in a list that can be used as seeds when
        building multiple active sites"""
        seeds = []
        # Set a residue as reference
        ref = None
        for res in reslist:
            if res.auth_resid is None or res.structure is None:
                continue
            # Check if residue has any close neighbours -- If not, skip it
            skip = True
            for other in reslist:
                if res == other:
                    continue
                try:
                    if res.get_distance(other, kind='min') < 8:
                        skip = False
                        break
                except TypeError:
                    continue
            if skip:
                continue
            ref = res
            break
        if ref is None:
            return seeds
        # Get all equivalents of ref residue and make them seeds
        for res in reslist:
            if res.is_equivalent(ref):
                if res.structure is None or res in seeds:
                    continue
                seeds.append(res)
        return seeds

    @staticmethod
    def _get_nearest_equivalent(self, other, reslist, site):
        """Gets the closest equivalent of 'other' to 'self', if there
        are multiple equivalents in the residue list"""
        equivalents = []
        for res in reslist:
            if res.structure is None:
                continue
            if res.is_equivalent(self):
                equivalents.append(res)
        result = None
        min_dist = 999
        for eq in equivalents:
            #Check if the same residue is already in the site
            if site.contains_equivalent(eq):
                continue
            dist = eq.get_distance(other, kind='min')
            if dist < min_dist:
                result = eq
                min_dist = dist
        return result

    @staticmethod
    def _remove_redundant_sites(sitelist, cutoff=0):
        """Cleans a list of sites by removing duplicates or similar ones
        according to an RMSD cutoff"""
        seen = set()
        reject = set()
        for p in sitelist:
            if p.has_missing_functional_atoms or len(p) != len(
                    p.reference_site):
                reject.add(p.id)
                continue
            for q in sitelist:
                if q.has_missing_functional_atoms or len(q) != len(
                        q.reference_site):
                    reject.add(q.id)
                    continue
                if p.id == q.id or (q.id, p.id) in seen:
                    continue
                seen.add((p.id, q.id))
                _, _, _, rms = p.fit(q)
                if (p.has_identical_residues(q)
                        and rms < 0.01) or rms < cutoff:
                    reject.add(q.id)
        nr = []
        for site in sitelist:
            if site.id not in reject and site not in nr:
                nr.append(site)
        return nr

    @staticmethod
    def _mark_unclustered(sitelist):
        """Cleans the list of catalytic sites from the same PDB
        by rejecting sites that might have insanely outlying residues"""
        try:
            ref_dists = sitelist[0].reference_site.get_distances(kind='min')
            ref_dists = np.nan_to_num(ref_dists, nan=999)
            ref_dists = np.where(ref_dists < 8, 8, ref_dists)
        except IndexError:
            return False
        for p in sitelist:
            p.is_sane = True
            p_dists = np.nan_to_num(p.get_distances(kind='ca'), nan=0)
            if not np.all((p_dists < 3 * ref_dists)):
                p.is_sane = False
                continue
            else:
                for q in sitelist:
                    if p.id == q.id or q.is_sane == False:
                        continue
                    q_dists = np.nan_to_num(q.get_distances(kind='ca'),
                                            nan=999)
                    q_dists = np.where(q_dists < 8, 8, q_dists)
                    if not np.all((p_dists < 1.3 * q_dists)):
                        p.is_sane = False
        return True

    @staticmethod
    def _super(p_coords,
               q_coords,
               cycles=1,
               cutoff=999,
               weighted=False,
               scaling_factor=None):
        sup = Superimposer()
        sup.set(p_coords, q_coords, cycles, cutoff, scaling_factor)
        if weighted:
            sup.run_weighted()
        else:
            sup.run_unweighted()
        return sup.rot, sup.tran, np.round(sup.rms,
                                           3), np.round(sup.rms_all, 3)

    @staticmethod
    def _rmsd(p_coords, q_coords):
        """Calculates rmsd on two coordinate sets (NumPy arrays) WITHOUT
        transformation and minimization"""
        diff = np.square(np.linalg.norm(p_coords - q_coords, axis=1))
        return np.sqrt(np.sum(diff) / diff.size)

    @staticmethod
    def _transform(coords, rot, tran):
        """Rotates and translates a set of coordinates (NxD NumPy array)"""
        return np.dot(coords, rot) + tran
Ejemplo n.º 29
0
class Disordered_Fragment(object):
    def __init__(self, start_pos=None, stop_pos=None, sequence=None):
        if start_pos:
            self.start_pos = start_pos  #residue number of first disordered residue
        else:
            self.start_pos = 0
        if stop_pos:
            self.stop_pos = stop_pos  #residue number of last disordered residue
        else:
            self.stop_pos = 0
        if sequence: self.sequence = sequence
        else: self.sequence = ""  #sequence of disordered fragment
        self.radius = 1.0  #pseudoatoms radius
        self.max_sphere_radius = 10.0  #radius of sphere defining volume simulation area
        self.pseudoresidues = []  #list of residues objects
        self.fragment_type = "internal"  #cterm/nterm/internal/simulated_volume
        self.fragment_lattice = None  #lattice for disordered region structure (only residues, no atoms)

    def __str__(self):
        return "%s %s %s %s %s" % (self.start_pos, self.stop_pos, self.sequence, \
                                   self.radius, self.fragment_type)

    def add_component_structure(self, struct):
        """
        adds structure representing all component structure
        """
        self.structure = struct

    def add_fragment_structure(self, fragment):
        """
        adds piece of structure representing disordered region
        """
        self.fragment_lattice = fragment

    def add_pseudoatoms_to_structure(self, pseudoatoms, moltype):
        """
        """
        start_index = 0
        for pa in pseudoatoms:
            #    print "***", start_index -1, len(list(self.fragment_lattice.get_residues()))
            self.add_pa_to_structure(
                pa,
                list(self.fragment_lattice.get_residues())[start_index],
                moltype)
            start_index += 1

    def add_pa_to_structure(self, pa, resi, moltype):
        """
        """

        coord = array([pa.x, pa.y, pa.z])  #, "f")
        if moltype == "protein":
            new_atom = PyryAtom('CA', coord, 0, 1, ' ', ' CA', 1)
        else:
            new_atom = PyryAtom("C4'", coord, 0, 1, ' ', " C4'", 1)

        new_atom.assign_vdw()
        new_atom.assign_molweight()
        resi.add(new_atom)
        #print "Add PA to...", resi.id, new_atom.get_parent().id

#@TODO needs testing!!!!!
#@TODO need to renumber residues somehow!!

    def add_fragment_to_original_structure(self, component, structure, res_nr,
                                           fr_type):
        """
        normal - in direction from n to c term
        reverse - in direction from c to n term (for addition of residues on Nterm)
        """

        #@TODO will have to be changed when multichain components come!!!
        #####################################
        #chain = list(self.structure.get_chains())[0]
        if structure: chain = list(structure.get_chains())[0]
        else: chain = list(component.pyrystruct.struct.get_chains())[0]

        residues = list(self.fragment_lattice.get_residues())
        if fr_type == "nterm":
            residues.sort(key=lambda Residue: Residue.id[1], reverse=True)

        for resi in residues:
            #print "WANNA ADD:  ", resi.id, res_nr, type
            resi.id = (" ", res_nr, " ")
            res_nr += 1
            chain.add(resi)
            self.add_pseudoresidue(resi)

    def add_pseudoresidue(self, pr):
        """
        adds pseudoresidue object to list of pseudoresidues
        """
        self.pseudoresidues.append(pr)

    def build_structure(self, sequence, start_index, moltype):
        """
           builds new structure composed of atom_name atoms only
           for nucleic acids these are C4', for proteins CA
        """
        new_chain = list(self.fragment_lattice.get_chains())[0]
        for resi in sequence:
            #print "resi", resi, start_index
            resi_id = (" ", start_index, " ")
            #resi_name = resi
            ###############33
            if moltype == "protein":
                resi_name = AMINOACIDS[resi.upper()]
            else:
                resi_name = NUCLEOTIDES[resi.upper()]
###############33
            new_resi = Residue(resi_id, resi_name, " ")
            new_chain.add(new_resi)
            start_index += 1

    def clean_fragment(self):
        self.fragment_lattice = None

    def remove_pseudoresidues(self, structure):
        """
        removes pseudoresidues simulated during previous mutation in order
        to prepare conditions for new simulated pseudoresidues to be attached and
        scored
        """
        #############################
        ##self.structure into structure
        #@TODO must be changed when hybrids components will be considered
        remove = []
        if self.fragment_type != "simulated_volume":
            if structure:
                chain = list(structure.get_chains())[0]
                for resi in chain:
                    for r in self.pseudoresidues:
                        if r.id[1] == resi.id[1]:
                            remove.append(resi)
                            #print "wanna detach", resi.id, len(list(self.structure.get_residues()))
                            break
            for resi in remove:
                chain.detach_child(resi.id)

        self.pseudoresidues = []
        #self.fragment_lattice = None

    def create_new_chain(self, id):
        """
        """
        self.fragment_lattice = Structure(id)
        my_model = Model(0)
        self.fragment_lattice.add(my_model)
        my_chain = Chain(id)
        my_model.add(my_chain)  #what if more chains in one component?

    def create_simulated_volume(self, start_pos, fasta_seq, struct):
        """
        method to create new instance of Disordered_Fragment class to represent
        regions with not assigned atom coordinates
        """
        self.set_fragment_type("simulated_volume")
        self.create_new_chain(struct.chain)
        self.build_structure(fasta_seq, start_pos, struct.moltype)
        self.set_fragment_sequence(fasta_seq)
        self.get_pseudoatom_radius(struct)
        ##@TODO-CHECK: how to assess radius of simulation sphere??
        self.calculate_max_sphere_radius(struct)

############################################33
#@TODO-CHECK: wouldn't one fragment type be enough??

    def create_simulated_fragment(self, struct, fasta_seq):
        """
        method to create set attributes of disordered fragment instance
        """
        #@TODO-CHECK: calculate max sphere radius according to moltype and resi number!!
        self.get_pseudoatom_radius(struct)
        self.__check_fragment_type(fasta_seq)
        self.calculate_max_sphere_radius(struct)

    def __check_fragment_type(self, fasta_seq):
        """
        from selection of:
           terminal
           internal
        """
        ################333
        #if len(fasta_seq) >30:
        #fragments longer than 30 residues are simulated as grapes.
        #################33
        if self.stop_pos == len(fasta_seq): self.fragment_type = "cterm"
        elif self.stop_pos < len(fasta_seq) - 1 and self.start_pos > 1:
            self.fragment_type = "internal"
            if self.stop_pos - self.start_pos >= 30:
                self.fragment_type = "cterm"
                print "internal fragment simulated as GRAPE", self.start_pos, self.stop_pos
        elif self.start_pos == 1:
            self.fragment_type = "nterm"

###########################################

    def get_pseudoatom_radius(self, struct):
        """
        radius is averaged distance between CA or C4' atoms in ideal helix/2 to
        represent volume of single CA/C4' atom
        """
        if struct.moltype.lower() == "protein":
            self.radius = 1.9  #or 1.72 as C atom
        elif struct.moltype.upper() == "DNA":
            self.radius = 3.6
        elif struct.moltype.upper() == "RNA":
            self.radius = 3.36

    def calculate_max_sphere_radius(self, struct):
        """
           returns average residue radius for a particular component type (in Angstrooms)
           
           radius given (3.8, 7.2, 6.72) is averaged distance between CA or C4'
           atoms in ideal helix
        """
        if struct.moltype.lower() == "protein":
            self.__set_max_sphere_radius_for_moltype(3.8)

        elif struct.moltype.upper() == "DNA":
            self.__set_max_sphere_radius_for_moltype(7.2)

        elif struct.moltype.upper() == "RNA":
            self.__set_max_sphere_radius_for_moltype(6.72)

    def __set_max_sphere_radius_for_moltype(self, mol_radius):
        """
        """
        if self.fragment_type == "cterm" or self.fragment_type == "nterm":
            self.max_sphere_radius = (len(self.sequence) * mol_radius) / 2
        elif self.fragment_type == "simulated_volume":
            self.max_sphere_radius = (len(self.sequence) * mol_radius)

    def set_max_sphere_radius(self, area_radius):
        """
        sets radius of simulation area for Volume Simulator
        """
        self.max_sphere_radius = area_radius

    def set_anchor_residues(self, resi1, resi2=None):
        """
        defines first residue for volume simulator - here simulation should start
        if given defines last residue for volume simulator - here simulation should finish
        """
        self.start_resi = resi1
        self.end_resi = resi2

    def set_modeling_disordered_fragment(self, component, struct, chain):
        """
#@TODO: podzial na 2 osobne funkcje: modelowanie fragmentow i modelowanie objetosci
#@TODO: uporzadkowac simulate fragments!! dodac symulacje dla fragmentow srodkowych i nkoncowych
#@TODO: sprawdzic numeracje dodawanych przez symulator reszt
        """
        if self.fragment_type == "simulated_volume":
            #@TODO: must be changed into nicer way!!
            if struct:
                self.remove_pseudoresidues(
                    struct
                )  # pyrystruct.struct#remove old Pseudoatoms positions
            else:
                self.remove_pseudoresidues(component.pyrystruct.struct)
            self.create_new_chain(chain)  #pyrystruct.chain
            self.build_structure(self.sequence, self.start_pos,
                                 component.pyrystruct.moltype)
        else:
            if struct:
                self.remove_pseudoresidues(struct)
            else:
                self.remove_pseudoresidues(component.pyrystruct.struct)
            self.create_new_chain(chain)
            self.build_structure(self.sequence, self.start_pos,
                                 component.pyrystruct.moltype)
            #self.add_component_structure(pyrystruct.struct)

    def set_fragment_sequence(self, seq):
        """
        sequence of disordered fragment
        """
        self.sequence = seq

    def set_pseudoatom_radius(self, radius):
        """
        sets pseudoatom radius; different for nucleotide and amino acids; in Angstroms
        """
        self.radius = radius

    def set_fragment_type(self, frag_type):
        """
        sets fragment type. Can be internal when disordered region is inside the component's structure
        or terminal when it is located on C or N termini
        """
        self.fragment_type = frag_type
Ejemplo n.º 30
0
class PyRyStructure(object):
    """
        class represents structure as entity (very wide definition)
        used for storing information about structures,
        creating BIO.pdb structures,
        saving structure files etc.
    """
    def __init__(self, structure=None):
        if structure: self.struct = structure
        else: self.struct = None
        self.sequence = ''  # sequence taken from structure
        #----------------will decide on one of these 3 ----------------------------
        self.center_of_mass = []  # [x,y,z] coords of center of mass
        self.geometric_center = []  # geometric centre
        self.center = None  # actual center of given complex component
        #--------------------------------------------------------------------------
        self.chain = ''  # chain name from structure file
        self.moltype = ''  # protein, DNA, RNA

    def __str__(self):
        return "%s %s %s %s %s"%(self.struct, self.chain, self.center_of_mass,\
                                                self.moltype, self.sequence)

    def add_chain_to_struct(self, chain_id):
        """
            adds another model to BIO.pdb structure object
        Parameters:
        -----------
            chain_id    :   chain name
        Returns:
        ---------
            self.struct :   Bio.PDB structure with new chain
        """
        chain = Chain(chain_id)
        self.struct[0].add(chain)

    def add_residues_to_structure(self, struct, chain_id, chain2_id):
        """
            adds residues from struct to a given structure (self.structure)
        Parameters:
        -----------
            struct      :   template structure object with residues which will
                            be added to self.structure object
            chain_id    :   name of template chain 
            chain2_id   :   name of new chain in self.struct
        Returns:
        ---------
            self.stuct  :   with extra residues
        """
        residues = struct[0][chain_id].child_list
        [self.struct[0][chain2_id].add(res) for res in residues]

    def calculate_atom_atom_distance(self, atom1, atom2):
        """
            calculates distance between two atoms
        Parameters:
        -----------
            atom1, atom2    :   Bio.PDB.Atom entities
        Returns:
        ---------
            distance from atom1 to atom2 in 3D space
        Raises:
        -------
            PyRyStructureError if parameters are not Bio.PDB.Atom entities
        """
        if is_structure(): return atom1 - atom2

    def calculate_centre_of_mass(self, entity=None, geometric=False):
        """
           calculates centre of mass for given structure
        Returns gravitic or geometric center of mass of an Entity.
        Geometric assumes all masses are equal (geometric=True)
        Defaults to Gravitic.
Parameters:
-----------
    geometric   : optional   
Returns:
---------
    centre of mass coordinates as [x,y,z] list   
Raises:
-------
    ValueError  :   if wrong object is given as a target
    PyRyStructureError  : no PyRyStructure object
        """
        #if self.struct == None: raise PyRyStructureError("You haven't provided \
        #                                any structure to PyRyStructure class")

        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            atom_list = self.struct.get_atoms()
        elif hasattr(entity, '__iter__') and filter(lambda x: x.level ==\
                                            'A', entity): # List of Atoms
            atom_list = entity
        else:  # Some other weirdo object
            raise ValueError('Center of Mass can only be calculated from \n\
        the following objects:Structure, Model, Chain, Residue, list of Atoms.'
                             )

        new_centre = [0., 0., 0.]
        whole_mass = 0

        for atom in atom_list:
            atom_centre = array([
                float(atom.coord[0]),
                float(atom.coord[1]),
                float(atom.coord[2])
            ])
            whole_mass += atom.molweight
            new_centre += atom_centre * atom.molweight

        new_centre /= whole_mass

        self.center_of_mass = new_centre
        return self.center_of_mass

    def create_PDB_obj(self, id, filename):
        """
            creates Bio.PDB object from pdb file
        Parameters:
        -----------
            id          : name of structure
            filename    : file name
        """
        parser = PDBParser()
        self.struct = parser.get_structure(str(id), filename)

    def create_new_structure(self, name, chain_id):
        """
            creates new Bio.PDB structure object
        Parameters:
        -----------
            name        :   structure name
            chain_id    :   chain name (e.g. A, B, C) 
        Returns:
        ---------
            self.struct :   Bio.PDB object with model and chain inside
        """
        self.struct = Structure(name)
        my_model = Model(0)
        my_chain = Chain(chain_id)
        self.struct.add(my_model)
        self.struct[0].add(my_chain)

    def get_chainname(self):
        """
            returns name of given structures chain
        """
        self.chain = list(self.struct.get_chains())[0].id

    def get_mol_sequence(self):
        """
            retrieves struct sequence as one letter code
        Parameters:
        -----------
            self.struct : structure object
        Returns:
        ---------
            self.sequence : sequence of given structure in one letter code
        """
        ##----must be included in tests!!!--------------------

        for resi in self.struct.get_residues():
            resi_name = resi.resname.strip().upper()

            #add one letter nucleotide names
            if len(resi_name) == 1 and resi_name in RESNAMES.values():
                self.sequence += resi_name
                #add hetatms with modifications
            elif resi_name in to_one_letter_code:
                self.sequence += to_one_letter_code[resi_name]
                #do not add ions and ligands into sequence
            elif resi_name in LIGANDS:
                pass
                #if antyhing else appeared include as X
            else:
                self.sequence += "X"
        return self.sequence

    def get_moltype(self):
        """
            based on component's sequence determines if a certain
            component is DNA, RNA or protein
        Raises:
        -------
            PyRyStructureError if resnames are incorrect
        """

        res = list(self.struct.get_residues())[0]
        if len(res.resname.strip()) == 3:
            if res.resname.strip() in AMINOACIDS.values():
                self.moltype = 'protein'
            else:
                if res.resname.strip() in RESNAMES.keys(): pass
                else:
                    raise PyRyStructureError("Wrong 3letter name",
                                             res.resname.strip())
        else:
            for at in res:
                if at.fullname.strip() == "CA":
                    self.moltype = 'protein'
                    break
                elif at.fullname.strip() == "C4'" or at.fullname.strip(
                ) == "C4*":
                    for atom in res.child_list:
                        if atom.fullname.strip() == "O2'":
                            self.moltype = "RNA"
                            break
                    if self.moltype == "":
                        self.moltype = "DNA"
        return self.moltype

    def is_structure(self):
        """
            checks if a given structure is Bio.PDB structure object
        Raises:
        ------
            PyRyStructureError  : if self.struct is not Bio.PDB object 
        """
        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            return True
        else:
            raise PyRyStructureError('%s should be one of\n\
                     the following objects:Structure, Model, Chain, Residue, \n\
                                                  list of Atoms.' %
                                     (self.struct))

    def set_chain_name(self, chain):
        self.chain = chain

    def set_moltype(self, moltype):
        """
        """
        self.moltype = moltype

    def set_structure(self, struct):
        self.struct = struct

    def set_pyrystructure(self, structure=None):
        """
        sets structure as PyRyStructure atrribute
        
        Parameters:
        -----------
            structure   :   Bio.PDB structure object
        """
        if self.struct == None: self.struct = structure
        if self.sequence == '': self.get_mol_sequence()
        self.get_chainname()
        self.get_moltype()

    def set_sequence(self, seq):
        """
        """
        self.sequence = seq

    def write_structure(self, filename):
        """
            Writting structure to the pdb_file, saving changed coordinated
        Parameters:
        -----------
            filename    :   final name of structure file        
        """
        out = PDBIO()
        out.set_structure(self.struct)
        out.save(filename)
Ejemplo n.º 31
0
chain = Chain("A")
structure = Structure("ref")

num_count = 0
for i in range(0,shape(points)[0]):
    num_count = num_count +1
    res_id = (' ',num_count,' ')
    residue = Residue(res_id,'ALA',' ')
    cur_coord = tuple(points[i])
    bfactor = bfactors[i]
    atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C')
    residue.add(atom)
    chain.add(residue)

model.add(chain)
structure.add(model)
# --------------------------------------------------------------------
io=PDBIO()
io.set_structure(structure)
if ( args['dst'] is None):
    fn = sys.stdout
    io.save(fn)
    if ( args['link'] ):
        for i in range(1,shape(points)[0]):
            fn.write( "CONECT%5d%5d\n" % (i, i+1))
else:
    fn = args['dst']
    io.save(fn)
    fout = open(fn,"a")
    if (args['link'] ):
        for i in range(1,shape(points)[0]):
Ejemplo n.º 32
0
def normalize_structure(input_path: str,
                        pdb_id: str,
                        model_id: int,
                        chain_id: str,
                        primary: str,
                        mask: str,
                        save=True,
                        verbose=True):
    assert primary
    assert mask
    with warnings.catch_warnings(record=True):
        warnings.simplefilter("ignore", PDBConstructionWarning)
        parser = PDBParser()
        structure = parser.get_structure(pdb_id, input_path)
        if not model_id in structure.child_dict:
            try_model_id = model_id - 1
            model = None
            while try_model_id >= 0:
                if try_model_id in structure.child_dict:
                    model = structure.child_dict[try_model_id]
                    if verbose:
                        print('Supposing model {} is {}...'.format(
                            model_id - 1, model_id))
                try_model_id -= 1
            if not model:
                raise ValueError(
                    'model "{}" not found in "{}", options are {}'.format(
                        model_id, pdb_id, list(structure.child_dict.keys())))
        else:
            model = structure.child_dict[model_id]
        if not chain_id in model.child_dict:
            raise ValueError(
                'chain "{}" not found in "{}" model "{}", options are {}'.
                format(chain_id, pdb_id, model_id,
                       list(model.child_dict.keys())))
        chain = model.child_dict[chain_id]

        new_chain = normalize_chain(chain)

        raw = []
        for residue in chain:
            try:
                raw.append(resname_to_abbrev(residue.resname))
            except UnknownResnameError:
                # if verbose:
                #    print('Skipping residue "{}"'.format(residue.resname))
                pass
        raw = ''.join(raw)

        # verify that the sequence is what we expect
        normalized = []
        for residue in new_chain:
            try:
                normalized.append(resname_to_abbrev(residue.resname))
            except UnknownResnameError:
                # if verbose:
                #    print('Skipping residue "{}"'.format(residue.resname))
                pass
        normalized = ''.join(normalized)

        # extract the known primary sequence using the mask
        masked_primary = []
        for r, m in zip(primary, mask):
            if m == '-':
                continue
            assert m == '+'
            masked_primary.append(r)
        masked_primary = ''.join(masked_primary)

        # ensure the sequence lengths match
        if len(normalized) != len(masked_primary):
            raise ChainLengthError(len(normalized), len(masked_primary))

        # ensure residue identities match
        for i, (got, expected) in enumerate(zip(normalized, masked_primary)):
            if got != expected:
                raise ValueError(
                    'mismatch residue at position {} (got {}, expected {})'.
                    format(i, got, expected))

        new_model = Model(model.id)
        new_model.add(new_chain)
        new_structure = Structure(structure.id)
        new_structure.add(new_model)

        if save:
            out_path = input_path + '.norm'
            io = PDBIO()
            io.set_structure(new_structure)
            io.save(out_path)
            return out_path
        else:
            return new_structure
Ejemplo n.º 33
0
					if atom.coord[0] < atom2.coord[0]:
						atom3.coord[0] += xDistancePerStep
					elif atom.coord[0] > atom2.coord[0]:
						atom3.coord[0] -= xDistancePerStep

					if atom.coord[1] < atom2.coord[1]:
						atom3.coord[1] += yDistancePerStep
					elif atom.coord[1] > atom2.coord[1]:
						atom3.coord[1] -= yDistancePerStep	

					if atom.coord[2] < atom2.coord[2]:
						atom3.coord[2] += zDistancePerStep
					elif atom.coord[2] > atom2.coord[2]:
						atom3.coord[2] -= zDistancePerStep

		yield newModel

	if startEndInclusive:
		final.id = steps + 1

		yield final

modelFrame = 0
for model in interpolate(structure[0], structure[1], 10, True):
	result = Structure('result')
	result.add(model)
	io = PDBIO()
	io.set_structure(result)
	io.save('frames/out_' + str(modelFrame) + '.pdb')
	modelFrame += 1
Ejemplo n.º 34
0
}, {
    'name': 'C5',
    'coord': PDB.Atom.array([66.402, 44.364, 11.291], 'f'),
    'bfactor': 44.20,
    'occupancy': 1.0,
    'altloc': ' ',
    'fullname': 'C5',
    'serial_number': 7
}, {
    'name': 'C6',
    'coord': PDB.Atom.array([65.095, 44.589, 11.192], 'f'),
    'bfactor': 44.33,
    'occupancy': 1.0,
    'altloc': ' ',
    'fullname': 'C6',
    'serial_number': 8
}]
my_structure.add(my_model)
my_model.add(my_chain)
my_chain.add(my_residue)

for atom in atoms:
    my_atom = Atom(atom['name'], atom['coord'], atom['bfactor'],
                   atom['occupancy'], atom['altloc'], atom['fullname'],
                   atom['serial_number'])
    my_residue.add(my_atom)

out = PDBIO()
out.set_structure(my_structure)
out.save('my_new_structure.pdb')
Ejemplo n.º 35
0
structure = Structure(refid)
model_ref = Model(1)
chain_ref = Chain("A")
points_ref = ReadXYZ(ref_ptsfilename,scale)
	
num_count = 0
for i in range(0,shape(points_ref[IndexList])[0]):
	num_count = num_count +1
	res_id = (' ',num_count,' ')
	residue = Residue(res_id,'ALA',' ')
	cur_coord = tuple(points_ref[IndexList[i]])
	atom = Atom('CA',cur_coord,0,0,' ',num_count,num_count,'C')
	residue.add(atom)
	chain_ref.add(residue)
model_ref.add(chain_ref)
structure.add(model_ref)

#--------------------------------------------------------------------
altid = "alt"
structure_alt = Structure(refid)
model_alt = Model(2)
chain_alt = Chain("A")
points_alt = ReadXYZ(alt_ptsfilename,scale)
	
num_count = 0
for i in range(0,shape(points_alt[IndexList])[0]):
	num_count = num_count +1
	res_id = (' ',num_count,' ')
	residue = Residue(res_id,'ALA',' ')
	cur_coord = tuple(points_alt[IndexList[i]])
	atom = Atom('CA',cur_coord,0,0,' ',num_count,num_count,'C')
Ejemplo n.º 36
0
                    if atom.coord[0] < atom2.coord[0]:
                        atom3.coord[0] += xDistancePerStep
                    elif atom.coord[0] > atom2.coord[0]:
                        atom3.coord[0] -= xDistancePerStep

                    if atom.coord[1] < atom2.coord[1]:
                        atom3.coord[1] += yDistancePerStep
                    elif atom.coord[1] > atom2.coord[1]:
                        atom3.coord[1] -= yDistancePerStep

                    if atom.coord[2] < atom2.coord[2]:
                        atom3.coord[2] += zDistancePerStep
                    elif atom.coord[2] > atom2.coord[2]:
                        atom3.coord[2] -= zDistancePerStep

        yield newModel

    if startEndInclusive:
        final.id = steps + 1

        yield final


for model in interpolate(initial[0], final[0], 10, True):
    result.add(deepcopy(model))

io = PDBIO()
io.set_structure(result)
io.save("out.pdb")
Ejemplo n.º 37
0
def visualize_2DA(apo_2DA, holo_2DA, paper_apo_spans):
    """ Writes superimposed holo structure to a file, prints Pymol script which can be directly pasted in pymol.

     Printed Pymol script will:
     1) automatically load both structures (superimposed holo from filesystem, apo from the internet)
     2) create objects and selections for domains, and the two-domain arrangements
     3) color the selections by domain, apo/holo and paper/ours
        - colors - ours more saturation, paper faded
            - red, yellow apo (first and second domain respectively)
            - green, blue holo
     4) provide example usage in the last script paragraph
     """

    # load the structure from file
    a = parse_mmcif(apo_2DA.pdb_code)
    h = parse_mmcif(holo_2DA.pdb_code)
    apo = a.structure
    holo = h.structure

    ###### vlozene z mainu
    apo_mapping = a.bio_to_mmcif_mappings[0][apo_2DA.d1.chain_id]
    holo_mapping = h.bio_to_mmcif_mappings[0][holo_2DA.d1.chain_id]

    # crop polypeptides to longest common substring
    c1_common_seq, c2_common_seq = get_longest_common_polypeptide(a.poly_seqs[apo_mapping.entity_poly_id], h.poly_seqs[holo_mapping.entity_poly_id])
    c1_label_seq_ids = list(c1_common_seq.keys())
    c2_label_seq_ids = list(c2_common_seq.keys())

    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    ###### end vlozene

    # get residues of the first domain, in both apo and holo structures
    apo_d1 = DomainResidues.from_domain(apo_2DA.d1, apo[0], apo_mapping)
    holo_d1 = DomainResidues.from_domain(holo_2DA.d1, holo[0], holo_mapping)
    # superimpose holo onto apo, using the first domain
    superimposed_holo_model = superimpose_structure(holo[0], holo_d1, apo_d1)
    # save the structure
    name = holo.id + f'_{holo_d1.domain_id}onto_{apo_d1.domain_id}'
    io = MMCIFIO()
    superimposed_holo = Structure(name)
    superimposed_holo.add(superimposed_holo_model)
    io.set_structure(superimposed_holo)
    sholo_file_path = Path(OUTPUT_DIR, name + '.cif')
    io.save(str(sholo_file_path), preserve_atom_numbering=True)

    def get_resi_selection(spans):
        selection = []
        for from_, to in spans:
            selection.append(f'resi {from_}-{to}')

        return '(' + ' or '.join(selection) + ')'

    # convert paper spans to label seqs, so we can show them in Pymol
    def get_paper_domain(d: DomainResidueMapping, paper_spans, residue_id_mapping):
        # translate spans to label seq ids and return a domain object
        segment_beginnings = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 0].tolist()))
        segment_ends = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 1].tolist()))
        logger.debug(segment_beginnings)
        logger.debug(segment_ends)
        return DomainResidueMapping(d.domain_id, d.chain_id, segment_beginnings, segment_ends)

    logger.debug(paper_apo_spans)  # [d1, d2] where d1 [(), (),...]
    paper_apo_drm1 = get_paper_domain(apo_2DA.d1, paper_apo_spans[0], apo_mapping)
    paper_apo_drm2 = get_paper_domain(apo_2DA.d2, paper_apo_spans[1], apo_mapping)
    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    paper_holo_drm1 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm1, holo_d1.chain_id, label_seq_id_offset)
    paper_holo_drm2 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm2, holo_d1.chain_id, label_seq_id_offset)  # same chain, for now, as in d1

    # create highlight script (by the spans, or just create multiple selections)
    # copy the 2 structures to 4 (paper spans vs our spans), so we can color them differently
    # select only the domains (2), and make only them visible

    sholo = superimposed_holo

    pymol_script = f"""
fetch {apo.id}
load {sholo_file_path.absolute()}

sele apo_d1, {apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(apo_2DA.d1.get_spans())}
sele apo_d2, {apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(apo_2DA.d2.get_spans())}
sele apo_2DA, apo_d1 or apo_d2

sele holo_d1, {sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(holo_2DA.d1.get_spans())}
sele holo_d2, {sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(holo_2DA.d2.get_spans())}
sele holo_2DA, holo_d1 or holo_d2

# copy objects, so we can color them differently
copy paper_{apo.id}, {apo.id}
copy paper_{sholo.id}, {sholo.id}

sele paper_apo_d1, paper_{apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(paper_apo_drm1.get_spans())}
sele paper_apo_d2, paper_{apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(paper_apo_drm2.get_spans())}
sele paper_apo_2DA, paper_apo_d1 or paper_apo_d2

sele paper_holo_d1, paper_{sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(paper_holo_drm1.get_spans())}
sele paper_holo_d2, paper_{sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(paper_holo_drm2.get_spans())}
sele paper_holo_2DA, paper_holo_d1 or paper_holo_d2

color red, apo_d1
color yellow, apo_d2
color green, holo_d1
color blue, holo_d2

color salmon, paper_apo_d1
color paleyellow, paper_apo_d2
color palegreen, paper_holo_d1
color lightblue, paper_holo_d2

# example usage: 
hide; show surface, apo_2DA
hide; show surface, paper_apo_2DA
hide; show surface, holo_2DA
hide; show surface, paper_holo_2DA

hide; show surface, apo_2DA or holo_2DA or paper_apo_2DA or paper_holo_2DA
    """

    print(pymol_script)