def splitOnePDB(fname, outPath):

  try:
    s= parser.get_structure(fname, fname)
  except Exception:
    print ("Error loading pdb")
    return 0
  banLenChains=[]    
  try:
    for chain in s[0]:
      badResInChain=0
      for res in  chain.get_list():
        if not is_aa(res,standard=True):
          badResInChain+=1
      chainLen= sum(1 for res in chain if "CA" in res) - badResInChain
      if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
        print(chainLen)
        banLenChains.append(chain.get_id())
  except KeyError:
    print ("Not good model")
    return 0  
  for badChainId in banLenChains:
    s[0].detach_child(badChainId)

  receptorChainList= []
  ligandChainList= []
  if len( s[0].get_list())<2:
    print(s)
    print( s[0].get_list())
    print("Not enough good chains")
    return 0
  for chain1 in s[0]:

    tmpReceptorList=[]
    for chain2 in s[0]:
      if chain1!= chain2:
        tmpReceptorList.append(chain2)
    if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList:   
      ligandChainList.append(chain1)
      receptorChainList.append(tmpReceptorList)
    
  prefix= os.path.basename(fname).split(".")[0]
  for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)):
    io=PDBIO()
    ligandStruct= Structure(prefix+"ligand")
    ligandStruct.add(Model(0))
    ligandChain.set_parent(ligandStruct[0])
    ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb"))

    io=PDBIO()
    receptorStruct= Structure(prefix+"receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
      receptorChain.set_parent(receptorStruct[0])    
      receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb"))
    print( "ligand:", ligandChain, "receptor:",receptorChains )
def splitOnePDB(fname, chainIdL, chainIdR, outPath):
    print(os.path.basename(fname))
    try:
        s = parser.get_structure(os.path.basename(fname), fname)
    except Exception:
        print("Error loading pdb")
        return 0

    banLenChains = []
    try:
        for chain in s[0]:
            badResInChain = 0
            for res in chain.get_list():
                if not is_aa(res, standard=True) and res.resname != "HOH":
                    badResInChain += 1
            # for res in chain: print(res)
            chainLen = sum(1 for res in chain if "CA" in res) - badResInChain
            if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
                print(chain, chainLen)
                banLenChains.append(chain.get_id())
    except KeyError:
        print("Not good model")
        return 0

    # print(banLenChains)
    if len(s[0].get_list()) - len(banLenChains) < 2:
        print(s)
        print(s[0].get_list())
        print("Not enough good chains")
        return 0

    ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR)
    print("ligand:", ligandChains, "receptor:", receptorChains)

    prefix = os.path.basename(fname).split(".")[0]

    io = PDBIO()
    ligandStruct = Structure(prefix + "ligand")
    ligandStruct.add(Model(0))

    for ligandChain in ligandChains:
        ligandChain.set_parent(ligandStruct[0])
        ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb"))

    io = PDBIO()
    receptorStruct = Structure(prefix + "receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
        receptorChain.set_parent(receptorStruct[0])
        receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure = Structure(structure_id)
Exemple #4
0
    def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure:
        # Grab the Alphafold dictionary entry that descrives the residue range in the structure
        seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0])
        seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0])

        # start empty
        renumbered_structure = Structure(structure.id)
        for model in structure:
            renumbered_model = Model(model.id)
            for chain in model:
                transcript_residue_number = seq_db_align_begin
                renumbered_chain = Chain(chain.id)
                for residue in chain:
                    renumbered_residue = residue.copy()
                    renumbered_residue.id = (' ', transcript_residue_number, ' ')
                    # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder
                    for atom in renumbered_residue:
                        atom.disordered_flag = 0
                    renumbered_residue.disordered = 0
                    renumbered_chain.add(renumbered_residue)
                    transcript_residue_number += 1

                assert transcript_residue_number == seq_db_align_end + 1
                renumbered_model.add(renumbered_chain)

            renumbered_structure.add(renumbered_model)
        return renumbered_structure
    def getStructFromFasta(self, fname, chainType):
        '''
    Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled
    and thus no coordiantes availables. Implements from Structure to Residue hierarchy.
    :param fname: str. path to fasta file
    @chainType: str. "l" or "r"
    '''

        seq = self.parseFasta(
            fname, inputNumber="1" if chainType == "l" else
            "2")  #inpuNumber is used to report which partner fails if error
        prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0]
        chainId = chainType.upper()
        residues = []
        struct = Structure(prefix)
        model = Model(0)
        struct.add(model)
        chain = Chain(chainId)
        model.add(chain)
        for i, aa in enumerate(seq):
            try:
                resname = one_to_three(aa)
            except KeyError:
                resname = "UNK"
            res = Residue((' ', i, ' '), resname, prefix)
            chain.add(res)
        return struct
Exemple #6
0
def retrieve_sphere_model(structure):  #, score):
    """
    each chain is here represented by centre of mass only
    """
    sphere_struct = Structure('clustering_model')
    my_model = Model(0)
    sphere_struct.add(my_model)

    #bedzie zmieniona numeracja
    chain_mass_centres, index = [], 0
    for chain in structure.get_chains():
        my_chain = Chain(chain.id)
        sphere_struct[0].add(my_chain)

        coord = calculate_centre_of_complex(chain)
        chain_mass_centres.append(coord)
        my_residue = Residue((' ', index, ' '), chain.id, ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        index += 1
    del structure
    return sphere_struct
Exemple #7
0
    def slice(cls, obj, selection, name='slice'):
        """Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection> 
        defines which  descendents 'S1' will be stored in 'S2'."""
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain

        ent = Structure(name)  # Biopython structure object
        # Loop over selection and determine what model/chain objects we need to create in order to
        # store the slice
        models = {}
        for item in selection:
            mid = item[1]
            cid = item[2]
            if mid not in models:
                models[mid] = set()  # store chain ids
            models[mid].add(cid)

        # Create model/chains to store slice
        for mid in models:
            ent.add(Model(mid))
            for cid in models[mid]:
                ent[mid].add(Chain(cid))

        # Add residues to slice
        for item in selection:
            mid = item[1]
            cid = item[2]
            rid = item[3]
            ent[mid][cid].add(obj[mid][cid][rid].copy())

        return cls(ent, name=name)
    def create_sphere_representation(self):
        """
	each chain is here represented by centre of mass only
	"""
        new_struct = Structure('sphrere')
        my_model = Model(0)
        new_struct.add(my_model)

        chain_mass_centres, index = [], 1
        my_chain = Chain(self.fa_struct.chain)
        new_struct[0].add(my_chain)

        coord, self.molmass, self.radius = self.calculate_centre_of_complex(
            self.fa_struct.struct)
        my_residue = Residue((' ', index, ' '), "ALA", ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        self.cg_struct = new_struct
        name = "dddd" + self.fa_struct.chain
        self.save_pdb(new_struct, name)
Exemple #9
0
 def add(self, residue):
     """Add PdbResidue object to site (in the residues list and dict)"""
     residue = residue.copy(include_structure=True)
     if type(residue) == PdbResidue:
         self.residues.append(residue)
         self.residues_dict[residue.full_id] = residue
         residue.parent_site = self
     if type(residue) == Het:
         self.ligands.append(residue)
         residue.parent_site = self
         if residue.is_polymer:
             if residue.chain in self.structure[0]:
                 for r in residue.structure:
                     self.structure[0][residue.chain].add(r)
                 return True
             self.structure[0].add(residue.structure)
             return True
     if residue.structure:
         # Initialize structure if empty
         if self.structure is None:
             self.structure = Structure(self.id)
             self.structure.add(Model(0))
         chain_id = residue.structure.get_parent().get_id()
         if chain_id not in self.structure[0]:
             self.structure[0].add(Chain(chain_id))
         # Add residue structure to site structure
         if residue.structure.get_id() not in self.structure[0][chain_id]:
             self.structure[0][chain_id].add(residue.structure)
     return True
Exemple #10
0
def retrieve_ca_model(structure):
    """
    chains are represented only by main chain atoms (Calfas or C4')
    """
    reduced_struct = Structure('clustering_model')
    my_model = Model(0)
    reduced_struct.add(my_model)

    main_chain_atoms = []
    for ch in structure[0]:
        my_chain = Chain(ch.id)
        reduced_struct[0].add(my_chain)
        for resi in ch:
            for atom in resi:
                #print "----", resi.id, resi.get_segid(), ch.id
                if atom.get_name() == "CA" or atom.get_name(
                ) == "C4'" or atom.get_name() == "C4*":
                    my_residue = Residue((' ', resi.id[1], ' '),
                                         resi.get_resname(), ' ')
                    atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA',
                                atom.get_serial_number())
                    my_chain.add(my_residue)
                    my_residue.add(atom)

                    main_chain_atoms.append(atom)

    return reduced_struct
 def create_new_chain(self, old_struct):
     s = Structure(old_struct.chain)
     my_model = Model(0)
     s.add(my_model)
     my_chain = Chain(old_struct.chain)
     my_model.add(my_chain)  #what if more chains in one component?
     return s
Exemple #12
0
def save_chain_to(chain, filename: str):
    from Bio.PDB.PDBIO import PDBIO
    io = PDBIO()
    # io.set_structure(chain.get_bio_chain())
    structure = Structure(filename)
    structure.add(chain)
    io.set_structure(structure)
    io.save(filename)
 def create_new_chain(self, id):
     """
     """
     self.fragment_lattice = Structure(id)
     my_model = Model(0)
     self.fragment_lattice.add(my_model)
     my_chain = Chain(id)
     my_model.add(my_chain)  #what if more chains in one component?
def initialize_res(residue: Union[Geo, str]) -> Structure:
    """Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0."""

    if isinstance(residue, Geo):
        geo = residue
    elif isinstance(residue, str):
        geo = geometry(residue)
    else:
        raise ValueError("Invalid residue argument:", residue)

    segID = 1
    AA = geo.residue_name
    CA_N_length = geo.CA_N_length
    CA_C_length = geo.CA_C_length
    N_CA_C_angle = geo.N_CA_C_angle

    CA_coord = np.array([0.0, 0.0, 0.0])
    C_coord = np.array([CA_C_length, 0, 0])
    N_coord = np.array([
        CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)),
        CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)),
        0,
    ])

    N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N")

    # Check if the peptide is capped or not
    if geo.residue_name == "ACE":
        CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C")
    else:
        CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C")

    C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length = geo.C_O_length
    CA_C_O_angle = geo.CA_C_O_angle
    N_CA_C_O_diangle = geo.N_CA_C_O_diangle

    carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle,
                                    N_CA_C_O_diangle)
    O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O")

    res = make_res_of_type(segID, N, CA, C, O, geo)

    cha = Chain("A")
    cha.add(res)

    mod = Model(0)
    mod.add(cha)

    struc = Structure("X")
    struc.add(mod)
    return struc
def single_chain_structure(chain, name='superposition'):
    from Bio.PDB.Structure import Structure
    from Bio.PDB.Model import Model

    structure = Structure(name)
    model = Model(0)
    structure.add(model)

    model.add(chain)

    return structure
Exemple #16
0
    def get_structure(self, name='RNA chain'):
        """Returns chain as a PDB.Structure object."""
        struc = Structure(name)
        model = Model(0)
        chain = Chain(self.chain_name)
        struc.add(model)
        struc[0].add(chain)

        for resi in self:
            struc[0][self.chain_name].add(resi)
        return struc
Exemple #17
0
def complex_save(given_complex, i, path):

    s = Structure(i)
    my_model = Model(0)
    s.add(my_model)
    for component in given_complex.components:
        my_model.add(
            component.pyrystruct.struct[0][component.pyrystruct.chain])
    out = PDBIO()
    out.set_structure(s)
    out.save(path)
    return path
    def saveStruct(self, fname, desiredOrder):
        io = PDBIO(use_model_flag=True)
        if desiredOrder is not None:
            children = self.structure.child_list

        self.structure = Structure(self.structId)
        for modelId in desiredOrder:
            child = [model for model in children if model.id == modelId][0]
            child.detach_parent()
            self.structure.add(child)

        io.set_structure(self.structure)
        io.save(fname)  #,  preserve_atom_numbering=True)
Exemple #19
0
def extract_model(pdb_struct, k):
    """
    Extract a model from the given PDB structure.
    """
    assert k < len(pdb_struct), 'missing specified model'

    new_struct = Structure(pdb_struct.id)
    new_model = pdb_struct[k].copy()
    new_model.id = 0
    new_model.serial_num = 1
    new_struct.add(new_model)

    return new_struct
Exemple #20
0
    def get_structure_from_files(verbose=False, test_mode=False):
        '''
        Parses data from a list of PDB files and returns a BioPython structure object containing
        all the data with constant times between frames.

        Parameters
        ----------
        `verbose` - whether to show print statements.
        
        Returns
        -------
        `Structure` - Represents a macromolecular structure using the BioPython object notation.

        '''
        # Our data is composed of 11 smaller files.
        pdb_01 = "PDB/WT-GrBP5/WT_295K_200ns_50ps_0_run.pdb"
        pdb_02 = "PDB/WT-GrBP5/WT_295K_500ns_50ps_1_run.pdb"
        pdb_03 = "PDB/WT-GrBP5/WT_295K_500ns_50ps_2_run.pdb"
        pdb_04 = "PDB/WT-GrBP5/WT_295K_500ns_50ps_3_run.pdb"
        pdb_05 = "PDB/WT-GrBP5/WT_295K_500ns_50ps_4_run.pdb"
        pdb_06 = "PDB/WT-GrBP5/WT_295K_500ns_50ps_5_run.pdb"
        pdb_07 = "PDB/WT-GrBP5/WT_295K_500ns_100ps_6_run.pdb"
        pdb_08 = "PDB/WT-GrBP5/WT_295K_500ns_100ps_7_run.pdb"
        pdb_09 = "PDB/WT-GrBP5/WT_295K_500ns_100ps_8_run.pdb"
        pdb_10 = "PDB/WT-GrBP5/WT_295K_500ns_100ps_9_run.pdb"
        pdb_11 = "PDB/WT-GrBP5/WT_295K_300ns_100ps_10_run.pdb"

        # Seperate the 50ps and 100ps PDBs into their respective groups.
        fast_trajectories = [pdb_01, pdb_02, pdb_03, pdb_04, pdb_05, pdb_06]
        slow_trajectories = [pdb_07, pdb_08, pdb_09, pdb_10, pdb_11]

        # Create a single 'main' structure to append all the models to.
        main_structure = Structure('WT-GrBP5')
        if test_mode:
            Parser._parse_pdb_files([pdb_01],
                                    main_structure,
                                    keep_all_models=False,
                                    verbose=verbose)
        else:
            Parser._parse_pdb_files(fast_trajectories,
                                    main_structure,
                                    keep_all_models=False,
                                    verbose=verbose)
            Parser._parse_pdb_files(slow_trajectories,
                                    main_structure,
                                    keep_all_models=True,
                                    verbose=verbose)

        return main_structure
Exemple #21
0
 def create_new_structure(self, name, chain_id):
     """
         creates new Bio.PDB structure object
     Parameters:
     -----------
         name        :   structure name
         chain_id    :   chain name (e.g. A, B, C) 
     Returns:
     ---------
         self.struct :   Bio.PDB object with model and chain inside
     """
     self.struct = Structure(name)
     my_model = Model(0)
     my_chain = Chain(chain_id)
     self.struct.add(my_model)
     self.struct[0].add(my_chain)
Exemple #22
0
def multiply_model(pdb_struct, num_models):
    """
    Given a single-model PDB structure, multiply that model.
    """
    assert len(pdb_struct) == 1, 'single-model PDB file required'

    new_struct = Structure(pdb_struct.id)

    for i in range(num_models):
        new_model = pdb_struct[0].copy()
        new_model.detach_parent()
        new_model.id = i
        new_model.serial_num = i + 1
        new_struct.add(new_model)
        new_model.set_parent(new_struct)

    return new_struct
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''
    
    if isinstance( residue, Geo ):
        geo = residue
    else:
        geo= Geo(residue) 
    
    segID=1
    AA= geo.residue_name
    CA_N_length=geo.CA_N_length
    CA_C_length=geo.CA_C_length
    N_CA_C_angle=geo.N_CA_C_angle
    
    CA_coord= np.array([0.,0.,0.])
    C_coord= np.array([CA_C_length,0,0])
    N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0])

    N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N")
    CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C")
    C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length=geo.C_O_length
    CA_C_O_angle=geo.CA_C_O_angle
    N_CA_C_O_diangle=geo.N_CA_C_O_diangle
    
    carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle)
    O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O")

    res=makeRes(segID, N, CA, C, O, geo)

    cha= Chain('A')
    cha.add(res)
    
    mod= Model(0)
    mod.add(cha)

    struc= Structure('X')
    struc.add(mod)
    return struc
Exemple #24
0
    def save_pdb(self, complex_id, temp = "", name = ""):
        """
        gets coordinates of all complex components and writes them in one
        file one component = one pdb model
        
        Parameters:
        ------------
            complex_id  : number of complex from simulation
        Returns:
        --------
            pdb files with simulated components in OUTFOLDER
        """
        ##add component chain by chain not residue by residue.
        model_num = 0
        score = round(self.simulation_score, 4)
        s = Structure(complex_id) 
        my_model = Model(0)
        s.add(my_model)
        
        for component in self.components:
#@TODO: #what if more chains in one component?
            my_model.add(component.pyrystruct.struct[0][component.pyrystruct.chain])
        out = PDBIO()
        out.set_structure(s)
        outname = outfolder.outdirname.split("/")[-1]

        temp = str(temp)

        try:
            temp = round(float(temp),1)
        except: pass

        if name:
            fi_name = str(outfolder.outdirname)+'/'+name+'_'+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'
            out.save(fi_name)
        else:
            fi_name = str(outfolder.outdirname)+'/'+str(outname)+"_"+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'          
            out.save(fi_name)

        for comp in self.components:
            comp.pyrystruct.struct[0][comp.pyrystruct.chain].detach_parent()

        return fi_name
    def __make_structure_from_residues__(self, residues):
        """
        Makes a Structure object either from a pdbfile or a list of residues
        """
        # KR: this probably can be outsourced to another module.
        struct = Structure('s')
        model = Model('m')
        n_chain = 1
        chain = Chain('c%i' % n_chain)

        for residue in residues:
            if chain.has_id(residue.id):
                model.add(chain)
                n_chain += 1
                chain = Chain('c%i' % n_chain)
            chain.add(residue)

        model.add(chain)
        struct.add(model)
        return struct
    def create_structure(coords, pdb_type, remove_masked):
        """Create the structure.

        Args:
            coords: 3D coordinates of structure
            pdb_type: predict or actual structure
            remove_masked: whether to include masked atoms. If false,
                           the masked atoms have coordinates of [0,0,0].

        Returns:
            structure
        """

        name = protein.id_
        structure = Structure(name)
        model = Model(0)
        chain = Chain('A')
        for i, residue in enumerate(protein.primary):
            residue = AA_LETTERS[residue]
            if int(protein.mask[i]) == 1 or remove_masked == False:
                new_residue = Residue((' ', i + 1, ' '), residue, '    ')
                j = 3 * i
                atom_list = ['N', 'CA', 'CB']
                for k, atom in enumerate(atom_list):
                    new_atom = Atom(name=atom,
                                    coord=coords[j + k, :],
                                    bfactor=0,
                                    occupancy=1,
                                    altloc=' ',
                                    fullname=" {} ".format(atom),
                                    serial_number=0)
                    new_residue.add(new_atom)
                chain.add(new_residue)
        model.add(chain)
        structure.add(model)
        io = PDBIO()
        io.set_structure(structure)
        io.save(save_dir + name + '_' + pdb_type + '.pdb')
        return structure
    def createPDBFile(self):
        "Create test CIF file with 12 Atoms in icosahedron vertexes"
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain
        from Bio.PDB.Residue import Residue
        from Bio.PDB.Atom import Atom
        from Bio.PDB.mmcifio import MMCIFIO
        import os
        CIFFILENAME = "/tmp/out.cif"

        # create atom struct with ico simmety (i222r)
        icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r')
        pentomVectorI222r = icosahedron.getVertices()

        # create biopython object
        structure = Structure('result')  # structure_id
        model = Model(1, 1)  # model_id,serial_num
        structure.add(model)
        chain = Chain('A')  # chain Id
        model.add(chain)
        for i, v in enumerate(pentomVectorI222r, 1):
            res_id = (' ', i, ' ')  # first arg ' ' -> aTOm else heteroatom
            res_name = "ALA"  #+ str(i)  # define name of residue
            res_segid = '    '
            residue = Residue(res_id, res_name, res_segid)
            chain.add(residue)
            # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number,
            #             element=None)
            atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C")
            residue.add(atom)

        io = MMCIFIO()
        io.set_structure(structure)
        # delete file if exists
        if os.path.exists(CIFFILENAME):
            os.remove(CIFFILENAME)
        io.save(CIFFILENAME)
        return CIFFILENAME
Exemple #28
0
def select_structure(selector, structure):
    new_structure = Structure(structure.id)
    for model in structure:
        if not selector.accept_model(model):
            continue
        new_model = Model(model.id, model.serial_num)
        new_structure.add(new_model)
        for chain in model:
            if not selector.accept_chain(chain):
                continue
            new_chain = Chain(chain.id)
            new_model.add(new_chain)
            for residue in chain:
                if not selector.accept_residue(residue):
                    continue
                new_residue = Residue(residue.id, residue.resname,
                                      residue.segid)
                new_chain.add(new_residue)
                for atom in residue:
                    if selector.accept_atom(atom):
                        new_residue.add(atom)
    return new_structure
Exemple #29
0
def visualize_2DA(apo_2DA, holo_2DA, paper_apo_spans):
    """ Writes superimposed holo structure to a file, prints Pymol script which can be directly pasted in pymol.

     Printed Pymol script will:
     1) automatically load both structures (superimposed holo from filesystem, apo from the internet)
     2) create objects and selections for domains, and the two-domain arrangements
     3) color the selections by domain, apo/holo and paper/ours
        - colors - ours more saturation, paper faded
            - red, yellow apo (first and second domain respectively)
            - green, blue holo
     4) provide example usage in the last script paragraph
     """

    # load the structure from file
    a = parse_mmcif(apo_2DA.pdb_code)
    h = parse_mmcif(holo_2DA.pdb_code)
    apo = a.structure
    holo = h.structure

    ###### vlozene z mainu
    apo_mapping = a.bio_to_mmcif_mappings[0][apo_2DA.d1.chain_id]
    holo_mapping = h.bio_to_mmcif_mappings[0][holo_2DA.d1.chain_id]

    # crop polypeptides to longest common substring
    c1_common_seq, c2_common_seq = get_longest_common_polypeptide(a.poly_seqs[apo_mapping.entity_poly_id], h.poly_seqs[holo_mapping.entity_poly_id])
    c1_label_seq_ids = list(c1_common_seq.keys())
    c2_label_seq_ids = list(c2_common_seq.keys())

    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    ###### end vlozene

    # get residues of the first domain, in both apo and holo structures
    apo_d1 = DomainResidues.from_domain(apo_2DA.d1, apo[0], apo_mapping)
    holo_d1 = DomainResidues.from_domain(holo_2DA.d1, holo[0], holo_mapping)
    # superimpose holo onto apo, using the first domain
    superimposed_holo_model = superimpose_structure(holo[0], holo_d1, apo_d1)
    # save the structure
    name = holo.id + f'_{holo_d1.domain_id}onto_{apo_d1.domain_id}'
    io = MMCIFIO()
    superimposed_holo = Structure(name)
    superimposed_holo.add(superimposed_holo_model)
    io.set_structure(superimposed_holo)
    sholo_file_path = Path(OUTPUT_DIR, name + '.cif')
    io.save(str(sholo_file_path), preserve_atom_numbering=True)

    def get_resi_selection(spans):
        selection = []
        for from_, to in spans:
            selection.append(f'resi {from_}-{to}')

        return '(' + ' or '.join(selection) + ')'

    # convert paper spans to label seqs, so we can show them in Pymol
    def get_paper_domain(d: DomainResidueMapping, paper_spans, residue_id_mapping):
        # translate spans to label seq ids and return a domain object
        segment_beginnings = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 0].tolist()))
        segment_ends = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 1].tolist()))
        logger.debug(segment_beginnings)
        logger.debug(segment_ends)
        return DomainResidueMapping(d.domain_id, d.chain_id, segment_beginnings, segment_ends)

    logger.debug(paper_apo_spans)  # [d1, d2] where d1 [(), (),...]
    paper_apo_drm1 = get_paper_domain(apo_2DA.d1, paper_apo_spans[0], apo_mapping)
    paper_apo_drm2 = get_paper_domain(apo_2DA.d2, paper_apo_spans[1], apo_mapping)
    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    paper_holo_drm1 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm1, holo_d1.chain_id, label_seq_id_offset)
    paper_holo_drm2 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm2, holo_d1.chain_id, label_seq_id_offset)  # same chain, for now, as in d1

    # create highlight script (by the spans, or just create multiple selections)
    # copy the 2 structures to 4 (paper spans vs our spans), so we can color them differently
    # select only the domains (2), and make only them visible

    sholo = superimposed_holo

    pymol_script = f"""
fetch {apo.id}
load {sholo_file_path.absolute()}

sele apo_d1, {apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(apo_2DA.d1.get_spans())}
sele apo_d2, {apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(apo_2DA.d2.get_spans())}
sele apo_2DA, apo_d1 or apo_d2

sele holo_d1, {sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(holo_2DA.d1.get_spans())}
sele holo_d2, {sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(holo_2DA.d2.get_spans())}
sele holo_2DA, holo_d1 or holo_d2

# copy objects, so we can color them differently
copy paper_{apo.id}, {apo.id}
copy paper_{sholo.id}, {sholo.id}

sele paper_apo_d1, paper_{apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(paper_apo_drm1.get_spans())}
sele paper_apo_d2, paper_{apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(paper_apo_drm2.get_spans())}
sele paper_apo_2DA, paper_apo_d1 or paper_apo_d2

sele paper_holo_d1, paper_{sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(paper_holo_drm1.get_spans())}
sele paper_holo_d2, paper_{sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(paper_holo_drm2.get_spans())}
sele paper_holo_2DA, paper_holo_d1 or paper_holo_d2

color red, apo_d1
color yellow, apo_d2
color green, holo_d1
color blue, holo_d2

color salmon, paper_apo_d1
color paleyellow, paper_apo_d2
color palegreen, paper_holo_d1
color lightblue, paper_holo_d2

# example usage: 
hide; show surface, apo_2DA
hide; show surface, paper_apo_2DA
hide; show surface, holo_2DA
hide; show surface, paper_holo_2DA

hide; show surface, apo_2DA or holo_2DA or paper_apo_2DA or paper_holo_2DA
    """

    print(pymol_script)
 def __init__(self, structId="subset"):
     self.structId = structId
     self.pdbParser = PDBParser(QUIET=True)
     self.structure = Structure(structId)