コード例 #1
0
def retrieve_sphere_model(structure):  #, score):
    """
    each chain is here represented by centre of mass only
    """
    sphere_struct = Structure('clustering_model')
    my_model = Model(0)
    sphere_struct.add(my_model)

    #bedzie zmieniona numeracja
    chain_mass_centres, index = [], 0
    for chain in structure.get_chains():
        my_chain = Chain(chain.id)
        sphere_struct[0].add(my_chain)

        coord = calculate_centre_of_complex(chain)
        chain_mass_centres.append(coord)
        my_residue = Residue((' ', index, ' '), chain.id, ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        index += 1
    del structure
    return sphere_struct
コード例 #2
0
ファイル: pdb.py プロジェクト: PhyreEngine/phyre_engine
def renumber(chain, new_id=" "):
    """
    Renumber a chain from 1, stripping insertion codes.

    :param `Bio.PDB.Chain` chain: structure to sanitise.
    :param str new_id: ID of the new chain.
    :return: A 2-tuple containing the following:

        1. The new :py:class:`Bio.PDB.Chain.Chain` object.

        2. A list of tuples containing the old residue ID, as returned by
           :py:meth:`Bio.PDB.Chain.Chain.get_id`.

    """
    mapping = []
    sanitised_chain = Chain(new_id)

    for res_index, res in enumerate(chain):
        sanitised_res = Residue(
            (res.get_id()[0], res_index + 1, ' '),
            res.get_resname(),
            res.get_segid())
        mapping.append(res.get_id())

        for atom in res:
            sanitised_res.add(atom.copy())
        sanitised_chain.add(sanitised_res)
    return mapping, sanitised_chain
コード例 #3
0
    def getStructFromFasta(self, fname, chainType):
        '''
    Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled
    and thus no coordiantes availables. Implements from Structure to Residue hierarchy.
    :param fname: str. path to fasta file
    @chainType: str. "l" or "r"
    '''

        seq = self.parseFasta(
            fname, inputNumber="1" if chainType == "l" else
            "2")  #inpuNumber is used to report which partner fails if error
        prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0]
        chainId = chainType.upper()
        residues = []
        struct = Structure(prefix)
        model = Model(0)
        struct.add(model)
        chain = Chain(chainId)
        model.add(chain)
        for i, aa in enumerate(seq):
            try:
                resname = one_to_three(aa)
            except KeyError:
                resname = "UNK"
            res = Residue((' ', i, ' '), resname, prefix)
            chain.add(res)
        return struct
コード例 #4
0
    def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure:
        # Grab the Alphafold dictionary entry that descrives the residue range in the structure
        seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0])
        seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0])

        # start empty
        renumbered_structure = Structure(structure.id)
        for model in structure:
            renumbered_model = Model(model.id)
            for chain in model:
                transcript_residue_number = seq_db_align_begin
                renumbered_chain = Chain(chain.id)
                for residue in chain:
                    renumbered_residue = residue.copy()
                    renumbered_residue.id = (' ', transcript_residue_number, ' ')
                    # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder
                    for atom in renumbered_residue:
                        atom.disordered_flag = 0
                    renumbered_residue.disordered = 0
                    renumbered_chain.add(renumbered_residue)
                    transcript_residue_number += 1

                assert transcript_residue_number == seq_db_align_end + 1
                renumbered_model.add(renumbered_chain)

            renumbered_structure.add(renumbered_model)
        return renumbered_structure
コード例 #5
0
def retrieve_ca_model(structure):
    """
    chains are represented only by main chain atoms (Calfas or C4')
    """
    reduced_struct = Structure('clustering_model')
    my_model = Model(0)
    reduced_struct.add(my_model)

    main_chain_atoms = []
    for ch in structure[0]:
        my_chain = Chain(ch.id)
        reduced_struct[0].add(my_chain)
        for resi in ch:
            for atom in resi:
                #print "----", resi.id, resi.get_segid(), ch.id
                if atom.get_name() == "CA" or atom.get_name(
                ) == "C4'" or atom.get_name() == "C4*":
                    my_residue = Residue((' ', resi.id[1], ' '),
                                         resi.get_resname(), ' ')
                    atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA',
                                atom.get_serial_number())
                    my_chain.add(my_residue)
                    my_residue.add(atom)

                    main_chain_atoms.append(atom)

    return reduced_struct
コード例 #6
0
    def create_sphere_representation(self):
        """
	each chain is here represented by centre of mass only
	"""
        new_struct = Structure('sphrere')
        my_model = Model(0)
        new_struct.add(my_model)

        chain_mass_centres, index = [], 1
        my_chain = Chain(self.fa_struct.chain)
        new_struct[0].add(my_chain)

        coord, self.molmass, self.radius = self.calculate_centre_of_complex(
            self.fa_struct.struct)
        my_residue = Residue((' ', index, ' '), "ALA", ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        self.cg_struct = new_struct
        name = "dddd" + self.fa_struct.chain
        self.save_pdb(new_struct, name)
コード例 #7
0
def normalize_chain(chain: Chain) -> Chain:
    new_chain = Chain(chain.id)
    for residue in chain:
        try:
            new_chain.add(normalize_residue(residue))
        except UnknownResidueError:
            pass
    return new_chain
コード例 #8
0
def initialize_res(residue: Union[Geo, str]) -> Structure:
    """Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0."""

    if isinstance(residue, Geo):
        geo = residue
    elif isinstance(residue, str):
        geo = geometry(residue)
    else:
        raise ValueError("Invalid residue argument:", residue)

    segID = 1
    AA = geo.residue_name
    CA_N_length = geo.CA_N_length
    CA_C_length = geo.CA_C_length
    N_CA_C_angle = geo.N_CA_C_angle

    CA_coord = np.array([0.0, 0.0, 0.0])
    C_coord = np.array([CA_C_length, 0, 0])
    N_coord = np.array([
        CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)),
        CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)),
        0,
    ])

    N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N")

    # Check if the peptide is capped or not
    if geo.residue_name == "ACE":
        CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C")
    else:
        CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C")

    C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length = geo.C_O_length
    CA_C_O_angle = geo.CA_C_O_angle
    N_CA_C_O_diangle = geo.N_CA_C_O_diangle

    carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle,
                                    N_CA_C_O_diangle)
    O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O")

    res = make_res_of_type(segID, N, CA, C, O, geo)

    cha = Chain("A")
    cha.add(res)

    mod = Model(0)
    mod.add(cha)

    struc = Structure("X")
    struc.add(mod)
    return struc
コード例 #9
0
 def add_dummy_structure(self):
     """Adds a dummy atom of zero coordinates to mark a gap in visualisation
     software"""
     dummy_atom = Atom('DUM', np.zeros(3), 0, 1, ' ', 'DUM', -999)
     dummy_residue = Residue((' ', -1 * self.chiral_id, ' '), 'DUM', '?')
     dummy_residue.add(dummy_atom)
     dummy_chain = Chain('?')
     dummy_chain.add(dummy_residue)
     self.dummy_structure = dummy_residue
     return True
コード例 #10
0
    def init_chain(self, chain_id):
        """Initiate a new Chain object with given id.

        Arguments:
        o chain_id - string
        """
        if self.model.has_id(chain_id):
            self.chain = self.model[chain_id]
            warnings.warn(
                "WARNING: Chain %s is discontinuous at line %i." %
                (chain_id, self.line_counter), PDBConstructionWarning)
        else:
            self.chain = Chain(chain_id)
            self.model.add(self.chain)
コード例 #11
0
ファイル: TMtoolsCommandLine.py プロジェクト: wxyz/exmachina
    def _align(self):
        pp_a = self._pp(self.protein_A, 'A')
        # seq_a = pp_a.get_sequence()
        pp_b = self._pp(self.protein_B, ' ')
        # seq_b = pp_b.get_sequence()

        # global_align = pairwise2.align.globalxx(seq_a, seq_b)[0]
        # msa = MultipleSeqAlignment([SeqRecord(Seq(global_align[0], alphabet=generic_protein), id='A'),
        #                             SeqRecord(Seq(global_align[1], alphabet=generic_protein), id='B')])
        msa = self.alignment
        # offset_a = re.search(r'[^-]', str(msa[0].seq)).span()[0]
        # offset_b = re.search(r'[^-]', str(msa[1].seq)).span()[0]
        plus = 1000
        for i in range(len(pp_a)):
            pp_a[i].id = (pp_a[i].id[0], plus + i, pp_a[i].id[2])
        for i in range(len(pp_b)):
            pp_b[i].id = (pp_b[i].id[0], plus + i, pp_b[i].id[2])
        new_chain_a = Chain(' ')
        for i in pp_a:
            # i.id = (i.id[0], i.id[1] - plus, i.id[2])
            new_chain_a.add(i)
        new_chain_b = Chain(' ')
        for i in pp_b:
            # i.id = (i.id[0], i.id[1] - plus, i.id[2])
            new_chain_b.add(i)

        io = PDBIO()
        io.set_structure(new_chain_a)
        io.save(f'.tmp.protein_a.pdb')
        io = PDBIO()
        io.set_structure(new_chain_b)
        io.save(f'.tmp.protein_b.pdb')
コード例 #12
0
 def create_new_chain(self, old_struct):
     s = Structure(old_struct.chain)
     my_model = Model(0)
     s.add(my_model)
     my_chain = Chain(old_struct.chain)
     my_model.add(my_chain)  #what if more chains in one component?
     return s
コード例 #13
0
 def add(self, residue):
     """Add PdbResidue object to site (in the residues list and dict)"""
     residue = residue.copy(include_structure=True)
     if type(residue) == PdbResidue:
         self.residues.append(residue)
         self.residues_dict[residue.full_id] = residue
         residue.parent_site = self
     if type(residue) == Het:
         self.ligands.append(residue)
         residue.parent_site = self
         if residue.is_polymer:
             if residue.chain in self.structure[0]:
                 for r in residue.structure:
                     self.structure[0][residue.chain].add(r)
                 return True
             self.structure[0].add(residue.structure)
             return True
     if residue.structure:
         # Initialize structure if empty
         if self.structure is None:
             self.structure = Structure(self.id)
             self.structure.add(Model(0))
         chain_id = residue.structure.get_parent().get_id()
         if chain_id not in self.structure[0]:
             self.structure[0].add(Chain(chain_id))
         # Add residue structure to site structure
         if residue.structure.get_id() not in self.structure[0][chain_id]:
             self.structure[0][chain_id].add(residue.structure)
     return True
コード例 #14
0
ファイル: structure.py プロジェクト: jaredsagendorf/geobind
    def slice(cls, obj, selection, name='slice'):
        """Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection> 
        defines which  descendents 'S1' will be stored in 'S2'."""
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain

        ent = Structure(name)  # Biopython structure object
        # Loop over selection and determine what model/chain objects we need to create in order to
        # store the slice
        models = {}
        for item in selection:
            mid = item[1]
            cid = item[2]
            if mid not in models:
                models[mid] = set()  # store chain ids
            models[mid].add(cid)

        # Create model/chains to store slice
        for mid in models:
            ent.add(Model(mid))
            for cid in models[mid]:
                ent[mid].add(Chain(cid))

        # Add residues to slice
        for item in selection:
            mid = item[1]
            cid = item[2]
            rid = item[3]
            ent[mid][cid].add(obj[mid][cid][rid].copy())

        return cls(ent, name=name)
コード例 #15
0
    def __create_superimposed_pdb(self):
        def fill_in_chain(chain, protein_id, rotation_matrix = None):
            for index,residue in enumerate(self.proteins[protein_id].get_residues()):
                residue.id = (residue.id[0], index, residue.id[2])
                chain.add(residue)

        merged_model = Model(0)
        chain_a = Chain('A')
        chain_b = Chain('B')

        fill_in_chain(chain_a, 0)
        fill_in_chain(chain_b, 1)

        merged_model.add(chain_a)
        merged_model.add(chain_b)

        return merged_model
コード例 #16
0
 def from_label_seq_ids(cls, label_seq_ids: Iterable[int],
                        mapping: BiopythonToMmcifResidueIds.Mapping,
                        bio_chain: Chain):
     return cls(
         cls.residues_from_label_seq_ids(label_seq_ids, mapping, bio_chain),
         bio_chain.get_parent().get_parent().id,
         bio_chain.id,
     )
コード例 #17
0
 def create_new_chain(self, id):
     """
     """
     self.fragment_lattice = Structure(id)
     my_model = Model(0)
     self.fragment_lattice.add(my_model)
     my_chain = Chain(id)
     my_model.add(my_chain)  #what if more chains in one component?
コード例 #18
0
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''
    
    if isinstance( residue, Geo ):
        geo = residue
    else:
        geo= Geo(residue) 
    
    segID=1
    AA= geo.residue_name
    CA_N_length=geo.CA_N_length
    CA_C_length=geo.CA_C_length
    N_CA_C_angle=geo.N_CA_C_angle
    
    CA_coord= np.array([0.,0.,0.])
    C_coord= np.array([CA_C_length,0,0])
    N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0])

    N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N")
    CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C")
    C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length=geo.C_O_length
    CA_C_O_angle=geo.CA_C_O_angle
    N_CA_C_O_diangle=geo.N_CA_C_O_diangle
    
    carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle)
    O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O")

    res=makeRes(segID, N, CA, C, O, geo)

    cha= Chain('A')
    cha.add(res)
    
    mod= Model(0)
    mod.add(cha)

    struc= Structure('X')
    struc.add(mod)
    return struc
コード例 #19
0
    def get_structure(self, name='RNA chain'):
        """Returns chain as a PDB.Structure object."""
        struc = Structure(name)
        model = Model(0)
        chain = Chain(self.chain_name)
        struc.add(model)
        struc[0].add(chain)

        for resi in self:
            struc[0][self.chain_name].add(resi)
        return struc
コード例 #20
0
ファイル: test_pdbalign.py プロジェクト: kemaleren/pdbalign
    def setUp(self):
        self.chain = Chain("A")
        residues = [
            Residue(0, resname="Trp", segid=0),
            Residue(0, resname="His", segid=1),
            Residue(0, resname="Ser", segid=2),
            Residue(0, resname="Val", segid=3),
            Residue(0, resname="His", segid=4),]

        for r in residues:
            self.chain.add(r)
コード例 #21
0
 def add_chain_to_struct(self, chain_id):
     """
         adds another model to BIO.pdb structure object
     Parameters:
     -----------
         chain_id    :   chain name
     Returns:
     ---------
         self.struct :   Bio.PDB structure with new chain
     """
     chain = Chain(chain_id)
     self.struct[0].add(chain)
コード例 #22
0
ファイル: pdb.py プロジェクト: yotamfr/prot2vec
def select_structure(selector, structure):
    new_structure = Structure(structure.id)
    for model in structure:
        if not selector.accept_model(model):
            continue
        new_model = Model(model.id, model.serial_num)
        new_structure.add(new_model)
        for chain in model:
            if not selector.accept_chain(chain):
                continue
            new_chain = Chain(chain.id)
            new_model.add(new_chain)
            for residue in chain:
                if not selector.accept_residue(residue):
                    continue
                new_residue = Residue(residue.id, residue.resname,
                                      residue.segid)
                new_chain.add(new_residue)
                for atom in residue:
                    if selector.accept_atom(atom):
                        new_residue.add(atom)
    return new_structure
コード例 #23
0
    def create_structure(coords, pdb_type, remove_masked):
        """Create the structure.

        Args:
            coords: 3D coordinates of structure
            pdb_type: predict or actual structure
            remove_masked: whether to include masked atoms. If false,
                           the masked atoms have coordinates of [0,0,0].

        Returns:
            structure
        """

        name = protein.id_
        structure = Structure(name)
        model = Model(0)
        chain = Chain('A')
        for i, residue in enumerate(protein.primary):
            residue = AA_LETTERS[residue]
            if int(protein.mask[i]) == 1 or remove_masked == False:
                new_residue = Residue((' ', i + 1, ' '), residue, '    ')
                j = 3 * i
                atom_list = ['N', 'CA', 'CB']
                for k, atom in enumerate(atom_list):
                    new_atom = Atom(name=atom,
                                    coord=coords[j + k, :],
                                    bfactor=0,
                                    occupancy=1,
                                    altloc=' ',
                                    fullname=" {} ".format(atom),
                                    serial_number=0)
                    new_residue.add(new_atom)
                chain.add(new_residue)
        model.add(chain)
        structure.add(model)
        io = PDBIO()
        io.set_structure(structure)
        io.save(save_dir + name + '_' + pdb_type + '.pdb')
        return structure
    def createPDBFile(self):
        "Create test CIF file with 12 Atoms in icosahedron vertexes"
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain
        from Bio.PDB.Residue import Residue
        from Bio.PDB.Atom import Atom
        from Bio.PDB.mmcifio import MMCIFIO
        import os
        CIFFILENAME = "/tmp/out.cif"

        # create atom struct with ico simmety (i222r)
        icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r')
        pentomVectorI222r = icosahedron.getVertices()

        # create biopython object
        structure = Structure('result')  # structure_id
        model = Model(1, 1)  # model_id,serial_num
        structure.add(model)
        chain = Chain('A')  # chain Id
        model.add(chain)
        for i, v in enumerate(pentomVectorI222r, 1):
            res_id = (' ', i, ' ')  # first arg ' ' -> aTOm else heteroatom
            res_name = "ALA"  #+ str(i)  # define name of residue
            res_segid = '    '
            residue = Residue(res_id, res_name, res_segid)
            chain.add(residue)
            # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number,
            #             element=None)
            atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C")
            residue.add(atom)

        io = MMCIFIO()
        io.set_structure(structure)
        # delete file if exists
        if os.path.exists(CIFFILENAME):
            os.remove(CIFFILENAME)
        io.save(CIFFILENAME)
        return CIFFILENAME
コード例 #25
0
    def init_chain(self, chain_id):
        """Initiate a new Chain object with given id.

        Arguments:
        o chain_id - string
        """
        if self.model.has_id(chain_id):
            self.chain=self.model[chain_id]
            warnings.warn("WARNING: Chain %s is discontinuous at line %i."
                          % (chain_id, self.line_counter),
                          PDBConstructionWarning)
        else:
            self.chain=Chain(chain_id)
            self.model.add(self.chain)
コード例 #26
0
 def renumberChain(self,
                   chainID,
                   offset=0,
                   modelID='0',
                   filename="output.mmcif"):
     # get chain object
     chain = self.structure[modelID][chainID]
     # remove chain from model
     self.structure[modelID].detach_child(chainID)
     from Bio.PDB.Chain import Chain
     # create new chain
     newChain = Chain(chainID)
     for residue in chain:
         # remove residue, otherwise we cannot renumber it
         residue.detach_parent()
         rId = residue.id
         res_id = list(rId)
         res_id[1] = res_id[1] + offset
         if res_id[1] < 0:
             raise ValueError('Residue number cant be <= 0')
         residue.id = tuple(res_id)
         newChain.add(residue)
     self.structure[modelID].add(newChain)
     self.write(filename)
コード例 #27
0
 def create_new_structure(self, name, chain_id):
     """
         creates new Bio.PDB structure object
     Parameters:
     -----------
         name        :   structure name
         chain_id    :   chain name (e.g. A, B, C) 
     Returns:
     ---------
         self.struct :   Bio.PDB object with model and chain inside
     """
     self.struct = Structure(name)
     my_model = Model(0)
     my_chain = Chain(chain_id)
     self.struct.add(my_model)
     self.struct[0].add(my_chain)
コード例 #28
0
    def __make_structure_from_residues__(self, residues):
        """
        Makes a Structure object either from a pdbfile or a list of residues
        """
        # KR: this probably can be outsourced to another module.
        struct = Structure('s')
        model = Model('m')
        n_chain = 1
        chain = Chain('c%i' % n_chain)

        for residue in residues:
            if chain.has_id(residue.id):
                model.add(chain)
                n_chain += 1
                chain = Chain('c%i' % n_chain)
            chain.add(residue)

        model.add(chain)
        struct.add(model)
        return struct
コード例 #29
0
 def polymer(cls,
             reslist,
             mcsa_id=None,
             pdb_id=None,
             chain='',
             parent_site=None):
     """Alternative constructor for polymers. Takes a residue list and returns
     a polymer ligand"""
     poly = cls(mcsa_id,
                pdb_id,
                resname='*P*',
                resid=None,
                chain=chain,
                structure=None,
                parent_site=parent_site,
                calculate_scores=False)
     poly.structure = Chain(chain)
     for res in reslist:
         if res.get_id() not in poly.structure:
             poly.structure.add(res.copy())
     poly.similarity, poly.best_match = poly.get_similarity()
     poly.centrality = poly.get_centrality()
     return poly
コード例 #30
0
ファイル: StructureBuilder.py プロジェクト: ruda830/biovoice
class StructureBuilder:
    """Deals with constructing the Structure object.

    The StructureBuilder class is used by the PDBParser classes to
    translate a file to a Structure object.
    """

    def __init__(self):
        """Initialize the class."""
        self.line_counter = 0
        self.header = {}

    def _is_completely_disordered(self, residue):
        """Return 1 if all atoms in the residue have a non blank altloc (PRIVATE)."""
        atom_list = residue.get_unpacked_list()
        for atom in atom_list:
            altloc = atom.get_altloc()
            if altloc == " ":
                return 0
        return 1

    # Public methods called by the Parser classes

    def set_header(self, header):
        """Set header."""
        self.header = header

    def set_line_counter(self, line_counter):
        """Tracks line in the PDB file that is being parsed.

        Arguments:
         - line_counter - int

        """
        self.line_counter = line_counter

    def init_structure(self, structure_id):
        """Initialize a new Structure object with given id.

        Arguments:
         - id - string

        """
        self.structure = Structure(structure_id)

    def init_model(self, model_id, serial_num=None):
        """Create a new Model object with given id.

        Arguments:
         - id - int
         - serial_num - int

        """
        self.model = Model(model_id, serial_num)
        self.structure.add(self.model)

    def init_chain(self, chain_id):
        """Create a new Chain object with given id.

        Arguments:
         - chain_id - string

        """
        if self.model.has_id(chain_id):
            self.chain = self.model[chain_id]
            warnings.warn(
                "WARNING: Chain %s is discontinuous at line %i."
                % (chain_id, self.line_counter),
                PDBConstructionWarning,
            )
        else:
            self.chain = Chain(chain_id)
            self.model.add(self.chain)

    def init_seg(self, segid):
        """Flag a change in segid.

        Arguments:
         - segid - string

        """
        self.segid = segid

    def init_residue(self, resname, field, resseq, icode):
        """Create a new Residue object.

        Arguments:
         - resname - string, e.g. "ASN"
         - field - hetero flag, "W" for waters, "H" for
           hetero residues, otherwise blank.
         - resseq - int, sequence identifier
         - icode - string, insertion code

        """
        if field != " ":
            if field == "H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field = "H_" + resname
        res_id = (field, resseq, icode)
        if field == " ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn(
                    "WARNING: Residue ('%s', %i, '%s') redefined at line %i."
                    % (field, resseq, icode, self.line_counter),
                    PDBConstructionWarning,
                )
                duplicate_residue = self.chain[res_id]
                if duplicate_residue.is_disordered() == 2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue = duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue = Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue = duplicate_residue
                        return
                else:
                    if resname == duplicate_residue.resname:
                        warnings.warn(
                            "WARNING: Residue ('%s', %i, '%s','%s') already defined "
                            "with the same name at line  %i."
                            % (field, resseq, icode, resname, self.line_counter),
                            PDBConstructionWarning,
                        )
                        self.residue = duplicate_residue
                        return
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue = None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode)
                        )
                    self.chain.detach_child(res_id)
                    new_residue = Residue(res_id, resname, self.segid)
                    disordered_residue = DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue = disordered_residue
                    return
        self.residue = Residue(res_id, resname, self.segid)
        self.chain.add(self.residue)

    def init_atom(
        self,
        name,
        coord,
        b_factor,
        occupancy,
        altloc,
        fullname,
        serial_number=None,
        element=None,
        pqr_charge=None,
        radius=None,
        is_pqr=False,
    ):
        """Create a new Atom object.

        Arguments:
         - name - string, atom name, e.g. CA, spaces should be stripped
         - coord - Numeric array (Float0, size 3), atomic coordinates
         - b_factor - float, B factor
         - occupancy - float
         - altloc - string, alternative location specifier
         - fullname - string, atom name including spaces, e.g. " CA "
         - element - string, upper case, e.g. "HG" for mercury
         - pqr_charge - float, atom charge (PQR format)
         - radius - float, atom radius (PQR format)
         - is_pqr - boolean, flag to specify if a .pqr file is being parsed

        """
        residue = self.residue
        # if residue is None, an exception was generated during
        # the construction of the residue
        if residue is None:
            return
        # First check if this atom is already present in the residue.
        # If it is, it might be due to the fact that the two atoms have atom
        # names that differ only in spaces (e.g. "CA.." and ".CA.",
        # where the dots are spaces). If that is so, use all spaces
        # in the atom name of the current atom.
        if residue.has_id(name):
            duplicate_atom = residue[name]
            # atom name with spaces of duplicate atom
            duplicate_fullname = duplicate_atom.get_fullname()
            if duplicate_fullname != fullname:
                # name of current atom now includes spaces
                name = fullname
                warnings.warn(
                    "Atom names %r and %r differ only in spaces at line %i."
                    % (duplicate_fullname, fullname, self.line_counter),
                    PDBConstructionWarning,
                )
        if not is_pqr:
            self.atom = Atom(
                name,
                coord,
                b_factor,
                occupancy,
                altloc,
                fullname,
                serial_number,
                element,
            )
        elif is_pqr:
            self.atom = Atom(
                name,
                coord,
                None,
                None,
                altloc,
                fullname,
                serial_number,
                element,
                pqr_charge,
                radius,
            )
        if altloc != " ":
            # The atom is disordered
            if residue.has_id(name):
                # Residue already contains this atom
                duplicate_atom = residue[name]
                if duplicate_atom.is_disordered() == 2:
                    duplicate_atom.disordered_add(self.atom)
                else:
                    # This is an error in the PDB file:
                    # a disordered atom is found with a blank altloc
                    # Detach the duplicate atom, and put it in a
                    # DisorderedAtom object together with the current
                    # atom.
                    residue.detach_child(name)
                    disordered_atom = DisorderedAtom(name)
                    residue.add(disordered_atom)
                    disordered_atom.disordered_add(self.atom)
                    disordered_atom.disordered_add(duplicate_atom)
                    residue.flag_disordered()
                    warnings.warn(
                        "WARNING: disordered atom found with blank altloc before "
                        "line %i.\n" % self.line_counter,
                        PDBConstructionWarning,
                    )
            else:
                # The residue does not contain this disordered atom
                # so we create a new one.
                disordered_atom = DisorderedAtom(name)
                residue.add(disordered_atom)
                # Add the real atom to the disordered atom, and the
                # disordered atom to the residue
                disordered_atom.disordered_add(self.atom)
                residue.flag_disordered()
        else:
            # The atom is not disordered
            residue.add(self.atom)

    def set_anisou(self, anisou_array):
        """Set anisotropic B factor of current Atom."""
        self.atom.set_anisou(anisou_array)

    def set_siguij(self, siguij_array):
        """Set standard deviation of anisotropic B factor of current Atom."""
        self.atom.set_siguij(siguij_array)

    def set_sigatm(self, sigatm_array):
        """Set standard deviation of atom position of current Atom."""
        self.atom.set_sigatm(sigatm_array)

    def get_structure(self):
        """Return the structure."""
        # first sort everything
        # self.structure.sort()
        # Add the header dict
        self.structure.header = self.header
        return self.structure

    def set_symmetry(self, spacegroup, cell):
        """Set symmetry."""
        pass
コード例 #31
0
class StructureBuilder(object):
    """
    Deals with contructing the Structure object. The StructureBuilder class is used
    by the PDBParser classes to translate a file to a Structure object.
    """
    def __init__(self):
        self.line_counter=0
        self.header={}

    def _is_completely_disordered(self, residue):
        "Return 1 if all atoms in the residue have a non blank altloc."
        atom_list=residue.get_unpacked_list()
        for atom in atom_list:
            altloc=atom.get_altloc()
            if altloc==" ":
                return 0
        return 1

    # Public methods called by the Parser classes

    def set_header(self, header):
        self.header=header

    def set_line_counter(self, line_counter):
        """
        The line counter keeps track of the line in the PDB file that
        is being parsed.

        Arguments:
        o line_counter - int
        """
        self.line_counter=line_counter

    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure=Structure(structure_id)

    def init_model(self, model_id, serial_num = None):
        """Initiate a new Model object with given id.

        Arguments:
        o id - int
        o serial_num - int
        """
        self.model=Model(model_id,serial_num)
        self.structure.add(self.model)

    def init_chain(self, chain_id):
        """Initiate a new Chain object with given id.

        Arguments:
        o chain_id - string
        """
        if self.model.has_id(chain_id):
            self.chain=self.model[chain_id]
            warnings.warn("WARNING: Chain %s is discontinuous at line %i."
                          % (chain_id, self.line_counter),
                          PDBConstructionWarning)
        else:
            self.chain=Chain(chain_id)
            self.model.add(self.chain)

    def init_seg(self, segid):
        """Flag a change in segid.

        Arguments:
        o segid - string
        """
        self.segid=segid

    def init_residue(self, resname, field, resseq, icode):
        """
        Initiate a new Residue object.

        Arguments:
        o resname - string, e.g. "ASN"
        o field - hetero flag, "W" for waters, "H" for
            hetero residues, otherwise blank.
        o resseq - int, sequence identifier
        o icode - string, insertion code
        """
        if field!=" ":
            if field=="H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field="H_"+resname
        res_id=(field, resseq, icode)
        if field==" ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn("WARNING: Residue ('%s', %i, '%s') "
                              "redefined at line %i."
                              % (field, resseq, icode, self.line_counter),
                              PDBConstructionWarning)
                duplicate_residue=self.chain[res_id]
                if duplicate_residue.is_disordered()==2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue=duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue=Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue=duplicate_residue
                        return
                else:
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue=None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode))
                    self.chain.detach_child(res_id)
                    new_residue=Residue(res_id, resname, self.segid)
                    disordered_residue=DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue=disordered_residue
                    return
        residue=Residue(res_id, resname, self.segid)
        self.chain.add(residue)
        self.residue=residue

    def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                  serial_number=None, element=None):
        """
        Initiate a new Atom object.

        Arguments:
        o name - string, atom name, e.g. CA, spaces should be stripped
        o coord - Numeric array (Float0, size 3), atomic coordinates
        o b_factor - float, B factor
        o occupancy - float
        o altloc - string, alternative location specifier
        o fullname - string, atom name including spaces, e.g. " CA "
        o element - string, upper case, e.g. "HG" for mercury
        """
        residue=self.residue
        # if residue is None, an exception was generated during
        # the construction of the residue
        if residue is None:
            return
        # First check if this atom is already present in the residue.
        # If it is, it might be due to the fact that the two atoms have atom
        # names that differ only in spaces (e.g. "CA.." and ".CA.",
        # where the dots are spaces). If that is so, use all spaces
        # in the atom name of the current atom.
        if residue.has_id(name):
                duplicate_atom=residue[name]
                # atom name with spaces of duplicate atom
                duplicate_fullname=duplicate_atom.get_fullname()
                if duplicate_fullname!=fullname:
                    # name of current atom now includes spaces
                    name=fullname
                    warnings.warn("Atom names %r and %r differ "
                                  "only in spaces at line %i."
                                  % (duplicate_fullname, fullname,
                                     self.line_counter),
                                  PDBConstructionWarning)
        atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc,
                            fullname, serial_number, element)
        if altloc!=" ":
            # The atom is disordered
            if residue.has_id(name):
                # Residue already contains this atom
                duplicate_atom=residue[name]
                if duplicate_atom.is_disordered()==2:
                    duplicate_atom.disordered_add(atom)
                else:
                    # This is an error in the PDB file:
                    # a disordered atom is found with a blank altloc
                    # Detach the duplicate atom, and put it in a
                    # DisorderedAtom object together with the current
                    # atom.
                    residue.detach_child(name)
                    disordered_atom=DisorderedAtom(name)
                    residue.add(disordered_atom)
                    disordered_atom.disordered_add(atom)
                    disordered_atom.disordered_add(duplicate_atom)
                    residue.flag_disordered()
                    warnings.warn("WARNING: disordered atom found "
                                  "with blank altloc before line %i.\n"
                                  % self.line_counter,
                                  PDBConstructionWarning)
            else:
                # The residue does not contain this disordered atom
                # so we create a new one.
                disordered_atom=DisorderedAtom(name)
                residue.add(disordered_atom)
                # Add the real atom to the disordered atom, and the
                # disordered atom to the residue
                disordered_atom.disordered_add(atom)
                residue.flag_disordered()
        else:
            # The atom is not disordered
            residue.add(atom)

    def set_anisou(self, anisou_array):
        "Set anisotropic B factor of current Atom."
        self.atom.set_anisou(anisou_array)

    def set_siguij(self, siguij_array):
        "Set standard deviation of anisotropic B factor of current Atom."
        self.atom.set_siguij(siguij_array)

    def set_sigatm(self, sigatm_array):
        "Set standard deviation of atom position of current Atom."
        self.atom.set_sigatm(sigatm_array)

    def get_structure(self):
        "Return the structure."
        # first sort everything
        # self.structure.sort()
        # Add the header dict
        self.structure.header=self.header
        return self.structure

    def set_symmetry(self, spacegroup, cell):
        pass
コード例 #32
0
ファイル: test_pdbalign.py プロジェクト: kemaleren/pdbalign
class TestPdbalign(unittest.TestCase):
    # Need to reduce gap penalty to make test alignments work

    aligner = Aligner(BLOSUM62.load(), do_codon=False,
                      open_insertion=-1, open_deletion=-1)

    def setUp(self):
        self.chain = Chain("A")
        residues = [
            Residue(0, resname="Trp", segid=0),
            Residue(0, resname="His", segid=1),
            Residue(0, resname="Ser", segid=2),
            Residue(0, resname="Val", segid=3),
            Residue(0, resname="His", segid=4),]

        for r in residues:
            self.chain.add(r)


    def test_align_and_index(self):
        problems = (
            (Seq("AHSVH"), Seq("AHVH"), [0, 1, -1, 2, 3]),
            (Seq("AHVH"), Seq("AHSVH"), [0, 1, 3, 4]),
            (Seq("AHSVH"), Seq("AHSVH"), [0, 1, 2, 3, 4]),
            (Seq("-HSVH"), Seq("AHSVH"), [-1, 1, 2, 3, 4]),
            (Seq("A-SVH"), Seq("AHSVH"), [0, -1, 2, 3, 4]),
            (Seq("AH-VH"), Seq("AHSVH"), [0, 1, -1, 3, 4]),
            (Seq("AHS-H"), Seq("AHSVH"), [0, 1, 2, -1, 4]),
            (Seq("AHSV-"), Seq("AHSVH"), [0, 1, 2, 3, -1]),
            (Seq("AHSVHCCCCCCFPVW"), Seq("AHSVHFPVW"),
             [0, 1, 2, 3, 4, -1, -1, -1, -1, -1, -1, 5, 6, 7, 8]),
        )

        for s, p, e in problems:
            result = align_and_index(s, p, missing=-1, aligner=self.aligner)
            self.assertEqual(e, result)

    def test_align_chains_msa(self):
        sequences = [Seq("AHSVH"),
                     Seq("AH-VH"),
                     Seq("A-SVH")]
        indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner)
        expected = np.array([[0, 1, 2, 3, 4]])
        self.assertTrue(np.all(indices == expected))

    def test_align_chains_msa_no_consensus(self):
        sequences = [Seq("AHSV"),
                     Seq("AHSH")]
        indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner)
        expected = np.array([[0, 1, 2, -1]])
        self.assertTrue(np.all(indices == expected))

    def test_align_chains_msa_leading_gaps(self):
        sequences = [Seq("FFWHSVH"),
                     Seq("IIWH-VH"),
                     Seq("WWW-SVH")]
        indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner)
        expected = np.array([[-1, -1, 0, 1, 2, 3, 4]])
        self.assertTrue(np.all(indices == expected))

    def test_compute_distance_matrix(self):
        c1 = np.array([[0, 0],
                       [np.nan, np.nan],
                       [1, 1],
                       [1, 0]])
        c2 = c1.copy()
        c1[:, 0] += 1.5
        c1[:, 1] += 1
        coords = np.hstack([c1, c2]).reshape((4, 2, 2))
        expected = np.array([[0, 5, 0.5, 1],
                             [5, 0, 5, np.inf],
                             [0.5, 5, 0, 1],
                             [1, np.inf, 1, 0]])
        result = compute_distance_matrix(coords, default_dist=5)
        self.assertTrue(np.all(expected == result))

    def test_consensus(self):
        flag = -1
        problems = (((0, 0, 1, 1), flag),
                    ((0, 0, 0, 1), 0),
                    ((0, 0, 0, 0), 0),
                    (iter([]), flag),
                    ((), flag))
        for it, exp in problems:
            result = consensus(it, flag=-1)
            self.assertEqual(exp, result)
コード例 #33
0
ファイル: GenPDBFromPts.py プロジェクト: yunxu/chromatin
    ind = 0
    for line in open(filename).readlines():
        if not line.startswith('#'):
            bfactors[ind] = array((line.split())[column]) 
            ind = ind+1
    return bfactors	
#--------------------------------------------------------------------
points = ReadXYZ ( args['src'], args['scale'])
if ( args['bfactor'] is not None):
    print "read bfactor file column %d" % args['column']
    bfactors = ReadBfactor(args['bfactor'],args['column'])
else:
    bfactors = zeros(len(points))

model = Model(1)
chain = Chain("A")
structure = Structure("ref")

num_count = 0
for i in range(0,shape(points)[0]):
    num_count = num_count +1
    res_id = (' ',num_count,' ')
    residue = Residue(res_id,'ALA',' ')
    cur_coord = tuple(points[i])
    bfactor = bfactors[i]
    atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C')
    residue.add(atom)
    chain.add(residue)

model.add(chain)
structure.add(model)
コード例 #34
0
def compare_chains(chain1: Chain, chain2: Chain,
                   c1_residue_mapping: BiopythonToMmcifResidueIds.Mapping,
                   c2_residue_mapping: BiopythonToMmcifResidueIds.Mapping,
                   c1_seq: Dict[int, str], c2_seq: Dict[int, str],  # in 3-letter codes
                   comparators__residues_param: List[Analyzer],
                   comparators__residue_ids_param: List[Analyzer],
                   comparators__domains__residues_param: List[Analyzer],
                   comparators__domains__residue_ids_param: List[Analyzer],
                   comparators__2domains__residues_param: List[Analyzer],
                       serializer_or_analysis_handler: AnalysisHandler,
                   domains_info: list,
                   ) -> None:
    """ Runs comparisons between two chains. E.g. one ligand-free (apo) and another ligand-bound (holo).
    :param chain1: A Bio.PDB Chain, obtained as a part of BioPython Structure object as usual
    :param chain2: A corresponding chain (same sequence), typically from a different PDB structure. See chain1.

    :param c1_residue_mapping:
    :param apo_poly_seqs:
    """
    s1_pdb_code = chain1.get_parent().get_parent().id
    s2_pdb_code = chain2.get_parent().get_parent().id

    logging.info(f'running analyses for ({s1_pdb_code}, {s2_pdb_code}) pair...')
    #
    # with warnings.catch_warnings():
    #     warnings.simplefilter("ignore")
    #     pp1 = chain_to_polypeptide(chain1)
    #     pp2 = chain_to_polypeptide(chain2)

    # c1_seq, c2_seq todo, is the order in atom_site loop guaranteed? If not, I should sort the dict by label_seq_id
    # also todo, is label_seq_id sequential, that is one-by-one always +1?
    # todo assert entity_poly_seq have no gaps (always +1), they say they're sequential, I think they mean exactly this

    # crop polypeptides to longest common substring
    c1_common_seq, c2_common_seq = get_longest_common_polypeptide(c1_seq, c2_seq)
    c1_label_seq_ids = list(c1_common_seq.keys())
    c2_label_seq_ids = list(c2_common_seq.keys())
    return
    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]

    # up to this point, we have residue ids of the protein sequence in the experiment. This also includes unobserved
    # residues, but those we will exclude from our analysis as their positions weren't determined
    c1_residues, c1_label_seq_ids, c2_residues, c2_label_seq_ids = get_observed_residues(
        chain1,
        c1_label_seq_ids,
        c1_residue_mapping,
        chain2,
        c2_label_seq_ids,
        c2_residue_mapping,
    )

    c1_residues = ChainResidues(c1_residues, s1_pdb_code, chain1.id)
    c2_residues = ChainResidues(c2_residues, s2_pdb_code, chain2.id)

    # todo trochu nesikovny
    c1_residue_ids = ChainResidueData[ResidueId]([ResidueId(label_seq_id, chain1.id) for label_seq_id in
                                                  c1_label_seq_ids], s1_pdb_code, chain1.id)
    c2_residue_ids = ChainResidueData[ResidueId]([ResidueId(label_seq_id, chain2.id) for label_seq_id in
                                                  c2_label_seq_ids], s2_pdb_code, chain2.id)

    # [done] tady nahradit pp pomocí apo_seq nějak
    # [done] v analyzerech (APIs) nahradit author_seq_id
    # todo tady matchovaní domén pomocí tohodle - zas mohu pouzit Sequence Matcher
    #   - ale spany, je to složitější -> zatím přeindexovat apo nebo holo do druhý...

    for a in comparators__residues_param:
        # this fn (run_analyses_for_isoform_group) does not know anything about serialization?
        # But it will know how nested it is (domain->structure) and can pass full identifiers of structures/domains

        serializer_or_analysis_handler.handle('chain2chain', a, a(c1_residues, c2_residues), c1_residues,
                                              c2_residues)  # in future maybe pass apo and holo. Will serialize itself. And output the object in rdf for example?
        # because what I would like is to output the analysis with objects identifiers, and then output the objects, what they contain (e.g. domain size?)


    for c in comparators__residue_ids_param:
        serializer_or_analysis_handler.handle('chain2chain', c, c(c1_residue_ids, c2_residue_ids), c1_residue_ids,
                                              c2_residue_ids)

    # domain-level analyses

    # get domains (set of auth_seq_id), sort them by domain id and hope they will correspond to each other
    # or could map corresponding domains by choosing the ones that have the most overlap?
    try:
        c1_domains = sorted(filter(lambda d: d.chain_id == chain1.id, get_domains(s1_pdb_code)), key=lambda d: d.domain_id)
        c2_domains = sorted(filter(lambda d: d.chain_id == chain2.id, get_domains(s2_pdb_code)), key=lambda d: d.domain_id)
        # todo zaznamenat total počet domén (pro obě struktury), zapsat do jinýho jsonu třeba

        for pdb_code, domains in ((s1_pdb_code, c1_domains), (s2_pdb_code, c2_domains)):
            for d in domains:
                domains_info.append(
                    {'type': 'full_domain',
                     'full_id': (pdb_code, d.chain_id, d.domain_id),
                     'pdb_code': pdb_code,
                     'chain_id': d.chain_id,
                     'domain_id': d.domain_id,
                     'spans': d.get_spans(),})


        # for d in c2_domains:
        #         domains_info.append(
        #     {'type': 'total_domains_found', 'result': len(c2_domains), 'pdb_code': s2_pdb_code, 'chain_id': chain2.id})
        # todo  spany domén, hlavně

    except APIException as e:
        if e.__cause__ and '404' in str(e.__cause__):
            logging.warning(f'{s1_pdb_code} {s2_pdb_code} no domains found, skip the domain-level analysis')
            return  # no domains found, skip the domain-level analysis
        raise


    # assert len(c1_domains) == len(c2_domains) # not always true, as expected, but now OK

    # SequenceMatcher on domain resiudes
    c1_domains__residues = []
    c2_domains__residues = []

    for c1_d in c1_domains:  # or c2_domains:
        # first remap first domain to second (or in future use longest common substrings, but not trivial since domains can be composed of multiple segments)
        # offset nemusí být všude stejný
        c1_domain_mapped_to_c2 = DomainResidueMapping.from_domain_on_another_chain(c1_d, chain2.id, label_seq_id_offset)

        # todo proc chain.get_parent?? Asi abych chain nemusel specifikovat (ale ted pracuju jenom s nima..)
        c1_d_residues = DomainResidues.from_domain(c1_d, chain1.get_parent(), c1_residue_mapping,
                                                   lambda id: id not in c1_label_seq_ids)
        c2_d_residues = DomainResidues.from_domain(c1_domain_mapped_to_c2, chain2.get_parent(), c2_residue_mapping,
                                                   lambda id: id not in c2_label_seq_ids)

        if not c1_d_residues or not c2_d_residues:
            # the domain is not within the processed LCS of both chains (empty intersection with chain residues)
            logging.warning(f'domain {c1_d.domain_id} is not within the processed LCS of both chains (empty '
                            f'intersection with '
                            f'chain residues)')
            continue

        c1_domains__residues.append(DomainResidues(c1_d_residues.data, c1_d_residues.structure_id, c1_d_residues.chain_id, c1_d_residues.domain_id))
        c2_domains__residues.append(DomainResidues(c2_d_residues.data, c2_d_residues.structure_id, c2_d_residues.chain_id, c2_d_residues.domain_id))

    for residue_mapping, domains in ((c1_residue_mapping, c1_domains__residues),
                                     (c2_residue_mapping, c2_domains__residues)):
        for d in domains:
            domains_info.append(
                {'type': 'analyzed_domain',
                 'full_id': d.get_full_id(),
                 'pdb_code': d.structure_id,
                 'chain_id': d.chain_id,
                 'domain_id': d.domain_id,
                 'spans': d.get_spans(residue_mapping),
                 'spans_auth_seq_id': d.get_spans(residue_mapping, auth_seq_id=True),
                 })

    #
    # # todo zaznamenat počet domén jdoucích do analýz
    # domains_info.append({'type': 'analyzed_domain_count', 'result': len(c1_domains__residues), 'pdb_code': s1_pdb_code, 'chain_id': chain1.id})
    # domains_info.append({'type': 'analyzed_domain_count', 'result': len(c2_domains__residues), 'pdb_code': s2_pdb_code, 'chain_id': chain2.id})

    # todo to tam taky neni v argumentech, ale harcoded.., to je ten muj fix...
    # todo tohle totiž neni párový porovnání.., ale 'jednotkový'
    #  - stejně jako get domains, get_ss (nikoliv compare ss), vlastne i sequence atp
    #  - cachovat surface area teda nedava smysl, nacte se proste z predvypocitanyho, jako normalne
    #  - nebo, proste jenom tyhle structure-level veci ma smysl "cachovat" resp nepocitat tady, pro kazdej par, ale
    #  - nacitat z filu/unpicklovat - to asi ne, mít serialize/deserialize (stejne chci to mit jako citelny vystup). 4
    #  -  A pak to klidně všechno pro rychlost deserializovat do pameti...
    # no, tak to abych se těšil zas na json/pandas-merge hell.. Vsude merge.. Vsude dupe cols/delat index (ten pak ale nekdy zas potrebujes v cols...)

    for chain_domains in (c1_domains__residues, c2_domains__residues):
        for d1, d2 in itertools.combinations(chain_domains, 2):
            serializer_or_analysis_handler.handle('2DA', get_interdomain_surface, get_interdomain_surface(d1, d2),
                                                  d1, d2)

    for d_chain1, d_chain2 in zip(c1_domains__residues, c2_domains__residues):
        for a in comparators__domains__residues_param:
            serializer_or_analysis_handler.handle('domain2domain', a, a(d_chain1, d_chain2), d_chain1, d_chain2)

    # todo vyres ty divny idcka
    for d_chain1, d_chain2 in zip(c1_domains__residues, c2_domains__residues):
        # Convert DomainResidues to DomainResidueData[ResidueId]
        # asi zas přes mapping... lepší by to bylo, kdyby byl implicitně schovaný třeba na to biopython residue (
        # jinak by to nešlo moc ani, leda mit CustomResidue s fieldama bioresidue a label_seq_id, to je ale celkem
        # naprd, nebo ne? Nefungovalo by to s chainem, ale to stejně nikde nepoužívám...
        d_chain1 = DomainResidueData[ResidueId]([ResidueId.from_bio_residue(r, c1_residue_mapping) for r in d_chain1],
                                                d_chain1.structure_id, d_chain1.chain_id, d_chain1.domain_id)
        d_chain2 = DomainResidueData[ResidueId]([ResidueId.from_bio_residue(r, c2_residue_mapping) for r in d_chain2],
                                                d_chain2.structure_id, d_chain2.chain_id, d_chain2.domain_id)

        for a in comparators__domains__residue_ids_param:
            serializer_or_analysis_handler.handle('domain2domain', a, a(d_chain1, d_chain2), d_chain1, d_chain2)

    # two-domain arrangements to two-domain arrangements
    for (d1_chain1, d1_chain2), (d2_chain1, d2_chain2) in itertools.combinations(zip(c1_domains__residues, c2_domains__residues), 2):
        # (in paper considered if of both apo and holo interdomain iface >= 200 A^2
        # if get_interdomain_surface(d1_chain1, d2_chain1) < 200 or get_interdomain_surface(d1_chain2, d2_chain2) < 200:
        #     continue

        for a in comparators__2domains__residues_param:
            serializer_or_analysis_handler.handle('chain2DA2chain2DA', a, a(d1_chain1, d2_chain1, d1_chain2,
                                                                            d2_chain2),
                                                  d1_chain1,
                                                  d2_chain1, d1_chain2, d2_chain2)

        d1d2_chain1 = d1_chain1 + d2_chain1
        d1d2_chain2 = d1_chain2 + d2_chain2
        serializer_or_analysis_handler.handle('chain2DA2chain2DA', get_rmsd, get_rmsd(d1d2_chain1, d1d2_chain2),
                                              d1d2_chain1,
                                              d1d2_chain2)  # todo hardcoded analysis
コード例 #35
0
    def run(self, struct: Model, chain: Chain):
        def has_at_least_n_non_hydrogen_atoms(ligand, n):
            non_hydrogen_atoms = 0

            for atom in ligand.get_atoms():
                assert atom.element is not None
                if atom.element != 'H':
                    non_hydrogen_atoms += 1

                if non_hydrogen_atoms >= n:
                    return True  # todo nakonec můžu asi sumovat všechny, stejně budu chtít konfigurovatelny output, aby mi dal počet atomů ligandu, nebo budu dělat statistiky, kolik atomu má průměrný ligand atp.

            return False

        # ligand has >= 6 non-hydrogen atoms
        ligands = list(
            filter(lambda lig: has_at_least_n_non_hydrogen_atoms(lig, 6),
                   get_all_ligands(struct)))

        # ligand is within RADIUS in contact with MIN_RESIDUES_WITHIN_LIGAND residues

        # (in original paper they used a program LPC, for ensuring specific interaction of ligand with at least 6 residue, this is a "shortcut",
        #    a temporary condition (simple))

        chain_atoms = list(chain.get_atoms())
        ns = NeighborSearch(chain_atoms)

        RADIUS = 4.5
        MIN_RESIDUES_WITHIN_LIGAND = 6
        # todo calculate average number of protein heavy atoms in 4.5 Å within ligand atom (paper says 6)

        acceptable_ligands = []

        for ligand in ligands:
            residues_in_contact_with_ligand = set(
            )  # including the ligand itself (in biopython, non-peptide ligand is
            # in the same chain usually, but in a different residue)

            ligand_residues = set()  # residues that compose the ligand

            for ligand_atom in ligand.get_atoms(
            ):  # ligand can be a chain or a residue
                ligand_residues.add(ligand_atom.get_parent())
                chain_atoms_in_contact = ns.search(ligand_atom.get_coord(),
                                                   RADIUS)

                for atom in chain_atoms_in_contact:
                    # exclude hydrogen atoms (as in the paper)
                    if atom.element == 'H':
                        continue

                    residues_in_contact_with_ligand.add(atom.get_parent())

            # exclude the ligand itself from the set of contact residues
            residues_in_contact_with_ligand -= ligand_residues

            if len(residues_in_contact_with_ligand
                   ) >= MIN_RESIDUES_WITHIN_LIGAND:
                acceptable_ligands.append(ligand)

        return len(acceptable_ligands) > 0
コード例 #36
0
ファイル: CalReduceRMS.py プロジェクト: yunxu/chromatin
	points = zeros(shape=(num_lines,3))
	ind = 0
	for line in open(filename).readlines():
		points[ind] = array((line.split()[0:3])) 
		points[ind] = points[ind] * scale
		ind = ind+1
	return points	



#--------------------------------------------------------------------
ref_ptsfilename = "K562.pts"
refid = "ref"
structure = Structure(refid)
model_ref = Model(1)
chain_ref = Chain("A")
points_ref = ReadXYZ(ref_ptsfilename,scale)
	
num_count = 0
for i in range(0,shape(points_ref[IndexList])[0]):
	num_count = num_count +1
	res_id = (' ',num_count,' ')
	residue = Residue(res_id,'ALA',' ')
	cur_coord = tuple(points_ref[IndexList[i]])
	atom = Atom('CA',cur_coord,0,0,' ',num_count,num_count,'C')
	residue.add(atom)
	chain_ref.add(residue)
model_ref.add(chain_ref)
structure.add(model_ref)

#--------------------------------------------------------------------