Ejemplo n.º 1
0
    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        center = numpy.require(center, dtype='d', requirements='C')
        if center.shape != (3, ):
            raise Exception("Expected a 3-dimensional NumPy array")
        points = self.kdt.search(center, radius)
        atom_list = [self.atom_list[point.index] for point in points]
        if level == "A":
            return atom_list
        else:
            return unfold_entities(atom_list, level)
Ejemplo n.º 2
0
    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:

         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)
        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.search(center, radius)
        indices = self.kdt.get_indices()
        n_atom_list = []
        atom_list = self.atom_list
        for i in indices:
            a = atom_list[i]
            n_atom_list.append(a)
        if level == "A":
            return n_atom_list
        else:
            return unfold_entities(n_atom_list, level)
Ejemplo n.º 3
0
    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        center = numpy.require(center, dtype='d', requirements='C')
        if center.shape != (3,):
            raise Exception("Expected a 3-dimensional NumPy array")
        points = self.kdt.search(center, radius)
        atom_list = [self.atom_list[point.index] for point in points]
        if level == "A":
            return atom_list
        else:
            return unfold_entities(atom_list, level)
Ejemplo n.º 4
0
    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.search(center, radius)
        indices = self.kdt.get_indices()
        n_atom_list = []
        atom_list = self.atom_list
        for i in indices:
            a = atom_list[i]
            n_atom_list.append(a)
        if level == "A":
            return n_atom_list
        else:
            return unfold_entities(n_atom_list, level)
Ejemplo n.º 5
0
def CheckClashes(structure, chain):
    """
    Checks for clashes at a radius = 2 between a PDB structure and all the atoms on the provided chain.
    Returns True or False depending if number of different residues clashing exceeds 25.
    Arguments:
        -structure: PDB structure.
        -chain: PDB chain structure to check if it has clashes with the main structure.
    """
    # declare NeighborSearch() object instance with all the atoms from the structure (model 0), that includes all chains of that structure.
    ns = NeighborSearch(unfold_entities(structure[0], 'A'))

    # iterate over atoms in input chain, search for close residues
    clashing_residues = set([])
    for atom in chain.get_atoms():
        close_res = ns.search(atom.get_coord(), radius=2, level="R")

        try:
            close_res.remove(atom.get_parent())
        except ValueError:
            pass

        for res in close_res:
            neighbor_res = (atom.get_parent(), res)
            clashing_residues.add(neighbor_res)
            if len(clashing_residues) > 25:
                return True

    return False
Ejemplo n.º 6
0
    def test_from_structure_level(self):
        """Unfold from highest level to all levels."""
        struct_unfold = unfold_entities(self.structure, "S")[0]
        for res1, res2 in zip(self.structure.get_residues(),
                              struct_unfold.get_residues()):
            assert res_full_id(res1) == res_full_id(res2)

        model_unfold = unfold_entities(self.structure, "M")[0]
        for res1, res2 in zip(self.structure.get_residues(),
                              model_unfold.get_residues()):
            assert res_full_id(res1) == res_full_id(res2)

        residue_unfold = unfold_entities(self.structure, "R")
        for res1, res2 in zip(self.structure.get_residues(), residue_unfold):
            assert res_full_id(res1) == res_full_id(res2)

        atom_unfold = unfold_entities(self.structure, "A")
        for at1, at2 in zip(self.structure.get_atoms(), atom_unfold):
            assert at1 is at2
Ejemplo n.º 7
0
def get_residues(file, mod, ch, first_to_remove) -> list:
    '''Residues from pdb file
    It select only proteinogenic residues excluding all water molecules and
    ions.

    Args:
        file (str): absolute/relative path for pdb file
        mod (int): selects the wanted model (must be => 0)
        ch (int): selects the wanted chain (must be => 0)
        first_to_remove (int): number of residues to remove from the beginning
        of the chain (e.g. because they are added artificially to make the
        protein crystallize)

    Returns:
        list: list of proteinogenic residues excluding all water molecules
        and ions
    '''

    parser = pdb.PDBParser()
    name_protein = file[-8:-4]  # get unique protein ID of 4 characters
    # This assumes that the file name is the protein ID
    structure = parser.get_structure(name_protein, file)

    # Info to print while calling the parser
    # print(f'Parsing: {name_protein}')
    # print('Models: ', len(list(structure.get_models())))
    # print('Chains: ', len(list(structure.get_chains())))

    # Unpacking the selected chain
    models = unfold_entities(structure, 'M')
    chains = unfold_entities(models[mod], 'C')
    res_list_full = unfold_entities(chains[ch], 'R')
    # filtering out all but proteinogeneic residues
    proteinogenic_res = \
    ['ALA','CYS','ASP','GLU','PHE','GLY','HIS','ILE','LYS','LEU','MET',
    'ASN','PYL','PRO','GLN','ARG','SER','THR','SEC','VAL','TRP','TYR']
    res_list = []
    for res in res_list_full:
        if res.get_resname() in proteinogenic_res:
            res_list.append(res)

    return res_list[first_to_remove:]
Ejemplo n.º 8
0
def get_atoms_of_res_sidechain(residue):
    """
    finds all atoms of a given sidechain for a residue
    removes C, O, N
    :param residue:
    :return:
    """
    atoms_in_res = unfold_entities(residue, 'A')
    for atom in atoms_in_res:
        if atom.get_name() in ['C', 'O', 'N']:
            atoms_in_res.remove(atom)

    return atoms_in_res
Ejemplo n.º 9
0
    def test_entities_not_homogenous(self):
        structure_atom = next(self.structure.get_atoms())
        structure_chain = next(self.structure.get_chains())

        with self.assertRaises(PDBException):
            unfold_entities([structure_atom, structure_chain], "A")
Ejemplo n.º 10
0
 def test_invalid_level(self):
     with self.assertRaises(PDBException):
         unfold_entities(self.structure, "Z")
Ejemplo n.º 11
0
def build_align_file(input_pdb, pdbcode, output_align_file="protein.ali"):
    """
    Function that takes a PDB filepath, detects missing residues and builds a MODELLER align file with this information,
    to be later used for completing residues.
    :param input_pdb: PDB filepath
    :param pdbcode: Code identifier for the PDB structure. Ex: 2y8d
    :param output_align_file: Filepath for output align file.
    :return:
    """

    # Read structure and extract present and missing residues
    pdbparser = PDBParser()
    structure = pdbparser.get_structure(pdbcode, input_pdb)
    chains = unfold_entities(structure, "C")  # Get chains

    missing_residues = structure.header[
        "missing_residues"]  # Get missing residues from whole structure

    # Remove alignment file if exists
    try:
        os.remove(output_align_file)
    except FileNotFoundError:
        pass

    # Where to store the sequences from structure separated by chains/index
    whole_gapped = []
    whole_full = []

    for chain in chains:
        chain_id = chain.get_id()
        residues = unfold_entities(chain, "R")  # Get residues of chain
        missing_res_chain = get_chain_missing_res(missing_residues, chain_id)

        # Residues with empty id[0] are the 'real' residues, others are solvent or different.
        residues_list = [(residue.id[1], seq1(residue.resname))
                         for residue in residues if residue.id[0] == " "]
        for mis_res in missing_res_chain:
            insert_gap(mis_res["ssseq"], residues_list)

        # Sequence with gaps
        try:
            gapped_seq = "".join(np.array(residues_list)[:, 1])
        except IndexError:
            # Warn the user if the residues list is empty (probably HETATOMS)
            msg = "Residues list for chain {} is empty. Check PDB, probably chain is" \
                  "full of HETATOM type atoms. Leaving chain empty in align " \
                  "file.".format(chain)
            warnings.warn(msg)
            gapped_seq = ""  # Empty seq for chain full of HETATOM or non-standard
        # Make the line width the correct/expected one for modeller align file
        textwrap.wrap(gapped_seq, width=75, break_on_hyphens=False)

        # Full sequence without gaps by replacing gaps with the missing res
        full_seq = gapped_seq
        for mis_res in missing_residues:
            full_seq = full_seq.replace("-", seq1(mis_res["res_name"]), 1)

        whole_gapped.append(gapped_seq)
        whole_full.append(full_seq)

        # For checking full_seq
        # print(full_seq)

    # Building whole strings to write to file. "/" char separates chains.
    whole_gapped_str = "/".join(whole_gapped)
    whole_full_str = "/".join(whole_full)

    # Writing to file
    # Remember sequences have to end with the * character
    with open(output_align_file, "a+") as file:
        # Writing structure/gapped section
        file.write(">P1;" + structure.id + "\n")
        file.write("structureX:" + structure.id + ":FIRST:@ END:@" + 5 * ":." +
                   "\n")
        for line in textwrap.wrap(whole_gapped_str + "*",
                                  width=75,
                                  break_on_hyphens=False):
            file.write("%s\n" % line)
        # Writing full sequence section
        file.write(">P1;" + structure.id + "_fill\n")
        file.write("sequence:" + structure.id + ":FIRST:@ END:@" + 5 * ":." +
                   "\n")
        for line in textwrap.wrap(whole_full_str + "*",
                                  width=75,
                                  break_on_hyphens=False):
            file.write("%s\n" % line)
Ejemplo n.º 12
0
def view_in_pymol(id,
                  predicted_voxels=None,
                  truth_voxels=None,
                  voxel_atom_ratio=.2):
    pdb, chain = id.split(".")
    structure = Structure.from_pdb(pdb, chain, rotate=False)

    cmd = """fetch {id}
remove hetatm
hide everything, {id}
show surface, {id}
color gray90, {id}
""".format(id=id)

    if truth_voxels is not None:
        truth_atoms = Counter()
        for v in truth_voxels:
            atoms = structure.convert_voxels(v, level="A")
            if len(atoms) > 0:
                truth_atoms[atoms[0]] += 1

        truth_atoms = [atom for atom, count in truth_atoms.iteritems() \
            if float(count)/atom_volume(structure, atom) >= voxel_atom_ratio]

        truth_residues = [
            str(r.get_id()[1]) for r in unfold_entities(truth_atoms, "R")
        ]
        truth_resi = "+".join(truth_residues)

        cmd += """select true_binding_site, resi {true_resi}
color orange, true_binding_site
""".format(true_resi=truth_resi)

    if predicted_voxels is not None:
        predicted_atoms = Counter()
        for v in predicted_voxels:
            atoms = structure.convert_voxels(v, level="A")
            if len(atoms) > 0:
                predicted_atoms[atoms[0]] += 1

        predicted_atoms = [atom for atom, count in predicted_atoms.iteritems() \
            if float(count)/atom_volume(structure, atom) >= voxel_atom_ratio]

        predicted_residues = [
            str(r.get_id()[1]) for r in unfold_entities(predicted_atoms, "R")
        ]
        predicted_resi = "+".join(truth_residues)

        cmd += """select predicted_binding_site, resi {predicted_resi}
color magenta, predicted_binding_site
""".format(predicted_resi=predicted_resi)

    if truth_voxels is not None and predicted_voxels is not None:
        false_postive_voxels = set(predicted_residues) - set(truth_residues)
        fp_resi = "+".join(false_postive_voxels)
        cmd += """select false_positive_binding_site, resi {fp_resi}
color blue, false_positive_binding_site
""".format(fp_resi=fp_resi)

    with open("{}_pymol.cmd".format(id), "w") as f:
        print >> f, cmd
Ejemplo n.º 13
0
def SuperimposeStructures(object_list, complex, RMSD_threshold):
    """
    Superimposes chains from objects in object_list to chains in complex. Adds the non-clashing chains to the complex and removes the structure from the object_list.
    Returns the complex with the new added chains, and the updated object_list with

    Arguments:
        -object_list : list of PDB objects that have to be superimposed and added to the complex.
        -complex: main structure to which individual chains from the object_list have to be added after superimposition.
        -RMSD_threshold: threshold for the RMSD value of the superposition between a chain of an object an the same chain on the complex.
        Default value for the program is 0.5.
    """

    # Get core chain to start reconstruction
    core = FindCoreChain(object_list)
    if options.verbose:
        sys.stderr.write("Chain defined as core to superimpose: %s\n" % (core))
        sys.stderr.write("Added to the final complex:\n")

    # Declare Superimpose object
    sup = Superimposer()
    ref_struct = None

    for structure in list(object_list):

        # select the first structure with the core chain to be the reference
        try:
            if core in structure[0] and not ref_struct:
                ref_struct = copy.deepcopy(structure)
                complex.add(ref_struct[0])

        except:
            pass

        # if the structure contains the core chain, superimpose that to the chain with same name in ref structure set before
        if core in structure[0] and (structure is not ref_struct):

            sup.set_atoms(unfold_entities(ref_struct[0][core], 'A'),
                          unfold_entities(structure[0][core], 'A'))
            sup.apply(structure[0])

            RMSD = float(sup.rms)
            print(RMSD)
            # check for clashes before adding new chain to complex
            if RMSD < RMSD_threshold:
                for chain in structure[0]:

                    if chain.get_id() != core:
                        if not CheckClashes(complex, chain):
                            chain_copy = copy.deepcopy(chain)

                            N = 65
                            while chain_copy.get_id() in [
                                    a.get_id() for a in complex.get_chains()
                            ]:
                                try:
                                    chain_copy.id = chr(N)
                                except ValueError:
                                    pass
                                N += 1

                            complex[0].add(chain_copy)

                            if options.verbose:
                                sys.stderr.write("\tChain %s\n" % (chain.id))

                object_list.remove(structure)

    return (complex, object_list)