def SplitChain(PDB_objects):
    """
	Splits a list of PDB files by chain creating one PDB and one FASTA file per chain.
	
	Arguments:

	PDB_objects: list of PDB objects (with many chains) generated by the PDB parser.
	"""

    File_prefix = []

    for pdb in PDB_objects:
        chain_names = set()
        io = PDBIO()

        # Creates a PDB file for each chain of the original file.
        for chain in pdb.get_chains():
            if chain.get_id() not in chain_names:
                io.set_structure(chain)
                io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb")
                File_prefix.append(pdb.get_id() + "_" + chain.get_id())

                # Creates a FASTA file for each chain of the original file.
                polipeptide = PPBuilder()
                for pp in polipeptide.build_peptides(pdb):
                    fasta = open(pdb.get_id() + "_" + chain.get_id() + ".fa",
                                 "w")
                    fasta.write(">" + pdb.get_id() + "_" + chain.get_id() +
                                "\n")
                    fasta.write(str(pp.get_sequence()))

                chain_names.add(chain.get_id())

    return File_prefix
Example #2
0
def write_backbone_angles(chain,
                          region=None,
                          offset=0,
                          outfile=sys.stdout,
                          header=False):
    """
    Write Psi/Phi angles from a pdb file
    """
    if region is None:
        region = (0, float('inf'))

    polypeptide_builder = PPBuilder()
    polypeptides = polypeptide_builder.build_peptides(chain)

    if header:
        print(HEADER, file=outfile)

    for peptide in polypeptides:
        angles = peptide.get_phi_psi_list()
        for residue, (phi, psi) in zip(peptide, angles):
            position = residue.get_id()[1]
            if region[0] <= position <= region[1]:
                print(chain.id,
                      position,
                      seq1(residue.get_resname()),
                      position + offset,
                      'NA' if phi is None else phi * RAD_FACTOR,
                      'NA' if psi is None else psi * RAD_FACTOR,
                      sep='\t',
                      file=outfile)
def get_secondary_structure(structure):
    rama_ss_ranges = [(-180, -180, 80, 60, 'E', 'blue'),
                      (-180, 50, 80, 130, 'E', 'blue'),
                      (-100, -180, 100, 60, 'P', 'green'),
                      (-100, 50, 100, 130, 'P', 'green'),
                      (-180, -120, 180, 170, 'H', 'red'),
                      (0, -180, 180, 360, 'L', 'yellow')]

    # Calculate PSI and PHI
    ppb = PPBuilder()  # PolyPeptideBuilder
    ss = ["" for x in range(N)]
    for chain in structure:
        for pp in ppb.build_peptides(chain):
            phi_psi = pp.get_phi_psi_list(
            )  # [(phi_residue_1, psi_residue_1), ...]
            for i, residue in enumerate(pp):
                # print(model, chain, i, residue, phi_psi[i])
                # Convert radians to degrees and remove first and last value that are None
                if phi_psi[i][0] is not None and phi_psi[i][1] is not None:
                    for x, y, w, h, ss_c, color in rama_ss_ranges:
                        if x <= phi_psi[i][0] < x + w and y <= phi_psi[i][
                                1] < y + h:
                            ss[i] = ss_c
                            break
    return ss
def CreateJoinedFastas(input_PDB_objects):
    """
	Joins many PDB objects and creates a FASTA file with all objects joined.

	Arguments:

	input_PDB_objects: list of PDB objects whose sequence will be added to the FASTA file.
	"""

    polipeptide = PPBuilder()
    first_line = True
    filename = ""

    # Create FASTA files.
    for obj in input_PDB_objects:
        filename = filename + obj.get_id() + "_"
    filename = filename + ".fa"
    joined_fasta = open(filename, 'w')

    # Write FASTA files.
    for obj in input_PDB_objects:
        if first_line:
            joined_fasta.write(">" + obj.get_id() + "\n")
            first_line = False
        else:
            joined_fasta.write("\n" + ">" + obj.get_id() + "\n")
        for polipep in polipeptide.build_peptides(obj):
            joined_fasta.write(str(polipep.get_sequence()))

    return filename
Example #5
0
    def compute_secondary_structure(self, model):
        """
        This function defines all the secondary structures of the model passed in input
        :param model: one model
        :return: the matrix of secondary structures
        """

        # Calculate PSI and PHI
        ppb = PPBuilder()
        rama = {
        }  # { chain : [[residue_1, ...], [phi_residue_1, ...], [psi_residue_2, ...] ] }

        residue_found = 0
        for chain in model:
            for pp in ppb.build_peptides(chain):
                phi_psi = pp.get_phi_psi_list()

                for i, residue in enumerate(pp):

                    if phi_psi[i][0] is not None and phi_psi[i][1] is not None:
                        # Conversion to degrees when the values are not None (for first and last)
                        rama.setdefault(chain.id, [[], [], []])
                        rama[chain.id][0].append(residue)
                        rama[chain.id][1].append(math.degrees(phi_psi[i][0]))
                        rama[chain.id][2].append(math.degrees(phi_psi[i][1]))
                    else:
                        # Adding of Nan if the angles are None (for first and last)
                        rama.setdefault(chain.id, [[], [], []])
                        rama[chain.id][0].append(residue)
                        rama[chain.id][1].append(math.nan)
                        rama[chain.id][2].append(math.nan)

                    residue_found += 1

        # Eventual nan-padding if something goes wrong during the angle computation
        if residue_found < self._residues:
            for i in range(self._residues - residue_found):
                rama.setdefault('Z', [[], [], []])
                rama['Z'][0].append(None)
                rama['Z'][1].append(math.nan)
                rama['Z'][2].append(math.nan)

        # Comparison of the angles with the Ramachandran regions
        ss = []
        for chain_id in rama:
            for residue, phi, psi in zip(*rama[chain_id]):
                ss_class = None
                if math.isnan(phi) and math.isnan(psi):
                    # If nan (angles not available) insert a symbol indicating this situation
                    ss_class = '-'
                else:
                    # Determine the correspondent region and store it
                    for x, y, width, height, ss_c, color in self._ranges:
                        if x <= phi < x + width and y <= psi < y + height:
                            ss_class = ss_c
                            break

                ss.append(ss_class)

        return ss
Example #6
0
 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertIsInstance(s, Seq)
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))
Example #7
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    # residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    polypeptides = PPBuilder().build_peptides(structure)
    if len(polypeptides) == 0:
        polypeptides = CaPPBuilder().build_peptides(structure)
    sequence = ''.join([str(p.get_sequence()) for p in polypeptides])
    residues = [
        residue for polypeptide in polypeptides for residue in polypeptide
    ]
    protein_name = os.path.basename(file_name).replace(".pdb", "")
    return protein_name, structure, residues, sequence, atoms
Example #8
0
    def test_ppbuilder_torsion(self):
        """Test phi/psi angles calculated with PPBuilder."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        phi_psi = pp[0].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
        self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)

        phi_psi = pp[1].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
        self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)

        phi_psi = pp[2].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
Example #9
0
 def _pp(self, pdb_path, chain_id):
     pdb_id = Path(pdb_path).stem
     pp_list = PPBuilder().build_peptides(PDBParser().get_structure(
         pdb_id, pdb_path)[0][chain_id])
     pp = pp_list[0]
     for i in pp_list[1:]:
         pp += i
     return pp
Example #10
0
 def get_structure_sequence(struct):
     # type: (Structure) -> str
     """
     Gets the structure sequence using PPBuilder
     :param struct: Structure object
     :return: struct sequence
     """
     ppb = PPBuilder()
     return ''.join(
         [str(pp.get_sequence()) for pp in ppb.build_peptides(struct)])
Example #11
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
Example #12
0
 def test_polypeptide(self):
     """Tests on polypetide class and methods."""
     p = PDBParser(PERMISSIVE=True)
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("scr", pdb1)
     ppb = PPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()),
                      "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()),
                      "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     phi_psi = pp[0].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
     self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)
     phi_psi = pp[1].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
     self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)
     phi_psi = pp[2].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
     ppb = CaPPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()),
                      "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()),
                      "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [
         10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131,
         139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242,
         251, 260, 267, 276, 284
     ])
     taus = pp[1].get_tau_list()
     self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3)
     self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3)
     self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3)
     thetas = pp[2].get_theta_list()
     self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3)
     self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3)
     self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def is_protein(chain):
    """
        Check if chain is a protein.

    :param chain:
    :return:
    """
    ppb = PPBuilder()
    for pp in ppb.build_peptides(chain):
        if len(pp.get_sequence()) > 0:
            return True
    return False
Example #14
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
def chain_to_one_pp(chain):
    ppb = PPBuilder()

    polypeptides = ppb.build_peptides(chain)

    if len(polypeptides) != 1:
        print('warning ', len(polypeptides),
              ' polypeptides from one chain, extending first pp')

        for pp in polypeptides[1:]:
            polypeptides[0].extend(pp)

    return polypeptides[0]
Example #16
0
def structure_filtered_dca_get_sequence_from_structure(structure):

    from Bio.PDB import PPBuilder

    sequence = ""

    ppb = PPBuilder(radius=10.0)

    for pp in ppb.build_peptides(structure, aa_only=False):

        sequence += '%s\n' % pp.get_sequence()

    return sequence.replace('\n', '')
Example #17
0
def run_test():
    from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder

    # first make a PDB parser object
    p = PDBParser(PERMISSIVE=1)

    # get the structure, call it "example"
    structure = p.get_structure("example", "PDB/a_structure.pdb")

    # now loop over content and print some info
    for model in structure.get_list():
        model_id = model.get_id()
        print "Model %i contains %i chains." % (model_id, len(model))
        for chain in model.get_list():
            chain_id = chain.get_id()
            print "\tChain '%s' contains %i residues." % (chain_id, len(chain))
            for residue in chain.get_list():
                residue_id = residue.get_id()
                hetfield, resseq, icode = residue_id
                print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % (
                    hetfield, resseq, icode, len(residue))
                # check if there is disorder due to a point mutation --- this is rare
                if residue.is_disordered() == 2:
                    print "\t\t\tThere is a point mutation present in the crystal at this position."
                    s = "\t\t\tResidues at this position are "
                    for resname in residue.disordered_get_id_list():
                        s = s + resname + " "
                    print s[:-1] + "."
                # count the number of disordered atoms
                if residue.is_disordered() == 1:
                    disordered_count = 0
                    for atom in residue.get_list():
                        if atom.is_disordered():
                            disordered_count = disordered_count + 1
                    if disordered_count > 0:
                        print "\t\t\tThe residue contains %i disordered atoms." % disordered_count

    print "Polypeptides using C-N"
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "Polypeptides using CA-CA"
    ppb = CaPPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "NeighborSearch test"
    quick_neighbor_search_test()
Example #18
0
def run(infile, splitpdb):
    parser = PDBParser()
    struct = parser.get_structure('mystruct', infile)
    ppb = PPBuilder()

    basename = os.path.basename(infile)
    prefix = os.path.splitext(basename)[0]
    if splitpdb == 0:  # We do NOT split the PDB and fasta files!
        seqfile = open(prefix + '.fasta', 'w')
        pdbio = PDBIO_RPL.PDBIO()
        pdbio.set_structure(struct)
        cleanfile = prefix + '_clean.pdb'
        pdbio.save(cleanfile)
    ListChains = []
    for model in struct:
        for chain in model:
            ListChains.append(chain.id)
            ListPpdb = ppb.build_peptides(chain)
            if (len(ListPpdb) > 0):
                for index, pp in enumerate(ListPpdb):
                    #                    print(chain.id,index,pp.get_sequence(),pp
                    if splitpdb == 1:  # We split the PDB and fasta files!
                        seqfile = open(
                            prefix + '_' + chain.id + '.' + str(index) +
                            '.fasta', 'w')
                    seq = pp.get_sequence()
                    seqfile.write('>%s %s\n' % (prefix + '_chain_' + chain.id +
                                                '_' + str(index), len(seq)))
                    seqfile.write('%s' % seq)
                    seqfile.write('\n')
                    if splitpdb == 1:  # We split the PDB and fasta files!
                        seqfile.close()
                        startres = pp[0].id[1]
                        endres = pp[-1].id[1]
                        ofile = prefix + '_' + chain.id + '.' + str(
                            index) + '.pdb'
                        Dice_RPL.extract(struct, chain.id, startres, endres,
                                         ofile)
            else:
                #               Also split chains that do not consist of amino acids!
                ChainList = chain.get_list()
                startres = ChainList[0].id[1]
                endres = ChainList[0].id[-1]
                ofile = prefix + '_' + chain.id + '.' + str(index) + '.pdb'
                Dice_RPL.extract(struct, chain.id, startres, endres, ofile)
    if splitpdb == 0:  # We do NOT split the PDB and fasta files!
        seqfile.close()

    return ListChains
Example #19
0
    def get_sequence(self, chain_id):
        """
			Input:
				self: Use Biopython.PDB structure which has been stored in an object variable
				chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
						depends on the specific protein and the resulting structure)
			Return:
				Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id)
				in a Biopython.PDB structure as a string.
		"""
        sequence = 'SEQWENCE'

        ppb = PPBuilder()

        for pp in ppb.build_peptides(self.structure[0][chain_id]):
            return pp.get_sequence()
Example #20
0
 def test_c_n(self):
     """Extract polypeptides using C-N."""
     ppbuild = PPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
     # Check the sequence
     s = pp.get_sequence()
     self.assertTrue(isinstance(s, Seq))
     self.assertEqual(s.alphabet, generic_protein)
     self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER"
                      "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC",
                      str(s))
Example #21
0
    def test_ppbuilder_real_nonstd(self):
        """Test PPBuilder on real PDB file allowing non-standard amino acids."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure, False)

        self.assertEqual(len(pp), 1)

        # Check the start and end positions
        self.assertEqual(pp[0][0].get_id()[1], 151)
        self.assertEqual(pp[0][-1].get_id()[1], 220)

        # Check the sequence
        s = pp[0].get_sequence()
        self.assertIsInstance(s, Seq)
        # Here non-standard MSE are shown as M
        self.assertEqual(
            "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
            s)
Example #22
0
    def get_ignored_res(file: str):
        x, y, ignored, output = [], [], [], {}
        for model in PDBParser().get_structure(id=None, file=file):
            for chain in model:
                peptides = PPBuilder().build_peptides(chain)
                for peptide in peptides:
                    for aa, angles in zip(peptide, peptide.get_phi_psi_list()):
                        residue = chain.id + ":" + aa.resname + str(aa.id[1])
                        output[residue] = angles

        for key, value in output.items():
            # Only get residues with both phi and psi angles
            if value[0] and value[1]:
                x.append(value[0] * 180 / pi)
                y.append(value[1] * 180 / pi)
            else:
                ignored.append((key, value))

        return output, ignored, x, y
Example #23
0
def get_sequence(pdb, chain):
        pdb_parser = PDBParser(PERMISSIVE=0)                    # The PERMISSIVE instruction allows PDBs presenting errors.
        pdb_structure = pdb_parser.get_structure(pdb,pdb)

	pdb_chain = pdb_structure[0][chain]
	ppb=PPBuilder()
	Sequence = ""
	for pp in ppb.build_peptides(pdb_chain):
		Sequence = Sequence + pp.get_sequence()

	io = PDBIO()
	io.set_structure(pdb_structure)
	output = pdb[-8:-4] +"_"+chain+".pdb"
#        output = pdb
	out = open(output[:-4]+chain+".fasta.txt","w")
	out.write(">"+pdb[:-4]+chain+"\n")
	out.write(str(Sequence)+"\n")
	out.close()
	io.save(output,SelectChains(chain))
Example #24
0
def get_pp(pdb, chain, start, length, seq):
    """retrieve the residiues for a given pdb file and chain as polypeptides"""
    f = make_filename(pdb)
    p = PDBParser(PERMISSIVE=1)
    pdb_struct = p.get_structure(
        pdb, f)  # Load the pdb structure pdb contained on the file f.
    pdb_chain = pdb_struct[0][
        chain]  # Select the right Chain of the structure.
    ppb = PPBuilder()  # Initialize a peptide builder.
    peptides = ppb.build_peptides(
        pdb_chain)  # Load the given chain as a peptide.
    for i, pep in enumerate(peptides):
        if str(pep.get_sequence()).find(seq) != -1:
            start = str(pep.get_sequence()).find(seq)
            break
    if start > 0 and (start + length + 2) <= len(pep):
        pp = pep[(start - 1):(start + length + 2)]
        return pp
    else:
        raise
def split_pdb_by_chain(pdb_id):
    if not os.path.isdir("pdb_chains/" + pdb_id.upper()):
        os.mkdir("pdb_chains/" + pdb_id.upper())
    actual_pdbfile = PDBParser().get_structure(
        pdb_id, "ent_files/pdb" + pdb_id.lower() + ".ent")
    return_dict = dict()
    for model in actual_pdbfile:
        for chain in model:
            outfilename = pdb_id.upper() + "-" + str(
                model.get_id() + 1) + "_" + str(chain.get_id()) + ".pdb"
            if not os.path.isfile("pdb_chains/" + pdb_id.upper() + "/" +
                                  outfilename):
                io = PDBIO()
                io.set_structure(chain)
                io.save("pdb_chains/" + pdb_id.upper() + "/" + outfilename)
            ppb = PPBuilder().build_peptides(chain)
            this_seq = Seq("", generic_protein)
            for pp in ppb:
                this_seq += pp.get_sequence()
            return_dict[outfilename] = this_seq
    return return_dict
Example #26
0
 def testModels(self):
     """Test file with multiple models"""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1LCD.cif")
     self.assertEqual(len(structure), 3)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         self.assertEqual(structure[1].serial_num, 2)
         self.assertEqual(structure[2].serial_num, 3)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
Example #27
0
def get_sequence(pdb, chain):
    if chain is "%":
        chain = " "
    warnings.filterwarnings('always', message='.*discontinuous at.*')
    pdb_parser = PDBParser(
        PERMISSIVE=0, QUIET=True
    )  # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb, pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb = PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain, aa_only=False):
        Sequence = Sequence + pp.get_sequence()

    io = PDBIO()
    io.set_structure(pdb_structure)
    output = pdb[0:-4] + ".pdb"
    out = open(output[:-4] + ".fasta.atom", "w")
    out.write(">" + pdb[0:-4] + "\n")
    out.write(str(Sequence) + "\n")
    out.close()
Example #28
0
def get_sequence(pdb, chain, first, last, output):
    pdb_parser = PDBParser(PERMISSIVE=0)                    # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb,pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb=PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain):
        Sequence = Sequence + pp.get_sequence()

    io = PDBIO()
    io.set_structure(pdb_structure)
#        if pdb[-5] == chain:
#            output = pdb
#        else:
#            output = pdb[:-4]+chain+".pdb"
### writing out sequence to fasta
#    out = open(output[:-4]+".fasta.txt","w")
#    out.write(">"+output[:-4]+"\n")
#        out.write(str(Sequence[first-1: last-2])+"\n")
#        out.close()
    io.save(output,SelectDomain(chain, first, last))
Example #29
0
    def test_ppbuilder_real(self):
        """Test PPBuilder on real PDB file."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        self.assertEqual(len(pp), 3)

        # Check termini
        self.assertEqual(pp[0][0].get_id()[1], 152)
        self.assertEqual(pp[0][-1].get_id()[1], 184)
        self.assertEqual(pp[1][0].get_id()[1], 186)
        self.assertEqual(pp[1][-1].get_id()[1], 213)
        self.assertEqual(pp[2][0].get_id()[1], 216)
        self.assertEqual(pp[2][-1].get_id()[1], 220)

        # Now check sequences
        pp0_seq = pp[0].get_sequence()
        pp1_seq = pp[1].get_sequence()
        pp2_seq = pp[2].get_sequence()
        self.assertIsInstance(pp0_seq, Seq)
        self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
        self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE")
        self.assertEqual(pp2_seq, "TACQG")
Example #30
0
def get_sequence(pdb, chain):
    pdb_parser = PDBParser(
        PERMISSIVE=0
    )  # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb, pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb = PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain):
        Sequence = Sequence + pp.get_sequence()
    start = [residue.id[1] for residue in pdb_chain][0]
    if start is not 1:
        for residue in pdb_chain:
            residue.id = (' ', residue.id[1] - start + 1, ' ')
    io = PDBIO()
    io.set_structure(pdb_structure)
    #        output = pdb[-8:-4] +"_"+chain+".pdb"
    output = "renumbered_" + pdb
    #        out = open(output[:-4]+".fasta.txt","w")
    #        out.write(">"+pdb[-8:-4]+"_"+chain+"\n")
    #        out.write(str(Sequence))
    #        out.close()
    io.save(output, SelectChains(chain))