예제 #1
0
def pdb_contacts(pdb, chain, dist):
    i = 0
    # Get chain code from 6th letter in pdb name
    pdb_chain = pdb_getchain(pdb, chain)
    ppb = CaPPBuilder()
    # Initialise building of a polypeptide and its sequence
    # If a mutated residue is present in a chain it is classed as a hetatm
    # However, not all hetatms in a chain are part of the sequence. The CaPPBuilder
    # makes sequences by requiring CA-CA distances to be <4.3A. Common hetatms are
    # identified such that an MSE hetatm will be replaced by an M in the sequence
    polypepTot = ppb.build_peptides(pdb_chain, aa_only=False)[0]
    sequen = polypepTot.get_sequence()

    # Add to the polypeptide
    for polypep_raw in ppb.build_peptides(pdb_chain, aa_only=False)[1:]:
        sequen += (polypep_raw.get_sequence())
        polypepTot += polypep_raw

    i = 0

    # Sometimes the terminal residue in a protein isn't fully resolved
    last_res = polypepTot[-1]
    if last_res.has_id("CA") or last_res.has_id("CB"):
        polypep = polypepTot  # If resolved take whole AA
        file_seq.write(">sequence\n%s\n" % sequen)
        file_seq.write("%s" % sequen)
    else:
        polypep = polypepTot[:-1]  # Otherwise take all but the last AA
        file_seq.write(">sequence\n%s\n" % sequen[:-1])
        file_seq.write("%s" % sequen[:-1])

    file_map.write(str(len(polypep)) + "\n")
    #	sys.stderr.write(pdb+'\n')

    for residue1 in polypep:
        # Quite frequently residues do not have resolved CB, in which case use CA
        # If no CA exists, print ERROR. Grep the output if running unsupervised.
        try:
            if residue1.has_id("CB"):  #get_resname() == "GLY":
                c_alpha = residue1["CB"]
            else:
                c_alpha = residue1["CA"]
        except:
            sys.stdout.write("ERROR")
            raise
        i += 1
        j = 0
        for residue2 in polypep:
            try:
                if residue2.has_id("CB"):  #get_resname() == "GLY":
                    c_alpha2 = residue2["CB"]
                else:
                    c_alpha2 = residue2["CA"]
            except:
                file_map.write("ERROR")
                raise
            j += 1
            if (norm(c_alpha.get_coord(), c_alpha2.get_coord()) <
                    dist):  # 3.5 ):
                file_map.write("%d %d\n" % (i - 1, j - 1))
예제 #2
0
def pdb_polypep(pdb, chain, trim):
    i = 0
    # Get chain code from 6th letter in pdb name
    pdb_chain = pdb_getchain(pdb, chain)
    ppb = CaPPBuilder()
    # Initialise building of a polypeptide and its sequence
    # If a mutated residue is present in a chain it is classed as a hetatm
    # However, not all hetatms in a chain are part of the sequence. The CaPPBuilder
    # makes sequences by requiring CA-CA distances to be <4.3A. Common hetatms are
    # identified such that an MSE hetatm will be replaced by an M in the sequence
    polypepTot = ppb.build_peptides(pdb_chain, aa_only=False)[0]
    sequen = polypepTot.get_sequence()
    # Add to the polypeptide
    for polypep_raw in ppb.build_peptides(pdb_chain, aa_only=False)[1:]:
        sequen += (polypep_raw.get_sequence())
        polypepTot += polypep_raw
# Remove unstructured terminal ends
    if trim:
        polypepTot = pp_trim(polypepTot)
    # Sometimes the terminal residue in a protein isn't fully resolved
    last_res = polypepTot[-1]
    if last_res.has_id("CA") or last_res.has_id("CB"):
        polypep = polypepTot  # If resolved take whole AA
#		file_seq.write(">sequence\n%s\n" %sequen)
##		file_seq.write("%s" %sequen)
    else:
        polypep = polypepTot[:-1]  # Otherwise take all but the last AA


#		file_seq.write(">sequence\n%s\n" %sequen[:-1])
##		file_seq.write("%s" %sequen[:-1])
#	file_map.write( str(len(polypep)) +"\n" )
#	sys.stderr.write(pdb+'\n')
    return polypep
예제 #3
0
 def test_ca_ca(self):
     """Extract polypeptides using CA-CA."""
     ppbuild = CaPPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
예제 #4
0
    def test_cappbuilder_tau(self):
        """Test tau angles calculated with CaPPBuilder."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure)

        taus = pp[1].get_tau_list()
        self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3)
        self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3)
        self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3)
        thetas = pp[2].get_theta_list()
        self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3)
        self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3)
        self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
예제 #5
0
    def test_cappbuilder_real(self):
        """Test CaPPBuilder on real PDB file."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure)

        pp0_seq = pp[0].get_sequence()
        pp1_seq = pp[1].get_sequence()
        pp2_seq = pp[2].get_sequence()
        self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
        self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE")
        self.assertEqual(pp2_seq, "TACQG")
        self.assertEqual(
            [ca.serial_number for ca in pp[0].get_ca_list()],
            [
                10,
                18,
                26,
                37,
                46,
                50,
                57,
                66,
                75,
                82,
                93,
                104,
                112,
                124,
                131,
                139,
                150,
                161,
                173,
                182,
                189,
                197,
                208,
                213,
                222,
                231,
                236,
                242,
                251,
                260,
                267,
                276,
                284,
            ],
        )
예제 #6
0
 def test_ca_ca(self):
     """Extract polypeptides using CA-CA."""
     ppbuild = CaPPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
     # Check the sequence
     s = pp.get_sequence()
     self.assertTrue(isinstance(s, Seq))
     self.assertEqual(s.alphabet, generic_protein)
     self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER"
                      "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC",
                      str(s))
예제 #7
0
 def test_ca_ca(self):
     """Extract polypeptides using CA-CA."""
     ppbuild = CaPPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
     # Check the sequence
     s = pp.get_sequence()
     self.assertTrue(isinstance(s, Seq))
     self.assertEqual(s.alphabet, generic_protein)
     self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER"
                      "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC",
                      str(s))
예제 #8
0
    def test_cappbuilder_real_nonstd(self):
        """Test CaPPBuilder on real PDB file allowing non-standard amino acids."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure, False)

        self.assertEqual(len(pp), 1)

        # Check the start and end positions
        self.assertEqual(pp[0][0].get_id()[1], 151)
        self.assertEqual(pp[0][-1].get_id()[1], 220)

        # Check the sequence
        s = pp[0].get_sequence()
        self.assertIsInstance(s, Seq)
        # Here non-standard MSE are shown as M
        self.assertEqual(
            "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
            s)
예제 #9
0
def pdb_sequence(pdb_file, id=None, method="order"):
    from Bio.PDB import PDBParser, CaPPBuilder
    from Bio.PDB.Polypeptide import three_to_one
    if id is None:
        id = util.make_id_from_file_name(pdb_file)
    parser = PDBParser()
    structure = parser.get_structure(id, pdb_file)
    seq_chains = []
    for chain in structure.get_chains():
        id_chain = chain.get_id()
        if method == "distance":
            ppb = CaPPBuilder()
            seq = sum((pp.get_sequence() for pp in ppb.build_peptides(chain)),
                      Seq("", IUPAC.protein))
            seq_spec = None  #TODO: implement
        elif method == "order":
            seq = []
            seq_spec = []
            for res in chain.get_residues():
                seq.append(three_to_one(res.get_resname()))
                ## from Bio docs, res.get_full_id() returns: ("1abc", 0, "A", (" ", 10, "A"))
                fid = res.get_full_id()
                seq_spec.append(
                    pdb_seq_spec(chain=fid[-2].strip(),
                                 resn=res.get_resname(),
                                 resi=fid[-1][-2],
                                 ins=fid[-1][-1].strip()))

            seq = Seq("".join(seq), IUPAC.protein)
        else:
            raise ValueError("Unknown method: {}".format(method))

        seq_chains.append(
            dict(id_chain=id_chain,
                 seq_rec=SeqRecord(seq,
                                   id="{}_{}".format(id, id_chain),
                                   description=""),
                 seq_spec=seq_spec))
        chains_map = dict(((x["id_chain"], x) for x in seq_chains))
    return pdb_seqs(id=id, chains=seq_chains, chains_map=chains_map)
예제 #10
0
#from TCRmodeller_functions import *
from subprocess import Popen, PIPE


script, filename, tag, chainid = argv
tmpdir = os.getcwd() 
hmmscan_program = '/TCRmodeller/programs/hmmer/hmmscan'
profit_program = '/Users/ragul/profit/ProFitV3.1/src/profit'

f2 = open('temp.fa','w+')            

pdbfile = parser.get_structure("PDB", filename)
mychain = pdbfile[0][chainid]

f2.write(">"+filename+"\n")
for ppe in ppb.build_peptides(mychain):
    f2.write(str(ppe.get_sequence())+"\n")
f2.close()
    



def find_CDRs(tcr_seq, hmmscan_program, tmpdir, tag):

    CDR1_start_pos = 0 
    CDR1_end_pos = 0 
    CDR2_start_pos = 0 
    CDR2_end_pos = 0 
    CDR3_start_pos = 0 
    CDR3_end_pos = 0 
    HV4_start_pos = 0 
class nonHetSelect(Select):
    def accept_residue(self,residue):
        if residue.id[0] == ' ':
            return 1
        else:
            return 0

gzpdbfile_path =  "/TCRmodeller/PDB_RELEASE/pdb_structures" +  '/%s/pdb%s.ent.gz' %(pdbcode[1:3], pdbcode) 
gzpdbfile = gzip.open(gzpdbfile_path, 'rb')
pdbfile = parser.get_structure("PDB", gzpdbfile)

mychaina = pdbfile[0][chainida]
io.set_structure(mychaina)
io.save('tmpa.pdb', nonHetSelect())
faseqa = ""
for ppe in ppb.build_peptides(mychaina):
    faseqa += str(ppe.get_sequence())

print "faseqa : ", faseqa 

mychainb = pdbfile[0][chainidb]
io.set_structure(mychainb)
io.save('tmpb.pdb', nonHetSelect())
faseqb = ""
for ppe in ppb.build_peptides(mychainb):
    faseqb += str(ppe.get_sequence())

print "faseqb : ", faseqb 

regexa = "[A-Z]{0,23}C[A-Z]([A-Z]{8,12}W)[YF][A-Z]{13}([A-Z]{6,11})[A-Z]{15,30}[DL][A-Z]{2,3}Y[A-Z][CW][A-Z]([A-Z]{7,16}[FW])G[A-Z]G[A-Z]{0,7}[PA]*"
regexb = "[A-Z]{0,23}C[A-Z]([A-Z]{8,12}W)[Y][A-Z]{13}([A-Z]{6,11})[A-Z]{15,40}[YLF][A-Z][CW][A-Z]([A-Z]{7,17}[F])G[A-Z]G[A-Z]{0,7}[E]*"