Example #1
0
 def water_in_chain(self, pdb_file, chain_id):
     parser = PDBParser(QUIET=True)
     struct = parser.get_structure('model', pdb_file)
     chain = list(struct[0][chain_id].get_residues())
     waters = [
         chain[i] for i in range(len(chain)) if chain[i].get_id()[0] == 'W'
     ]
     return waters
 def __read_pdb_structure__(self, pdb_filename):
     """
     Makes a Structure object from a pdbfile
     """
     # KR: this probably can be outsourced to another module.
     parser = PDBParser()
     struct = parser.get_structure('', pdb_filename)
     return struct
Example #3
0
 def real_seq():
     structure = PDBParser().get_structure(protein.protein_id,
                                           protein.protein_id + '.pdb')
     ppb = PPBuilder()
     seq = ''
     for pp in ppb.build_peptides(structure):
         seq += pp.get_sequence()
     return seq
Example #4
0
def get_info(filename):
    '''
	Return header. Function adapted from Biopython Package.\n
	get_info(filename)\n
	Filename needs to be a PDB file format (*.ent or *.pdb)
	'''
    p = PDBParser(QUIET=True)
    s = p.get_header()
Example #5
0
 def setUp(self):
     self.structures = {}
     parser = PDBParser()
     for angles, structure in STRUCTURES.items():
         with StringIO(structure) as struc_fh:
             self.structures[angles] = parser.get_structure(
                 repr(angles),
                 struc_fh)
Example #6
0
def test_superimposer_atoms():

    from Bio.PDB.PDBParser import PDBParser

    code = '1ptq'
    fileName = testFilePath + code + '.pdb'

    fixedS = PDBParser().get_structure(code, fileName)
    movingS = PDBParser().get_structure(code, fileName)

    # TODO transform moving

    sup = Superimposer()
    sup.set_atoms(list(fixedS.get_atoms()), list(movingS.get_atoms()))

    assert sup.rms == approx(0.)
    return
Example #7
0
def readPDBfiles(pdbfiles,ncpus=cpu_count()):
    pdb_parser = PDBParser(PERMISSIVE=1, QUIET=1)
    with open(pdbfiles, 'r') as f:
        pdblines = set(f.readlines())
    pool = Pool(ncpus)
    function = partial(loadTarget, pdb_parser)
    target_tups = pool.map(function, pdblines)
    return target_tups
Example #8
0
 def __init__(s, filename, strandsfile):
     parser = PDBParser()
     s.stru = parser.get_structure('struct', filename)[0]
     s.sti = s.readstrands(strandsfile)
     s.sresis = s.get_strands_resis()
     s.c = s.get_center()
     s.sig = IOSignature(s)
     s.orients = s.getstrandsorient()
Example #9
0
    def setUp(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)

            # Open a parser in permissive mode and parse an example file
            self.pqr_parser = PDBParser(PERMISSIVE=1, is_pqr=True)
            self.example_structure = self.pqr_parser.get_structure(
                "example", "PQR/1A80.pqr")
Example #10
0
def readFileBioPDB(struct,fn):
	p = PDBParser()
	try:
		with open(fn,'r') as infile:
			struct_in  = p.get_structure(struct, infile)
	except Exception, e:
		print " "+str(e)
		sys.exit(1)
def get_structure(protein_name):
    """
    Returns the protein struture given the protein name.
    """
    parser = PDBParser()
    structure = parser.get_structure(protein_name, protein_name + '.pdb')

    return structure
Example #12
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length

    (aas, gly, pro) = load_scores()  ##define global tables
    score = 0  #initialize
    pars = PDBParser(PERMISSIVE=1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]
    pro_list = find_residue(chain, 'PRO')
    gly_list = find_residue(chain, 'GLY')
    aas_list = range(chain.child_list[1].id[1],
                     chain.child_list[len(chain) - 1].id[1])
    #need to remove pro/gly indices in first/last position
    if pro_list.count(1) > 0:
        pro_list.remove(1)
    if pro_list.count(len(chain) - 1) > 0:
        pro_list.remove(len(chain) - 1)
    if gly_list.count(1) > 0:
        gly_list.remove(1)
    if gly_list.count(len(chain) - 1) > 0:
        gly_list.remove(len(chain) - 1)
    try:
        for index in pro_list:
            aas_list.remove(index)  #remove pros from aas_list
        for index in gly_list:
            aas_list.remove(index)  #remove glys from aas_list
    except ValueError:
        print 'incosistency in PDB file - will return score = 0'
        return 0
    else:
        proscore = score_help(chain, pro_list, pro)
        glyscore = score_help(chain, gly_list, gly)
        aasscore = score_help(chain, aas_list, aas)
        score = proscore + glyscore + aasscore
        size = length(chain)
        try:
            score = (score / size) * 1000  #normalize score
            return score
        except ZeroDivisionError:
            print "calculated protein length 0 -> returning score 0"
            score = 0
            return score
Example #13
0
 def has_correct_dum_in_pdb(self, file_path):
     io = PDBIO()
     parser = PDBParser(QUIET=True)
     code = file_path[-8:-4]
     struct = parser.get_structure(code, file_path)
     try:
         list(struct[0][' '].get_residues())
     except KeyError:
         print('Error in file:', file_path)
Example #14
0
def find_pdb_limits(pdb_path):
    """"""
    pdb = PDBParser().get_structure('', pdb_path)
    # takes the first (and only) polypeptide
    pp = PPBuilder().build_peptides(pdb)[0]
    start = pp[0].get_id()[1]
    end = pp[-1].get_id()[1]
    seq = pp.get_sequence()
    return (start, end, seq)
Example #15
0
def read_pdb(pdb_id):
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("1", pdb_id)
    #print("***%s***" % name)
    s = s[0]
    res_list = PDB.Selection.unfold_entities(s, 'R')  #read aminoacid
    aa_list = get_aa_list(res_list)
    aa_list_full = check_aa_id(aa_list)
    return aa_list_full
Example #16
0
 def setUp(self):
     """Loads the A residue to start with."""
     self.struc = ModernaStructure('file', A_RESIDUE)
     self.adenosine = self.struc['1']
     struc = ModernaStructure('file', C_RESIDUE, '0')
     self.cytosine = struc['494']
     self.chain = PDBParser().get_structure('test_struc',
                                            MINI_TEMPLATE)[0].child_list[0]
     self.unk = ModernaStructure('file', PDB_UNK)
 def test_is_backbone_intact(self):
     """Check all kinds of backbone discontinuities in one residue."""
     chain=PDBParser().get_structure('test_struc',BROKEN_BACKBONE)[0].child_list[0]
     residues = [r for r in chain]
     for resi in residues[:5]:
         mr = RNAResidue(resi)
         self.assertFalse(is_backbone_intact(mr))
     mr = RNAResidue(chain[6])
     self.assertTrue(is_backbone_intact(mr))
Example #18
0
    def test_bad_xyz(self):
        """Test if bad coordinates exception is raised."""
        # Atom Entry
        data = "ATOM      1  N   PRO     1      00abc1  02.000 3.0000 -0.1000  1.0000       N\n"

        # Get sole atom of this structure
        parser = PDBParser(is_pqr=True)  # default initialization
        self.assertRaises(PDBConstructionException, parser.get_structure,
                          "example", StringIO(data))
Example #19
0
def get_bio_model(pdbfile):

    # (f,name) = tempfile.mkstemp()
    # sqldb.exportpdb(name)
    #parser = PDBParser(PERMISSIVE=1)
    parser = PDBParser()
    structure = parser.get_structure('_tmp', pdbfile)
    #os.remove(name)
    return structure[0]
Example #20
0
def get_PDB(pdb_ids, valid_chains=None, chain_len=True, pdb_dir='.'):
    # Debug
    logging.debug('Directory for PDB files')
    logging.debug(pdb_dir)
    logging.debug('Chain length')
    logging.debug(chain_len)
    logging.debug('Valid chains')
    logging.debug(valid_chains)
    # New list for residues
    # It will be turned into DataFrame later
    ds_residues = list()
    # Loop thorugh every protein
    for pdb_id in pdb_ids:
        # Define an array of aminoacids for the current protein
        residues = list()
        # Get structure of the protein
        structure = PDBParser(QUIET=True).get_structure(pdb_id, pdb_dir + '/pdb{}.ent'.format(pdb_id))
        # We select only the 0-th model
        model = structure[0]
        # Loop through every model's chain
        for chain in model:
            # Check chain is in valid chains
            if (valid_chains is not None) and ((pdb_id, chain.id) not in valid_chains):
                continue
            for residue in chain:
                # Do not take into account non-aminoacidic residues (e.g. water molecules)
                if not is_aa(residue):
                    continue
                # Add an entry to the residues list
                residues.append((pdb_id, model.id, chain.id, residue.id[1], residue.get_resname(), 0, 0))
        if not residues:
            logging.warning('A protein {} has no valid residues'.format(pdb_id))
        ds_residues += residues
    if not ds_residues:
        logging.error('No valid aminoacidics found\nAborting...')
        exit()
    # Turn list into dataframe
    ds_residues = pd.DataFrame(ds_residues)
    # Debug
    logging.debug('PDB dataset')
    logging.debug(ds_residues)
    # Define dataset column names
    ds_residues.columns = ['PDB_ID', 'MODEL_ID', 'CHAIN_ID', 'RES_ID', 'RES_NAME', 'LIP_SCORE', 'LIP']
    # Check if chain lengths should be added
    if chain_len:
        # Group and extract chain length
        ds_chain_len = ds_residues.groupby(['PDB_ID', 'MODEL_ID', 'CHAIN_ID']).size().reset_index(name='CHAIN_LEN')
        # Add chain len to main dataframe
        ds_residues = ds_residues.merge(ds_chain_len, how='left', on=['PDB_ID', 'MODEL_ID', 'CHAIN_ID'])
        # Reindex columns of the main dataframe: chain length after chain id
        ds_residues = ds_residues.reindex(['PDB_ID', 'MODEL_ID', 'CHAIN_ID', 'CHAIN_LEN', 'RES_ID', 'RES_NAME', 'LIP_SCORE', 'LIP'], axis=1)
    # Show some info about the dataset
    logging.debug("Numbers of proteins: {}".format(len(pdb_ids)))
    logging.debug("Numbers of residues: {}".format(len(ds_residues.PDB_ID)))
    # Return created dataset
    return ds_residues
Example #21
0
def parsePdb(pdbFile, pdbCode, seqName):
    structure = PDBParser().get_structure(pdbCode, pdbFile)
    chains = ['A', 'B', 'C', 'D', 'E']
    seqFile = open(seqName + '.fasta', 'w')

    for ch in chains:
        chain = structure[0][ch]
        getSeq(pdbCode, chain, ch, seqFile)

    seqFile.close()
Example #22
0
    def sodium_in_pdb(self, pdb_file):
        parser = PDBParser(QUIET=True)
        struct = parser.get_structure('model', pdb_file)
        residues = list(struct[0].get_residues())
        sodium = [
            res for res in residues
            if res.get_id()[0] == 'SOD' or res.get_id()[0] == 'H_ NA'
        ]

        return sodium
Example #23
0
def identify_terminal_residues(pdb_filename):
    # identify terminal residues
    parser = PDBParser()
    structure = parser.get_structure('X', pdb_filename)
    terminal_residues = {}
    for model in structure:
        for chain in model:
            residues = list(chain.get_residues())
            terminal_residues[chain.id] = (residues[0].id[1], residues[-1].id[1])
        return terminal_residues
 def test_is_backbone_congested(self):
     """Check whether backbone atoms clash into rest of the structure."""
     resi = RNAResidue(self.chain.child_list[2])
     self.assertFalse(is_backbone_congested(resi))
     # now check a structure where the backbone clashes into O2'
     chain=PDBParser().get_structure('test_struc', BB_MESSED_UP)[0].child_list[0]
     resi = RNAResidue(chain[('H_c  ', 32, ' ')])
     self.assertTrue(is_backbone_congested(resi))
     resi = RNAResidue(chain[(' ', 33, ' ')])
     self.assertTrue(is_backbone_congested(resi))
Example #25
0
 def split_chain(self, pdbfile):
     #split domain-swapped dimer into individual chains.
     chains = ['A', 'B']
     p = PDBParser(PERMISSIVE=1)
     structure = p.get_structure('test', pdbfile)
     for chain in chains:
         pdb_chain_file = 'chain_{}.pdb'.format(chain)
         io_w_no_h = PDBIO()
         io_w_no_h.set_structure(structure)
         io_w_no_h.save('{}'.format(pdb_chain_file), ChainSelect(chain))
Example #26
0
def parse_atoms_infile(filename):
    '''
	Parse a PDB file and return atom list.\n
	parse_atoms_infile(filename):\n
	File needs to be a PDB file format (*.ent or *.pdb)
	'''
    p = PDBParser(QUIET=True)
    s = p.get_structure("X", filename)
    atom_list = [atom for atom in s.get_atoms() if atom.name == 'CB']
    return atom_list
Example #27
0
def f_get_chain_dssp(pdb_name, path, chain_name):
    name = pdb_name[0:4]
    pdb_chain_name = "%s_%c.pdb" % (name, chain_name)
    p = PDBParser()
    structure = p.get_structure('X', path + pdb_chain_name)
    model = structure[0]
    dssp = DSSP(model, path + pdb_chain_name)
    print "DSSP created for " + pdb_chain_name + "\n"
    handlelog.write("DSSP created for " + pdb_chain_name + "\n")
    return dssp
Example #28
0
    def test_bad_radius(self):
        """Test if missing, malformed or negative radius case is handled correctly."""
        # Test Entries
        malformed = "ATOM      1  N   PRO     1      000001  02.000 3.0000 -0.1000  1.a00f       N\n"
        missing = "ATOM      1  N   PRO     1      000001  02.000 3.0000 -0.1000               N\n"
        negative = "ATOM      1  N   PRO     1      000001  02.000 3.0000 -0.1000 -1.0000       N\n"

        # Malformed
        parser = PDBParser(PERMISSIVE=True,
                           is_pqr=True)  # default initialization
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            structure = parser.get_structure("test", StringIO(malformed))

        atom = next(structure.get_atoms())
        self.assertEqual(atom.get_radius(), None)

        # Missing
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            structure = parser.get_structure("test", StringIO(missing))

        atom = next(structure.get_atoms())
        self.assertEqual(atom.get_radius(), None)

        # Negative
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            structure = parser.get_structure("test", StringIO(negative))

        atom = next(structure.get_atoms())
        self.assertEqual(atom.get_radius(), None)

        # Test PERMISSIVE mode behaviour
        parser = PDBParser(PERMISSIVE=False,
                           is_pqr=True)  # default initialization
        self.assertRaises(PDBConstructionException, parser.get_structure,
                          "example", StringIO(malformed))
        self.assertRaises(PDBConstructionException, parser.get_structure,
                          "example", StringIO(negative))
        self.assertRaises(PDBConstructionException, parser.get_structure,
                          "example", StringIO(missing))
Example #29
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    import os
    import string

    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]

    score = float(0)
    size=length(chain)

    for res_index in range(1, size-2): #not first or last res
        res = chain.child_list[res_index]
        cur = res.resname
        pre = chain.child_list[res_index-1].resname
        pos = chain.child_list[res_index+1].resname
        
        filename = pre + '_' + cur + '_' + pos + '.scr'
        
        table_file = '/home/marciovm/proteins/bdtrimers/' + string.lower(cur) + '/' + filename

        chain_index = chain.child_list[res_index].id[1]

        table = load_scores(table_file)
        if table != 0:
            new = score_help(chain, chain_index, table)
        else:
            new = 0
        score = score + new       
    try:
        score = (score/size)*1000 #normalize score
        return score
    except ZeroDivisionError:
        print "calculated protein length 0 -> returning score 0"
        score = 0
        return score
Example #30
0
 def parse(self, *pdb_filenames):
     """
     REQUIRED. Adds the protein PDB files. You can specify as many as you want, but only two will be used for the superimposition.
     """
     self.proteins = [] # reset proteins to an empty array
     parser = PDBParser(QUIET=True)
     for filename in pdb_filenames:
         # use file name as PDB id
         pdb_id = self.__get_pdb_id_from_filename(filename)
         # get PDB contents
         self.proteins.append(parser.get_structure(pdb_id, filename))