Ejemplo n.º 1
0
    def test_1_warnings(self):
        """Check warnings: Parse a flawed PDB file in permissive mode.

        NB: The try/finally block is adapted from the warnings.catch_warnings
        context manager in the Python 2.6 standard library.
        """
        warnings.simplefilter('always', PDBConstructionWarning)
        try:
            # Equivalent to warnings.catch_warnings -- hackmagic
            orig_showwarning = warnings.showwarning
            all_warns = []
            def showwarning(*args, **kwargs):
                all_warns.append(args[0])
            warnings.showwarning = showwarning
            # Trigger warnings
            p = PDBParser(PERMISSIVE=True)
            p.get_structure("example", "PDB/a_structure.pdb")
            for wrn, msg in zip(all_warns, [
                # Expected warning messages:
                'Atom N defined twice in residue <Residue ARG het=  resseq=2 icode= > at line 19.',
                'disordered atom found with blank altloc before line 31.',
                "Residue (' ', 4, ' ') redefined at line 41.",
                "Blank altlocs in duplicate residue SER (' ', 4, ' ') at line 41.",
                "Residue (' ', 10, ' ') redefined at line 73.",
                "Residue (' ', 14, ' ') redefined at line 104.",
                "Residue (' ', 16, ' ') redefined at line 133.",
                "Residue (' ', 80, ' ') redefined at line 631.",
                "Residue (' ', 81, ' ') redefined at line 644.",
                'Atom O defined twice in residue <Residue HOH het=W resseq=67 icode= > at line 820.'
                ]):
                self.assertTrue(msg in str(wrn))
        finally:
            warnings.showwarning = orig_showwarning
Ejemplo n.º 2
0
def experimental_method(pdb_path):
    """
    Get String representation of Experimental method used file of interest.
    Use header for this information.

    :param pdb_path: Path to PDB file
    :return:
    """
    parser = PDBParser(get_header=True)
    parser.get_structure('', pdb_path)

    return parser.get_header()['structure_method']
Ejemplo n.º 3
0
def compare_structure(reference, alternate):
    parser=PDBParser()

    ref_struct=parser.get_structure('Reference', \
                                    path.join(PDBdir, reference))
    alt_struct= parser.get_structure("Alternate", \
                                    path.join(PDBdir, alternate))


    ref_model=ref_struct[0]
    ref_chain=ref_model['A']

    alt_model=alt_struct[0]
    alt_chain=alt_model['A']

    ref_atoms=[]
    alt_atoms=[]

    for ref_res in ref_chain:
        if(ref_res.get_resname() in resdict.keys()):
            ref_atoms.append(ref_res['CA'])

    for alt_res in alt_chain:
        if(alt_res.get_resname() in resdict.keys()):
             alt_atoms.append(alt_res['CA'])

    super_imposer= Superimposer()
    super_imposer.set_atoms(ref_atoms, alt_atoms)
    super_imposer.apply(alt_model.get_atoms())

    make_pdb_file(alt_struct, "Aligned_" + alternate)

    full= super_imposer.rms

    super_imposer_50= Superimposer()
    super_imposer_50.set_atoms(ref_atoms[:50], alt_atoms[:50])
    super_imposer_50.apply(alt_model.get_atoms())

    make_pdb_file(alt_struct, "Aligned_50_" + alternate)

    f_50= super_imposer_50.rms

    super_imposer_150= Superimposer()
    super_imposer_150.set_atoms(ref_atoms[:150], alt_atoms[:150])
    super_imposer_150.apply(alt_model.get_atoms())

    make_pdb_file(alt_struct, "Aligned_150_" + alternate)

    f_150= super_imposer_150.rms

    return f_50, f_150, full, len(ref_atoms)
Ejemplo n.º 4
0
def read_structure(pdb_path, structure_id, quiet=True):
    """Reads in a PDB structure.

    Will read gzip compressed PDB structures.

    Parameters
    ----------
    pdb_path : str
        path to pdb file to read
    structure_id : str
        structure id of pdb file

    Returns
    -------
    structure : Bio.PDB structure object | None
       returns PDB structure if possible else none
    """
    pdb_parser = PDBParser(QUIET=quiet)  # parser for pdb files

    # skip if there is no pdb for it
    if not pdb_path:
        logger.debug('Skipping pdb {0}'.format(structure_id))
        return None

    # read in pdb file
    try:
        # handle gziped or uncompressed reading
        if pdb_path.endswith('.gz'):
            with gzip.open(pdb_path, 'rb') as handle:
                structure = pdb_parser.get_structure(structure_id, handle)
        else:
            structure = pdb_parser.get_structure(structure_id, pdb_path)

        # fix homology model chain letters to be "A" instead of " "
        for model in structure:
            for chain in model:
                if chain.id == " ":
                    chain.id = "A"
                    del model.child_dict[' ']
                    model.child_dict['A'] = chain

        return structure
    except KeyboardInterrupt:
        # stop if they kill program
        raise
    except:
        logger.info('Failed reading in structure {0}'.format(structure_id))
        return None
Ejemplo n.º 5
0
 def test_get_sequence_from_pdb_structure(self):
     pdb_file = "./test.pdb"
     p = PDBParser()
     structure = p.get_structure('test', pdb_file)
     structure_of_chain = structure[0]['A']
     sequence = construct_protein_graph.get_sequence_from_pdb_structure(structure_of_chain)
     self.assertEqual("VNIKTNPFK", sequence)
Ejemplo n.º 6
0
def selectChain(ifn, ofn, chainID='A'):
    parser = PDBParser()
    structure = parser.get_structure('x', ifn)

    class ChainSelector():
        def __init__(self, chainID=chainID):
            self.chainID = chainID

        def accept_chain(self, chain):
            if chain.get_id() == self.chainID:
                return 1
            return 0

        def accept_model(self, model):
            return 1

        def accept_residue(self, residue):
            return 1

        def accept_atom(self, atom):
            return 1

    sel = ChainSelector(chainID)
    io = PDBIO()
    io.set_structure(structure)
    io.save(ofn, sel)
Ejemplo n.º 7
0
    def load_PDB_to_system(self, filename = None):
        parser    = PDBParser(QUIET=True)
        structure = parser.get_structure('X', filename)
        self.residues = []

        for model in structure:

            c = 1
            for chain in model:

                self.id   = 1
                #self.name = "protein"

                n = 1
                r = 1

                for pdb_residue in chain:
                    residue = Residue(id=r,  name=pdb_residue.resname)
                    for pdb_atom in pdb_residue:

                        atom = Atom(id=n,
                                    name=pdb_atom.name,
                                    pos=pdb_atom.coord)
                        n += 1

                        residue.atoms.append(atom)
                    self.residues.append(residue)
                    r += 1
Ejemplo n.º 8
0
    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in cif_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in cif_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
Ejemplo n.º 9
0
			def test_to_string(self):
				"""Write structure as string"""

				stream = StringIO()
				stream.write(dummy_1)
				stream.seek(0)

				mol = MolProcesser(stream)
				n_models = sum(1 for _ in mol.structure.get_models()) #1
				n_chains = sum(1 for _ in mol.structure.get_chains()) #2
				n_resids = sum(1 for _ in mol.structure.get_residues()) #2
				n_atoms = sum(1 for _ in mol.structure.get_atoms()) #15
				has_docc = sum(1 for a in mol.structure.get_atoms() if a.is_disordered())
				has_hatm = sum(1 for r in mol.structure.get_residues() if r.id[0] != ' ')

				stream_2 = StringIO()
				stream_2.write(mol.tostring)
				stream_2.seek(0)

				p = PDBParser(QUIET=1)
				mol_2 = p.get_structure('xyz', stream_2)

				n_models_2 = sum(1 for _ in mol_2.get_models()) #1
				n_resids_2 = sum(1 for _ in mol_2.get_residues()) #2
				n_atoms_2 = sum(1 for _ in mol_2.get_atoms()) #15
				has_docc_2 = sum(1 for a in mol_2.get_atoms() if a.is_disordered())
				has_hatm_2 = sum(1 for r in mol_2.get_residues() if r.id[0] != ' ')

				self.assertEqual(n_models, n_models_2)
				self.assertEqual(n_resids, n_resids_2)
				self.assertEqual(n_atoms, n_atoms_2)
				self.assertEqual(has_docc, has_docc_2)
				self.assertEqual(has_hatm, has_hatm_2)
Ejemplo n.º 10
0
def chain2pos_scan_str(chain, pdb, mutation_set='a'):
  """
  Takes a chain ID and a model.PDBFile object, returns a string
  suitable as the PositionScan line for FoldX.
  """
  parser = PDBParser(PERMISSIVE=1)
  pdbfn = pdb.fullpath()
  struct = parser.get_structure(pdb.uuid, pdbfn)[0]
  #chains = pdb_extract_chain_seqs(struct)
  
  chainlist = Selection.unfold_entities(struct, 'C')
  
  position_scan_str = ''
  for c in chainlist:
    if c.id == chain:
      for r in c:
        try:
          aa = three_to_one(r.get_resname())
          resnum = r.id[1]
          position_scan_str += '%s%s%i%s,' % (aa, chain, resnum, mutation_set)
        except:
          # non-native amino acid or water
          pass


  position_scan_str = position_scan_str[:-1]
  
  return position_scan_str
Ejemplo n.º 11
0
 def test_NACCESS(self):
     """Test NACCESS generation from PDB"""
     p = PDBParser()
     pdbfile = "PDB/1A8O.pdb"
     model = p.get_structure("1A8O", pdbfile)[0]
     naccess = NACCESS(model, pdbfile)
     self.assertEqual(len(naccess), 66)
Ejemplo n.º 12
0
    def run(self):
        mypath = self.getPath()
        lig_ifn = mypath.sdf
        prt_ifn = mypath.pdb

        lig_ext = os.path.basename(lig_ifn).split('.')[-1]
        lig = pybel.readfile(lig_ext, lig_ifn).next()
        lig.removeh()
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure('prt', prt_ifn)

        typetable = OBTypeTable()
        typetable.SetFromType('INT')
        typetable.SetToType('SYB')

        dat = []
        atom_types = [typetable.Translate(atom.type) for atom in lig.atoms]
        atom_types = shuffle(atom_types)
        for residue in structure.get_residues():
            dists = residueDistances2LigandAtoms(residue, lig)
            dat.append({"dists": dists,
                        "atom_types": atom_types,
                        "residue": residue.get_resname()})

        to_write = json.dumps(dat, indent=4, separators=(',', ':'))
        with self.output().open('w') as ofs:
            ofs.write(to_write)
Ejemplo n.º 13
0
def RemoveLigandsOneBioUnit(biounit, ligandlist):
    # ligandlist is a residue list with residue chain id, name and residue number
    p = PDBParser(PERMISSIVE = 1)
    pdbname= biounit.split("/")[-1]
    try:
        models = p.get_structure(pdbname, biounit)
    except:
        return None
    #for model in models:
    #    for chain in model:
    #        for residue in chain:
    #            print residue
    for rligand in ligandlist:
        for model in models:
            for chain in model:
                for residue in list(chain):
                    if chain.id == rligand["ChainID"] and int(rligand["ResNum"]) == residue.id[1]:
                        chain.detach_child(residue.id)
                    elif residue.id[0] == "W":
                        chain.detach_child(residue.id)
                    elif len(rligand["LigName"].split()) > 1 and int(rligand["ResNum"]) <= residue.id[1]:
                        LongLigand(chain, residue, rligand)
    io = PDBIO()
    io.set_structure(models)
    filepath = os.path.join(BIOSTRDIR, models.id)
    io.save(filepath)
Ejemplo n.º 14
0
def parse_freesasa_output(fpath):
    """
    Returns per-residue relative accessibility of side-chain and main-chain
    atoms as calculated by freesasa.
    """

    asa_data, rsa_data = {}, {}

    _rsa = rel_asa
    _bb = set(('CA', 'C', 'N', 'O'))

    P = PDBParser(QUIET=1)
    s = P.get_structure('bogus', fpath.name)
    for res in s.get_residues():
        res_id = (res.parent.id, res.resname, res.id[1])
        asa_mc, asa_sc, total_asa = 0, 0, 0
        for atom in res:
            aname = atom.name
            at_id = (res.parent.id, res.resname, res.id[1], aname)
            asa = atom.bfactor
            # if atom.name in _bb:
            #     asa_mc += asa
            # else:
            #     asa_sc += asa
            total_asa += asa
            asa_data[at_id] = asa

        rsa_data[res_id] = total_asa / _rsa['total'][res.resname]

    return asa_data, rsa_data
Ejemplo n.º 15
0
def parse_structure(path):
    """
    Parses a structure using Biopython's PDB/mmCIF Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """
    # setup logging
    logger = logging.getLogger('Prodigy')
    logger.info('[+] Reading structure file: {0}'.format(path))
    fname = os.path.basename(path)
    sname = '.'.join(fname.split('.')[:-1])
    s_ext = fname.split('.')[-1]

    _ext = {'pdb', 'ent', 'cif'}
    if s_ext not in _ext:
        raise IOError('[!] Structure format \'{0}\' is not supported. Use \'.pdb\' or \'.cif\'.'.format(s_ext))

    sparser = PDBParser(QUIET=1) if s_ext in {'pdb', 'ent'} else MMCIFParser()

    try:
        s = sparser.get_structure(sname, path)
    except Exception as exeption:
        logger.error('[!] Structure \'{0}\' could not be parsed'.format(sname), file=sys.stderr)
        raise Exception(exeption)

    return (validate_structure(s),
            len(set([c.id for c in s.get_chains()])),
            len(list(s.get_residues())))
Ejemplo n.º 16
0
 def test_dssp(self):
     """Test DSSP generation from PDB."""
     p = PDBParser()
     pdbfile = "PDB/2BEG.pdb"
     model = p.get_structure("2BEG", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 130)
Ejemplo n.º 17
0
def main():
    p = PDBParser()
    filename = "pdb10gs.ent"
    models = p.get_structure("10GS", filename)
    for model in models:
        print models[0]
        print model.get_full_id()
        TestDSSP(models[0], filename)
Ejemplo n.º 18
0
 def test_3_bad_xyz(self):
     """Check error: Parse an entry with bad x,y,z value."""
     data = "ATOM      9  N   ASP A 152      21.554  34.953  27.691  1.00 19.26           N\n"
     parser = PDBParser(PERMISSIVE=False)
     s = parser.get_structure("example", StringIO(data))
     data = "ATOM      9  N   ASP A 152      21.ish  34.953  27.691  1.00 19.26           N\n"
     self.assertRaises(PDBConstructionException,
             parser.get_structure, "example", StringIO(data))       
Ejemplo n.º 19
0
def main():
    p = PDBParser()
    filename = "test/10gs.bio1"
    models = p.get_structure("10gs", filename)
    for model in models:
        print models[0]
        print model.get_full_id()
        TestNACCESS(models[0], filename)
Ejemplo n.º 20
0
def build_all_angles_model(pdb_filename):
    parser=PDBParser()
    structure=parser.get_structure('sample', \
                                    path.join(PDBdir, pdb_filename))
    model=structure[0]
    chain=model['A']
    model_structure_geo=[]
    prev="0"
    N_prev="0"
    CA_prev="0"
    CO_prev="0"
    prev_res=""
    rad=180.0/math.pi
    for res in chain:
        if(res.get_resname() in resdict.keys()):
            geo=Geometry.geometry(resdict[res.get_resname()])
            if(prev=="0"):
                N_prev=res['N']
                CA_prev=res['CA']
                C_prev=res['C']
                prev="1"
            else:
                n1=N_prev.get_vector()
                ca1=CA_prev.get_vector()
                c1=C_prev.get_vector()
                                
                C_curr=res['C']
                N_curr=res['N']
                CA_curr=res['CA']
                                                
                c=C_curr.get_vector()
                n=N_curr.get_vector()
                ca=CA_curr.get_vector()

                geo.CA_C_N_angle=calc_angle(ca1, c1, n)*rad
                geo.C_N_CA_angle=calc_angle(c1, n, ca)*rad

                psi= calc_dihedral(n1, ca1, c1, n) ##goes to current res
                omega= calc_dihedral(ca1, c1, n, ca) ##goes to current res
                phi= calc_dihedral(c1, n, ca, c) ##goes to current res

                geo.psi_im1=psi*rad
                geo.omega=omega*rad
                geo.phi=phi*rad

                geo.N_CA_C_angle= calc_angle(n, ca, c)*rad
                ##geo.CA_C_O_angle= calc_angle(ca, c, o)*rad

                ##geo.N_CA_C_O= calc_dihedral(n, ca, c, o)*rad

                N_prev=res['N']
                CA_prev=res['CA']
                C_prev=res['C']
                ##O_prev=res['O']
                                
                        
            model_structure_geo.append(geo)
    return model_structure_geo
def getPdbAtomsBySerialNum(pdb_fn, serial_nums):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('x', pdb_fn)
    atoms = {atom.serial_number : atom for atom in structure.get_atoms()}
    re_ordered = []
    for num in serial_nums:
        re_ordered.append(atoms[num])

    return re_ordered
Ejemplo n.º 22
0
def pdb2dfromactivesite(pdb_fh,active_sites=[]):
    """
    This calculates distances between each ligand atom or optionally provided amino acids (sources) and each residue in the protein.
    
    :param pdb_fh: path to .pdb file.
    :param active_sites: optional list of residue numbers as sources. 
    :returns dfromligands: pandas table with distances from ligand
    """
    junk_residues = ["HOH"," MG","CA"," NA","SO4","IOD","NA","CL","GOL","PO4"]
    pdb_parser=PDBParser()
    pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh)
    model = pdb_data[0]
    chainA = model["A"] #only a chain
    residues   = list(chainA.get_residues())
    ligands_residue_objs=[]
    for residue in chainA:
        if not residue.get_resname() in junk_residues:
            if not residue.get_resname() in aas_21_3letter: #only aas 
                ligands_residue_objs.append(residue)
            elif residue.id[1] in active_sites:
                ligands_residue_objs.append(residue)
            
    dfromligands=pd.DataFrame()
    for ligandi in range(len(ligands_residue_objs)):
        ligand_residue_obj=ligands_residue_objs[ligandi]
        for ligand_atom_obj in ligand_residue_obj:
            for residue in chainA:
                if residue.get_resname() in aas_21_3letter: #only aas 
                    dfromligands.loc[residue.id[1],"ref_pdb"]=residue.get_resname()
                    if not ligand_residue_obj.get_resname() in aas_21_3letter:
                        dfromligands.loc[residue.id[1],"Distance from Ligand: %s (ATOM: %s)" % \
                                         (ligand_residue_obj.get_resname(),ligand_atom_obj.get_name())]\
                        =ligand_residue_obj[ligand_atom_obj.get_name()]-residue["CA"]
                    else:
                        dfromligands.loc[residue.id[1],"Distance from active site residue: %s %d (ATOM: %s)" % \
                                         (ligand_residue_obj.get_resname(),ligand_residue_obj.get_id()[1],\
                                          ligand_atom_obj.get_name())]\
                        =ligand_residue_obj[ligand_atom_obj.get_name()]-residue["CA"]

    dfromligands.index.name="aasi"
    if "ref_pdb" in dfromligands:
        del dfromligands["ref_pdb"]
    #average and minimum distances
    cols_all=dfromligands.columns.tolist()
    for moltype in ['Distance from Ligand:','Distance from active site residue:']:
        cols_moltype=[c for c in cols_all if moltype in c]
        if len(cols_all)>0:
            dfromligands.loc[:,'%s average' % moltype]=dfromligands.loc[:,cols_moltype].T.mean()
            dfromligands.loc[:,'%s minimum' % moltype]=dfromligands.loc[:,cols_moltype].T.min()
            mols=np.unique([c[c.find(moltype):c.find(' (ATOM')] for c in cols_moltype])
            if len(mols)>1:
                for mol in mols:
                    cols_mol=[c for c in cols_moltype if mol in c]
                    dfromligands.loc[:,'%s: average' % mol]=dfromligands.loc[:,cols_mol].T.mean()
                    dfromligands.loc[:,'%s: minimum' % mol]=dfromligands.loc[:,cols_mol].T.min()    

    return dfromligands
Ejemplo n.º 23
0
class PDBAtomAtomDistanceReader(object):
    def __init__(self, pdbname, label):
        self.pdbname = pdbname
        self.label = label
        self.region = label.translate(None, '0123456789')

        self.parser = PDBParser(QUIET=True)
        self.structure = self._load_structure()
        self.models = [PDBModel(model) for model in self.structure.get_list()]

    def _load_structure(self):
        return self.parser.get_structure(self.label, self.pdbname)

    def get_single_distance(self, p1, p2, exclude_backbone=False,
            CA_only=False):
        distances = []

        for model in self.models:
            dist_info = model.get_single_distance(p1, p2, exclude_backbone,
                                                  CA_only)
            distances.append(dist_info)

        distances.sort(key=lambda x: x['dist'])
        min_dist = distances[0]
        max_dist = distances[-1]
        avg_dist = np.array([d['dist'] for d in distances]).mean()

        return {'protein': self.region, 'p1': p1, 'p2': p2, 'label': self.label,
                'r1': min_dist['r1'], 'r2': min_dist['r2'],
                'min_dist': min_dist,
                'avg_dist': avg_dist,
                'max_dist': max_dist}

    def get_pair_distances(self):
        distances = []

        print "Calculating distances..."
        for model in self.models:
            dist_info = model.get_pair_distances()
            distances.append(dist_info)

        if len(self.models) == 1:
            return [k + v for k, v in distances[0].iteritems()]

        final_distances = {}

        residues = self.models[0].residues
        for r1, r2 in combinations(residues, 2):
            p1 = r1.get_id()[1]
            p2 = r2.get_id()[1]
            pair = (p1, p2)
            final_distances[pair] = min([d[pair] for d in distances])

        final_distances = [k + v for k, v in final_distances.iteritems()]

        return final_distances
Ejemplo n.º 24
0
	def _get_resmapping(self):
		res_mapping = []
		filepath = self._get_filepath('', pdb_file=True)
		p = PDBParser(QUIET=True)
		structure = p.get_structure('protein', filepath)
		chain = structure[0]['A']
		for residue in chain.get_residues():
			if str(residue.id[1]) in self.resnums:
				res_mapping.append((self.codes[residue.resname], residue.id[1]))
		return res_mapping
Ejemplo n.º 25
0
	def _get_ligand_name(self):
		p = PDBParser(QUIET=True)
		ligand = p.get_structure('ligand', self.out_filename)
		chain = ligand[0]['A']
		for residue in chain.get_residues():
			if residue.resname in self.ignore:
				pass
			else:
				self.ligands.append(residue.resname)
		print "Ligands found: ", self.ligands
Ejemplo n.º 26
0
 def test_fragment_mapper(self):
     """Self test for FragmentMapper module."""
     p = PDBParser()
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("X", pdb1)
     m = s[0]
     fm = FragmentMapper(m, 10, 5, "PDB")
     for r in Selection.unfold_entities(m, "R"):
         if r in fm:
             self.assertTrue(str(fm[r]).startswith("<Fragment length=5 id="))
Ejemplo n.º 27
0
 def test_empty(self):
     """Parse an empty file."""
     parser = PDBParser()
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         struct = parser.get_structure('MT', filename)
         # Structure has no children (models)
         self.assertFalse(len(struct))
     finally:
         os.remove(filename)
Ejemplo n.º 28
0
 def check_msms(self, prot_file, first_100_residues):
     p = PDBParser()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         s = p.get_structure("X", prot_file)
     model = s[0]
     rd = ResidueDepth(model)
     res_chain = ''
     for item in rd.property_list[:100]:
         res_chain = res_chain + item[0].get_resname()
     self.assertEqual(res_chain, first_100_residues)
Ejemplo n.º 29
0
 def test_model_numbering(self):
     """Preserve model serial numbers during I/O."""
     def confirm_numbering(struct):
         self.assertEqual(len(struct), 20)
         for idx, model in enumerate(struct):
             self.assertTrue(model.serial_num, idx + 1)
             self.assertTrue(model.serial_num, model.id + 1)
     parser = PDBParser()
     struct1 = parser.get_structure("1mot", "PDB/1MOT.pdb")
     confirm_numbering(struct1)
     # Round trip: serialize and parse again
     io = PDBIO()
     io.set_structure(struct1)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struct2 = parser.get_structure("1mot", filename)
         confirm_numbering(struct2)
     finally:
         os.remove(filename)
def open_pdb(pdbfn):
    """Open pdb with Biopython.

    Args:
       pdbfn1 (str): a path to a pdb structure

    Returns:
       PDB Biopython object: with a pdb structure

    """
    parser = PDBParser()
    return parser.get_structure('', pdbfn)
Ejemplo n.º 31
0
def ligand_com(refinement_input, ligand_chain):
    """
    Calculate ligand's center of mass.

    Parameters
    ------------
    refinement_input : str
        Path to PDB file.

    ligand_chain : str
        Ligand chain ID.


    Returns
    --------
    output : list[float]
        Center of mass vector.
    """
    parser = PDBParser()
    output = []
    refinement_input = glob.glob(refinement_input)

    for inp in refinement_input:
        structure = parser.get_structure("inp", inp)
        mass = 0.0
        com = np.zeros(3)
        for res in structure.get_residues():
            if res.resname == ligand_chain:
                for atom in res.get_atoms():
                    com = com + np.array(list(atom.get_vector())) * atom.mass
                    mass += atom.mass
                com = com / mass

        output.append(com.tolist())

    return output
Ejemplo n.º 32
0
##0:PDB ID
##1:Chain (default A)r
##2: Residue
##3: Distance
select = csvlist[0][0]
achn = csvlist[1][0]
mk = csvlist[3][0]
for i in range(0, len(csvlist[2])):
    ares.append(csvlist[2][i])
#Opening the file
pdbl = PDBList()
pdbl.retrieve_pdb_file(select, pdir='pdb')
file_path = filebase + '/pdb/pdb' + select + '.ent'
#Read the file
parser = PDBParser(QUIET=1)
structure = parser.get_structure('test', file_path)
#Residue Info
rf = open(os.path.join(outfilebase, "residue_list" + '.csv'), 'wt')
reswriter = csv.writer(rf, lineterminator='\n')
reswriter.writerow(
    ["-------------------------------------------------------------------"])
reswriter.writerow(
    ["-------------------------------------------------------------------"])
model = structure[0]
chain = model[achn]
for res in chain.get_residues():
    tags = res.get_full_id()
    if res.get_resname() != 'HOH' and tags[3][0] == " ":
        resname.append(res.get_resname())
        resid = res.get_id()
        rescode.append(resid[1])
Ejemplo n.º 33
0
    ftp.cwd("/pub/pdb/data/structures/all/pdb")

    filenames = []
    ftp.retrlines('NLST', callback=lambda line: filenames.append(line))
    print("files: %s" % len(filenames))

    for filename in filenames:
        print("downloading %s ..." % filename)
        with open(filename, 'wb') as fp:
            ftp.retrbinary("RETR %s" % filename, callback=fp.write)

        print("processing: %s" % filename)

        p = PDBParser()
        with gzip.open(filename, 'rt') as f:
            structure = p.get_structure("", f)

        pdb_id = structure.header["idcode"]

        assert pdb_id, "no PDB ID for %s" % filename

        model = structure[0]

        try:
            dssp = DSSP(model, filename, dssp="/Users/luis/dssp-2.3.0/mkdssp")
        except Exception as e:
            print(e)
            print()
            os.remove(filename)
            continue
Ejemplo n.º 34
0
def extract_beads(pdb_path):
    amino_acids = pd.read_csv('/home/hyang/bio/erf/data/amino_acids.csv')
    vocab_aa = [x.upper() for x in amino_acids.AA3C]
    vocab_dict = {
        x.upper(): y
        for x, y in zip(amino_acids.AA3C, amino_acids.AA)
    }

    p = PDBParser()
    structure = p.get_structure('X', pdb_path)
    residue_list = Selection.unfold_entities(structure, 'R')

    ca_center_list = []
    cb_center_list = []
    res_name_list = []
    res_num_list = []
    chain_list = []

    for res in residue_list:
        if res.get_resname() not in vocab_aa:
            # raise ValueError('protein has non natural amino acids')
            continue

        try:
            res['CA'].get_coord()
            if res.get_resname() != 'GLY':
                res['CB'].get_coord()
        except KeyError:
            print(f'{pdb_path}, {res} missing CA / CB atoms')
            continue

        chain_list.append(res.parent.id)
        res_name_list.append(vocab_dict[res.get_resname()])
        res_num_list.append(res.id[1])

        ca_center_list.append(res['CA'].get_coord())
        if res.get_resname() != 'GLY':
            cb_center_list.append(res['CB'].get_coord())
        else:
            cb_center_list.append(res['CA'].get_coord())

    ca_center = np.vstack(ca_center_list)
    cb_center = np.vstack(cb_center_list)

    df = pd.DataFrame({
        'chain_id': chain_list,
        'group_num': res_num_list,
        'group_name': res_name_list,
        'x': ca_center[:, 0],
        'y': ca_center[:, 1],
        'z': ca_center[:, 2],
        'xcb': cb_center[:, 0],
        'ycb': cb_center[:, 1],
        'zcb': cb_center[:, 2]
    })

    # assign "chain" number for the energy calculation
    chain = np.zeros(df.shape[0], dtype=np.int)
    chain_id = df['chain_id'].values
    group_num = df['group_num'].values
    count = 0
    chain_0 = chain_id[0]
    group_0 = group_num[0]

    # if type(group_0) is str:
    #     print(pdb_id, 'group_num has string')

    for i in range(1, df.shape[0]):
        chain_i = chain_id[i]
        group_i = group_num[i]
        if (chain_i == chain_0) & (group_i == group_0 + 1):
            group_0 += 1
        else:
            count += 1
            chain_0 = chain_i
            group_0 = group_i
        chain[i] = count
    df['chain'] = chain

    df.to_csv(f'{pdb_path}_bead.csv', index=False)
    def handle(self, *args, **options):
        self.options = options
        if self.options['purge']:
            Residue.objects.filter(
                protein_conformation__protein__entry_name__endswith='_a',
                protein_conformation__protein__family__parent__parent__name=
                'Alpha').delete()
            ProteinConformation.objects.filter(
                protein__entry_name__endswith='_a',
                protein__family__parent__parent__name='Alpha').delete()
            Protein.objects.filter(
                entry_name__endswith='_a',
                family__parent__parent__name='Alpha').delete()

        # Building protein and protconf objects for g protein structure in complex
        scs = SignprotComplex.objects.all()
        for sc in scs:
            self.logger.info(
                'Protein, ProteinConformation and Residue build for alpha subunit of {} is building'
                .format(sc))
            try:
                # Alpha subunit
                try:
                    alpha_protein = Protein.objects.get(
                        entry_name=sc.structure.pdb_code.index.lower() + '_a')
                except:
                    alpha_protein = Protein()
                    alpha_protein.entry_name = sc.structure.pdb_code.index.lower(
                    ) + '_a'
                    alpha_protein.accession = None
                    alpha_protein.name = sc.structure.pdb_code.index.lower(
                    ) + '_a'
                    alpha_protein.sequence = sc.protein.sequence
                    alpha_protein.family = sc.protein.family
                    alpha_protein.parent = sc.protein
                    alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme
                    alpha_protein.sequence_type = ProteinSequenceType.objects.get(
                        slug='mod')
                    alpha_protein.source = ProteinSource.objects.get(
                        name='OTHER')
                    alpha_protein.species = sc.protein.species
                    alpha_protein.save()
                try:
                    alpha_protconf = ProteinConformation.objects.get(
                        protein__entry_name=sc.structure.pdb_code.index.lower(
                        ) + '_a')
                except:
                    alpha_protconf = ProteinConformation()
                    alpha_protconf.protein = alpha_protein
                    alpha_protconf.state = ProteinState.objects.get(
                        slug='active')
                    alpha_protconf.save()
                pdbp = PDBParser(PERMISSIVE=True, QUIET=True)
                s = pdbp.get_structure('struct',
                                       StringIO(sc.structure.pdb_data.pdb))
                chain = s[0][sc.alpha]
                nums = []
                for res in chain:
                    try:
                        res['CA']
                        nums.append(res.get_id()[1])
                    except:
                        pass

                resis = Residue.objects.filter(
                    protein_conformation__protein=sc.protein)
                num_i = 0
                temp_seq2 = ''
                pdb_num_dict = OrderedDict()
                # Create first alignment based on sequence numbers
                for n in nums:
                    if sc.structure.pdb_code.index == '6OIJ' and n < 30:
                        nr = n + 6
                    else:
                        nr = n
                    pdb_num_dict[n] = [chain[n], resis.get(sequence_number=nr)]
                # Find mismatches
                mismatches = []
                for n, res in pdb_num_dict.items():
                    if AA[res[0].get_resname()] != res[1].amino_acid:
                        mismatches.append(res)

                pdb_lines = sc.structure.pdb_data.pdb.split('\n')
                seqadv = []
                for l in pdb_lines:
                    if l.startswith('SEQADV'):
                        seqadv.append(l)
                mutations, shifted_mutations = OrderedDict(), OrderedDict()
                # Search for annotated engineered mutations in pdb SEQADV
                for s in seqadv:
                    line_search = re.search(
                        'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)',
                        s)
                    if line_search != None:
                        if line_search.group(2) == sc.alpha:
                            if line_search.group(
                                    4).strip() == sc.protein.accession:
                                if line_search.group(3) == line_search.group(
                                        6):
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                                else:
                                    shifted_mutations[int(
                                        line_search.group(3))] = [
                                            line_search.group(1),
                                            line_search.group(5),
                                            int(line_search.group(6))
                                        ]
                            else:
                                # Exception for 6G79
                                if line_search.group(3) != line_search.group(
                                        6) and 'CONFLICT' in line_search.group(
                                            7):
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                                # Exception for 5G53
                                if line_search.group(
                                        4).strip() != sc.protein.accession:
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                remaining_mismatches = []

                # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation
                for m in mismatches:
                    num = m[0].get_id()[1]
                    if num in mutations:
                        if m[0].get_resname() != mutations[num][0] and m[
                                1].amino_acid != AA[mutations[num][1]]:
                            remaining_mismatches.append(m)
                    elif num in shifted_mutations:
                        remaining_mismatches.append(m)
                    else:
                        remaining_mismatches.append(m)

                ### sanity check
                # print(mutations)
                # print(shifted_mutations)
                # print(mismatches)
                # print(remaining_mismatches)
                # pprint.pprint(pdb_num_dict)

                # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment
                if len(remaining_mismatches
                       ) > 0 and sc.structure.pdb_code.index not in [
                           '6OIJ', '6OY9', '6OYA'
                       ]:
                    ppb = PPBuilder()
                    seq = ''
                    for pp in ppb.build_peptides(chain, aa_only=False):
                        seq += str(pp.get_sequence())
                    pw2 = pairwise2.align.localms(sc.protein.sequence, seq, 2,
                                                  -1, -.5, -.1)
                    ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1])
                    wt_pdb_dict = OrderedDict()
                    pdb_wt_dict = OrderedDict()
                    j, k = 0, 0
                    for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq,
                                            temp_seq):
                        if ref != '-' and temp != '-':
                            wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]]
                            pdb_wt_dict[pdb_num_dict[nums[k]][0]] = resis[j]
                            j += 1
                            k += 1
                        elif ref == '-':
                            wt_pdb_dict[i] = pdb_num_dict[nums[k]]
                            pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i
                            k += 1
                        elif temp == '-':
                            wt_pdb_dict[resis[j]] = i
                            pdb_wt_dict[i] = resis[j]
                            j += 1
                    for i, r in enumerate(remaining_mismatches):
                        # Adjust for shifted residue when residue is a match
                        if r[0].get_id()[1] - remaining_mismatches[
                                i - 1][0].get_id()[1] > 1:
                            pdb_num_dict[r[0].get_id()[1] -
                                         1][1] = pdb_wt_dict[chain[
                                             r[0].get_id()[1] - 1]]
                        # Adjust for shifted residue when residue is mutated and it's logged in SEQADV
                        if r[0].get_id()[1] in shifted_mutations:
                            pdb_num_dict[r[0].get_id()[1]][1] = resis.get(
                                sequence_number=shifted_mutations[
                                    r[0].get_id()[1]][2])
                        # Adjust for shift
                        else:
                            pdb_num_dict[r[0].get_id()[1]][1] = pdb_wt_dict[
                                r[0]]

                bulked_residues = []
                for key, val in pdb_num_dict.items():
                    # print(key, val) # sanity check
                    res_obj = Residue()
                    res_obj.sequence_number = val[0].get_id()[1]
                    res_obj.amino_acid = AA[val[0].get_resname()]
                    res_obj.display_generic_number = val[
                        1].display_generic_number
                    res_obj.generic_number = val[1].generic_number
                    res_obj.protein_conformation = alpha_protconf
                    res_obj.protein_segment = val[1].protein_segment
                    bulked_residues.append(res_obj)
                Residue.objects.bulk_create(bulked_residues)
                self.logger.info(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished'
                    .format(sc))
            except Exception as msg:
                print(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'
                    .format(sc))
                print(msg)
                self.logger.info(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'
                    .format(sc))
  offset=atom_nr-1
  print('Offset=', offset,'\nResidue Offset=',residue_offset)
  for line in open(ligand_name):
    pdblist = line.split()
    pdblist[1]=atom_nr
    pdblist[5]=str(int(residue_offset)+int(pdblist[5]))
    print('\t'.join(map(str, pdblist)), file=output)
    atom_nr+=1
    
# create parser to find distance between C-alpha atoms of ligand and all C-alpha atoms of receptor
parser = PDBParser()
alphabet_str= string.ascii_uppercase
alphabet_list=list(alphabet_str)

# read structure of ligand from file
structure = parser.get_structure('LIGAND',ligand_name)
model_ligand = structure[0]
for i in list(string.ascii_uppercase):
    try:
        chain_ligand = model_ligand[i]
    except KeyError:
        continue

# read structure of receptor from file
structure = parser.get_structure('RECEPTOR', receptor_name)
model_receptor = structure[0]
for i in list(string.ascii_uppercase):
    try:
        chain_receptor = model_receptor[i]
    except KeyError:
        continue
Ejemplo n.º 37
0
from Bio.PDB import PDBParser
from numpy import std, average

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

OmpA_beta_sheet_pairs = [(8, 42), (10, 40), (12, 38), (14, 36), (16, 34),
                         (52, 80), (54, 78), (75, 103), (77, 101), (79, 99),
                         (81, 97), (83, 95), (85, 93)]

parser = PDBParser()

#structure = parser.get_structure('OmpA', '/homes/retel/qj/pdb1qjp.ent')
structure = parser.get_structure('OmpA', 'pdb2ge4.ent')



#nuclei = ['CA', 'CB', 'C', 'HA', 'H']
nuclei = ['H']

def calculate_distances():
    #nuclei = ['CA', 'CB', 'C']
    #nuclei = ['CA', 'CB', 'C', 'N', 'H', 'HA', 'HB']
    #nuclei = ['H', 'HA', 'HB']
    intra = {}
    sequential = {}
    longrange1 = {}
    longrange2 = {}
    for chain in structure.get_chains():
Ejemplo n.º 38
0
    if not (L == len(ss_seq)):
        raise ValueError("Length mismatch %i %i" % (L, len(ss_seq)))
    for i in range(0, L):
        residues[i].xtra["SS_PSEA"] = ss_seq[i]
    # os.system("rm "+fname)


class PSEA(object):
    def __init__(self, model, filename):
        ss_seq = psea(filename)
        ss_seq = psea2HEC(ss_seq)
        annotate(model, ss_seq)
        self.ss_seq = ss_seq

    def get_seq(self):
        """Return secondary structure string."""
        return self.ss_seq


if __name__ == "__main__":

    import sys
    from Bio.PDB import PDBParser

    # Parse PDB file
    p = PDBParser()
    s = p.get_structure('X', sys.argv[1])

    # Annotate structure with PSEA sceondary structure info
    PSEA(s[0], sys.argv[1])
Ejemplo n.º 39
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import sys

prot_id = "5AGY.pdb"
prot_file = sys.argv[1]

#On va utiliser le parser de Biopython qui nous permet d'accéder aux éléments d'un fichier PDB.
from Bio.PDB import PDBParser

parser = PDBParser(PERMISSIVE=1)
structure = parser.get_structure(prot_id, prot_file)
model = structure[0]

if "-h" in sys.argv or "--help" in sys.argv:
    print(
        "Ce programme identifie les liaisons cations-pi à partir d'un fichier Protein Data Bank (PDB). Les critères pris en compte proviennent du Protein Interaction Calculator que l'on peut retrouver en suivant le lien : http://pic.mbu.iisc.ernet.in/PIC_Criteria.pdf. Le parser de Biopython est strucutré de la manière suivante : Structure/model/chain/residu/atome."
    )
    print("Fonctions utilisées :")
    print('parser.get_structure -->  ',
          'Creation of a structure object from a PDB file')
    print(
        'objet.get_name -->  ',
        'Renvoie le nom correspondant à l objet : Structure/model/chain/residu/atome'
    )
    print('parser.get_structur -->e  ',
          'Renvoie le numéro rattaché au résidue dans le fichier PDB')
    print('')
Ejemplo n.º 40
0
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP

#Read in and Parse PDB file to obtain DSSP --> secondary structure determination
#follows the basic outline on biopython.org -- tutorial
parse = PDBParser()
struc = parse.get_structure('6hrc', "6hrc.pdb")
model = struc[0]
dssp = DSSP(model, '6hrc.pdb')
sec_struc = ''
a_helix = 0
b_sheet = 0
other = 0
none = 0

key = list(dssp.keys())[2]

dssp[key]

for c in range(len(dssp)):
    key = list(dssp.keys())[c]
    sec_struc += dssp[key][2]
    if dssp[key][2] == "H" or dssp[key][2] == "G" or dssp[key][2] == "I":
        a_helix += 1
    if dssp[key][2] == "E" or dssp[key][2] == "B":
        b_sheet += 1
    if dssp[key][2] == "-":
        none += 1
    else:
        other += 1
Ejemplo n.º 41
0
    def __init__(self,
                 xyz=None,
                 r=None,
                 xyzr=None,
                 xyzrg=None,
                 g=None,
                 pdb=None,
                 bv=None,
                 mesh=None,
                 name=None,
                 spheres_file=None):
        """
        A Spheres object contains a list of xyz centers with r radii and g groups. It can be defined using xyzrg, xyzr (and optionally g), xyz (and optionally r or g), a pdb file (and optionally r or g), or a list of vertices with normals bounded by the spheres (requires r and optionally includes g)

        Args:
          xyz (float nx3): Array containing centers (Default value = None)
          r (float nx1): Array containing radii (Default value = None)
          xyzr (float nx4): Array containing centers and radii (Default value = None)
          xyzrg (float nx5): Array containing centers, radii, and groups (Default value = None)
          g (float nx1): Array containing groups (Default value = None)
          pdb (str): filename of a pdb to be processed into spheres (Default value = None)
          bv (float nx6): Array containing vertices and normals (Default value = None)
          mesh (Trimesh): mesh object describing the surface (Default value = None)
          name (str): descriptive identifier (Default value = None)
          spheres_file (str): filename of a Spheres file to be read from disk (Default value = None)

        """

        if xyzrg is not None:
            self.xyzrg = xyzrg
        elif xyzr is not None:
            self.xyzr = xyzr

            if g is not None:
                self.g = g
        elif xyz is not None:
            self.xyz = xyz

            if r is not None:
                self.r = r
            if g is not None:
                self.g = g
        elif pdb is not None:
            if not sys.warnoptions:
                import warnings
                warnings.simplefilter("ignore")

            p = PDBParser(PERMISSIVE=1, QUIET=True)
            structure = p.get_structure("prot", pdb)

            self.xyz = np.array(
                [atom.get_coord() for atom in structure[0].get_atoms()])

            if r is not None:
                self.r = r
            else:
                self.r = [
                    _get_atom_radius(atom, rtype='united')
                    for atom in structure[0].get_atoms()
                ]

            if g is not None:
                self.g = g

        elif bv is not None and r is not None:
            self.xyz = bv[:, 0:3] + r * bv[:, 3:6]
            self.r = r
            self.remove_duplicates()

            if g is not None:
                self.g = g
        elif spheres_file is not None:
            xyzr_file = None
            obj_file = None

            base, ext = os.path.splitext(spheres_file)
            if ext == ".xyzrg":
                xyzrg_file = spheres_file
                obj_file = "{0}.obj".format(base)
            elif ext == ".obj":
                xyzrg_file = "{0}.xyzrg".format(base)
                if not os.path.isfile(xyzrg_file):
                    logger.error(
                        "No spheres file found with the name: {0}.xyzr or {0}.xyzrg"
                        .format(base))
                obj_file = spheres_file
            else:
                logger.error(
                    "Invalid filename given to read in spheres object: {0}".
                    format(spheres_file))
                raise ValueError(
                    "Spheres objects must be .xyzrg or .obj ({0} provided)".
                    format(spheres_file))
            spheres_data = np.loadtxt(xyzrg_file, delimiter=' ')

            if spheres_data.shape[1] == 5:
                self.xyzrg = spheres_data
            elif spheres_data.shape[1] == 4:
                self.xyzr = spheres_data
            else:
                logger.error(
                    "Spheres csv file contains the wrong number of columns")
                raise ValueError(
                    "{0} columns found in file {1}; must contain 4 or 5".
                    format(spheres_data.shape[1], spheres_file))
            mesh = trimesh.load_mesh(obj_file)

            if name is None:
                name = os.path.basename(base)

        if mesh is not None:
            self.mesh = mesh
        else:
            self.mesh = None

        if name is not None:
            self.name = name
        else:
            self.name = None

        unique_ind = np.unique(self.xyzrg, axis=0, return_index=True)[1]
        self.xyzrg = self.xyzrg[sorted(unique_ind), :]
Ejemplo n.º 42
0
import heapq
import numpy as np
from collections import deque

from Bio.PDB import PDBParser, DSSP

HELIX_CODES = ['H', 'G', 'I']
BETA_CODES = ['B', 'E']
SHORT_CODES = ['T', 'S', 'C', '-', '', ' ']
if __name__ == '__main__':
    res = []
    path = 'D:/work/bioproteins_dl/deep/alpha-fold/train_dataset/'
    protname = '6ryj'
    pdb_filename = path + '' + protname + '.pdb'
    p = PDBParser()
    structure = p.get_structure(protname, pdb_filename)
    model = structure[0]
    dssp = DSSP(model, pdb_filename)
    f = open(path + protname + '.ss_sa', 'w')
    f.write('>' + protname + '\n')
    for residue in dssp:
        print(residue[2])
        if residue[2] in HELIX_CODES:
            res.append('H')
        elif residue[2] in BETA_CODES:
            res.append('E')
        else:
            res.append('C')
    r = ''.join(res)
    f.write(r)
    f.flush()
Ejemplo n.º 43
0
def atom_information(pdbdata, mode):

    #analyze pdb file
    parser = PDBParser(QUIET=True, PERMISSIVE=True)
    structure = parser.get_structure('model', pdbdata)

    #DSSP prediction
    pmodel = structure[0]
    dssp = DSSP(pmodel, pdbdata)

    #Set variables
    global coordinates
    global color
    global radius
    global chains
    global chain_coords
    global chain_colors

    if mode == 'cpk':
        #list of atoms
        atoms = [atom for atom in structure.get_atoms()]
        natoms = len(atoms)
        #atom coordinates
        coordinates = np.array([atom.coord for atom in atoms])
        center = centroid(coordinates)
        coordinates -= center

        #atom color
        color = [colorrgba(atom.get_id()) for atom in atoms]
        #atom radius
        radius = np.array([vrad(atom.get_id()) for atom in atoms])

    elif mode == 'aminoacid':
        #list of atoms
        atoms = [
            atom for atom in structure.get_atoms()
            if atom.get_parent().resname != 'HOH'
        ]
        natoms = len(atoms)
        #atom coordinates
        coordinates = np.array([atom.coord for atom in atoms])
        center = centroid(coordinates)
        coordinates -= center
        #atom color
        color = [
            colorrgba(restype(atom.get_parent().resname)) for atom in atoms
        ]
        #atom radius
        radius = np.array([vrad(atom.get_id()) for atom in atoms])

    elif mode == 'backbone':
        #list of atoms
        atoms = [
            atom for atom in structure.get_atoms()
            if atom.get_name() == 'CA' or atom.get_name() == 'N'
        ]
        natoms = len(atoms)
        #atom coordinates
        coordinates = np.array([atom.coord for atom in atoms])
        center = centroid(coordinates)
        coordinates -= center
        #atom color
        color = []
        #list of arrays of coordinates and colors for each chain
        chains = []
        chain_colors = []
        chain_coords = []
        for chain in structure.get_chains():
            chains.append(chain)
            can_coord = np.array([
                atom.coord for atom in chain.get_atoms()
                if atom.get_name() == 'CA' or atom.get_name() == 'N'
            ])
            can_coord -= center
            chain_coords.append(can_coord)
            chain_length = len(can_coord)
            chain_color = np.append(np.random.rand(1, 3), [1.0])
            chain_colors.append(chain_color)
            color.append(np.tile(chain_color, (chain_length, 1)))
        if len(chains) > 1:
            color = np.concatenate(color)
        #atom radius
        radius = np.array([vrad(atom.get_id()) for atom in atoms])

    elif mode == 'dssp':
        #list of atoms
        atoms = [
            atom for atom in structure.get_atoms()
            if atom.get_name() == 'CA' or atom.get_name() == 'N'
        ]
        natoms = len(atoms)
        #atom coordinates
        coordinates = np.array([atom.coord for atom in atoms])
        center = centroid(coordinates)
        coordinates -= center
        #atom color
        struct3 = [dssp[key][2] for key in list(dssp.keys())]
        residues = [
            residue for residue in structure.get_residues()
            if residue.get_resname() in resdict.keys()
        ]
        color = []
        for i in range(len(struct3)):
            dsspcolor = crgbaDSSP(struct3[i])
            n_atoms = len([
                atom for atom in residues[i]
                if atom.get_name() == 'CA' or atom.get_name() == 'N'
            ])
            color.append(np.tile(dsspcolor, (n_atoms, 1)))
        if len(struct3) > 1:
            color = np.concatenate(color)
        #list of arrays of coordinates and colors for each chain
        chains = []
        chain_colors = []
        chain_coords = []
        for chain in structure.get_chains():
            chains.append(chain)
            chain_color = np.append(np.random.rand(1, 3), [1.0])
            chain_colors.append(chain_color)
            can_coord = np.array([
                atom.coord for atom in chain.get_atoms()
                if atom.get_name() == 'CA' or atom.get_name() == 'N'
            ])
            can_coord -= center
            chain_coords.append(can_coord)
        #atom radius
        radius = np.array([vrad(atom.get_id()) for atom in atoms])
Ejemplo n.º 44
0

#@TODO: helping function: for tests isues only; to be removed!!
def save_pdb(struct, name):
    print "test", len(list(struct.get_residues()))
    for resi in struct.get_residues():
        print resi.id, resi.resname
    out = PDBIO()
    out.set_structure(struct)
    out.save(str(name) + 'volume_simulator.pdb')


if __name__ == '__main__':

    p = PDBParser(PERMISSIVE=False, QUIET=True)
    st = p.get_structure("Zfull.pdb", "Zfull.pdb")
    component = Component()
    component.pyrystruct = st

    dd_frag = DisorderedFragment()
    dd_frag.set_modeling_disordered_fragment(component.pyrystruct)
    mass_centre = [0, 0, 0]

    g = Grapes()
    g.set_volume_simulation_parameters(dd_frag, component.pyrystruct,
                                       mass_centre)

    res = g.generate()

    dd_frag.add_pseudoatoms_to_structure(res, component.pyrystruct.moltype)
Ejemplo n.º 45
0
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)


if __name__ == "__main__":

    import sys
    from Bio.PDB import PDBParser

    p = PDBParser()
    s = p.get_structure("X", sys.argv[1])
    model = s[0]

    rd = ResidueDepth(model, sys.argv[1])

    for item in rd:
        print(item)
Ejemplo n.º 46
0
def randomize_starting_position(ligand_file, complex_file, outputfolder=".", nposes=200, test=False, user_center=None,
                                logger=None):
    """
    Randomize initial ligand position around the receptor.
    Default number of poses = 200.
    :param ligand_file:
    :param complex_file:
    :param nposes:
    :return:
    """
    if test:  np.random.seed(42)

    # read in files
    parser = PDBParser()
    output = []
    structure = parser.get_structure('protein', complex_file)
    ligand = parser.get_structure('ligand', ligand_file)
    COI = np.zeros(3)

    # get center of interface (if PPI)
    if user_center:
        try:
            chain_id, res_number, atom_name = user_center.split(":")
        except ValueError:
            raise cs.WrongAtomStringFormat(f"The specified atom is wrong '{user_center}'. \
Should be 'chain:resnumber:atomname'")
        for chain in structure.get_chains():
            if chain.id == chain_id:
                for residue in chain.get_residues():
                    if residue.id[1] == int(res_number):
                        for atom in residue.get_atoms():
                            if atom.name == atom_name: 
                                COI = np.array(list(atom.get_vector())) 
  
    # calculate protein and ligand COM
    com_protein = calculate_com(structure)
    com_ligand = calculate_com(ligand)

    # calculating the maximum d of the ligand
    coor_ligand = []
    for atom in ligand.get_atoms():
        coor_ligand.append(list(atom.get_vector() - com_ligand))

    coor_ligand = np.array(coor_ligand)
    coor_ligand_max = np.amax(coor_ligand, axis=0)
    d_ligand = np.sqrt(np.sum(coor_ligand_max ** 2))

    # set threshold for near and far contacts based on ligand d
    if d_ligand / 2 < 5.0:
        d5_ligand = 5.0
    else:
        d5_ligand = d_ligand / 2 + 1

    if d_ligand > 8.0:
        d8_ligand = d_ligand / 2 + 4
    else:
        d8_ligand = 8.0

    # calculate vector to move the ligandi
    if user_center:
        move_vector = com_ligand - COI
    else:
        move_vector = com_ligand - com_protein

    # translate the ligand to the protein COM (COI for PPI)
    original_coords = []
    for atom in ligand.get_atoms():
        ligand_origin = np.array(list(atom.get_vector())) - move_vector
        original_coords.append(ligand_origin)
        atom.set_coord(ligand_origin)

    # calculating the maximum radius of the protein from the origin
    coor = []
    for atom in structure.get_atoms():
        coor.append(list(atom.get_vector() - com_protein))
    coor = np.array(coor)
    coor_max = np.amax(coor, axis=0)
    d = np.sqrt(np.sum(coor_max ** 2))

    # radius of the sphere from the origin
    D = 10.0 if user_center else np.ceil(6.0 + d)
    D_initial = D
    logger.info("Sampling {}A spherical box around the centre of the receptor/interface.".format(D))

    if user_center:
        sphere_cent = COI
    else:
        sphere_cent = com_protein

    j = 0
    logger.info("Generating {} poses...".format(nposes))
    start_time = time.time()
    while (j < nposes):
        # generate random coordinates
        phi = np.random.uniform(0, 2 * np.pi)
        costheta = np.random.uniform(-1, 1)
        u = np.random.uniform(0, 1)
        theta = np.arccos(costheta)

        r = D * np.cbrt(u)
        x = r * np.sin(theta) * np.cos(phi)
        y = r * np.sin(theta) * np.sin(phi)
        z = r * np.cos(theta)

        # move ligand to the starting point (protein COM)
        for atom, coord in zip(ligand.get_atoms(), original_coords):
            atom.set_coord(coord)

        # translate ligand to a random position
        translation = (x, y, z)
        for atom in ligand.get_atoms():
            new_pos_lig_trans = np.array(list(atom.get_vector())) - translation
            atom.set_coord(new_pos_lig_trans)

        # calculate ligand COM in the new position
        new_ligand_COM = calculate_com(ligand)

        # rotate ligand
        vector = Vector(new_ligand_COM)
        rotation_matrix = rotaxis(np.random.randint(0, 2 * np.pi), vector)

        for atom in ligand.get_atoms():
            coords_after = atom.get_vector().left_multiply(rotation_matrix)
            atom.set_coord(coords_after)

        # check if it's inside the sampling sphere
        dist = np.sqrt((new_ligand_COM[0] - sphere_cent[0]) ** 2 + (new_ligand_COM[1] - sphere_cent[1]) ** 2 + (
                new_ligand_COM[2] - sphere_cent[2]) ** 2)

        if dist < D:
            # check contacts at: 5A (no contacts) and 8A (needs contacts)
            protein_list = Selection.unfold_entities(structure, "A")
            contacts5 = []
            contacts8 = []
            ligand_atoms = list(ligand.get_atoms())
            
            contacts5.append( NeighborSearch(protein_list).search(new_ligand_COM, d5_ligand, "S"))
            contacts8 = NeighborSearch(protein_list).search(new_ligand_COM, d8_ligand, "S")
            if contacts8 and not any(contacts5):
                j += 1
                io = PDBIO()
                io.set_structure(ligand)
                output_name = os.path.join(outputfolder, 'ligand{}.pdb'.format(j))
                io.save(output_name)
                output.append(output_name)
                start_time = time.time()

            end_time = time.time()
            total_time = end_time - start_time
            if total_time > 60:
                D += 1
                if D - D_initial >= 20:
                    logger.info("Original box increased by 20A. Aborting...")
                    break
                start_time = end_time
                logger.info("Increasing sampling box by 1A.")
    logger.info("{} poses created successfully.".format(j))
    return output, D, list(sphere_cent)
Ejemplo n.º 47
0
                f.write("\nNumero: %s\n" % ref.number)
                f.write("\nPosicao: %s\n" % ref.positions)
                f.write("\nComentarios: %s\n" % ref.comments)
                f.write("\nReferencias: %s\n" % ref.references)
                f.write("\nAutores: %s\n" % ref.authors)
                f.write("\nTitulo: %s\n" % ref.title)
                f.write("\nLocalizacao: %s\n\n" % ref.location)
            break
        except Exception:
            break
f.close()

#análise da estrutura das proteínas relevantes com base nos ficheiros PDB encontrados (código baseado no desenvolvido pelo grupo 10)
parser = PDBParser()
ficheiro = open("analise_pdb.txt", "w")
structure = parser.get_structure('4F67', '4F67.pdb')
pdbl = PDBList()
pdbl.retrieve_pdb_file('4F67')
ficheiro.write("****Analise do ficheiro 4F67.pdb****\n")
ficheiro.write("\nPalavras Chave: %s\n" % structure.header['keywords'])
ficheiro.write("\nNome do Organismo: %s\n" % structure.header['name'])
ficheiro.write("\nCabecalho: %s" % structure.header['head'])
ficheiro.write("\nData da deposicao: %s\n" %
               structure.header['deposition_date'])
ficheiro.write("\nData da publicacaos: %s\n" %
               structure.header['release_date'])
ficheiro.write("\nMetodo usado: %s\n" % structure.header['structure_method'])
ficheiro.write("\nResolucao: %s\n" % structure.header['resolution'])
ficheiro.write("\nReferencia da estrutura: %s\n" %
               structure.header['structure_reference'])
ficheiro.write("\nReferencia de artigo: %s\n" %
Ejemplo n.º 48
0
def pdb_buildstructure(pdbfile):
    pdb_parser = PDBParser(
        PERMISSIVE=1
    )  # The PERMISSIVE instruction allows PDBs presenting errors.
    return pdb_parser.get_structure(
        "name", pdbfile)  # This command gets the structure of the PDB
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".pdb"):
        # dataset_dict[filename] = idx
        dataset_filenames.append(filename)
        idx += 1

pdb_to_seq = {}

parser = PDBParser()
ppb = PPBuilder()
i = 0
for filename in dataset_filenames:
    with warnings.catch_warnings(record=True):
        with open(os.path.join(Constants.PDB_PATH, filename)) as f:
            structure = parser.get_structure(os.path.splitext(filename)[0], f)
    model = structure[0]
    for pp in ppb.build_peptides(model):
        #print(pp.get_sequence())
        pdb_to_seq[filename] = str(pp.get_sequence())
        break

file_to_ds = {}

with open(Constants.TRAIN_VAL_TEST_SPLIT_FILE_PATH) as file:
    split_d = json.load(file)
    for tr_val_or_test, filenames in split_d.items():
        for fn in filenames:
            file_to_ds[fn] = tr_val_or_test

seq_to_pdbs = {}
Ejemplo n.º 50
0
def generate_seq_file(score_file, save_file):
    score_file = './dataFile/' + score_file
    sf = pd.read_csv(score_file, sep='\t')
    mut_chains = sf.iloc[:, 0]

    mut_dict = dict()
    mut_track = set()
    pdb_track = set()
    for chain in mut_chains:
        info = chain.split('_')
        pdb_id = info[0]
        chain_id = info[1]
        wt_aa = info[2][0:3]
        mu_aa = info[2][-3:]
        mu_pos = int(''.join(filter(lambda x: x.isdigit(), info[2])))
        if not chain in mut_track:
            mut_track.add(chain)
            if pdb_id in pdb_track:
                mut_dict[pdb_id].append({
                    'chain_id': chain_id,
                    'wt_aa': wt_aa,
                    'mu_aa': mu_aa,
                    'mu_pos': mu_pos,
                    'name': chain
                })
            else:
                mut_dict[pdb_id] = [{
                    'chain_id': chain_id,
                    'wt_aa': wt_aa,
                    'mu_aa': mu_aa,
                    'mu_pos': mu_pos,
                    'name': chain
                }]
                pdb_track.add(pdb_id)
    del mut_track
    del pdb_track

    parser = PDBParser()
    seq_builder = PPBuilder()
    pdb_dl_handle = PDBList()
    PDB_DIR = './dataFile/PDB_dl'
    # check if pdb file exists
    mut_collect = dict()
    for pdb_id in mut_dict.keys():
        if not os.path.exists(PDB_DIR + '/pdb' + pdb_id.lower() + '.ent'):
            pdb_dl_handle.retrieve_pdb_file(pdb_code=pdb_id,
                                            file_format='pdb',
                                            overwrite=False,
                                            pdir=PDB_DIR)
        pdb_file = PDB_DIR + '/pdb' + pdb_id.lower() + '.ent'
        model = parser.get_structure(pdb_id, pdb_file)[0]

        for mutation in mut_dict[pdb_id]:
            protein_chain = model[mutation['chain_id']]
            sequence = "".join([
                str(pp.get_sequence())
                for pp in seq_builder.build_peptides(protein_chain)
            ])
            sequence = sequence.replace('\n', '').replace(' ', '')
            assert sequence[mutation['mu_pos'] - 1] == three_to_one(
                mutation['wt_aa']), 'Wt amino acid failed to match'
            mut_Seq_list = list(sequence)
            mut_Seq_list[mutation['mu_pos'] - 1] = three_to_one(
                mutation['mu_aa'])
            mut_Seq = ''.join(mut_Seq_list)
            mut_collect[mutation['name']] = mut_Seq

    with open(save_file, 'w') as output_hl:
        for k, v in mut_collect.items():
            output_hl.write(k + '\t' + v + '\n')
Ejemplo n.º 51
0
def test_show_biopython():
    from Bio.PDB import PDBParser
    parser = PDBParser()
    structure = parser.get_structure('protein', nv.datafiles.PDB)
    nv.show_biopython(structure)
Ejemplo n.º 52
0
 def getStructure(self):
     parser = PDBParser(PERMISSIVE=1)
     return parser.get_structure(self.ident, self.retrievePDB())
Ejemplo n.º 53
0
    from Bio.PDB import PDBParser

    if len(sys.argv) != 4:
        print "Expects three arguments,"
        print " - FASTA alignment filename (expect two sequences)"
        print " - PDB file one"
        print " - PDB file two"
        sys.exit()

    # The alignment
    fa = AlignIO.read(open(sys.argv[1]), "fasta", generic_protein)

    pdb_file1 = sys.argv[2]
    pdb_file2 = sys.argv[3]

    # The structures
    p = PDBParser()
    s1 = p.get_structure('1', pdb_file1)
    p = PDBParser()
    s2 = p.get_structure('2', pdb_file2)

    # Get the models
    m1 = s1[0]
    m2 = s2[0]

    al = StructureAlignment(fa, m1, m2)

    # Print aligned pairs (r is None if gap)
    for (r1, r2) in al.get_iterator():
        print r1, r2
Ejemplo n.º 54
0
def parse_structure(path):
    """
    Parses a structure using Biopython's PDB/mmCIF Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """
    log = logging.getLogger("Prodigy")
    log.info("[+] Reading structure file: {0}".format(path))
    fname = os.path.basename(path)
    sname = ".".join(fname.split(".")[:-1])
    s_ext = fname.split(".")[-1]

    _ext = set(("pdb", "ent", "cif"))
    if s_ext not in _ext:
        raise IOError(
            "[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'."
            .format(s_ext))

    if s_ext in set(("pdb", "ent")):
        sparser = PDBParser(QUIET=1)
    elif s_ext == "cif":
        sparser = MMCIFParser()

    try:
        s = sparser.get_structure(sname, path)
    except Exception as e:
        # log.error("[!] Structure '{0}' could not be parsed".format(sname))
        log.error("[!] Structure '{0}' could not be parsed".format(sname))
        raise Exception(e)

    # Keep first model only
    if len(s) > 1:
        log.warning(
            "[!] Structure contains more than one model. Only the first one will be kept"
        )
        model_one = s[0].id
        for m in s.child_list[:]:
            if m.id != model_one:
                s.detach_child(m.id)

    # Double occupancy check
    for atom in list(s.get_atoms()):
        if atom.is_disordered():
            residue = atom.parent
            sel_at = atom.selected_child
            sel_at.altloc = " "
            sel_at.disordered_flag = 0
            residue.detach_child(atom.id)
            residue.add(sel_at)

    # Remove HETATMs and solvent
    res_list = list(s.get_residues())

    def _ignore(r):
        return r.id[0][0] == "W" or r.id[0][0] == "H"

    for res in res_list:
        if _ignore(res):
            chain = res.parent
            chain.detach_child(res.id)
        elif not is_aa(res, standard=True):
            raise ValueError(
                "Unsupported non-standard amino acid found: {0}".format(
                    res.resname))
    n_res = len(list(s.get_residues()))

    # Remove Hydrogens
    atom_list = list(s.get_atoms())

    def _ignore(x):
        return x.element == "H"

    for atom in atom_list:
        if _ignore(atom):
            residue = atom.parent
            residue.detach_child(atom.name)

    # Detect gaps and compare with no. of chains
    pep_builder = PPBuilder()
    peptides = pep_builder.build_peptides(s)
    n_peptides = len(peptides)
    n_chains = len(set([c.id for c in s.get_chains()]))

    if n_peptides != n_chains:
        log.warning("[!] Structure contains gaps:")
        for i_pp, pp in enumerate(peptides):
            log.warning(
                "\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}"
                .format(i_pp, pp[0], pp[-1]))
        # raise Exception('Calculation cannot proceed')

    return (s, n_chains, n_res)
Ejemplo n.º 55
0
class PolypeptideTests(unittest.TestCase):
    """Test Polypeptide module."""
    @classmethod
    def setUpClass(self):
        pdb1 = "PDB/1A8O.pdb"
        self.parser = PDBParser(PERMISSIVE=True)
        self.structure = self.parser.get_structure("scr", pdb1)

    def test_ppbuilder_real(self):
        """Test PPBuilder on real PDB file."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        self.assertEqual(len(pp), 3)

        # Check termini
        self.assertEqual(pp[0][0].get_id()[1], 152)
        self.assertEqual(pp[0][-1].get_id()[1], 184)
        self.assertEqual(pp[1][0].get_id()[1], 186)
        self.assertEqual(pp[1][-1].get_id()[1], 213)
        self.assertEqual(pp[2][0].get_id()[1], 216)
        self.assertEqual(pp[2][-1].get_id()[1], 220)

        # Now check sequences
        pp0_seq = pp[0].get_sequence()
        pp1_seq = pp[1].get_sequence()
        pp2_seq = pp[2].get_sequence()
        self.assertIsInstance(pp0_seq, Seq)
        self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
        self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE")
        self.assertEqual(pp2_seq, "TACQG")

    def test_ppbuilder_real_nonstd(self):
        """Test PPBuilder on real PDB file allowing non-standard amino acids."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure, False)

        self.assertEqual(len(pp), 1)

        # Check the start and end positions
        self.assertEqual(pp[0][0].get_id()[1], 151)
        self.assertEqual(pp[0][-1].get_id()[1], 220)

        # Check the sequence
        s = pp[0].get_sequence()
        self.assertIsInstance(s, Seq)
        # Here non-standard MSE are shown as M
        self.assertEqual(
            "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
            s)

    def test_ppbuilder_torsion(self):
        """Test phi/psi angles calculated with PPBuilder."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        phi_psi = pp[0].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
        self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)

        phi_psi = pp[1].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
        self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)

        phi_psi = pp[2].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)

    def test_cappbuilder_real(self):
        """Test CaPPBuilder on real PDB file."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure)

        pp0_seq = pp[0].get_sequence()
        pp1_seq = pp[1].get_sequence()
        pp2_seq = pp[2].get_sequence()
        self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
        self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE")
        self.assertEqual(pp2_seq, "TACQG")
        self.assertEqual(
            [ca.serial_number for ca in pp[0].get_ca_list()],
            [
                10,
                18,
                26,
                37,
                46,
                50,
                57,
                66,
                75,
                82,
                93,
                104,
                112,
                124,
                131,
                139,
                150,
                161,
                173,
                182,
                189,
                197,
                208,
                213,
                222,
                231,
                236,
                242,
                251,
                260,
                267,
                276,
                284,
            ],
        )

    def test_cappbuilder_real_nonstd(self):
        """Test CaPPBuilder on real PDB file allowing non-standard amino acids."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure, False)

        self.assertEqual(len(pp), 1)

        # Check the start and end positions
        self.assertEqual(pp[0][0].get_id()[1], 151)
        self.assertEqual(pp[0][-1].get_id()[1], 220)

        # Check the sequence
        s = pp[0].get_sequence()
        self.assertIsInstance(s, Seq)
        # Here non-standard MSE are shown as M
        self.assertEqual(
            "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
            s)

    def test_cappbuilder_tau(self):
        """Test tau angles calculated with CaPPBuilder."""
        ppb = CaPPBuilder()
        pp = ppb.build_peptides(self.structure)

        taus = pp[1].get_tau_list()
        self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3)
        self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3)
        self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3)
        thetas = pp[2].get_theta_list()
        self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3)
        self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3)
        self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
Ejemplo n.º 56
0
def create_structure(filename, quiet=True):
    print("creating biopython molecule structure...")
    fileid = filename.rsplit(".", 1)[0]
    p = PDBParser(QUIET=quiet)
    structure = p.get_structure(fileid, filename)
    return structure
Ejemplo n.º 57
0
def main():
    """The main routine for conkit-validate functionality"""
    parser = create_argument_parser()
    args = parser.parse_args()

    global logger
    logger = conkit.command_line.setup_logging(level="info")

    if os.path.isfile(args.output) and not args.overwrite:
        raise FileExistsError('The output file {} already exists!'.format(
            args.output))

    logger.info(os.linesep + "Working directory:                           %s",
                os.getcwd())
    logger.info("Reading input sequence:                      %s",
                args.seqfile)
    sequence = conkit.io.read(args.seqfile, args.seqformat).top

    if len(sequence) < 5:
        raise ValueError('Cannot validate model with less than 5 residues')

    logger.info("Length of the sequence:                      %d",
                len(sequence))
    logger.info("Reading input distance prediction:           %s",
                args.distfile)
    prediction = conkit.io.read(args.distfile, args.distformat).top
    logger.info("Reading input PDB model:                     %s",
                args.pdbfile)
    model = conkit.io.read(args.pdbfile, args.pdbformat).top
    p = PDBParser()
    structure = p.get_structure('structure', args.pdbfile)[0]
    dssp = DSSP(structure, args.pdbfile, dssp=args.dssp, acc_array='Wilke')

    logger.info(os.linesep + "Validating model.")

    if len(sequence) > 500:
        logger.info(
            "Input model has more than 500 residues, this might take a while..."
        )

    figure = conkit.plot.ModelValidationFigure(
        model, prediction, sequence, dssp, map_align_exe=args.map_align_exe)
    figure.savefig(args.output, overwrite=args.overwrite)
    logger.info(os.linesep + "Validation plot written to %s", args.output)

    residue_info = figure.data.loc[:, ['RESNUM', 'SCORE', 'MISALIGNED']]
    table = PrettyTable()
    table.field_names = ["Residue", "Predicted score", "Suggested register"]

    _resnum_template = '{} ({})'
    _error_score_template = '*** {0:.2f} ***'
    _correct_score_template = '    {0:.2f}    '
    _register_template = '*** {} ({}) ***'
    _empty_register = '               '

    for residue in residue_info.values:
        resnum, score, misalignment = residue
        current_residue = _resnum_template.format(sequence.seq[resnum - 1],
                                                  resnum)
        score = _error_score_template.format(
            score) if score > 0.5 else _correct_score_template.format(score)

        if misalignment and resnum in figure.alignment.keys():
            register = _register_template.format(
                sequence.seq[figure.alignment[resnum] - 1],
                figure.alignment[resnum])
        else:
            register = _empty_register

        table.add_row([current_residue, score, register])

    logger.info(os.linesep)
    logger.info(table)
Ejemplo n.º 58
0
class WriteTest(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.io = PDBIO()
        self.parser = PDBParser(PERMISSIVE=1)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.structure = self.parser.get_structure("example", "PDB/1A8O.pdb")

    def test_pdbio_write_structure(self):
        """Write a full structure using PDBIO."""
        struct1 = self.structure
        # Ensure that set_structure doesn't alter parent
        parent = struct1.parent

        # Write full model to temp file
        self.io.set_structure(struct1)
        self.assertIs(parent, struct1.parent)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)

        try:
            self.io.save(filename)

            struct2 = self.parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))

            self.assertEqual(len(struct2), 1)
            self.assertEqual(nresidues, 158)
        finally:
            os.remove(filename)

    def test_pdbio_write_preserve_numbering(self):
        """Test writing PDB and preserve atom numbering."""
        self.io.set_structure(self.structure)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)

        try:
            self.io.save(filename)  # default preserve_atom_numbering=False

            struct = self.parser.get_structure("1a8o", filename)
            serials = [a.serial_number for a in struct.get_atoms()]
            og_serials = list(range(1, len(serials) + 1))
            self.assertEqual(og_serials, serials)
        finally:
            os.remove(filename)

    def test_pdbio_write_auto_numbering(self):
        """Test writing PDB and do not preserve atom numbering."""
        self.io.set_structure(self.structure)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)

        try:
            self.io.save(filename, preserve_atom_numbering=True)

            struct = self.parser.get_structure("1a8o", filename)
            serials = [a.serial_number for a in struct.get_atoms()]
            og_serials = [a.serial_number for a in self.structure.get_atoms()]

            self.assertEqual(og_serials, serials)
        finally:
            os.remove(filename)

    def test_pdbio_write_residue(self):
        """Write a single residue using PDBIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]

        # Ensure that set_structure doesn't alter parent
        parent = residue1.parent

        # Write full model to temp file
        self.io.set_structure(residue1)
        self.assertIs(parent, residue1.parent)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)
        finally:
            os.remove(filename)

    def test_pdbio_write_residue_w_chain(self):
        """Write a single residue (chain id == X) using PDBIO."""
        struct1 = self.structure.copy()  # make copy so we can change it
        residue1 = list(struct1.get_residues())[0]

        # Modify parent id
        parent = residue1.parent
        parent.id = "X"

        # Write full model to temp file
        self.io.set_structure(residue1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain remained the same
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "X")
        finally:
            os.remove(filename)

    def test_pdbio_write_residue_wout_chain(self):
        """Write a single orphan residue using PDBIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]

        residue1.parent = None  # detach residue

        # Write full model to temp file
        self.io.set_structure(residue1)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain is default: "A"
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "A")
        finally:
            os.remove(filename)

    def test_pdbio_write_custom_residue(self):
        """Write a chainless residue using PDBIO."""
        res = Residue.Residue((" ", 1, " "), "DUM", "")
        atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C")
        res.add(atm)

        # Ensure that set_structure doesn't alter parent
        parent = res.parent

        # Write full model to temp file
        self.io.set_structure(res)

        self.assertIs(parent, res.parent)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.parser.get_structure("res", filename)
            latoms = list(struct2.get_atoms())
            self.assertEqual(len(latoms), 1)
            self.assertEqual(latoms[0].name, "CA")
            self.assertEqual(latoms[0].parent.resname, "DUM")
            self.assertEqual(latoms[0].parent.parent.id, "A")
        finally:
            os.remove(filename)

    def test_pdbio_select(self):
        """Write a selection of the structure using a Select subclass."""
        # Selection class to filter all alpha carbons
        class CAonly(Select):
            """Accepts only CA residues."""

            def accept_atom(self, atom):
                if atom.name == "CA" and atom.element == "C":
                    return 1

        struct1 = self.structure
        # Ensure that set_structure doesn't alter parent
        parent = struct1.parent
        # Write to temp file
        self.io.set_structure(struct1)

        self.assertIs(parent, struct1.parent)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename, CAonly())
            struct2 = self.parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 70)
        finally:
            os.remove(filename)

    def test_pdbio_missing_occupancy(self):
        """Write PDB file with missing occupancy."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = self.parser.get_structure("test", "PDB/occupancy.pdb")

        self.io.set_structure(structure)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always", BiopythonWarning)
                self.io.save(filename)
                self.assertEqual(len(w), 1, w)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", PDBConstructionWarning)
                struct2 = self.parser.get_structure("test", filename)
            atoms = struct2[0]["A"][(" ", 152, " ")]
            self.assertIsNone(atoms["N"].get_occupancy())
        finally:
            os.remove(filename)

    def test_pdbio_write_truncated(self):
        """Test parsing of truncated lines."""
        struct = self.structure

        # Write to temp file
        self.io.set_structure(struct)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            # Check if there are lines besides 'ATOM', 'TER' and 'END'
            with open(filename) as handle:
                record_set = {l[0:6] for l in handle}
            record_set -= {
                "ATOM  ",
                "HETATM",
                "MODEL ",
                "ENDMDL",
                "TER\n",
                "TER   ",
                "END\n",
                "END   ",
            }
            self.assertEqual(len(record_set), 0)
        finally:
            os.remove(filename)

    def test_model_numbering(self):
        """Preserve model serial numbers during I/O."""

        def confirm_numbering(struct):
            self.assertEqual(len(struct), 3)
            for idx, model in enumerate(struct):
                self.assertEqual(model.serial_num, idx + 1)
                self.assertEqual(model.serial_num, model.id + 1)

        def confirm_single_end(fname):
            """Ensure there is only one END statement in multi-model files."""
            with open(fname) as handle:
                end_stment = []
                for iline, line in enumerate(handle):
                    if line.strip() == "END":
                        end_stment.append((line, iline))
            self.assertEqual(len(end_stment), 1)  # Only one?
            self.assertEqual(end_stment[0][1], iline)  # Last line of the file?

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            struct1 = self.parser.get_structure("1lcd", "PDB/1LCD.pdb")

        confirm_numbering(struct1)

        # Round trip: serialize and parse again
        self.io.set_structure(struct1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.parser.get_structure("1lcd", filename)
            confirm_numbering(struct2)
            confirm_single_end(filename)
        finally:
            os.remove(filename)

    def test_pdbio_write_x_element(self):
        """Write a structure with atomic element X with PDBIO."""
        struct1 = self.structure

        # Change element of one atom
        atom = next(struct1.get_atoms())
        atom.element = "X"  # X is assigned in Atom.py as last resort

        self.io.set_structure(struct1)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)

        try:
            self.io.save(filename)
        finally:
            os.remove(filename)

    def test_pdbio_write_unk_element(self):
        """PDBIO raises ValueError when writing unrecognised atomic elements."""
        struct1 = self.structure

        atom = next(struct1.get_atoms())
        atom.element = "1"

        self.io.set_structure(struct1)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)

        with self.assertRaises(ValueError):
            self.io.save(filename)
        os.remove(filename)
Ejemplo n.º 59
0
from Bio.PDB import PDBParser

p = PDBParser(QUIET=True)
s = p.get_structure('3mxw.pdb', '3mxw.pdb')

n_c = len(list(s.get_chains()))
n_h_r = len([ r for r in s[0]['H'] if r.resname != "HOH" ])

print(f"This protein has {n_c} chains and {n_h_r} residues in chain H")
Ejemplo n.º 60
0
def LoadPDB(PDBID, FOLDER):
    from Bio.PDB import PDBParser
    pdbfile = FOLDER + PDBID + '.pdb'
    parser = PDBParser()
    structure = parser.get_structure(PDBID, pdbfile)
    return structure