Beispiel #1
0
 def run_dssp(self):
     pdb = PDB.PDBList()
     pdb.retrieve_pdb_file(self.pdb_code, pdir='./', file_format="pdb")
     p = PDB.PDBParser()
     f = 'pdb{}.ent'.format(self.pdb_code.lower())
     wt_residues = [
         i for i in Residue.objects.filter(
             protein_conformation__protein=self.protein).exclude(
                 protein_segment__slug__in=['N-term', 'C-term'])
     ]
     gn_residues = [
         i.sequence_number for i in wt_residues if i.generic_number
         and i.protein_segment.slug not in ['ECL1', 'ECL2', 'ICL3', 'ECL3']
     ]
     structure = p.get_structure(self.pdb_code, f)
     for chain in structure[0]:
         ch = chain.get_id()
         self.chains.append(ch)
         self.dssp_dict[ch] = OrderedDict()
         self.dssp_info[ch] = OrderedDict([('H', 0), ('B', 0), ('E', 0),
                                           ('G', 0), ('I', 0), ('T', 0),
                                           ('S', 0), ('-', 0)])
     if len(self.dssp_dict) > 1:
         dssp = PDB.DSSP(structure[0], f, dssp='/env/bin/dssp')
         for key in dssp.keys():
             if int(key[1][1]) in gn_residues:
                 self.dssp_dict[key[0]][key[1][1]] = dssp[key]
                 self.dssp_info[key[0]][dssp[key][2]] = self.dssp_info[
                     key[0]][dssp[key][2]] + 1
     os.remove(f)
Beispiel #2
0
def align_structs(id1, chain1, id2, chain2):
    """
    the main function. gets the ids and the chain's names and finds the alignment with the best RMSD.
    prints the best RMSD, and saving the alignments file in cif format
    :param id1: the first file id
    :param chain1: the first protein's chain
    :param id2: the second file id
    :param chain2: the second protein's chain
    """
    # generating the relevant data
    lst = pdb.PDBList()
    protein1 = lst.retrieve_pdb_file(id1)
    protein2 = lst.retrieve_pdb_file(id2)
    parser = pdb.MMCIFParser()
    struct1 = parser.get_structure("p1", protein1)
    struct2 = parser.get_structure("p2", protein2)

    # creating a lists of CA atoms to align
    atoms1 = create_atoms_list(struct1, chain1)
    atoms2 = create_atoms_list(struct2, chain2)
    if len(atoms1) != len(atoms2):
        atoms1, atoms2 = bonus_9_2(chain1, chain2, struct1, struct2)

    # making the align
    super_imposer = pdb.Superimposer()
    super_imposer.set_atoms(atoms1, atoms2)
    super_imposer.apply(struct2[0].get_atoms())
    print(super_imposer.rms)

    # saving the aligned structure to files
    saving_file(id1, struct1)
    saving_file(id2, struct2)
def download_pdb(pdb):
    """
    Telecharge des PDB depuis le net
    """
    if not (os.path.isfile("archive_pdb/" + pdb + ".pdb")):
        PDB.PDBList().retrieve_pdb_file(pdb, pdir="archive_pdb")
        os.system("cd archive_pdb \n mv pdb" + pdb.lower() + ".ent " + pdb +
                  ".pdb")
Beispiel #4
0
def download_pdblist(pdb_dir, list_file):
    '''
    Uses Biopython to download a list of PDBs into specified directory.
    (Option 3)
    Called by: AccessPDB.py:main()
    '''
    pdb_codes = read_list_file(list_file)
    pdbl = bpp.PDBList()
    pdbl.download_pdb_files(pdb_codes, pdir=pdb_dir, file_format='pdb')
Beispiel #5
0
def download_nr(pdb_dir, cutoff):
    '''
    Downloads representative PDB files into
    specified directory. (Option 1)
    Called by: AccessPDB.py:main()
    '''
    pdb_codes = get_nr_list(cutoff)
    pdbl = bpp.PDBList()
    pdbl.download_pdb_files(pdb_codes, pdir=pdb_dir, file_format='pdb')
def download_cifs(downld_dir, pdbid):
    print('Downloading MMCIFs...')

    if not os.path.isfile(downld_dir + "/" + str(pdbid).lower() +
                          ".cif.gz"):  #Download if file does not exist
        pdb1 = PDB.PDBList()
        pdb1.retrieve_pdb_file(pdbid, pdir=downld_dir)

    if os.path.isfile(downld_dir + "/" + str(pdbid).lower() +
                      ".cif"):  #gzip files
        cmd = ("gzip -f " + downld_dir + "/" + str(pdbid).lower() + ".cif")
        subprocess.call(cmd, shell=True)
    def download(self, pdbIDList, overrideAll=False):
        """
        Download PDB files from ftp://ftp.wwpdb.org

        :param pdbIDList: list of structures to download
        :param overrideAll: Overrides all already downloaded files if set to True
        :return: None
        """
        pdbList = PDB.PDBList()
        failedDownloadsLogFile = self.downloadDirectory + "/failedLog"

        if not os.path.exists(self.downloadDirectory):
            os.makedirs(self.downloadDirectory)
        if not overrideAll and os.path.exists(failedDownloadsLogFile):
            failedLog = open(failedDownloadsLogFile, 'r')
            try:
                self.downloadFailed = self.downloadFailed.union(
                    json.load(failedLog))
            except:
                print "no FailLog detected"
                pass
            failedLog.close()

        for pdbID in set(pdbIDList) - self.downloadFailed:
            if not overrideAll:
                if os.path.exists(self._getNameFromPdbID(pdbID)):
                    self.downloadSkipped.append(pdbID)
                    continue

            try:
                dl_name = pdbList.retrieve_pdb_file(
                    pdbID, pdir=self.downloadDirectory)
                os.rename(dl_name,
                          self.downloadDirectory + "/" + pdbID + ".pdb")
                self.downloadSuccessful.append(pdbID)
                print "Downloaded", pdbID
            except IOError:
                #self.downloadFailed.add(pdbID)
                print "Failed to download", pdbID
        # TODO: napisat normalny manager
        try:
            failedLog = open(failedDownloadsLogFile, 'w')
            failedLog.write(json.dumps(list(self.downloadFailed)))
        except:
            pass
Beispiel #8
0
def checkAndGetPDB(path):
    """ Query a given pdb code from the PDB.

    :param path: The PDB code of the molecule to query.
    :type path: str

    :return: Path to the checked pdb file.

    """

    if path is None:
        raise IOError("The parameter 'path' must be a str.")

    if not isinstance(path, str):
        raise IOError("The parameter 'path' must be a str.")

    # Setup paths and filenames.
    pdb_target_name = os.path.basename(path).split('.pdb')[0]
    pdb_target_dir = os.path.dirname(path)

    if not os.path.isfile(path):
        # Query from pdb.org
        pdb_list = PDB.PDBList()
        if pdb_target_dir == '':
            pdb_target_dir = '.'

        try:
            print(
                "PDB file %s could not be found. Attempting to query from protein database server."
                % (path))
            download_target = pdb_list.retrieve_pdb_file(pdb_target_name,
                                                         pdir=pdb_target_dir,
                                                         file_format='pdb')
            if download_target in os.listdir(pdb_target_dir):
                print("Successfully downloaded structure from PDB.")
        except:
            raise

        # Move and rename the downloaded file.
        shutil.move(download_target, path)

        # Cleanup.
        shutil.rmtree('obsolete')

    return path
Beispiel #9
0
def get(PDB_id, directory=None, format='pdb'):
    """
    gets a structure from the PDB using its PDB id
    :param PDB_id: PDB id code
    :type PDB_id: str
    :param format:  File format
        * "mmCif" (PDBx/mmCif file),
        * "pdb" (default, format PDB),
        * "xml" (PDBML/XML format),
        * "mmtf" (highly compressed),
        * "bundle" (PDB formatted archive for large structure}
    :param directory: directory in which to save pdb file
    :return: structure
    """
    pdb = PDB.PDBList()
    if directory is not None:
        pdb.retrieve_pdb_file(PDB_id, pdir=directory, file_format=format, overwrite=True)
    else:
        pdb.retrieve_pdb_file(PDB_id, file_format=format, overwrite=True)
Beispiel #10
0
def get_pdb_files2(id_list, job="J0"):
    """ Returns a dictionary containing {result[id]: 'filename'}.
    """
    result = []
    pdbl = bpdb.PDBList()
    for i, pid in enumerate(id_list, 1):
        filename = pdbl.retrieve_pdb_file(pid,
                                          pdir="P%05d" % i,
                                          file_format="pdb")
        if isfile(filename):
            result += [{"name": pid, "folder": "P%05d" % i}]
        else:
            logging.warning("No structure found in PDB for the ID '" + pid +
                            "'")
            try:
                rmdir(pid)
            except:
                logging.error("Couldn't delete directory for " + str(pid))
    return file_list
def from_pdb_code_to_structure(code):
    """
    From a specific pdb code this function retrieves the structure file
    from the web database and generated a structure instance.
    """
    if pdb_code_check(code):
        pdbl = pdb.PDBList()
        parser = pdb.PDBParser(QUIET=True)
        try:
            structure = parser.get_structure(code, pdbl.retrieve_pdb_file(code, \
                pdir="pdbfiles/"))
        except urllib.error.URLError:
            sys.stderr.write("There is no a structure with the pdb code {} \
in the database\n".format(code))
        if nmr_check(structure):
            return structure
        else:
            return ValueError
    else:
        return ValueError
def PointCloudData(pdbid, chainid):
    """ Get C-alpha coordinates for the given pdbid and chainid
    along with the temperature factors and residue names.
    """
    pc = []
    bf = []
    resnames = []

    if not os.path.exists(os.getcwd() + '/' + filename):
        pdbl = PDB.PDBList()
        pdbl.retrieve_pdb_file(pdbid, False, os.getcwd(), 'pdb', True)
    parser = PDB.PDBParser(PERMISSIVE=1)
    structure = parser.get_structure(pdbid, 'pdb' + pdbid + '.ent')
    model = structure[0]
    chain = model[chainid]
    for residue in chain:
        for atom in residue:
            if atom.get_id() == "CA":
                resnames.append(residue.get_resname())
                bf.append(atom.get_bfactor())
                pc.append(atom.get_coord())
    pointcloud = np.asarray(pc)
    return pointcloud, bf, resnames
Beispiel #13
0
def _retrieve_chain(pdb_code_input, model_id=0):
    import Bio.PDB as pdb
    import Bio.Seq
    import tempfile
    import os
    import StringIO
    import urllib2
    import shutil
    import simtk.openmm.app as app
    pdb_code, chain_code = pdb_code_input.split("_")
    temp_dir = tempfile.mkdtemp()
    os.chdir(temp_dir)
    pdb_fetcher = pdb.PDBList()
    pdb_filepath = pdb_fetcher.retrieve_pdb_file(pdb_code)
    parser = pdb.PDBParser()
    structure = parser.get_structure(pdb_code, pdb_filepath)
    chain_result = structure[model_id][chain_code]
    outval = StringIO.StringIO()
    io = pdb.PDBIO()
    io.set_structure(chain_result)
    io.save(outval)
    outval.seek(0)
    shutil.rmtree(temp_dir)
    return outval
Beispiel #14
0
def test_chemical_composition(protein_id):
    """Test crystals.PDBParser returns the same chemical composition as BIO.PDB.PDBParser implementation,
    i.e. the same elements in the right proportions."""
    pdb_list = biopdb.PDBList(verbose=False, obsolete_pdb=gettempdir())
    biopdb_parser = biopdb.PDBParser()

    with catch_warnings():
        filterwarnings("ignore",
                       category=biopdb.PDBExceptions.PDBConstructionWarning)
        with tempfile.TemporaryDirectory() as temp_dir:
            with PDBParser(protein_id, download_dir=temp_dir) as parser:
                fname = pdb_list.retrieve_pdb_file(protein_id,
                                                   pdir=temp_dir,
                                                   file_format="pdb")

                # Note: Bio.PDB atoms store element as uppercase strings. Thus, they must be changed to titlecase
                crystals_chemical_composition = Counter(
                    [atm.element for atm in parser.atoms()])
                biopdb_chemical_composition = Counter([
                    atm.element.title() for atm in biopdb_parser.get_structure(
                        protein_id, fname).get_atoms()
                ])

                assert biopdb_chemical_composition == crystals_chemical_composition
Beispiel #15
0
 def setUp(self):
     self.pdb_list = biopdb.PDBList(verbose=False,
                                    obsolete_pdb=gettempdir())
     self.biopdb_parser = biopdb.PDBParser()
                      'a PDB file'))
if mdl.pdb_code_check(options.code):
    pdb_id = options.code
else:
    raise ValueError('Input code is not a PDB code')

if options.infile:
    pdbfile = options.infile
else:
    pdbfile = 'pdb' + pdb_id + '.ent'

pdbalignedfile = pdb_id + 'align.pdb'
pdb_superimp = pathname + pdb_id + 'superimp.pdb'

if not os.path.exists(pathname + pdbfile):
    pdbobj = pdb.PDBList()
    pdbobj.retrieve_pdb_file(pdb_id, pdir=pathname)

if not (pdbfile.endswith('pdb') or pdbfile.endswith('ent')):
    raise ValueError(('Your input file is not a valid PDB file, please use a '
                      'pdb or ent file'))

atom_list = []
if options.atom == 'CA':
    atom_list = ['CA']
elif options.atom == 'Back':
    atom_list = ['N', 'CA', 'C', 'O']

if options.verb:
    print("Initializing analysis information")
Beispiel #17
0
    def new_xtals(self, uniprot):
        ''' List GPCR crystal structures missing from GPCRdb and the yaml files. Adds missing structures to DB.
        '''
        structs = self.pdb_request_by_uniprot(uniprot)
        try:
            protein = Protein.objects.get(accession=uniprot)
        except:
            protein = None
        try:
            x50s = Residue.objects.filter(protein_conformation__protein=protein,generic_number__label__in=['1x50','2x50','3x50','4x50','5x50','6x50','7x50'])
        except:
            x50s = None
        if structs!=['null']:
            for s in structs:
                missing_from_db, missing_yaml = False, False
                try:
                    st_obj = Structure.objects.get(pdb_code__index=s)
                except:
                    if s not in self.exceptions:
                        check = self.pdb_request_by_pdb(s)
                        if check==1:
                            self.db_list.append(s)
                            missing_from_db = True
                if s not in self.yamls and s not in self.exceptions:
                    if s not in self.db_list:
                        check = self.pdb_request_by_pdb(s)
                    else:
                        check = 1
                    if check==1:
                        self.yaml_list.append(s)
                        missing_yaml = True
                if not missing_from_db:
                    continue
                try:
                    pdb_data_dict = fetch_pdb_info(s, protein, new_xtal=True)
                    exp_method = pdb_data_dict['experimental_method']
                    if exp_method=='Electron Microscopy':
                        st_type = StructureType.objects.get(slug='electron-microscopy')
                    elif exp_method=='X-ray diffraction':
                        st_type = StructureType.objects.get(slug='x-ray-diffraction')
                    if 'deletions' in pdb_data_dict:
                        for d in pdb_data_dict['deletions']:
                            presentx50s = []
                            for x in x50s:
                                if not d['start']<x.sequence_number<d['end']:
                                    presentx50s.append(x)                                    
                            # Filter out ones without all 7 x50 positions present in the xtal
                            if len(presentx50s)!=7:
                                try:
                                    del self.db_list[self.db_list.index(s)]
                                    missing_from_db = False
                                    del self.yaml_list[self.yaml_list.index(s)]
                                except:
                                    pass
                    else:
                        print('Warning: no deletions in pdb info, check {}'.format(s))
                        continue

                    if missing_from_db:
                        pref_chain = ''
                        resolution = pdb_data_dict['resolution']
                        pdb_code, created = WebLink.objects.get_or_create(index=s, web_resource=WebResource.objects.get(slug='pdb'))
                        pdbl = PDB.PDBList()
                        pdbl.retrieve_pdb_file(s, pdir='./', file_format="pdb")
                        with open('./pdb{}.ent'.format(s).lower(),'r') as f:
                            lines = f.readlines()
                        pdb_file = ''
                        publication_date, pubmed, doi = '','',''
                        state = ProteinState.objects.get(slug='inactive')
                        new_prot, created = Protein.objects.get_or_create(entry_name=s.lower(), accession=None, name=s.lower(), sequence=pdb_data_dict['wt_seq'], family=protein.family,
                                                                          parent=protein, residue_numbering_scheme=protein.residue_numbering_scheme, 
                                                                          sequence_type=ProteinSequenceType.objects.get(slug='mod'), source=ProteinSource.objects.get(name='OTHER'), 
                                                                          species=protein.species)
                        new_prot_conf, created = ProteinConformation.objects.get_or_create(protein=new_prot, state=state, template_structure=None)
                        for line in lines:
                            if line.startswith('REVDAT   1'):
                                publication_date = line[13:22]
                            if line.startswith('JRNL        PMID'):
                                pubmed = line[19:].strip()
                            if line.startswith('JRNL        DOI'):
                                doi = line[19:].strip()
                            pdb_file+=line
                        pdb_data, created = PdbData.objects.get_or_create(pdb=pdb_file)
                        d = datetime.strptime(publication_date,'%d-%b-%y')
                        publication_date = d.strftime('%Y-%m-%d')
                        try:
                            if doi!='':
                                try:
                                    publication = Publication.objects.get(web_link__index=doi)
                                except Publication.DoesNotExist as e:
                                    p = Publication()
                                    try:
                                        p.web_link = WebLink.objects.get(index=doi, web_resource__slug='doi')
                                    except WebLink.DoesNotExist:
                                        wl = WebLink.objects.create(index=doi,
                                            web_resource = WebResource.objects.get(slug='doi'))
                                        p.web_link = wl
                                    p.update_from_doi(doi=doi)
                                    p.save()
                                    publication = p
                            elif pubmed!='':
                                try:
                                    publication = Publication.objects.get(web_link__index=pubmed)
                                except Publication.DoesNotExist as e:
                                    p = Publication()
                                    try:
                                        p.web_link = WebLink.objects.get(index=pubmed,
                                            web_resource__slug='pubmed')
                                    except WebLink.DoesNotExist:
                                        wl = WebLink.objects.create(index=pubmed,
                                            web_resource = WebResource.objects.get(slug='pubmed'))
                                        p.web_link = wl
                                    p.update_from_pubmed_data(index=pubmed)
                                    p.save()
                                    publication = p
                        except:
                            pass
                        pcs = PdbChainSelector(s, protein)
                        pcs.run_dssp()
                        preferred_chain = pcs.select_chain()

                        # Run state identification

                        # Create yaml files
                        with open('../../data/protwis/gpcr/structure_data/constructs/{}.yaml'.format(pdb_code.index), 'w') as construct_file:
                            yaml.dump({'name': pdb_code.index.lower(), 'protein': protein.entry_name}, construct_file, indent=4)
                        with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as structure_file:
                            struct_yaml_dict = {'construct': pdb_code.index.lower(), 'pdb': pdb_code.index, 'preferred_chain': preferred_chain, 'auxiliary_protein': '', 
                                                'ligand': {'name': 'None', 'pubchemId': 'None', 'title': 'None', 'role': '.nan', 'type': 'None'}, 'signaling_protein': 'None', 'state': 'Inactive'}
                            auxiliary_proteins, ligands = [], []
                            if pdb_data_dict['ligands']!='None':
                                for key, values in pdb_data_dict['ligands'].items():
                                    if key in ['SO4','NA','CLR','OLA','OLB','OLC','TAR','NAG','EPE','BU1','ACM','GOL','PEG','PO4','TLA','BOG','CIT','PLM','BMA','MAN','MLI','PGE']:
                                        continue
                                    else:
                                        ligands.append({'name': key, 'pubchemId': 'None', 'title': pdb_data_dict['ligands'][key]['comp_name'], 'role': '.nan', 'type': 'None'})
                                for key, values in pdb_data_dict['auxiliary'].items():
                                    if pdb_data_dict['auxiliary'][key]['subtype'] in ['Expression tag', 'Linker']:
                                        continue
                                    else:
                                        auxiliary_proteins.append(pdb_data_dict['auxiliary'][key]['subtype'])
                                for key, values in pdb_data_dict['construct_sequences'].items():
                                    if key!=protein.entry_name and key not in struct_yaml_dict['auxiliary_protein']:
                                        if 'arrestin' in key:
                                            struct_yaml_dict['signaling_protein'] = key
                                if len(auxiliary_proteins)>1:
                                    struct_yaml_dict['auxiliary_protein'] = ', '.join(auxiliary_proteins)
                                if len(ligands)>1:
                                    struct_yaml_dict['ligand'] = ligands
                            yaml.dump(struct_yaml_dict, structure_file, indent=4, default_flow_style=False)

                        # Build residue table for structure
                        build_structure_command = shlex.split('/env/bin/python3 manage.py build_structures -f {}.yaml'.format(pdb_code.index))
                        subprocess.call(build_structure_command)

                        # Check state
                        struct = Structure.objects.get(pdb_code__index=pdb_code.index)
                        pi = PdbStateIdentifier(struct)
                        pi.run()
                        if pi.state!=None:
                            Structure.objects.filter(pdb_code__index=pdb_code.index).update(state=pi.state)
                            print(pi.state, pi.activation_value)
                            with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'r') as yf:
                                struct_yaml = yaml.load(yf)
                            struct_yaml['state'] = pi.state.name
                            try:
                                struct_yaml['distance'] = round(float(pi.activation_value), 2)
                            except:
                                struct_yaml['distance'] = None
                            with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as struct_yaml_file:
                                yaml.dump(struct_yaml, struct_yaml_file, indent=4, default_flow_style=False)
                
                        # Check sodium pocket
                        new_prot_conf.sodium_pocket()

                        print('{} added to db (preferred_chain chain: {})'.format(s, preferred_chain))
                except Exception as msg:
                    print(s, msg)
Beispiel #18
0
    def analysis(self):
        """
        Basically it contains all the computations needed to perform the EDA.
        It contain the same steps used in the main CLI.
        It also generate the plots at the end. When the plots are generated
        move the software to the plot_window.
        """
        pdb_id = self.controller.app_data["pdbid"]
        pathname = self.controller.app_data["pathname"]
        pdbfile = self.controller.app_data["pdbfilename"]
        atom = self.controller.app_data["atom"]
        mode = self.controller.app_data["mode"]

        sys.stderr.write("the selcted mode is: {} ".format(mode))
        pdbalignedfile = str(pdb_id) + 'align.pdb'
        pdb_superimp = str(pathname) + str(pdb_id) + 'superimp.pdb'

        if not os.path.exists(str(pathname) + str(pdbfile)):
            pdbobj = pdb.PDBList()
            pdbobj.retrieve_pdb_file(pdb_id, pdir=str(pathname))
            sys.stderr.write("The structure {} have been \
retrieved.\n".format(pdb_id))

        atom_list = []
        if atom == 'CA':
            atom_list = ['CA']
        elif atom == 'Back':
            atom_list = ['N', 'CA', 'C', 'O']

        if mode == 'MD':
            pdbref = pdb.PDBList()
            ref_file = pdbref.retrieve_pdb_file(pdb_id, pdir=pathname)
            parser = pdb.PDBParser(QUIET=True)
            reference = parser.get_structure(pdb_id + 'ref', ref_file)
            try:
                ED = eda.EDAnalysis(pdb_id,
                                    mode,
                                    atom_list,
                                    pathname + pdbfile,
                                    reference=reference)
            except (eda.WrongModeException, KeyError, ValueError):
                pass
        else:
            ED = eda.EDAnalysis(pdb_id, mode, atom_list, pathname + pdbfile)

        ED.superimpose_models()
        if mode == 'NMR':
            sys.stderr.write("Writting the superimposed file.\n")
            head = mdl.store_header_text(pathname + pdbfile)
            self.controller.app_data["head"] = head
            io = pdb.PDBIO()
            io.set_structure(ED.structure)
            io.save(pdb_superimp)
            mdl.merge_the_header(pdb_superimp, head, pathname + pdbalignedfile)
            os.remove(pdb_superimp)

        sys.stderr.write("Calculating means and coordinates\n")
        ED.createcordsarray()
        sys.stderr.write("Calculating covariance matrix\n")
        sys.stderr.write("Calculating eigenvalues and eigenvectors\n")
        ED.cal_cov()
        sys.stderr.write("Plotting eigenvalues\n")
        self.controller.app_data["ED"] = ED
        #pathplots = self.controller.app_data["pathplots"]
        n_plot = 30
        if ED.n < n_plot:
            n_plot = ED.n
        pathplots = pathname + 'plots/'
        plot = ED.plot_eig_wosv(n_plot)
        self.controller.app_data["plot"] = plot

        RMSD_plot = ED.RMSD_res_plot(4, pathplots, origin='interface')
        self.controller.app_data["RMSD_plot"] = RMSD_plot
        self.controller.show_frame("plot_window")
def consumer(code, result_queue):
    try:
        result_queue.put((BPDB.PDBList().retrieve_pdb_file(pdb_code=code[0:4]),
                          "Pull Successful"))
    except:
        result_queue.put(("", "Pull Failed at Download"))
Beispiel #20
0
import numpy as np
import pandas as pd
from Bio import PDB

repository = PDB.PDBList()
parser = PDB.PDBParser()
repository.retrieve_pdb_file('1TUP', pdir='.', file_format='pdb')
p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')

my_residues = set()
for residue in p53_1tup.get_residues():
    my_residues.add(residue.id[0])
print(my_residues)


def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):
    return sum([atom.mass for atom in atoms if accept_fun(atom)])


chain_names = [chain.id for chain in p53_1tup.get_chains()]
my_mass = np.ndarray((len(chain_names), 3))
for i, chain in enumerate(p53_1tup.get_chains()):
    my_mass[i, 0] = get_mass(chain.get_atoms())
    my_mass[i, 1] = get_mass(
        chain.get_atoms(),
        accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])
    my_mass[i, 2] = get_mass(chain.get_atoms(),
                             accept_fun=lambda atom: atom.parent.id[0] == 'W')
masses = pd.DataFrame(my_mass,
                      index=chain_names,
                      columns=['No Water', 'Zincs', 'Water'])
Beispiel #21
0
# Biopython contains function to load all files from list, yet there is no specified delay between loads and you can be
# banned for loading numerous files without timeouts
#
# Script loads an entries.idx
# Creates pdb_index file - clean file for dataframe
# Creates directory with loaded pdb files
#
# Note: at this script, I`ve loaded files contained RNA in their title
# To change subset of files you should change line with 'mask' variable assignment

# Absolute or relative path for directory to store pdbs
path = input("Enter path to directory to store files: ")
os.makedirs(path, exist_ok=True)

# Initialize loading class
load_struct = PDB.PDBList()

# Parse command to load pdb index file
command = shlex.split(
    'wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx')
# Run command
sp.check_output(command, universal_newlines=True)

# Delete junk --lines in file, make header tab-separated as main part of file
with open('entries.idx', 'r') as source, open('pdb_index', 'w') as target:
    for ind, line in enumerate(source):
        if ind == 0:
            line = line.replace(', ', '\t')
        if not line.startswith('-'):
            target.write(line)
Beispiel #22
0
from .IonComplex import IonComplex
from ..PolyIon import Peptide
from ..Ion import fixed_state

import tempfile
from string import ascii_uppercase
from Bio import PDB

lister = PDB.PDBList(obsolete_pdb='override')
parser = PDB.PDBParser()
builder = PDB.PPBuilder()


@fixed_state
class Protein(IonComplex):
    """Protein represents an ion composed of a complex of peptides.

    :param name: Name of the protein.
    :param ids: Names of the peptide members.
    :param sequences: Sequences of the peptide members.
    :param members: An iterable of the peptide members.

    If members and sequences are not provided, the name will be searched in the
    Protein DataBase (PDB). If a protein of the same name is available, the
    sequences of the peptides will be gathered from the PDB.
    """

    _state = {
        'name': 'Protein name.',
        'members': 'Name of the peptide members.'
    }
Beispiel #23
0
def main():
    """
    Load entries.idx from pdb, parse it, load all pdb files contains 'RNA' in header.
    Perform base pair annotation of all rna pdb files in rna_ids file via rnaview
    :return:
    """
    # Loading
    # Initialize loading class
    load_struct = PDB.PDBList()

    # Parse command to load pdb index file
    command = shlex.split(
        'wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx')
    # Run command
    sp.check_output(command, universal_newlines=True)

    # Delete junk --lines in file, make header tab-separated as whole other file
    with open('entries.idx', 'r') as source, open('pdb_index', 'w') as target:
        for ind, line in enumerate(source):
            if ind == 0:
                line = line.replace(', ', '\t')
            if not line.startswith('-'):
                target.write(line)

    # Create dataframe from full file with pdb index
    with open('pdb_index', 'r') as source:
        data = pd.read_csv(source, sep='\t')

    print(data.shape, data.columns)

    # Checking data
    print(data.isnull().any(), data.shape)

    # Drop files with empty header
    data.dropna(subset=['HEADER'], inplace=True)
    print(data.shape)

    # Filter subset of data with RNA in header
    mask = data['HEADER'].str.contains('RNA')
    rna = data[mask]

    # Create list with PDB ids of files with RNA
    rna_ids = rna['IDCODE'].unique().tolist()
    rna_length = len(rna_ids)
    rna_length, rna.head(), rna_ids

    with open('rna_ids', 'w') as file:
        for entry in rna_ids:
            file.write('{}\n'.format(entry))

    # Load 1 pdb file from RNA list, wait 30 seconds
    for ind, file in enumerate(rna_ids, 1):
        load_struct.retrieve_pdb_file(
            file,
            file_format='pdb',
            pdir='/home/arleg/PycharmProjects/Bioinformatics/RNA/pdb')
        print("{} is loaded, {} from {}".format(file, ind, rna_length))
        time.sleep(30)

    # Base pair annotation of all rna pdb files in rna_ids file
    # Add rnaview environment variable to environment in this script (it should be done despite the record of path to
    # tool and environment variable in .bashrc)
    envir = os.environ
    envir['RNAVIEW'] = '/home/arleg/RNATools/RNAVIEW'

    # Call rnaview on all files listed in rna_ids
    # There is an option in rnaview to read all pdbs in one call to tool, yet it didn`t work in my script
    with open('rna_ids', 'r') as file:
        for line in file:
            line = line.strip().lower()
            # Parse command to terminal
            command = shlex.split(
                '/home/arleg/RNATools/RNAVIEW/bin/rnaview pdb/pdb{}.ent'.
                format(line))
            # Run it
            sp.check_output(command, env=envir, universal_newlines=True)
Beispiel #24
0
    def __gatherAtoms(self):
        # try:
        bfactorCount = 0
        bfactorTotal = 0
        if True:
            import Bio.PDB as bio
            self.hasPDB = True
            pdbCode = self.pdbCode.lower()
            print('PSU: load from BioPython', self.pdbCode)
            parser = bio.PDBParser()
            biodl = bio.PDBList()
            structure = None
            gotPdb = False
            try:
                #print('debug get pdb from',self.pdbDataPath + 'pdb' + pdbCode + '.ent')
                structure = parser.get_structure(
                    pdbCode, self.pdbDataPath + 'pdb' + pdbCode + '.ent')
                gotPdb = True
            except:
                if '_ADJ' not in self.pdbDataPath:  #never download the pdb to an adjusted directory
                    import time
                    print('!!! Downloading from pdb: ', self.pdbDataPath,
                          pdbCode)
                    biodl.download_pdb_files([pdbCode],
                                             pdir=self.pdbDataPath,
                                             file_format='pdb')
                    time.sleep(1)
                    try:
                        structure = parser.get_structure(
                            pdbCode,
                            self.pdbDataPath + 'pdb' + pdbCode + '.ent')
                        gotPdb = True
                    except:
                        import time
                        time.sleep(10)
                        structure = parser.get_structure(
                            pdbCode,
                            self.pdbDataPath + 'pdb' + pdbCode + '.ent')
                        gotPdb = True

            if gotPdb:
                resolution = structure.header['resolution']
                atomNo = 0
                resnum = 1
                for model in structure:
                    for chain in model:
                        for residue in chain:
                            r = residue.get_resname()
                            # print('Residue:', r)
                            rid = residue.get_full_id()[3][1]
                            chain = residue.get_full_id()[2]
                            hetatm = residue.get_full_id()[3][0]
                            ridx = resnum
                            resnum = resnum + 1
                            #decision as to whether r is to be used. for density maps yes, for geoemtry no
                            #print(residue.get_full_id())
                            #print(r,hetatm)
                            if (
                                    r in self.getAAList() and 'H' not in hetatm
                            ) or self.useAll:  # and r!='HOH'):# != 'HOH':  # bio.is_aa(residue):
                                for atom in residue:
                                    disordered = 'N'
                                    useAtom = True
                                    if atom.is_disordered():
                                        disordered = 'Y'
                                        if self.keepDisordered:
                                            if atom.disordered_has_id("A"):
                                                atom.disordered_select("A")
                                        else:
                                            useAtom = False

                                    if not self.keepDisordered and useAtom:
                                        if atom.get_occupancy() < 1:
                                            useAtom = False
                                            print(
                                                'debug not passed disordered',
                                                atom, atom.get_occupancy())

                                    if useAtom:
                                        atomID = atom.get_full_id(
                                        )[0] + chain + str(
                                            rid) + atom.get_name()
                                        if atomID in self.badAtoms:
                                            #print(atomID)
                                            useAtom = False

                                    if useAtom:
                                        oneAtom = atm.GeoAtom()
                                        oneAtom.setStructureInfo(
                                            pdbCode, resolution)
                                        oneAtom.setResidueInfo(
                                            chain, rid, ridx, r)
                                        atomNo += 1
                                        name = atom.get_name()
                                        occupant = atom.get_full_id()[4][1]
                                        if occupant == ' ':
                                            occupant = 'A'
                                        x = atom.get_vector()[0]
                                        y = atom.get_vector()[1]
                                        z = atom.get_vector()[2]
                                        bfactor = atom.get_bfactor()
                                        if name == 'CA':
                                            bfactorCount += 1
                                            bfactorTotal += bfactor

                                        occupancy = atom.get_occupancy()
                                        oneAtom.setAtomInfo(
                                            r, name, atomNo, x, y, z, bfactor,
                                            occupant, occupancy, disordered)
                                        #if rid < 3:
                                        #    print(oneAtom)
                                        # add density if we can
                                        if self.hasDensity:
                                            tFoFc, FoFc, Fo, Fc = self.geoDen.getDensityXYZ(
                                                x, y, z)
                                            oneAtom.setDensityInfo(
                                                tFoFc, FoFc, Fo, Fc)

                                        # print('Atom:',atomNo)
                                        if r in self.getAAList():
                                            self.atoms.append(oneAtom)
                                        elif r == 'HOH':
                                            self.water.append(oneAtom)
                                        else:
                                            self.hetatms.append(oneAtom)

                if bfactorCount > 0:
                    self.averageBfactor = bfactorTotal / bfactorCount
                    # Now set the bFactorRatio for all atoms
                    for atom in self.atoms:
                        atom.values['bfactorRatio'] = atom.values[
                            'bfactor'] / self.averageBfactor
                else:
                    self.averageBfactor = 0

                print('PSU: loaded successfully from BioPython', self.pdbCode)
                self.hasPDB = True
            else:
                print('!!! PSU: failed to load', self.pdbCode, 'from',
                      self.pdbDataPath)
                self.hasPDB = False

        # except:
        #    self.hasPDB = False
        return (self.hasPDB)
Beispiel #25
0
def analyse_pucker_from_pdbs(pdbinputfilename, ligandinputfilename=None, outputfile = "tessellate_report",output_format="json", output_dir=""):
    try:
        import Bio.PDB as bp
        import tessellate.utils.pucker as puc
        import numpy as np
        import json
        import tessellate.utils.helperfunctions as helperfunctions
        import tessellate.utils.getRing as getRing
        import os
    except Exception as e:
        print("Error - Cannot import module %s", e)
        exit(1)

    #. workaround stdout annoyance, biopdb sometimes uses print
    try:
        import sys
        from io import StringIO
    except Exception as e:
        print("Error - Cannot import module ", e)
        exit(1)

    all_pucker_json = helperfunctions.init_all_pucker_dictionary()
    all_macro_pucker_json = helperfunctions.init_all_pucker_dictionary()
    #PDBDATA = {}

    outputfile = ".".join([outputfile, output_format])
    txt = False
    if output_format == "txt":
        txt = True
        outputfile = open(os.path.join(output_dir, outputfile), 'w')
        outputfile.write("tessellate "+__version__+" txt\n")
        outputfile.write(
        "PDBID RESNAME CHAIN RESID RINGATOMSORDER CONFORMER CONTEXTUAL_CONFORMER ANGULAR_PUCKER_COORDS ORIG_CONFORMER RING_SIZE\n")
        logger.critical("to be improved, does not match json output at present")
    nodejson = []

    # . read in list of pdbs to read (format is one column of pdb ids
    pdblist = []
    inputfile = open(pdbinputfilename, 'r')
    for line in inputfile:
        logger.debug('Read from pdbnames %s', line)
        pdblist.append(line.strip())

    # . read in ligands with preferred ring ordering (format is name, ringsize, numrings , numrings sets of N atom names
    if ligandinputfilename is None:
        ligand_dict={'3DR': {'num': 1, 'ringids': [("C2'", "C3'", "C4'", "O4'", "C1'")], 'ringsize': 5}, 'AVU': {'num': 2, 'ringids': [("C2'", "C3'", "C4'", "O4'", "C1'"), ('C2R', 'C3R', 'C4R', 'O4R', 'C1R')], 'ringsize': 5}, 'NAG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'CTR': {'num': 3, 'ringids': [('C3A', 'C4A', 'C5A', 'O5A', 'C1A', 'C2A'), ('C3B', 'C4B', 'C5B', 'O5B', 'C1B', 'C2B'), ('C3C', 'C4C', 'C5C', 'O5C', 'C1C', 'C2C')], 'ringsize': 6}, 'PSG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'MAN': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'BMA': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'NDG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O', 'C1', 'C2')], 'ringsize': 6}, 'BGP': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'G6P': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'ACR': {'num': 4, 'ringids': [('C3A', 'C4A', 'C5A', 'C7A', 'C1A', 'C2A'), ('C3B', 'C4B', 'C5B', 'O5B', 'C1B', 'C2B'), ('C3C', 'C4C', 'C5C', 'O5C', 'C1C', 'C2C'), ('C3D', 'C4D', 'C5D', 'O5D', 'C1D', 'C2D')], 'ringsize': 6}, 'LAK': {'num': 2, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2'), ("C3'", "C4'", "C5'", "O5'", "C1'", "C2'")], 'ringsize': 6}, 'GAL': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'BGC': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'NGA': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'UPG': {'num': 1, 'ringids': [("C3'", "C4'", "C5'", "O5'", "C1'", "C2'")], 'ringsize': 6}, 'BBA': {'num': 1, 'ringids': [('C1', 'C2', 'C3', 'C', 'C4', 'C5', 'C6')], 'ringsize': 7}, 'H52': {'num': 1, 'ringids': [('N21', 'C22', 'C23', 'N24', 'C25', 'C26', 'C27')], 'ringsize': 7}, '0J0': {'num': 1, 'ringids': [('C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21')], 'ringsize': 7}, '13U': {'num': 1, 'ringids': [('C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49')], 'ringsize': 8}, 'PS9': {'num': 1, 'ringids': [('S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9')], 'ringsize': 8}}
    else:
        ligfile = open(ligandinputfilename, 'r')
        ligand_dict = {}
        for line in ligfile:
            chunked = line.split()
            id = chunked[0]
            ringsize = int(chunked[1])
            numrings = int(chunked[2])
            columns = int(3)  # columns in addition to atoms
            if len(chunked) == (columns + int(numrings) * int(ringsize)):
                ligand_dict[id] = {}
                ligand_dict[id]["num"] = numrings
                ligand_dict[id]["ringids"] = []
                ligand_dict[id]["ringsize"] = ringsize
                for i in range(0, numrings):
                    templist = []
                    for j in range(0, ringsize):
                        templist.append(chunked[columns + i * ringsize + int(j)])
                    ligand_dict[id]["ringids"].append(tuple(templist))
                logger.debug('Creation of liganddict %s', ligand_dict[id])
            else:
                logger.error('atoms ids relative to number rings does not match in ligands file %s %s %s %s', chunked,
                          len(chunked), numrings, ringsize)
    logger.debug("Ligand Dict used: %s", ligand_dict)

    #. ignore these residues
    aminoacids = ["GLY", "ALA", "SER", "MET", "LYS", "GLU", "PRO", "ASP", "VAL", "PHE", "ASN", "ILE", "TRP",
                          "CYS", "HIS", "LEU", "GLN", "ARG", "TYR", "THR"]
    water = ["HOH"]
    other = []
    soup = aminoacids + water + other
    

    # . download pdbs  and if already there will not download
    for pdbid in pdblist:
        logger.debug('Ids in downloads list %s', pdbid)
        pdbl = bp.PDBList()
        pdbl.retrieve_pdb_file(pdbid, file_format="pdb" , pdir=os.path.join(output_dir,"pdb"))

    # . parse pdbs for ligands, when found, calc pucker
    p = bp.PDBParser()

    # .. loop over all pdbs
    for pdbid in pdblist:
        pdbpath = os.path.join(output_dir,"pdb")+"/pdb" + pdbid.lower() + ".ent"
        # else:
        #     pdbpath = os.path.join(upload_folder,pdbid)
        logger.debug('path to pdbids downloaded %s', pdbid)
        structure = p.get_structure(pdbid, pdbpath)
        # .. get all the residues
        res_list = bp.Selection.unfold_entities(structure, 'R')
        # .. loop over the residues
        for resi in res_list:
            SSSR = []
            rname = resi.get_resname()
            # .. is this residue in the ligands list, calc pucker
            if rname in ligand_dict.keys():
                logger.debug('Ligand %s appears in the ligand dict ', rname)
                # .. get atoms coords
                for nrings in range(0, ligand_dict[rname]["num"]):
                    try:
                        listallcoors = []
                        # .. some pdbs have missing atoms, for this case , check if atoms exist in PDB
                        check_atoms = []
                        for i in resi.get_list():
                            check_atoms.append(i.get_name())
                        missingatomtest = False
                        for atomindex in range(0, ligand_dict[rname]["ringsize"]):
                            if ligand_dict[rname]["ringids"][nrings][atomindex] not in check_atoms:
                                missingatomtest = True
                        if missingatomtest:
                            logger.debug('Missing atoms for %s', rname)
                            pass  # without notifying user !
                        else:
                            for atomindex in range(0, ligand_dict[rname]["ringsize"]):
                                listallcoors = listallcoors + list(
                                    resi[ligand_dict[rname]["ringids"][nrings][atomindex]].get_coord())
                            pobj = puc.Pucker(tuple(listallcoors))
                            if pobj and pobj.isvalid:
                                try:
                                    thisframeTD = pobj.calculate_triangular_tessellation()
                                    logger.debug(thisframeTD)
                                    conformer = pobj.deduce_canonical_conformation()
                                    nextconformer = pobj.deduce_canonical_conformation(nextguess=True)
                                    pconf = pobj.contextualise_conformer(conformer[0],ligand_dict[rname]["ringids"][nrings])
                                    node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ligand_dict[rname]["ringids"][nrings],conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]])
                                    logger.info(log)
                                    if txt:
                                        outputfile.write(log)
                                    if not nodejson == [] and node in nodejson:
                                        logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1])
                                    else:
                                        nodejson.append(node)

                                except Exception as e:
                                    logger.error('In known ligs, Pucker object valid, but calc, classify etc. failed  %s %s', str(pobj._coords), e)
                                    raise e
                            else:
                                logger.info("pobj is None or not valid in ligand ring find %s ", listallcoors)
                    except Exception as e:
                        logger.error("pdb file may be missing coordinates..")
                        raise  # not happy about how I raise this error but OK for now

            # .. Calculate SSSR regardless of whether I have this ligand in the dict or not and then calc pucker
            # .. this may make sense but will cause duplicates ......
            if rname not in soup and len(resi.get_list()) > 5:
                atomlist = []
                if resi.is_disordered():
                    for atom in resi.get_list():  # use get_list instead of unpacked list
                        if atom.is_disordered():
                            list_of_disorder = atom.disordered_get_id_list()
                            # just using the last one for now (whatever that is)
                            selected = list_of_disorder[-1]
                            if atom.disordered_has_id(selected):
                                atom.disordered_select(selected)

                for i in resi.get_list():
                    atomlist.append([i.get_name(), i.get_coord()])
                logger.debug("resi %s atomlist %s length %s", resi, atomlist, len(atomlist))
                SSSR = getRing.create_graph_and_find_rings_suite(atomlist,mineuclid=1.0,maxeuclid=2.2)
                try: # hack to fix this in python3 - working on it
                    for iidx in range(0,len(atomlist),12):
                        logger.debug("individual items atomlist %s", atomlist[iidx:iidx+12])
                        SSSR_item = getRing.create_graph_and_find_rings_suite(atomlist[iidx:iidx+12],mineuclid=1.0,maxeuclid=2.2)
                        logger.debug("individual items %s", SSSR_item)
                        if SSSR_item in SSSR:
                            pass
                        else:
                            SSSR.extend(SSSR_item)
                            logger.debug("Extending SSSR %s", SSSR_item)
                except Exception as e:
                    logger.error("Error in SSSR extend %s",e)
                        
                logger.debug("SSSR rings %s", SSSR)

                if SSSR:
                    for ring in SSSR:
                        nring = SSSR.index(ring)
                        logger.debug('FOUND %i rings in resi %s ', nring+1, rname)
                        alpharing = getRing.getcommonring(ring)
                        if alpharing is not None:
                            ring = alpharing
                        else:  # try see if its common through the common dict
                            for common in getRing.commonrings:
                                if sorted(common) == sorted(ring):
                                    ring = common
                                    break
                        listallcoors = []
                        for atomindex in ring:
                            listallcoors = listallcoors + list(resi[atomindex].get_coord())
                        logger.debug("listallcoors %s", listallcoors)
                        pobj = puc.Pucker(tuple(listallcoors))
                        if pobj and pobj.isvalid:
                            try:
                                thisframeTD = pobj.calculate_triangular_tessellation()
                                conformer = pobj.deduce_canonical_conformation()
                                pconf = pobj.contextualise_conformer(conformer[0], ring)
                                nextconformer = pobj.deduce_canonical_conformation(nextguess=True)
                                node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ring, conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]])
                                logger.info(log)
                                if txt:
                                    outputfile.write(log)
                                if not nodejson == [] and node in nodejson:
                                    logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1])
                                else:
                                    nodejson.append(node)
                            except Exception as e:
                                logger.error('In SSSR Pucker object valid, but calc, classify etc. failed  %s %s', str(pobj._coords), e)
                                raise e
                        else:
                            logger.info("pobj is None or not valid in SSSR ring find %s %s %s %s", listallcoors,rname,resi.get_id()[1], ring)
                else:
                    logger.debug('%s has no rings', rname)

            #. get all rings in this resi, get com
            macroatomlist = []
            if SSSR:
                logger.debug("SSSR possible macro list %s %s",pdbid,SSSR)
                for aring in SSSR:
                    ringcoords=[]
                    for ringatom in aring:
                        for itm in resi.get_list():
                            if itm.get_name() == ringatom:
                                ringcoords.append(itm.get_coord())
                    #print aring, np.array(np.add.reduce(ringcoords)/len(ringcoords)), "\n"
                    if len(ringcoords)<9 and len(ringcoords)>4: # ignore too large or too small cycles
                        #print len(ringcoords)
                        macroatomlist.append(["".join(aring), np.array(np.add.reduce(ringcoords)/len(ringcoords))])
                        #macroatomlist.append(["".join(aring), np.array(np.add.reduce(ringcoords))])
                        logger.debug("resi %s rings %s macroatomlist %s", resi, aring, macroatomlist)
                if len(macroatomlist) > 4:  # need at least five cycles to calculate macropucker
                    #print "macroatom ", macroatomlist
                    #import itertools
                    #for a, b in itertools.combinations(macroatomlist, 2):
                    # work out euclidean distance and choose to call this an edge if mineuclid<dist<maxeuclid
                        #print a, b, np.linalg.norm(a[1] - b[1])
                    logger.debug("Macroatoms list %s %s",pdbid, macroatomlist)
                    macroSSSR = getRing.create_graph_and_find_rings_suite(macroatomlist,maxeuclid=8.0)
                    logger.debug(macroSSSR)
                    if macroSSSR:
                        #print "mS ", macroSSSR
                        for ring in macroSSSR:
                            nring = macroSSSR.index(ring)
                            logger.debug('FOUND %i macro rings in resi %s ', nring+1, rname)
                            alpharing = getRing.getcommonring(ring)
                            if alpharing is not None:
                                ring = alpharing
                            else:  # try see if its common through the common dict
                                for common in getRing.commonrings:
                                    if sorted(common) == sorted(ring):
                                        ring = common
                                        break
                            listallcoors = []
                            #print macroatomlist
                            for atoms in ring:
                                listallcoors.extend(list((list(x[1]) for x in macroatomlist if x[0] in atoms)))
                            # now flatten the list
                            listallcoors = [y for x in listallcoors for y in x]

                            logger.debug("macro listallcoors %s", listallcoors)
                            pobj = puc.Pucker(tuple(listallcoors))
                            if pobj and pobj.isvalid:
                                try:
                                    thisframeTD = pobj.calculate_triangular_tessellation()
                                    conformer = pobj.deduce_canonical_conformation()
                                    pconf = pobj.contextualise_conformer(conformer[0], ring)
                                    nextconformer = pobj.deduce_canonical_conformation(nextguess=True)
                                    logger.debug("Macrocycles %s %s %s %s", thisframeTD, conformer, pconf, nextconformer)
                                    node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ring, conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]], True)
                                    logger.info(log)
                                    if txt:
                                        outputfile.write(log)
                                    if not nodejson == [] and node in nodejson:
                                        logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1])
                                    else:
                                        nodejson.append(node)
                                except Exception as e:
                                    logger.error('In macrocyc, Pucker object valid, but calc, classify etc. failed  %s %s', str(pobj._coords), e)
                                    pass
                            else:
                                logger.error("pobj is None or not valid in macrocyc %s",listallcoors)
                    else:
                        logger.debug('%s has no macro rings', rname)



    if output_format == "json":
        d2 = [key for key in nodejson]
        helperfunctions.write_to_json(d2, os.path.join(output_dir, outputfile),input_format="pdblist")
    elif output_format == "bson":
        d2 = [key for key in nodejson]
        helperfunctions.write_to_bson(d2, os.path.join(output_dir, outputfile))
    elif output_format == "pandas":
        d2 = [key for key in nodejson]
        helperfunctions.write_to_pandas_dataframe(d2, os.path.join(output_dir, outputfile))

    inputfile.close()

    if txt:
        outputfile.close()
    return
Beispiel #26
0

class SelectChains(PDB.Select):
    """ Only accept the specified chains when saving. """
    def __init__(self, chain_letters):
        self.chain_letters = chain_letters

    def accept_chain(self, chain):
        return (chain.get_id() in self.chain_letters)


if __name__ == "__main__":
    """ Parses PDB id's desired chains, and creates new PDB structures. """
    import sys
    if not len(sys.argv) == 2:
        print ("Usage: $ python %s 'pdb.txt'" % __file__)
        sys.exit()

    pdb_textfn = sys.argv[1]

    pdbList = PDB.PDBList()
    splitter = ChainSplitter("/home/steve/chain_pdbs")  # Change me.

    with open(pdb_textfn) as pdb_textfile:
        for line in pdb_textfile:
            pdb_id = line[:4].lower()
            chain = line[4]
            pdb_fn = pdbList.retrieve_pdb_file(pdb_id)
            splitter.make_pdb(pdb_fn, chain)

Beispiel #27
0
    def download_pdb(self, info):
        pdb_id, chain_id = info

        ## Check if atom has alternative position, if so, keep 'A' position and remove the flag
        ## but somehow this class doesn't seem to function well
        class NotDisordered(Select):
            def accept_atom(self, atom):
                if not atom.is_disordered() or atom.get_altloc() == 'A':
                    atom.set_altloc(' ')
                    return True
                else:
                    return False

        ## BioPython downloads PDB but it gives a lowercase name in pdb{}.ent format
        biopdb_name = '{0}/pdb{1}.ent'.format(self.work_dir, pdb_id.lower())
        biopdb_modf = '{0}/pdb{1}.mod.ent'.format(self.work_dir,
                                                  pdb_id.lower())
        if not os.path.isfile(biopdb_modf):
            try:
                PDB.PDBList(verbose=False).retrieve_pdb_file(
                    pdb_id,
                    pdir=self.work_dir,
                    obsolete=False,
                    file_format='pdb')
            except FileNotFoundError:
                print(
                    '  \033[31m> ERROR: BioPython cannot download PDB: \033[0m'
                    + pdb_id)
                return None

        ## Replace modified AA to avoid mis-recognition in biopython readin
        ## Replace disordered atoms and keep only the "A" variant
        ReplacePDBModifiedAA(biopdb_name, biopdb_modf)
        os.system('grep "REMARK  " {0} > {0}.remark'.format(biopdb_modf))
        with open(biopdb_modf, 'r') as fi:
            remarks = [l for l in fi if re.search('REMARK HET ', l)]

        ## Read the PDB file and extract the chain from structure[0]
        try:
            model = PDB.PDBParser(PERMISSIVE=1,
                                  QUIET=1).get_structure(pdb_id,
                                                         biopdb_modf)[0]
        except KeyError:
            print('  \033[31m> ERROR: BioPython cannot read in PDB: \033[0m' +
                  biopdb_modf)
            return None
        except ValueError:
            print('  \033[31m> ERROR: PDB file is empty: \033[0m' +
                  biopdb_modf)
            return None

        ### Bug alert: as of 20.02.18, Biopython dev hasn't come up with good
        ### strategy to fix the 'atom.disordered_get_list()' issue with alternative
        ### position of residue side chains. To go around this, will physically
        ### remove "B" variant and keep only "A" variant in
        io = PDB.PDBIO()
        io.set_structure(model[chain_id])
        io.save('{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id),
                select=NotDisordered())

        # Attach REMARK to end of PDB as safekeeping
        os.system('cat {0}/{1}_{2}.pdb {3}.remark > {1}.temp'.format(
            self.work_dir, pdb_id, chain_id, biopdb_modf))
        os.system('mv {1}.temp {0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id,
                                                       chain_id))
        #    os.system('mv {1} {0}/{2}.ent'.format(self.work_dir, biopdb_name, pdb_id))
        #    os.system('bzip2 -f {0}/{1}.ent'.format(self.work_dir, pdb_id))
        #    os.system('rm {0} {0}.remark'.format(biopdb_modf))

        return '{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id)
'''
==========================
calculate the S-S
==========================
'''
from Bio import PDB
from Bio.PDB import Vector
pdb1 = PDB.PDBList()
pdb1.retrieve_pdb_file('1C9X')
parser = PDB.PDBParser()
structure = parser.get_structure('1C9X', 'c9/pdb1C9X.ent')
Atom1 = []
Atom2 = []
resid = []
list = []
dict = {}
for model in structure:
	for chain in model:
		#print(chain)
		for  residue in chain:
			#print(i, residue.resname, residue.id[1])
			if residue.resname == 'CYS':
				list.append(residue.id[1])
				for atom in residue: 
					if atom.name == 'CB':
						Atom1.append(atom)
					elif atom.name == 'SG':
						Atom2.append(atom)

i = 0						
for i in range(len(Atom1)):
'''

Retrieve a PDB structure file from the web and parse it with Biopython.

-----------------------------------------------------------
(c) 2013 Allegra Via and Kristian Rother
    Licensed under the conditions of the Python License

    This code appears in section 21.2.2 of the book
    "Managing Biological Data with Python".
-----------------------------------------------------------
'''

from Bio import PDB

pdbl = PDB.PDBList()
pdbl.retrieve_pdb_file("2DN1")

parser = PDB.PDBParser()
structure = parser.get_structure("2DN1", "dn/pdb2dn1.ent")

for model in structure:
    for chain in model:
        print chain
        for residue in chain:
            print residue.resname, residue.id[1]
            for atom in residue:
                print atom.name, atom.coord
def download_PDB_struct(query: str = '1AXC'):
    """Obtain PDB structural models"""
    pdbl = PDB.PDBList()
    pdbl.retrieve_pdb_file(query, pdir='.')