Beispiel #1
0
def download_pdb(config, pdb_code: str) -> Path:
    """
    Download PDB structure from PDB.

    :param pdb_code: 4 character PDB accession code.
    :type pdb_code: str
    :return: returns filepath to downloaded structure.
    :rtype: str
    """
    if not config.pdb_dir:
        config.pdb_dir = Path("/tmp/")

    # Initialise class and download pdb file
    pdbl = PDBList()
    pdbl.retrieve_pdb_file(pdb_code,
                           pdir=config.pdb_dir,
                           overwrite=True,
                           file_format="pdb")
    # Rename file to .pdb from .ent
    os.rename(
        config.pdb_dir / f"pdb{pdb_code}.ent",
        config.pdb_dir / f"{pdb_code}.pdb",
    )

    # Assert file has been downloaded
    assert any(pdb_code in s for s in os.listdir(config.pdb_dir))
    log.info(f"Downloaded PDB file for: {pdb_code}")
    return config.pdb_dir / f"{pdb_code}.pdb"
Beispiel #2
0
def get_pdb(pdb_list):
    import os
    from Bio.PDB import PDBList

    out_dir = "PDB_benchmark_structures\\"
    pdb = pdb_list
    number_ids = len(pdb)

    filename = []
    not_found = []
    print("Downloading in %s:\n" % out_dir)
    for i, pdbid in enumerate(pdb):
        print('%s' % pdbid[:4])
        pdbl = PDBList()
        try:
            if not os.path.exists("{}{}.pdb".format(out_dir, pdbid)):
                x = pdbl.retrieve_pdb_file(pdbid[:4],
                                           file_format='pdb',
                                           pdir=out_dir)
                filename.append(x)

        except FileNotFoundError:
            not_found.append(pdbid)
            print("(NOTE) {} not found.".format(pdbid))
    return filename
Beispiel #3
0
def retrieve_cif_list():
    server = PDBList(server='ftp://ftp.wwpdb.org', pdb='input_files', obsolete_pdb=None ,verbose=True)
    pdb_list = open('input_files/protlist.txt','r')
    content = pdb_list.read().split()
    pdb_list.close()

    server.download_pdb_files(content,pdir="input_files/cif",file_format='mmCif', overwrite=True,obsolete= False)
def get_unique(input_df):

    from Bio.PDB import PDBList
    unique_pdbs = input_df.CPX.unique()
    pdbl = PDBList()
    for single_pdb in unique_pdbs:
        pdbl.retrieve_pdb_file(single_pdb, pdir='PDB', file_format="pdb")
def download_pdbs(base_dir, protein_codes):
    """
    Downloads the PDB database (or a part of it) as PDB files. Every protein is stored in it's own
    directory (with name the PDB code) under base_dir.

    :param base_dir: where to download all the proteins.
    :param protein_codes: the PDB codes of the proteins that should be downloaded.
    """
    prot_codes = []
    if isinstance(protein_codes, dict):
        for key in protein_codes.keys():
            prot_codes += protein_codes[key]
    else:
        prot_codes = protein_codes
    prot_codes = list(set(prot_codes))
    from Bio.PDB import PDBList
    failed = 0
    attempted = len(prot_codes)
    for code in prot_codes:
        try:
            pl = PDBList(pdb=os.path.join(base_dir, code.upper()))
            pl.flat_tree = 1
            pl.retrieve_pdb_file(pdb_code=code)
        except IOError:
            log.warning("Failed to download protein {}".format(code))
            failed += 1
            continue
    log.info("Downloaded {0}/{1} molecules".format(attempted - failed,
                                                   attempted))
Beispiel #6
0
def get_structure(pdb_data_folder, structure_PDB_ID):
    """
    Function to retrieve information about structure of specified molecule
    :param pdb_data_folder: path to folder contaiting all 3D structures
    :param structure_PDB_ID: structures PDB ID
    :return: the structure object
    """
    if (not is_non_zero_file(pdb_data_folder / (structure_PDB_ID + ".pdb"))
        ) and (not is_non_zero_file(pdb_data_folder /
                                    (structure_PDB_ID + ".cif"))):
        PDBList().retrieve_pdb_file(structure_PDB_ID, pdir=pdb_data_folder)

    if is_non_zero_file(pdb_data_folder / (structure_PDB_ID + ".pdb")):
        parser_pdb = PDBParser()
        structure = parser_pdb.get_structure(
            structure_PDB_ID, pdb_data_folder / (structure_PDB_ID + ".pdb"))
    if is_non_zero_file(pdb_data_folder / (structure_PDB_ID + ".cif")):
        try:
            parser_cif = MMCIFParser(QUIET=True)
            structure = parser_cif.get_structure(
                structure_PDB_ID,
                pdb_data_folder / (structure_PDB_ID + ".cif"))
        except:
            PDBList().retrieve_pdb_file(structure_PDB_ID,
                                        pdir=pdb_data_folder,
                                        file_format='pdb')
            parser_pdb = PDBParser()
            structure = parser_pdb.get_structure(
                structure_PDB_ID,
                pdb_data_folder / (structure_PDB_ID + ".pdb"))
    return structure
Beispiel #7
0
    def from_id(cls, pdb_id):
        """
        Initialize structure by PDB ID (fetches
        structure from RCSB servers)

        Parameters
        ----------
        pdb_id : str
            PDB identifier (e.g. 1hzx)

        Returns
        -------
        PDB
            initialized PDB structure
        """
        from urllib.error import URLError
        from Bio.PDB import PDBList
        pdblist = PDBList()

        try:
            # download PDB file to temporary directory
            pdb_file = pdblist.retrieve_pdb_file(pdb_id, pdir=tempdir())
            return cls.from_file(pdb_file, file_format="pdb")
        except URLError as e:
            raise ResourceError(
                "Could not fetch PDB data for {}".format(pdb_id)
            ) from e
Beispiel #8
0
def build_xtalstable(dbpathstr, sourcedbpathstrs, pdbformat='cif'):
    dbpath = Path(dbpathstr)
    today = datetime.date.today()
    todaystr = f'{today.year}-{today.month:02d}-{today.day:02d}'
    conn = seqdbutils.gracefuldbopen(dbpath)
    xtaldirpath = Path(dbpath).parent / 'Xtals'
    if not xtaldirpath.exists():
        os.mkdir(xtaldirpath)
    c = conn.cursor()
    c.execute(
        '''CREATE TABLE IF NOT EXISTS XTALS (pdbid text, acc text, srcdb text,dldate text, 
              relpath text, pdbformat text, dlsuccess int, obsolete int)''')
    pdbl = PDBList()
    cxRE = re.compile('([A-Za-z0-9]{4})\[')
    #by default folder containing xtals is named 'xtals' and in same directory as xtalsdb
    for srcdbpathstr in sourcedbpathstrs:
        srcdbpath = Path(srcdbpathstr)
        srcdbstr = srcdbpath.name
        src_conn = seqdbutils.gracefuldbopen(srcdbpath)
        seqdbutils.check_tables_exist(src_conn, ['CAZYSEQDATA'])
        src_c = src_conn.cursor()
        src_c.execute(
            'SELECT acc,pdbids FROM CAZYSEQDATA WHERE pdbids NOT NULL')
        pdbrows = src_c.fetchall()
        dbpdbs = []
        dbaccs = []
        for pdbrow in pdbrows:
            pdbentry = pdbrow['pdbids']
            accentry = pdbrow['acc']
            pdbs = cxRE.findall(pdbentry)
            dbaccs.extend([accentry for _ in range(len(pdbs))])
            dbpdbs.extend(pdbs)
        #dbpdbs=['4IM4']
        pdbl.download_pdb_files(dbpdbs, pdir=xtaldirpath)  #,obsolete=True)
        for acc, pdb in zip(dbaccs, dbpdbs):
            rel_pdbpath = xtaldirpath / f'{pdb}.cif'
            download_success = os.path.exists(rel_pdbpath)
            str_relpath = str(rel_pdbpath)
            c.execute(
                '''SELECT COUNT(*) FROM XTALS WHERE pdbid=(?) AND dlsuccess=(?)''',
                (pdb, 1))
            already_downloaded = c.fetchone()[0]
            if already_downloaded:
                continue
            c.execute(
                '''SELECT COUNT(*) FROM XTALS WHERE pdbid=(?) AND dlsuccess=(?)''',
                (pdb, 0))
            previously_failed = c.fetchone()[0]
            if previously_failed:
                if download_success:
                    print(f'new download of previously failed {pdb}')
                    c.execute(
                        '''UPDATE XTALS SET dldate = (?), dlsuccess = (?) WHERE pdbid=(?)''',
                        (todaystr, 1, pdb))
                continue
            c.execute('''INSERT INTO XTALS VALUES (?,?,?,?,?,?,?,?)''',\
                      (pdb,acc,srcdbstr,todaystr,str_relpath,pdbformat,download_success,None))
        conn.commit()
        src_conn.close()
    conn.close()
Beispiel #9
0
def generate_seq_file(score_file, save_file):
    score_file = './dataFile/' + score_file
    sf = pd.read_csv(score_file, sep='\t')
    mut_chains = sf.iloc[:,0]

    mut_dict = dict()
    mut_track = set()
    pdb_track = set()
    for chain in mut_chains:
        info = chain.split('_')
        pdb_id = info[0]
        chain_id = info[1]
        wt_aa = info[2][0:3]
        mu_aa = info[2][-3:]
        mu_pos = int(''.join(filter(lambda x: x.isdigit(), info[2])))
        if not chain in mut_track:
            mut_track.add(chain)
            if pdb_id in pdb_track:
                mut_dict[pdb_id].append({'chain_id':chain_id,
                                         'wt_aa': wt_aa,
                                         'mu_aa': mu_aa,
                                         'mu_pos': mu_pos,
                                         'name': chain})
            else:
                mut_dict[pdb_id] = [{'chain_id': chain_id,
                                     'wt_aa': wt_aa,
                                     'mu_aa': mu_aa,
                                     'mu_pos': mu_pos,
                                     'name': chain}]
                pdb_track.add(pdb_id)
    del mut_track
    del pdb_track
                
    parser = PDBParser()
    seq_builder = PPBuilder()
    pdb_dl_handle = PDBList()
    PDB_DIR = './dataFile/PDB_dl'
    # check if pdb file exists
    mut_collect = dict()
    for pdb_id in mut_dict.keys():
        if not os.path.exists(PDB_DIR+'/pdb'+pdb_id.lower()+'.ent'):
            pdb_dl_handle.retrieve_pdb_file(pdb_code=pdb_id, file_format='pdb', overwrite=False, pdir=PDB_DIR)
        pdb_file = PDB_DIR+'/pdb'+pdb_id.lower()+'.ent'
        model = parser.get_structure(pdb_id, pdb_file)[0]

        for mutation in mut_dict[pdb_id]:
            protein_chain = model[mutation['chain_id']]
            sequence = "".join([str(pp.get_sequence())
                                for pp in seq_builder.build_peptides(protein_chain)])
            sequence = sequence.replace('\n', '').replace(' ', '')
            assert sequence[mutation['mu_pos']-1] == three_to_one(mutation['wt_aa']), 'Wt amino acid failed to match'
            mut_Seq_list = list(sequence)
            mut_Seq_list[mutation['mu_pos']-1] = three_to_one(mutation['mu_aa'])
            mut_Seq = ''.join(mut_Seq_list)
            mut_collect[mutation['name']] = mut_Seq
    
    with open(save_file, 'w') as output_hl:
        for k, v in mut_collect.items():
            output_hl.write(k+'\t'+v+'\n')
Beispiel #10
0
def download(filelist: list, q: Queue, lock: Lock, cursor: sqlite3.Cursor, conn: sqlite3.Connection, dir_name: str):
    """
    :param filelist:
    :param q:
    :param lock:
    :param cursor:
    :param conn:
    :param dir_name:
    """
    with open('status_tmp.txt', 'w') as f:
        f.write('')
    for file in filelist:
        if file in open('status_tmp.txt').readlines():
            continue
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(file, pdir=os.path.join(dir_name, file), file_format='pdb')
        if not os.path.exists(os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file))):
            print("File with ID PDB: {:s} not found!".format(file))
            continue
        parser = PDBParser()
        structure = parser.get_structure('{:s}', os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file)))
        name = parser.header.get('name', '')
        head = parser.header.get('head', '')
        method = parser.header.get('structure_method', '')
        res = parser.header.get('resolution', '')
        ncomp = 0
        nchain = 0
        eclist = []
        for values in parser.header['compound'].values():
            ncomp += 1
            nchain += len(values['chain'].split(','))
            eclist.append(values.get('ec', '') or values.get('ec_number', ''))
        ec = ", ".join(eclist)
        nres = 0
        mmass = 0
        ppb = PPBuilder()
        for pp in ppb.build_peptides(structure):
            seq = pp.get_sequence()
            nres += len(seq)
            seqan = ProteinAnalysis(str(seq))
            mmass += int(seqan.molecular_weight())
        lock.acquire()
        try:
            cursor.execute("""INSERT INTO Structures (IDPDB, NAME, HEAD, METHOD, RESOLUTION, NCOMP, NCHAIN,
NRES, MMASS, EC) VALUES ("{:s}", "{:s}", "{:s}", "{:s}", {:.2f}, {:d}, {:d},{:d}, {:d}, "{:s}")""".format(
                file, name, head, method, res, ncomp, nchain, nres, mmass, ec))
        except sqlite3.DatabaseError as err:
            print("Error: ", err)
            continue
        else:
            print("Download Done for ID PDB: {:s}".format(file))
            conn.commit()
            q.put(file)
        finally:
            lock.release()
            with open('status_tmp.txt', 'at') as f:
                f.write((file + '\n'))
    os.remove('status_tmp.txt')
    q.put(None)
Beispiel #11
0
def fetchPDB(name, path):
    """Fetch a pdb and save to path"""
    from Bio.PDB import PDBList
    pdbname = os.path.join(path,name+'.pdb')
    pdbl = PDBList()
    filename = pdbl.retrieve_pdb_file(name,pdir=path)
    os.rename(filename, pdbname)
    return
Beispiel #12
0
    def struct_retrieve(self):
        """
            Retrieve PDB structure given argparse ID
        """
        self.pdb_id = str(self.args.id_input)
        pdbl = PDBList()

        pdbl.retrieve_pdb_file(self.pdb_id, file_format='pdb', pdir=".")
Beispiel #13
0
def downloadPdb(pdb_list):
    os.system("mkdir -p original_pdbs")
    for pdb_id in pdb_list:
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb + ".pdb"
        if not os.path.isfile("original_pdbs/" + pdbFile):
            pdbl = PDBList()
            name = pdbl.retrieve_pdb_file(pdb, pdir='.', file_format='pdb')
            os.system(f"mv {name} original_pdbs/{pdbFile}")
Beispiel #14
0
    def struct_retrieve(self):
        """
            Retrieve PDB structure given argparse ID
        """
        pdbl = PDBList()

        pdbl.retrieve_pdb_file(self.pdb_id,
                               file_format='pdb',
                               pdir=f"{self.out_dir}/")
Beispiel #15
0
def download_structure_file(pdb_id: str) -> None:
    """Download PDB/mmCIF file with a user provided identifer from PDB using BioPython library 

    :param pdb_id: the protein id in protein data bank 
    :type pdb_id: str
    """
    pdb_list = PDBList()
    pdb_list.retrieve_pdb_file(pdb_id)
    return
Beispiel #16
0
def generate_random_PDB(a, b, c):
    for i in range(b):
        c.append(a[randint(0, len(a))])

    print(c)
    pdb1=PDBList()
    for i in c:
        pdb1.retrieve_pdb_file(i, pdir = 'PDB')
    return c
Beispiel #17
0
def load_data(experiment: str, in_file: str, out_dir: str) -> None:
    #in_file = os.path.join('../data/', experiment, 'full_list.txt')
    print(in_file)
    #out_dir = os.path.join('../data/raw/', experiment)
    pdbl = PDBList(server='http://ftp.wwpdb.org', verbose=False)
    with open(in_file, 'r') as molecule_id_list:
        molecule_id_list = molecule_id_list.readlines()
        for molecule_id in tqdm(molecule_id_list):
            pdbl.retrieve_pdb_file(molecule_id.strip('\n').split('_')[0], pdir=out_dir, file_format='pdb')
Beispiel #18
0
 def getPDB(self, ID=None):
     '''
     Retrives a PDB file from RCSB when ID is supplied
     or OBJECT.id is defined
     '''
     from Bio.PDB import PDBList
     if ID is not None :
         return PDBList().retrieve_pdb_file(ID, pdir = '.', file_format = 'pdb')
     elif self.id is not None:
         return PDBList().retrieve_pdb_file(self.id, pdir = '.', file_format = 'pdb')
Beispiel #19
0
def retrieve_cif(prot_id):
    server = PDBList(server='ftp://ftp.wwpdb.org',
                     pdb='input_files',
                     obsolete_pdb=None,
                     verbose=True)
    server.retrieve_pdb_file(prot_id,
                             pdir="input_files/cif",
                             file_format='mmCif',
                             overwrite=True,
                             obsolete=False)
Beispiel #20
0
def descargarPDB(pdb):
    pdbl = PDBList()
    pdbl.retrieve_pdb_file(pdb, pdir='./Script/PDB', file_format='pdb')
    parser = PDBParser()
    ent_file = './Script/PDB/pdb' + pdb.lower() + '.ent'
    structure = parser.get_structure(pdb, ent_file)
    io = PDBIO()
    io.set_structure(structure)
    pdb_structure_file = './Script/PDBStructure/' + pdb + '.pdb'
    io.save(pdb_structure_file)
Beispiel #21
0
def download_PDB(pdb_ids, pdb_dir='.'):
    # Define pdb file fetching class
    pdbl = PDBList()
    # Fetch every protein
    for pdb_id in pdb_ids:
        # Debug
        logging.debug('PDB file which will be downloaded')
        logging.debug(pdb_id)
        # Execute fetching of the protein (pdb file)
        pdbl.retrieve_pdb_file(pdb_id, pdir=pdb_dir, file_format='pdb')
Beispiel #22
0
def obtian_seq_wo_seq_file(score_file):
    score_file = './dataFile/' + score_file
    sf = pd.read_csv(score_file, sep='\t')
    chains_involved = sf.iloc[:, 0]
    pdb = dict()
    pdb_track = set()
    for chain in chains_involved:
        chain_name = chain[0:6]
        pdb_name = chain[0:4]
        # if we encounter a old pdb
        if pdb_name in pdb_track:
            pdb[pdb_name].add(chain_name)
        # else, we have a new pdb
        else:
            # update the track file
            pdb_track.add(pdb_name)
            pdb[pdb_name] = {chain_name}

    # create the link to the PDB database and retrive all the file
    # related to the files, store them locally under ./dataFile/PDB_dl/
    PDB_DIR = './dataFile/PDB_dl'
    if not os.path.exists(PDB_DIR):
        os.mkdir(PDB_DIR)
    # create the download handle
    pdb_dl_handle = PDBList()
    # download all of the pdb files
    for item in pdb.keys():
        if not os.path.exists(PDB_DIR + '/pdb' + item.lower() + '.ent'):
            pdb_dl_handle.retrieve_pdb_file(pdb_code=item,
                                            file_format='pdb',
                                            overwrite=False,
                                            pdir=PDB_DIR)

    # for each pdb, we will construct the sequence
    seq_dict = dict()
    parser = PDBParser()
    seq_builder = PPBuilder()
    # key is the pdb_id, value is the chain in a
    for pdb_id, chain_names in pdb.items():
        pdb_file = PDB_DIR + '/pdb' + pdb_id.lower() + '.ent'
        model = parser.get_structure(pdb_id, pdb_file)[0]
        for chain in chain_names:
            # extract the last letter, which is the chain name
            chain_id = chain[-1]
            protein_chain = model[chain_id]
            sequence = "".join([
                str(pp.get_sequence())
                for pp in seq_builder.build_peptides(protein_chain)
            ])
            sequence = sequence.replace('\n',
                                        '').replace(' ',
                                                    '')  # clean the bad chars
            seq_dict[chain] = sequence

    return seq_dict
Beispiel #23
0
    def download_structure_file(self,
                                outdir,
                                file_type=None,
                                load_header_metadata=True,
                                force_rerun=False):
        """Download a structure file from the PDB, specifying an output directory and a file type. Optionally download
        the mmCIF header file and parse data from it to store within this object.

        Args:
            outdir (str): Path to output directory
            file_type (str): ``pdb``, ``mmCif``, ``xml``, ``mmtf`` - file type for files downloaded from the PDB
            load_header_metadata (bool): If header metadata should be loaded into this object, fastest with mmtf files
            force_rerun (bool): If structure file should be downloaded even if it already exists

        """
        ssbio.utils.double_check_attribute(
            object=self,
            setter=file_type,
            backup_attribute='file_type',
            custom_error_text=
            'Please set file type to be downloaded from the PDB: '
            'pdb, mmCif, xml, or mmtf')

        # XTODO: check if outfile exists using ssbio.utils.force_rerun, pdblist seems to take long if it exists
        # I know why - it's because we're renaming the ent to pdb. need to have mapping from file type to final extension
        # Then check if file exists, if not then download again
        p = PDBList()
        with ssbio.utils.suppress_stdout():
            structure_file = p.retrieve_pdb_file(pdb_code=self.id,
                                                 pdir=outdir,
                                                 file_format=file_type,
                                                 overwrite=force_rerun)
        if not op.exists(structure_file):
            log.debug('{}: {} file not available'.format(self.id, file_type))
            raise URLError('{}.{}: file not available to download'.format(
                self.id, file_type))
        else:
            log.debug('{}: {} file saved'.format(self.id, file_type))

            # Rename .ent files to .pdb
            if file_type == 'pdb':
                new_name = structure_file.replace('pdb',
                                                  '').replace('ent', 'pdb')
                os.rename(structure_file, new_name)
                structure_file = new_name

            self.load_structure_path(structure_file, file_type)
            if load_header_metadata and file_type == 'mmtf':
                self.update(parse_mmtf_header(structure_file))
            if load_header_metadata and file_type != 'mmtf':
                self.update(
                    parse_mmcif_header(
                        download_mmcif_header(pdb_id=self.id,
                                              outdir=outdir,
                                              force_rerun=force_rerun)))
Beispiel #24
0
def download_pdb_structure(pdb_code, pdb_file_name, file_path='.'):
    """Downloads a PDB structure from the Protein Data Bank"""
    pdbl = PDBList()
    file_name = pdbl.retrieve_pdb_file(pdb_code,
                                       file_format='pdb',
                                       pdir=file_path,
                                       overwrite=True)
    if os.path.exists(file_name):
        os.rename(file_name, pdb_file_name)
    else:
        raise Exception("Can not download structure: {0}".format(pdb_code))
def download_pdb(pdb_id, pdbs_path):
    """Downloads a pdb file
    
    Parameters
    ----------
    pdb_id : str
        pdb id
    pdbs_path: str, optional
        path of folder containing the pdbs (default is "pdbs")
    """
    pdbl = PDBList(obsolete_pdb=True)
    pdbl.download_pdb_files(pdb_id, file_format="pdb", pdir=pdbs_path)
Beispiel #26
0
def get_pdb(pdb_list):
    from Bio.PDB import PDBList

    out_dir = "PDB_benchmark_structures\\"
    pdb = pdb_list
    number_ids = len(pdb)

    print("Downloading in %s:\n" % out_dir)
    for ids in pdb:
        print('%s' % ids[:4])
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(ids[:4], file_format='pdb', pdir=out_dir)
Beispiel #27
0
 def readFromPDBDatabase(self, pdbID, dir=None, type='mmCif'):
     """
     Retrieve structure from PDB
     :param pdbID:
     :param dir: save structure in this directory
     :param type:  mmCif or pdb
     :return: filename with pdb file
     """
     if dir is None:
         dir = os.getcwd()
     pdbl = PDBList(pdb=dir)
     fileName = pdbl.retrieve_pdb_file(pdbID, pdir=dir, file_format=type)
     return os.path.abspath(fileName)
Beispiel #28
0
 def struct_retrieve(self): 
     """Retrieve the PDB structure from the terminal argument. 
     
     Args: 
         args [argparse object]: Contains the id_input argument
         
     Returns: 
         prompt [str]: File successfully written
         
     """
     
     pdbl = PDBList()
     pdbl.retrieve_pdb_file(self.pdb_id, file_format='pdb', pdir=self.out_dir)
Beispiel #29
0
def get_pdb(ids, dir):
    # Get structures ids
    ids_list = []
    with open(ids, 'r') as file:
        for line in file.readlines():
            ids_list.append(line.split('\n')[0])

    # Selecting structures from PDB
    pdbl = PDBList()

    for i in ids_list:
        print(i)
        pdbl.retrieve_pdb_file(i, pdir=dir, file_format='pdb')
Beispiel #30
0
def download_pdb():
    # download the selected pdb file
    result_handle = open(project_dir + spacer + "my_blast.xml")
    blast_record = NCBIXML.read(result_handle)
    global choose_pdbChoices
    i = 0
    list_blast_results = []
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
                title = alignment.title
                e_value = hsp.expect
                print e_value
                length = alignment.length
                identities = hsp.identities
                positives = hsp.positives
                gaps = hsp.gaps
                choice = ("title: " + str(title[0:100]) + " ..." +
                          "\n" + "score: " + str(e_value) + "  " +
                          "length: " + str(length) + "\n" +
                          "identities: " + str(identities) + "  " +
                          "positives: " + str(positives) + "  " +
                          "gaps: " + str(gaps) +  "\n" )
                if i <5:
                    choose_pdbChoices.append(choice)
                    list_blast_results.append(title)
                choice = ""
                i += 1
    print choose_pdbChoices

    app = wx.App(0)
    MainApp =MyFrame2(None)
    MainApp.Show()
    MainApp.Maximize()
    app.MainLoop()

    print "bla"
    print row
    title = list_blast_results[int(row)]
    info_file= open(project_dir + spacer +"info.txt", "a")
    info_file.write("\n you chose this blastresult: \n" + str(list_blast_results[int(row)]))
    info_file.close()

    print title[17:21]
    global pdbfile
    pdb_dir = project_dir + spacer + project_name +"_blast_pdb"
    pdbfile = project_dir + spacer + "tmppdb.pdb"
    pdbl = PDBList()
    pdbl.retrieve_pdb_file(title[17:21], obsolete = False, pdir = pdb_dir)
    title_lower = title[17:21].lower()
    shutil.copyfile(pdb_dir + spacer + "pdb" + title_lower + ".ent", project_dir + spacer + "tmppdb.pdb")
Beispiel #31
0
def online_input(structure_name, file_format=None):
    """
    Uses BioPython's PDBList to download tertiary structure from PDB

    :param structure_name: PDB correct name of RNA structure
    :param file_format: File format to pull from database, currently only 'pdb' is supported
    :return:
    """
    if not file_format:
        file_format = 'pdb'
    pdbl = PDBList()

    return pdbl.retrieve_pdb_file(pdb_code=structure_name, file_format=file_format,
                                  pdir=ROOT_DIR + '/downloadedStructures/')
Beispiel #32
0
def DownloadTemplate(template):
    """
	Downloads the desired template from the pdb database. 

	Arguments:

	template: pdb code of the template to download.
	"""

    pdbl = PDBList()
    pdbl.retrieve_pdb_file(template,
                           obsolete=False,
                           pdir="./",
                           file_format="pdb")
Beispiel #33
0
def pdb_download(code, path=None):
    """Downloads the structure of the pdb on a file.

    cod is the pdb code of the structure
    path is the localization where it will be downloaded

    Returns the file name where it is stored"""

    logging.info("Downloading pdb %s.", code)

    logging.captureWarnings(True)
    pdbl = PDBList(obsolete_pdb=os.getcwd())
    if path is None:
        file = pdbl.retrieve_pdb_file(code)
    else:
        file = pdbl.retrieve_pdb_file(code, pdir=path)
    logging.captureWarnings(False)
    return file
Beispiel #34
0
def download_and_get_chains():
    from Bio.PDB import PDBParser, PDBIO
    failed = []
    pdbs_dict = read_rostdb_entries()
    io = PDBIO()
    pdbl = PDBList()
    for pdb_e, chains in pdbs_dict.items():
        for chain_e in chains:
            try:
                pdbl.retrieve_pdb_file(pdb_e, pdir='./')
                pdb = PDBParser().get_structure(pdb_e, 'pdb'+pdb_e.lower()+'.ent')
                for chain in pdb.get_chains():
                    if chain.get_id() == chain_e:
                        io.set_structure(chain)
                        io.save(pdb.get_id() + '_' + chain.get_id() + '.pdb')
            except:
                failed.append((pdb_e, chain_e))
    print("failures:", failed)
def uniblast(sequence, db, evalue, tgts):
    os.environ['BLASTDB'] = db
    seq ='>%s\n%s' % sequence
    blast_line = ['psiblast', '-db', db, '-evalue', str(evalue),
                  '-num_iterations', '0', '-max_target_seqs', str(tgts),
                  '-outfmt', '6 qaccver saccver pident evalue qcovs length '
                             'staxid']
    taxon_line = ['taxonkit', 'lineage', '-i', '7']
    taxon_line2 = ['taxonkit', 'reformat', '-i', '8', '-f', '{s}']
    blast = Popen(blast_line, stdin=PIPE, stdout=PIPE, stderr=PIPE,
                  env=os.environ.copy(), encoding='utf8', cwd=os.getcwdu())
    o, e = blast.communicate(seq)
    txnkit1 = Popen(taxon_line, stdin=PIPE, stdout=PIPE,  stderr=PIPE,
                    env=os.environ.copy(), encoding='utf8', cwd=os.getcwdu())
    o1, e1 = txnkit1.communicate(o[:o.find('\n\n')] )
    txnkit2 = Popen(taxon_line2, stdin=PIPE, stdout=PIPE, stderr=PIPE,
                    env=os.environ.copy(), encoding='utf8', cwd=os.getcwdu())
    o2, e2 = txnkit2.communicate(o1)
    df = pd.read_table(StringIO(o2), sep='\t', header=None,
                       names='qaccver saccver pident evalue qcovs length '
                             'staxid lineage species'.split(),
                       quoting=csv.QUOTE_NONE, encoding='utf-8')
    df = df.sort_values(by=['pident', 'evalue', 'qcovs', 'length'],
                        ascending=[False, True, False, False])
    df = df[(df.pident > 30) & (df.qcovs > 70)][~df.saccver.duplicated()]
    pdbl = PDBList()
    pdb_codes = df.saccver.unique()
    for i in pdb_codes:
        pdb = i[:4]
        ch = i[-1]
        # Download pdb
        file_path = pdbl.retrieve_pdb_file(pdb, pdir='PDBs', file_format='pdb')
        parser = PDBParser()
        st = parser.get_structure(pdb, file_path)
        ou = PDBIO()
        ou.set_structure(st)
        os.remove(os.path.join('PDBs', pdb))
    return df
Beispiel #36
0
	def fetch_pdb(self, pdb_code, pdb_dir):
		pdb_list = PDBList()
		pdb_list.retrieve_pdb_file(pdb_code, pdir=pdb_dir)
Beispiel #37
0
                 f.write("\nPosicao: %s\n" %ref.positions)
                 f.write("\nComentarios: %s\n" %ref.comments)
                 f.write("\nReferencias: %s\n" %ref.references)
                 f.write("\nAutores: %s\n" %ref.authors)
                 f.write("\nTitulo: %s\n" %ref.title)
                 f.write("\nLocalizacao: %s\n\n" %ref.location)
             break
         except Exception:
             break
f.close()

#análise da estrutura das proteínas relevantes com base nos ficheiros PDB encontrados (código baseado no desenvolvido pelo grupo 10)
parser = PDBParser()
ficheiro= open("analise_pdb.txt", "w")
structure = parser.get_structure('4F67', '4F67.pdb')
pdbl = PDBList()
pdbl.retrieve_pdb_file('4F67')
ficheiro.write("****Analise do ficheiro 4F67.pdb****\n")
ficheiro.write("\nPalavras Chave: %s\n" %structure.header['keywords'])
ficheiro.write("\nNome do Organismo: %s\n" %structure.header['name'])
ficheiro.write("\nCabecalho: %s" %structure.header['head'])
ficheiro.write("\nData da deposicao: %s\n" %structure.header['deposition_date'])
ficheiro.write("\nData da publicacaos: %s\n" %structure.header['release_date'])
ficheiro.write("\nMetodo usado: %s\n" %structure.header['structure_method'])
ficheiro.write("\nResolucao: %s\n" %structure.header['resolution'])
ficheiro.write("\nReferencia da estrutura: %s\n" %structure.header['structure_reference'])
ficheiro.write("\nReferencia de artigo: %s\n" %structure.header['journal_reference'])
ficheiro.write("\nAutor: %s\n" %structure.header['author'])
ficheiro.write("\nComposto: %s" %structure.header['compound'])
ficheiro.close()
Beispiel #38
0
 def get_pdb_files(self):
     """Retrieves all pdb files corresponding the domains of interest."""
     pdb_list = PDBList()
     for pdb in self.pdb_ids:
         # put files in the directory pointed by the self.directory variable
         pdb_list.retrieve_pdb_file(pdb, pdir=self.directory)
Beispiel #39
0
#!/usr/bin/env python

from Bio.PDB import PDBList, PDBParser, Selection

import networkx as nx
import matplotlib.pyplot as plt

from pprint import pprint as pp

import numpy as np

distanceThreshold = 1  # sys.argv[2]

pdbList = PDBList()
pdbParser = PDBParser()

proteinName = "1MBN"
structure = pdbParser.get_structure(proteinName, pdbList.retrieve_pdb_file(proteinName))

resList = Selection.unfold_entities(structure, "R")
distanceMatrix = np.zeros([len(resList), len(resList)])


def genDistanceMatrix(dMatrix, rList):
    caMap = {res.id[1]: res["CA"] for res in rList if "CA" in res}

    pp(caMap)
    pp(len(caMap))


genDistanceMatrix(distanceMatrix, resList)
Beispiel #40
0
#!/usr/bin/env python

from Bio.PDB import PDBList

p = PDBList()
atp227 = [
            '1A0I',
            '1A49',
            '1A82',
            '1ATP',
            '1AYL',
            '1B0U',
            '1B76',
            '1B8A',
            '1BCP',
            '1BCP',
            '1CSN',
            '1D9Z',
            '1DY3',
            '1E2Q',
            '1E4G',
            '1E8X',
            '1EE1',
            '1ESQ',
            '1F2U',
            '1F2U',
            '1F9A',
            '1FMW',
            '1G21',
            '1G3I',
            '1G5T',
Beispiel #41
0
	def fetch_pdb(self, pdb_code):
		pdb_list = PDBList()
		pdb_list.retrieve_pdb_file(pdb_code, pdir=self.pdb_dir)
		self._rename_pdb_file(pdb_code)
Beispiel #42
0
        serialize(entries, pdbredo_seq_folder, entries_json_file)
    except IOError:
        raise('Missing some file')
else:
    entries = retrieve(pdbredo_seq_folder, entries_json_file)
    
if new_subset:
    #grab some subset of the pdb, download files and parse for uniprot xreferences
    os.mkdir(pdb_folder)
    os.mkdir(uniprot_folder)    
    #select a pseudo_random subset of entries by slicing the entries dict 
    #(which is in pseudo-random) order
    PDB_subset = list(entries.keys())[:n_entries]
    PDB_subset_nochain = [x.split('_')[0] for x in PDB_subset]
    #fetch and save all the pdb files
    pdbl = PDBList()
    for entry in PDB_subset_nochain:
        pdbl.retrieve_pdb_file(entry, pdir=pdb_folder)
    #serialize the PDB list for future use
    serialize(PDB_subset, pdb_folder, pdb_list)
    # parse the pdb headers for DBREF to uniprot
    pdb_to_uniprot = find_uniprot_in_pdb(PDB_subset_nochain, pdb_folder)

if recompute_clean:
    PDB_subset = retrieve(pdb_folder, pdb_list)
    PDB_subset_nochain = [x.split('_')[0] for x in PDB_subset]
    # parse the pdb headers for DBREF to uniprot
    pdb_to_uniprot = find_uniprot_in_pdb(PDB_subset_nochain, pdb_folder)
    #determine the uniprot references to fetch
    to_fetch = []
    for entry in pdb_to_uniprot.keys():
Beispiel #43
0
"""
Module Description:
This file imports PDB files from Network, and store them at appropiate place
"""

from Bio.PDB import PDBList

PDBListTemp = PDBList()
FILE = open("pfam_list.txt","r")
IN = FILE.readline()
while IN:
	print IN
	PDBListTemp.retrieve_pdb_file(IN)
	
	



Beispiel #44
0
def download_pdb(args):
    from Bio.PDB import PDBParser, PDBIO, PDBList
    io = PDBIO()
    pdbl = PDBList()
    pdbl.retrieve_pdb_file(args['name'], pdir=args['path'])
Beispiel #45
0
 def get_pdb_file(self, directory):
     """Retrieves a pdb file."""
     pdb_list = PDBList()
     pdb_list.retrieve_pdb_file(self.pdb_id, pdir=directory)