Esempio n. 1
0
def retrieve_cif_list():
    server = PDBList(server='ftp://ftp.wwpdb.org', pdb='input_files', obsolete_pdb=None ,verbose=True)
    pdb_list = open('input_files/protlist.txt','r')
    content = pdb_list.read().split()
    pdb_list.close()

    server.download_pdb_files(content,pdir="input_files/cif",file_format='mmCif', overwrite=True,obsolete= False)
Esempio n. 2
0
def build_xtalstable(dbpathstr, sourcedbpathstrs, pdbformat='cif'):
    dbpath = Path(dbpathstr)
    today = datetime.date.today()
    todaystr = f'{today.year}-{today.month:02d}-{today.day:02d}'
    conn = seqdbutils.gracefuldbopen(dbpath)
    xtaldirpath = Path(dbpath).parent / 'Xtals'
    if not xtaldirpath.exists():
        os.mkdir(xtaldirpath)
    c = conn.cursor()
    c.execute(
        '''CREATE TABLE IF NOT EXISTS XTALS (pdbid text, acc text, srcdb text,dldate text, 
              relpath text, pdbformat text, dlsuccess int, obsolete int)''')
    pdbl = PDBList()
    cxRE = re.compile('([A-Za-z0-9]{4})\[')
    #by default folder containing xtals is named 'xtals' and in same directory as xtalsdb
    for srcdbpathstr in sourcedbpathstrs:
        srcdbpath = Path(srcdbpathstr)
        srcdbstr = srcdbpath.name
        src_conn = seqdbutils.gracefuldbopen(srcdbpath)
        seqdbutils.check_tables_exist(src_conn, ['CAZYSEQDATA'])
        src_c = src_conn.cursor()
        src_c.execute(
            'SELECT acc,pdbids FROM CAZYSEQDATA WHERE pdbids NOT NULL')
        pdbrows = src_c.fetchall()
        dbpdbs = []
        dbaccs = []
        for pdbrow in pdbrows:
            pdbentry = pdbrow['pdbids']
            accentry = pdbrow['acc']
            pdbs = cxRE.findall(pdbentry)
            dbaccs.extend([accentry for _ in range(len(pdbs))])
            dbpdbs.extend(pdbs)
        #dbpdbs=['4IM4']
        pdbl.download_pdb_files(dbpdbs, pdir=xtaldirpath)  #,obsolete=True)
        for acc, pdb in zip(dbaccs, dbpdbs):
            rel_pdbpath = xtaldirpath / f'{pdb}.cif'
            download_success = os.path.exists(rel_pdbpath)
            str_relpath = str(rel_pdbpath)
            c.execute(
                '''SELECT COUNT(*) FROM XTALS WHERE pdbid=(?) AND dlsuccess=(?)''',
                (pdb, 1))
            already_downloaded = c.fetchone()[0]
            if already_downloaded:
                continue
            c.execute(
                '''SELECT COUNT(*) FROM XTALS WHERE pdbid=(?) AND dlsuccess=(?)''',
                (pdb, 0))
            previously_failed = c.fetchone()[0]
            if previously_failed:
                if download_success:
                    print(f'new download of previously failed {pdb}')
                    c.execute(
                        '''UPDATE XTALS SET dldate = (?), dlsuccess = (?) WHERE pdbid=(?)''',
                        (todaystr, 1, pdb))
                continue
            c.execute('''INSERT INTO XTALS VALUES (?,?,?,?,?,?,?,?)''',\
                      (pdb,acc,srcdbstr,todaystr,str_relpath,pdbformat,download_success,None))
        conn.commit()
        src_conn.close()
    conn.close()
def download_pdb(pdb_id, pdbs_path):
    """Downloads a pdb file
    
    Parameters
    ----------
    pdb_id : str
        pdb id
    pdbs_path: str, optional
        path of folder containing the pdbs (default is "pdbs")
    """
    pdbl = PDBList(obsolete_pdb=True)
    pdbl.download_pdb_files(pdb_id, file_format="pdb", pdir=pdbs_path)
Esempio n. 4
0
def download_pdb_files(FASTA_FILE, PDB_OUTPUT):
    """Downloads all pdb files from a given fasta file with pdb ids

    Parameters
    ----------
    FASTA_FILE : str
        The file location of the .fasta
    PDB_OUTPUT : str
        The directory for pdb outputs

    """

    # get protein ids
    records = list(SeqIO.parse(FASTA_FILE, "fasta"))
    protein_ids = set([x.id[:-1] for x in records])

    # download pdbs
    pdbl = PDBList()
    pdbl.download_pdb_files(protein_ids, pdir=PDB_OUTPUT)
Esempio n. 5
0
def obtain_pdb(dataset):
    # download the pdb set
    pdb_set = list(set(dataset['pdb']))
    if not os.path.exists('./PDB'):
        os.mkdir('./PDB')
    PDB_dl_handle = PDBList(verbose=False)
    PDB_dl_handle.download_pdb_files(pdb_codes=pdb_set,
                                     file_format='pdb',
                                     pdir='./PDB')
    # download the fasta set
    if not os.path.exists('./FASTA'):
        os.mkdir('./FASTA')
    for item in pdb_set:
        if not os.path.exists("./FASTA/{}.fasta".format(item)):
            os.system(
                """wget -q -O - "https://www.rcsb.org/pdb/download/viewFastaFiles.do?structureIdList={}&compressionType=uncompressed" > ./FASTA/{}.fasta"""
                .format(item, item))
            time.sleep(0.1)
        assert (os.stat("./FASTA/{}.fasta".format(item)).st_size !=
                0), 'Download Failed, Empty File generated.'
        if i != ',':
            outputlist.append(pdbID + str(i))


#string functions together and generate output in folders organized by taxID
for taxID in taxidlist:
    os.mkdir(str(taxID))
    os.chdir(str(taxID))
    outfilename = str(taxID) + '.txt'
    pdbids = []
    query(taxID)
    initial_num_entries = len(pdbids)
    pdbl = PDBList()
    cwd = os.getcwd()
    pdbl.download_pdb_files(pdbids,
                            obsolete=False,
                            pdir=cwd,
                            file_format='mmCif')
    for pdbID in pdbids:
        try:
            filter_bad_strc(pdbID)
        except:
            print "No pdb entries for protein: " + str(pdbID)
    mainlist = []
    for pdbID in pdbids:
        fetch(pdbID)
    filteredpdbIDs = []
    filter_redundant_strc(mainlist)
    outputlist = []
    for pdbID in filteredpdbIDs:
        try:
            get_chains(pdbID)
pdbs_path = os.path.join(current_path, 'pdb')

if pdbs_path not in glob(os.path.join(current_path, '*')):
    os.mkdir(pdbs_path)

with open(os.path.join(current_path, 'pdbs.txt'), 'r') as f:
    pdbs = [pdb[:-1] for pdb in f]
    if len(pdbs) == 1:
        pdb_id = pdbs[0]
    else:
        pdb_id = current_path.rsplit('/', 1)[1]

pdbl = PDBList(obsolete_pdb=True)

if not glob(os.path.join(pdbs_path, '*')):
    pdbl.download_pdb_files(pdbs, file_format='pdb', pdir=pdbs_path)

#initialize databases to report
database_1 = []
database_2 = []

pdbs = glob(os.path.join(pdbs_path, '*'))

for pdb in pdbs:
    mol = bg.Pmolecule(pdb)
    net = mol.network()

    # take only selected positions:
    if selected_positions:
        for node in list(net.nodes):
            pos = int(node[1::])
Esempio n. 8
0
def retrieve_pdb_files(pdb_codes: List[str], pdb_data_dir: Union[Path, str], file_format: str = "mmCif"):
    pdbl = PDBList(server="ftp://ftp.wwpdb.org", pdb=pdb_data_dir)
    filenames = pdbl.download_pdb_files(pdb_codes, file_format=file_format)
    return filenames
Esempio n. 9
0
from Bio.PDB import PDBList

##this script may download pdb files for a list of pdb codes.

if len(sys.argv) < 3:
    print 'python DownloadPDBs.py pdbcodeListFile format [ResDir]'
    print '\tpdbcodeListFile: a file of PDB IDs, in upper or lower case'
    print '\tformat: pdb or mmCif. This option specifies which PDB file format needed'
    exit(1)

listFile = sys.argv[1]
with open(listFile, 'r') as fh:
    content = [line.strip() for line in list(fh)]

pdbcodes = []
for c in content:
    pdbcodes.extend(c.split())

format = sys.argv[2]

ResDir = os.getcwd()
if len(sys.argv) > 3:
    ResDir = sys.argv[3]

if not os.path.isdir(ResDir):
    os.mkdir(ResDir)

pdblist = PDBList()
pdblist.download_pdb_files(pdbcodes, file_format=format, pdir=ResDir)
Esempio n. 10
0
#!/Applications/Anaconda/anaconda/bin/python3

from Bio.PDB import PDBList

pdbl = PDBList()
#pdbl.retrieve_pdb_file('1FAT',file_format='pdb')

##download_pdb_files(self, pdb_codes, obsolete=False, pdir=None, file_format='pdb', overwrite=False)

mylist = [
    '6eqd', '6ane', '5yfe', '5xjh', '5xg0', '5xfy', '5xfz', '5xh3', '5yns'
]

pdbl.download_pdb_files(
    mylist,
    obsolete=False,
    pdir="/Users/astuart/Ramapo/Research/PlasticDegradingEnzymes/Structures/",
    file_format='pdb')
Esempio n. 11
0
        'CudaPrecision': 'mixed', 
        'CudaDeviceIndex': (CUDA_DEV_IDX if CUDA_DEV_IDX else 0)} 

""" Helper function to choose mutant residues """
RESIDUES = ['ACE', 'ALA', 'ALAD', 'ARG', 'ARGN', 'ASF', 'ASH', 'ASN', 'ASN1', 'ASP', 'ASPH', 'CALA', 'CARG', 'CASF', 'CASN', 'CASP', 'CCYS', 'CCYX', 'CGLN', 'CGLU', 'CGLY', 'CHID', 'CHIE', 'CHIP', 'CILE', 'CLEU', 'CLYS', 'CME', 'CMET', 'CPHE', 'CPRO', 'CSER', 'CTHR', 'CTRP', 'CTYR', 'CVAL', 'CYM', 'CYS', 'CYS1', 'CYS2', 'CYSH', 'CYX', 'DAB', 'GLH', 'GLN', 'GLU', 'GLUH', 'GLY', 'HID', 'HIE', 'HIP', 'HIS', 'HIS1', 'HIS2', 'HISA', 'HISB', 'HISD', 'HISE', 'HISH', 'HSD', 'HSE', 'HSP', 'HYP', 'ILE', 'LEU', 'LYN', 'LYS', 'LYSH', 'MET', 'MSE', 'NALA', 'NARG', 'NASN', 'NASP', 'NCYS', 'NCYX', 'NGLN', 'NGLU', 'NGLY', 'NHID', 'NHIE', 'NHIP', 'NILE', 'NLEU', 'NLYS', 'NME', 'NMET', 'NPHE', 'NPRO', 'NSER', 'NTHR', 'NTRP', 'NTYR', 'NVAL', 'ORN', 'PGLU', 'PHE', 'PRO', 'QLN', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
STANDARDS = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS','ILE', 'LEU', 'LYN', 'LYS', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
def randres(standard=False):
    pool = STANDARDS if standard else RESIDUES
    return pool[rd.randint(0,len(pool)-1)]

###
# Seed
###
""" Download seed scFv (RCSB PDB ID: 1MFA) """
pdbl = PDBList(obsolete_pdb='/dev/null')
pdbl.download_pdb_files([SEED_PDB], pdir=PDB_DIR, file_format='pdb')

""" Extract 1MFA components """
uni_1MFA = mda.Universe(f'{PDB_DIR}/pdb{SEED_PDB}.ent')
assert hasattr(uni_1MFA, 'trajectory')

lig_1MFA = uni_1MFA.select_atoms('not protein and not resname HOH')
lig_1MFA.write(f'{PDB_DIR}/{SEED_PDB}.lig.pdb')

fab_1MFA = uni_1MFA.select_atoms('protein')
fab_1MFA.write(f'{PDB_DIR}/{SEED_PDB}.fab.pdb')
# light_1MFA = uni_1MFA.select_atoms('segid L')
# heavy_1MFA = uni_1MFA.select_atoms('segid H')

""" Fix/clean the FAb apo protein and save it """
fixer = PDBFixer(PDB_DIR + '/' + SEED_PDB + '.fab.pdb')