import random
from Bio.PDB import PDBList

try:
  DATASIZE = int(sys.argv[1])
except:
  print('Usage:', os.path.basename(sys.argv[0]), '''N
  
  N - number of entries to be randomly selected from all PDB entries.
  Output written to stdout.''', file=sys.stderr)
  sys.exit(1)

# Get index file
pdbl = PDBList()
#print(f"Downloading PDB index file, this takes a while.", file=sys.stderr)
all_entries = pdbl.get_all_entries()

# Select random subset
selected = random.sample(all_entries, DATASIZE)
print(f"Randomly selected %d entries from {len(all_entries)} entries." % DATASIZE, file=sys.stderr)

# Write results to stdout
for entry in selected:
  print(entry, file=sys.stdout)

# Get data files
#for entry in selected:
#  pdbl.retrieve_pdb_file(entry, file_format="pdb", pdir="pdb")


コード例 #2
0
# Test which PDB entries error on PDB/mmCIF parsers
# Writes output to a file labelled with the week

import os
from datetime import datetime
from math import ceil
from Bio.PDB import PDBList
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.MMCIFParser import MMCIFParser

start = datetime.now()
basedir = "."
pdbl = PDBList()
pdblist = pdbl.get_all_entries()

outstrs = [
    "Checking all PDB entries at {}".format(start.isoformat()),
    "Checking {} entries".format(len(pdblist))
]

pdb_parser = PDBParser()
mmcif_parser = MMCIFParser()

for pu in sorted(pdblist):
    p = pu.lower()
    try:
        pdbl.retrieve_pdb_file(p, pdir=basedir, file_format="pdb")
    except:
        # Not having a PDB file is acceptable, though a failure to download an
        #   available file may hide an error in parsing
        try:
コード例 #3
0
from configure import configure
config = configure()

from Bio.PDB import PDBList
from os.path import dirname, join, basename
from glob import glob
from os import remove

pdb_dir = dirname(config["cif"])
pl = PDBList(pdb=pdb_dir)
pl.flat_tree = True

existingCifs = glob(join(pdb_dir, "*.cif"))
existingPdbs = set([])
for cif in existingCifs:
    existingPdbs.add(basename(cif).replace(".cif", ""))

allPdbs = set(pdb_code.lower() for pdb_code in pl.get_all_entries())

pdb2delete = existingPdbs - allPdbs
pdb2download = allPdbs - existingPdbs

print("Found ", len(pdb2delete), " files to delete")
print("and ", len(pdb2download), " file to download")

for pdb_code in pdb2download:
    pl.retrieve_pdb_file(pdb_code, file_format="mmCif")

for pdb_code in pdb2delete:
    cifPath = join(pdb_dir, pdb_code + ".cif")
    remove(cifPath)