def get_seqres_file(self, savefile='pdb_seqres.txt'): """Retrieves a (big) file containing all the sequences of PDB entries and writes it to a file. """ print("Retrieving sequence file (takes about 15 MB).") url = self.pdb_server + '/pub/pdb/derived_data/pdb_seqres.txt' _urlretrieve(url, savefile)
def retrieve_pdb_file(self, pdb_code, obsolete=False, pdir=None): """ Retrieves a PDB structure file from the PDB server and stores it in a local file tree. The PDB structure's file name is returned as a single string. If obsolete == True, the file will be saved in a special file tree. @param pdir: put the file in this directory (default: create a PDB-style directory tree) @type pdir: string @return: filename @rtype: string """ # Get the compressed PDB structure code = pdb_code.lower() archive_fn = "pdb%s.ent.gz" % code pdb_dir = "divided" if not obsolete else "obsolete" url = (self.pdb_server + '/pub/pdb/data/structures/%s/pdb/%s/%s' % (pdb_dir, code[1:3], archive_fn)) # Where does the final PDB file get saved? if pdir is None: path = self.local_pdb if not obsolete else self.obsolete_pdb if not self.flat_tree: # Put in PDB-style directory tree path = os.path.join(path, code[1:3]) else: # Put in specified directory path = pdir if not os.access(path, os.F_OK): os.makedirs(path) filename = os.path.join(path, archive_fn) final_file = os.path.join(path, "pdb%s.ent" % code) # (decompressed) # Skip download if the file already exists if not self.overwrite: if os.path.exists(final_file): print("Structure exists: '%s' " % final_file) return final_file # Retrieve the file print("Downloading PDB structure '%s'..." % pdb_code) _urlretrieve(url, filename) # Uncompress the archive, delete when done #Can't use context manager with gzip.open until Python 2.7 gz = gzip.open(filename, 'rb') with open(final_file, 'wb') as out: out.writelines(gz) gz.close() os.remove(filename) return final_file