def read_pdb_file_content(input_type, input_value): """ Read the content of a PDB file either from a local path or via fetching the file from the PDB webserver. Parameters ---------- input_type : str Either 'pdb_code' or 'pdb_filepath'. input_value : str Either a valid PDB code, or a local filepath of a PDB file. Returns ------- str Content of the PDB file as a single string. """ if input_type == "pdb_code": pdb_file_content = pypdb.get_pdb_file(input_value) elif input_type == "pdb_filepath": with open(input_value) as f: pdb_file_content = f.read() else: raise ValueError(f"Provided input type {input_type} not valid. Accepted values are 'pdb_code' and 'pdb_filepath'.") return pdb_file_content
def readPDB(id, property_map={}): """Read a molecular system from a Protein Data Bank (PDBP) ID in the RSCB PDB website. Parameters ---------- id : str The PDB ID string. property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- system : :class:`System <BioSimSpace._SireWrappers.System>` A molecular system. Examples -------- Create a molecular system from the deoxy human haemoglobin Protein Data Bank (PDB) record. >>> import BioSimSpace as BSS >>> system = BSS.readPDB("1a3n") """ if not _has_pypdb: _warn("BioSimSpace.IO: PyPDB could not be imported on this system.") return None if type(id) is not str: raise TypeError("'id' must be of type 'str'") # Strip any whitespace from the PDB ID and convert to upper case. id = id.replace(" ", "").upper() # Create a temporary directory to write the PDB file. tmp_dir = _tempfile.TemporaryDirectory() # Attempt to download the PDB file. (Compression is currently broken!) try: pdb_string = _pypdb.get_pdb_file(id, filetype="pdb", compression=False) except: raise IOError("Invalid PDB ID: '%s'" % id) # Create the name of the PDB file. pdb_file = "%s/%s.pdb" % (tmp_dir.name, id) # Now write the PDB string to file. with open(pdb_file, "w") as file: file.write(pdb_string) # Read the file and return a molecular system. return readMolecules(pdb_file, property_map)
def get_pdb(id, cache): if len(id) > 4: return get_chain_pdb(id[:4], id[4:], cache) cache_path = './cache/data/' + id + '.pdb' if os.path.isfile(cache_path): return cache_path if len(id) != 4: print("BAD ID:", id) if cache: print("CACHE MISS WHEN REQUIRED: ", id) with open(cache_path + ".gz", 'wb') as file: file.write(pb.get_pdb_file(id, compression=True)) console("unpigz -f -q " + cache_path + ".gz") return cache_path
def fill_proteins_sequences(proteins_list, folder_path, special_folder): for protein in proteins_list: pdb_structure = get_pdb_file(protein.id, filetype='pdb', compression=False) if not os.path.exists(folder_path + '/' + special_folder): os.makedirs(folder_path + '/' + special_folder) file = write_to_file(str(protein.id), "pdb", pdb_structure, folder_path + "/" + special_folder) protein.sequence = str(extract_sequence(file.name)) return proteins_list
def get_pdb( ): # функция извлекает pdb файлы для каждого pdb_id, содержащегося в k00 conn = sqlite3.connect('test_proteins2.db') c = conn.cursor() first_k = c.execute( "SELECT s_m_title,s_bioluminate_Antigen_Type, s_bioluminate_Antigen_Chain, s_bioluminate_Antigen_Seq FROM 'PrimeStructureDB_DataTable'" ) k = c.fetchall() k = [x for x in k if x[1] is not None] k02 = [k0[2] for k0 in k] # символ цепи, к которой относится антиген k00 = [k0[0] for k0 in k] # pdb_id из test_proteins2.db pdbs = [ pypdb.get_pdb_file(k_i, filetype='pdb', compression=False).split('\n') for k_i in k00[:4] ] # проверила на первых 4 шт. return pdbs
def fetch_and_save_pdb_file(pdb_code, output_filepath): """ Fetch a PDB file from the PDB webserver and save locally. Parameters ---------- pdb_code : str PDB code of the protein structure. output_filepath : str or pathlib.Path Local filepath (including filename) to save the PDB file in. Returns ------- pathlib.Path The full path of the saved PDB file. """ pdb_file_content = pypdb.get_pdb_file(pdb_code) full_filepath = Path(output_filepath).with_suffix(".pdb") with open(full_filepath, "w") as f: f.write(pdb_file_content) return full_filepath
def download_pdb(pdbid, file_pathway): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written Returns: nothing, but it does write the PDB file ***Note: this function does NOT fix any mistakes with the PDB file """ if not os.path.exists(file_pathway): os.makedirs(file_pathway) if bunit == True: pdb = get_pdb_biological_unit(pdbid) else: pdb = pypdb.get_pdb_file(pdbid) write_file(os.path.join(file_pathway, '%s.pdb' % pdbid), pdb)
def getPDB(self): """ Downloads the PDB file from the Protein Data Bank. Returns ------- pdb : str Returns the absolute path to the PDB file downloaded from the Protein Data Bank. """ if self._pdb: return self._pdb try: pdb_string = _pypdb.get_pdb_file(self._code, filetype="pdb", compression=False) except: raise ValueError("Invalid PDB code: '%s'" % self._code) filename = _os.path.abspath(self._code + ".pdb") with open(filename, "w") as file: file.write(pdb_string) self._pdb = filename return self._pdb
def process_input_pdb(request): import pypdb import re pdb_file = None try: pdb_file = request.FILES['pdb_file'] except: pass pdb_search = request.POST.get('pdb_search') pdb = None chains = [] if (pdb_file): pdb = pdb_file.read().decode('utf-8') if (pdb_search): pdb = pypdb.get_pdb_file(pdb_search) if (pdb): chains = get_chains(pdb) data = { 'chains': chains } return JsonResponse(data)
def _down(pdb, outdir): check, niter = True, 0 while check and niter < 10: try: data = pypdb.get_pdb_file(pdb, filetype='pdb', compression=False) check = False except: print('PDBDown -> Issue with :', pdb) print('Trying again in 5 sec') niter += 1 time.sleep(5) if check: print('PDBdown -> Could not download ', pdb) return False else: fname = os.path.join(outdir, pdb) fname += '.pdb' f = open(fname, 'w') f.write(data) f.close() return True
return structureList ignoreFileList = ["cryoem_n_glycosylated.csv", "xray_n_glycosylated.csv"] ignoreFileList = [""] rootDir = os.getcwd() structureListDir = os.path.join(rootDir, "structures") for structureListFile in os.listdir(structureListDir): if structureListFile.endswith(".csv") and structureListFile not in ignoreFileList: print(structureListFile) structureListFileName = os.path.splitext(os.path.basename(structureListFile))[0] outputDir = os.path.join(structureListDir, structureListFileName) CreateFolder(outputDir) currentStructureList = GenerateListOfStructures(structureListDir, structureListFile) for count, pdbID in enumerate(currentStructureList): print(f'Currently downloading: {pdbID}\nProgress: {count} out of {len(currentStructureList)}\t Progress - {int((count/len(currentStructureList)*100))}%') newFileName = pdbID + '.pdb' currentPDB = pdb.get_pdb_file(pdbID, filetype='pdb', compression=False) WriteFile(outputDir, newFileName, currentPDB)
def get_pdb(ID): pdb_data = pypdb.get_pdb_file(ID, filetype='cif', compression=False) return pdb_data
def _fetch_from_rcsb(pdb_id): pdb_data = pypdb.get_pdb_file(pdb_id, filetype="pdb") if pdb_data is None: raise PDBAddError(f"no PDB with ID '{pdb_id}' found") return pdb_data.encode()
import pypdb import os pdbs = ['1ubq'] for p in pdbs: pdb_file = pypdb.get_pdb_file(p) with open('{}.pdb'.format(p), 'w') as f: f.write(pdb_file) os.system("mkdir {}".format(p)) os.system("mkdir {}/input".format(p)) os.system("mv {}.pdb {}/input".format(p, p)) print("{} done".format(p))
def submit(request): import subprocess import os import re from . import helpers from .forms import JobForm import pypdb import shutil pdb_search = request.POST['pdb_search'] pdb_file = None pdb_filename = None available_chains = [] def run_script(processed_pdb_filename, job_dir, orig_bin_dir, archive_name): [os.symlink(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', orig_bin_dir, f), os.path.join(job_dir, f)) for f in os.listdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', orig_bin_dir))] with helpers.change_workingdir(job_dir): shutil.copy(os.path.join('..', processed_pdb_filename), job_dir) if job.ph_range: subprocess.Popen(['./run_pdg-ph.sh', processed_pdb_filename.split('.')[0], 'MC', archive_name], shell=False) else: subprocess.Popen(['./run_pdg.sh', str(temperature), str(ph), processed_pdb_filename, archive_name], shell=False) if (pdb_search): pdb = pypdb.get_pdb_file(pdb_search) pdb_filename = pdb_search + '.pdb' else: pdb_file = None try: pdb_file = request.FILES['pdb_file'] except: pass if (pdb_file): pdb_filename = re.sub('[^0-9a-zA-Z.]+', '_', pdb_file.name) pdb = pdb_file.read().decode('utf-8') available_chains = [[x,x] for x in get_chains(pdb)] form = JobForm(available_chains, request.POST, request.FILES) if form.is_valid(): name = form.cleaned_data['name'] temperature = form.cleaned_data['temperature'] ph = form.cleaned_data['ph'] ph_range = form.cleaned_data['ph_range'] email = form.cleaned_data['email'] chain = form.cleaned_data['chain'] tksamc_version = int(form.cleaned_data['tksamc_version']) job = Job(name=name, ph=ph, ph_range=ph_range, temperature=temperature, email=email, chain=chain, tksamc_version=tksamc_version) job.save() job_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../media/jobs/', str(job.id)) os.makedirs(job_dir) tksamc_job_dir = None gtksamc_job_dir = None if tksamc_version == 1 or tksamc_version == 0: tksamc_job_dir = os.path.join(job_dir, 'tksamc') os.makedirs(tksamc_job_dir) if tksamc_version == 2 or tksamc_version == 0: gtksamc_job_dir = os.path.join(job_dir, 'gtksamc') os.makedirs(gtksamc_job_dir) if (not chain): chain = ['^\s'] with open(os.path.join(job_dir, pdb_filename), 'w') as destination: new_pdb = '\n'.join(re.findall(r'^ATOM\s+(?:[^\s]+\s+){3}[%s]\s+.*' % ('|'.join(chain)), pdb, re.MULTILINE)) destination.write(new_pdb) with helpers.change_workingdir(job_dir): subprocess.check_output(['/bin/sed', '-i', 's/AALA/ ALA/g;s/ACYS/ CYS/g;s/AASP/ ASP/g;s/AGLU/ GLU/g;s/APHE/ PHE/g;s/AGLY/ GLY/g;s/AHIS/ HIS/g;s/AILE/ ILE/g;s/ALYS/ LYS/g;s/ALEU/ LEU/g;s/AMET/ MET/g;s/AASN/ ASN/g;s/APRO/ PRO/g;s/AGLN/ GLN/g;s/AARG/ ARG/g;s/ASER/ SER/g;s/ATHR/ THR/g;s/AVAL/ VAL/g;s/ATRP/ TRP/g;s/ATYR/ TYR/g', pdb_filename], shell=False) subprocess.check_output(['/bin/sed', '-i', '/BALA/d;/BCYS/d;/BASP/d;/BGLU/d;/BPHE/d;/BGLY/d;/BHIS/d;/BILE/d;/BLYS/d;/BLEU/d;/BMET/d;/BASN/d;/BPRO/d;/BGLN/d;/BARG/d;/BSER/d;/BTHR/d;/BVAL/d;/BTRP/d;/BTYR/d', pdb_filename], shell=False) subprocess.check_output(['/usr/bin/gmx', 'editconf', '-f', pdb_filename, '-c', '-resnr', '1', '-label', 'A', '-o', 'processed_{0}'.format(pdb_filename)], shell=False) if job.name != '': base_archive_name = job.name else: base_archive_name = str(job.id) if tksamc_job_dir: run_script('processed_{0}'.format(pdb_filename), tksamc_job_dir, 'tksamc_bin', 'tksamc_' + base_archive_name) if gtksamc_job_dir: run_script('processed_{0}'.format(pdb_filename), gtksamc_job_dir, 'gtksamc_bin', 'gtksamc_' + base_archive_name) if email != '': job_url = request.build_absolute_uri( reverse('check_job', args=[job.id])) helpers.send_email(email, job.name, job_url) return HttpResponseRedirect(reverse('check_job', args=[job.id])) else: return render(request, 'main/index.html', {'form': form, 'nav': 'home'})