def _indiana_fetch(session, smiles, web_smiles): from chimerax.core.fetch import fetch_file import os filename = fetch_file( session, "http://cheminfov.informatics.indiana.edu/rest/thread/d3.py/" "SMILES/%s" % smiles, 'SMILES %s' % smiles, web_smiles, None) return filename
def fetch_eds_map(session, id, type = '2fofc', ignore_cache=False, **kw): ''' Fetch crystallographic density maps from PDBe (formerly the Upsalla Electron Density Server). 2fofc: http://www.ebi.ac.uk/pdbe/coordinates/files/1cbs.ccp4 fofc: http://www.ebi.ac.uk/pdbe/coordinates/files/1cbs_diff.ccp4 ''' url_pattern = 'http://www.ebi.ac.uk/pdbe/coordinates/files/%s' # Fetch map. log = session.logger log.status('Fetching %s from PDBe...' % (id,)) if type == 'fofc': map_name = id.lower() + '_diff.ccp4' elif type == '2fofc': map_name = id.lower() + '.ccp4' map_url = url_pattern % map_name from chimerax.core.fetch import fetch_file filename = fetch_file(session, map_url, 'map %s' % id, map_name, 'EDS', ignore_cache=ignore_cache) model_name = 'eds %s' % id models, status = session.open_command.open_data(filename, format = 'ccp4', name = model_name, polar_values = (type == 'fofc'), **kw) for v in models: v.set_display_style('mesh') return models, status
def fetch_alphafold_pae(session, uniprot_id, ignore_matrix_cache=False, ignore_download_cache=False): ''' Fetch the predicted aligned error matrix for an AlphaFold prediction from the EMBL AlphaFold server. ''' global _pae_cache if not ignore_matrix_cache: pae = _pae_cache.get(uniprot_id, None) if pae is not None: return pae from chimerax.core.fetch import fetch_file file_name = f'AF-{uniprot_id}-F1-predicted_aligned_error_v1.json' url = f'https://alphafold.ebi.ac.uk/files/{file_name}' filename = fetch_file(session, url, f'Alphafold {uniprot_id} PAE', file_name, 'AlphaFold-PAE', ignore_cache=ignore_download_cache) pae = _pae_cache[uniprot_id] = parse_pae_file(filename) return pae
def _fetch_assemblies(session, pdb_id, url_template, file_template, *, save_template=None, max_assemblies=None, ignore_cache=False, transmit_compressed=True, **kw): models = [] n = 1 id = pdb_id.lower() from chimerax.core.fetch import fetch_file from chimerax.core.errors import UserError while max_assemblies is None or n <= max_assemblies: filename = file_template % (id, n) url = url_template % filename status_name = '%s bioassembly %d' % (pdb_id, n) save_name = filename if save_template is None else (save_template % (id, n)) uncompress = filename.endswith('.gz') try: path = fetch_file(session, url, status_name, save_name, 'PDB', uncompress=uncompress, transmit_compressed=transmit_compressed, ignore_cache=ignore_cache) except UserError: break model_name = status_name mlist, status = session.open_command.open_data(path, name=model_name, **kw) if len(mlist) > 1: models.append(_group_subunit_models(session, mlist, status_name)) else: models.extend(mlist) n += 1 return models
def fetch_ihm(session, id, ignore_cache=False, **kw): ''' Fetch IHM models from PDB-Dev. https://pdb-dev.wwpdb.org/cif/PDBDEV_00000012.cif ''' url_pattern = 'https://pdb-dev.wwpdb.org/cif/%s' if len(id) < 8: zero_pad = '0'*(8-len(id)) full_id = zero_pad + id else: full_id = id log = session.logger log.status('Fetching %s from PDB-Dev...' % (full_id,)) name = 'PDBDEV_%s.cif' % full_id url = url_pattern % name from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, 'IHM %s' % full_id, name, 'PDBDev', ignore_cache=ignore_cache) log.status('Opening %s' % name) models, status = session.open_command.open_data(filename, format = 'ihm', name = name, **kw) return models, status
def fetch_uniprot_accession_info(session, accession, ignore_cache=False): session.logger.status("Fetch UniProt accession code %s..." % accession) from chimerax.core.fetch import fetch_file name = "%s.xml" % accession file_name = fetch_file(session, "https://www.uniprot.org/uniprot/%s.xml" % accession, "%s UniProt info" % accession, name, "UniProt", ignore_cache=ignore_cache) session.logger.status("Parsing %s" % name) import xml.dom.minidom tree = xml.dom.minidom.parse(file_name) get_child = lambda parent, tag_name: [cn for cn in parent.childNodes if getattr(cn, "tagName", None) == tag_name][0] try: uniprot = get_child(tree, "uniprot") except IndexError: raise InvalidAccessionError("Invalid UniProt accession number: %s" % accession) entry = get_child(uniprot, "entry") try: seq_node = get_child(entry, "sequence") except (KeyError, IndexError): raise AssertionError("No sequence for accession %s in UniProt info" % accession) protein = get_child(entry, "protein") rec_name = [cn for cn in protein.childNodes if getattr(cn, "tagName", None) in ("recommendedName", "submittedName")][0] full_name = get_child(rec_name, "fullName").firstChild.nodeValue features = [cn for cn in entry.childNodes if getattr(cn, "tagName", None) == "feature"] return "".join([c for c in seq_node.firstChild.nodeValue if not c.isspace()]), full_name, features
def _get_template(session, name): """Get Chemical Component Dictionary (CCD) entry""" from chimerax.core.fetch import fetch_file filename = '%s.cif' % name url = "http://ligand-expo.rcsb.org/reports/%s/%s/%s.cif" % (name[0], name, name) try: return fetch_file(session, url, 'CCD %s' % name, filename, 'CCD') except (UserError, OSError): return None
def _get_template(session, name): """Get Chemical Component Dictionary (CCD) entry""" from chimerax.core.fetch import fetch_file filename = '%s.cif' % name url = "http://ligand-expo.rcsb.org/reports/%s/%s/%s.cif" % (name[0], name, name) try: return fetch_file(session, url, 'CCD %s' % name, filename, 'CCD') except UserError: session.logger.warning( "Unable to fetch template for '%s': might be missing bonds" % name) return None
def fetch_mmtf(session, pdb_id, ignore_cache=False, **kw): if len(pdb_id) != 4: raise UserError("PDB identifers are 4 characters long, got %r" % pdb_id) pdb_id = pdb_id.lower() mmtf_name = '%s.mmtf' % pdb_id url = 'http://mmtf.rcsb.org/v1.0/full/%s.mmtf.gz' % pdb_id.upper() from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, 'MMTF %s' % pdb_id, mmtf_name, 'PDB', ignore_cache=ignore_cache, uncompress=True) session.logger.status("Opening MMTF %s" % (pdb_id,)) return session.open_command.open_data(filename, format='mmtf', name=pdb_id, **kw)
def find_doi_zip_archive_url(session, doi): doi_url = 'http://doi.org/%s' % doi filename = fetch_file(session, doi_url, 'doi %s' % doi, save_name = 'temp.html', save_dir = None, uncompress = True, ignore_cache=True) # Ick. Scrape this web page looking for a zip file url. urls = find_link_in_html(filename, '.zip') if len(urls) > 1: from chimerax.core.errors import UserError raise UserError('Found multiple zip archives at DOI "%s": %s' % (doi, ', '.join(urls))) elif len(urls) == 0: from chimerax.core.errors import UserError raise UserError('Found no zip archives at DOI "%s"' % doi) file_url = urls.pop() return file_url
def fetch_pdb(session, pdb_id, *, fetch_source="rcsb", ignore_cache=False, structure_factors=False, over_sampling=1.5, # for ChimeraX-Clipper plugin **kw): from chimerax.core.errors import UserError if len(pdb_id) != 4: raise UserError('PDB identifiers are 4 characters long, got "%s"' % pdb_id) if structure_factors: try: from chimerax.clipper.io import fetch_cif except ImportError: raise UserError('Working with structure factors requires the ' 'ChimeraX_Clipper plugin, available from the Tool Shed') import os pdb_id = pdb_id.lower() # check on local system -- TODO: configure location subdir = pdb_id[1:3] filename = "/databases/mol/pdb/%s/pdb%s.ent" % (subdir, pdb_id) if os.path.exists(filename): session.logger.info("Fetching PDB %s from system cache: %s" % (pdb_id, filename)) else: base_url = _pdb_sources.get(fetch_source, None) if base_url is None: raise UserError('unrecognized PDB source "%s"' % fetch_source) url = base_url % pdb_id pdb_name = "%s.pdb" % pdb_id from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, 'PDB %s' % pdb_id, pdb_name, 'PDB', ignore_cache=ignore_cache) session.logger.status("Opening PDB %s" % (pdb_id,)) models, status = session.open_command.open_data(filename, format='pdb', name=pdb_id, **kw) if structure_factors: sf_file = fetch_cif.fetch_structure_factors(session, pdb_id, fetch_source=fetch_source, ignore_cache=ignore_cache) from chimerax.clipper import get_map_mgr mmgr = get_map_mgr(models[0], create=True) if over_sampling < 1: warn_str = ('Map over-sampling rate cannot be less than 1. Resetting to 1.0') session.logger.warning(warn_str) over_sampling = 1 mmgr.add_xmapset_from_file(sf_file, oversampling_rate = over_sampling) return [mmgr.crystal_mgr], status return models, status
def fetch_autopack_results(session, results_name, database=default_autopack_database, ignore_cache=False): # Fetch results file. results_url = database + '/results/%s.apr.json' % results_name session.logger.status('Fetching %s from web %s...' % (results_name, results_url)) results_filename = results_name + '.apr.json' from chimerax.core.fetch import fetch_file results_path = fetch_file(session, results_url, 'results ' + results_name, results_filename, 'cellPACK', ignore_cache=ignore_cache) return results_path
def fetch_structure_factors(session, pdb_id, fetch_source='rcsb', ignore_cache=False, **kw): '''Get a structure factor file in CIF format by PDB identifier via the Internet''' if len(pdb_id) != 4: raise UserError( 'PDB identifiers are 4 characters long, got "{}"'.format(pdb_id)) if fetch_source not in _cif_sources.keys(): fetch_source = _valid_db_commands.get(fetch_source, None) if fetch_source is None: raise UserError( 'Fetching structure factors is not implemented for "fromDatabase {}"! Must be one of the following: {}' .format(fetch_source, ', '.join(_valid_db_commands.keys()))) import os pdb_id = pdb_id.lower() save_name = _cif_filenames[fetch_source].format(pdb_id) url = _cif_sources[fetch_source].format(pdb_id) from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, '{} structure factors'.format(pdb_id), save_name, 'PDB-SF', uncompress=_compressed[fetch_source], ignore_cache=ignore_cache) # Double check that a cif file was downloaded instead of an HTML error # message saying the ID does not exist with open(filename, 'r') as f: line = f.readline() if not line.startswith(('data_', '#')): f.close() os.remove(filename) raise UserError( 'Structure factors could not be retrieved! Are you ' 'sure this is an x-ray structure?') return filename
def fetch_doi(session, doi, url, ignore_cache = False): if not '/' in doi: from chimerax.core.errors import UserError raise UserError('DOI does not contain required "/", got "%s"' % doi) from chimerax.core.fetch import cache_directories, fetch_file from os.path import join, isdir, basename dirs = cache_directories() if not ignore_cache: for d in dirs: path = join(d, 'DOI', doi) if isdir(path): from os import listdir if url: zip_name = basename(url) zf = [f for f in listdir(path) if f == zip_name] else: zf = [f for f in listdir(path) if f.endswith('.zip')] if len(zf) == 1: zp = join(path, zf[0]) return zp if url is None: zip_file_url = find_doi_zip_archive_url(session, doi) else: zip_file_url = url zip_filename = basename(zip_file_url) if dirs: from os import makedirs, link d = join(dirs[0], 'DOI', doi) makedirs(d, exist_ok = True) save_dir = d else: save_dir = None filename = fetch_file(session, zip_file_url, 'zip %s %s' % (doi, zip_filename), zip_filename, save_dir = save_dir, uncompress = False, ignore_cache=True) return filename
def fetch_pubchem(session, pubchem_id, *, ignore_cache=False, **kw): from chimerax.core.errors import UserError if not pubchem_id.isdigit(): raise UserError('PubChem identifiers are numeric, got "%s"' % pubchem_id) import os url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%s/SDF?record_type=3d" % pubchem_id pubchem_name = "%s.sdf" % pubchem_id from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, 'PubChem %s' % pubchem_id, pubchem_name, 'PubChem', ignore_cache=ignore_cache) session.logger.status("Opening PubChem %s" % (pubchem_id, )) return session.open_command.open_data(filename, format='sdf', name="pubchem:" + pubchem_id, **kw)
def fetch_emdb(session, emdb_id, ignore_cache=False, **kw): from chimerax.core.errors import UserError if len(emdb_id) < 4: raise UserError("EMDB identifiers are at least 4 characters long") import socket hname = socket.gethostname() if hname.endswith('.edu') or hname.endswith('.gov'): # TODO: RCSB https is 20x slower than ftp. Cole Christie looking into it. # url_pattern = ('https://files.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz' # The RCSB ftp does not report file size so progress messages don't indicate how long it will take. url_pattern = 'ftp://ftp.wwpdb.org/pub/emdb/structures/EMD-%s/map/%s.gz' # url_pattern = 'https://files.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz' # url_pattern = 'ftp://ftp.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz' elif hname.endswith('.cn'): url_pattern = 'ftp://ftp.emdb-china.org/structures/EMD-%s/map/%s.gz' else: url_pattern = 'ftp://ftp.ebi.ac.uk/pub/databases/emdb/structures/EMD-%s/map/%s.gz' map_name = 'emd_%s.map' % emdb_id map_url = url_pattern % (emdb_id, map_name) from chimerax.core.fetch import fetch_file filename = fetch_file(session, map_url, 'map %s' % emdb_id, map_name, 'EMDB', uncompress=True, ignore_cache=ignore_cache) model_name = 'emdb %s' % emdb_id models, status = session.open_command.open_data(filename, format='ccp4', name=model_name, **kw) return models, status
def fetch_homologene(session, ident, ignore_cache=True, **kw): """Fetch and display sequence alignment for 'ident' from HomoloGene. Use Python library to download the FASTA file and use ChimeraX alignment tools for display. """ # First fetch the file using ChimeraX core function url = _URL % ident session.logger.status("Fetching HomoloGene %s" % ident) save_name = "%s.fa" % ident from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, "HomoloGene %s" % ident, save_name, "HomoloGene", ignore_cache=ignore_cache, uncompress=True) session.logger.status("Opening HomoloGene %s" % ident) models, status = session.open_command.open_data(filename, alignment=False, name=ident) return models, status
def fetch_mmcif( session, pdb_id, fetch_source="rcsb", ignore_cache=False, structure_factors=False, over_sampling=1.5, # for ChimeraX-Clipper plugin **kw): """Get mmCIF file by PDB identifier via the Internet""" if not _initialized: _initialize(session) if len(pdb_id) != 4: raise UserError('PDB identifiers are 4 characters long, got "%s"' % pdb_id) if structure_factors: try: from chimerax.clipper.io import fetch_cif except ImportError: raise UserError( 'Working with structure factors requires the ' 'ChimeraX_Clipper plugin, available from the Tool Shed') import os pdb_id = pdb_id.lower() filename = None if not fetch_source.endswith('updated'): # check on local system -- TODO: configure location subdir = pdb_id[1:3] filename = "/databases/mol/mmCIF/%s/%s.cif" % (subdir, pdb_id) if os.path.exists(filename): session.logger.info("Fetching mmCIF %s from system cache: %s" % (pdb_id, filename)) else: filename = None cache = 'PDB' else: cache = fetch_source if filename is None: base_url = _mmcif_sources.get(fetch_source, None) if base_url is None: raise UserError('unrecognized mmCIF/PDB source "%s"' % fetch_source) url = base_url % pdb_id pdb_name = "%s.cif" % pdb_id from chimerax.core.fetch import fetch_file filename = fetch_file(session, url, 'mmCIF %s' % pdb_id, pdb_name, cache, ignore_cache=ignore_cache) # double check that a mmCIF file was downloaded instead of an # HTML error message saying the ID does not exist with open(filename, 'r') as f: line = f.readline() if not line.startswith(('data_', '#')): f.close() import os os.remove(filename) raise UserError("Invalid mmCIF identifier") session.logger.status("Opening mmCIF %s" % (pdb_id, )) models, status = session.open_command.open_data(filename, format='mmcif', name=pdb_id, **kw) if structure_factors: sf_file = fetch_cif.fetch_structure_factors(session, pdb_id, fetch_source=fetch_source, ignore_cache=ignore_cache) from chimerax.clipper import get_map_mgr mmgr = get_map_mgr(models[0], create=True) if over_sampling < 1: warn_str = ( 'Map over-sampling rate cannot be less than 1. Resetting to 1.0' ) session.logger.warning(warn_str) over_sampling = 1 mmgr.add_xmapset_from_file(sf_file, oversampling_rate=over_sampling) return [mmgr.crystal_mgr], status return models, status
def fetch_autopack(session, path, results_name, database=default_autopack_database, ignore_cache=False): from . import read_apr recipe_loc, pieces = read_apr.read_autopack_results(path) recipe_url = recipe_loc.replace('autoPACKserver', database) from os.path import basename recipe_filename = basename(recipe_loc) from chimerax.core.fetch import fetch_file recipe_path = fetch_file(session, recipe_url, 'recipe for ' + results_name, recipe_filename, 'cellPACK', ignore_cache=ignore_cache) ingr_filenames, comp_surfaces = read_apr.read_autopack_recipe(recipe_path) from chimerax.core.models import Model cpm = Model(results_name, session) # Fetch compartment surface files. csurfs = [] from chimerax.surface.collada import read_collada_surfaces for comp_name, comp_loc, geom_loc in comp_surfaces: csurf = Model(comp_name, session) if comp_loc is not None: comp_url = comp_loc.replace('autoPACKserver', database) comp_filename = basename(comp_loc) comp_path = fetch_file(session, comp_url, 'compartment surface ' + comp_filename, comp_filename, 'cellPACK', ignore_cache=ignore_cache) slist, msg = read_collada_surfaces(session, comp_path, 'representation') csurf.add(slist) if geom_loc is not None: geom_url = geom_loc.replace('autoPACKserver', database) geom_filename = basename(geom_loc) geom_path = fetch_file(session, geom_url, 'compartment bounds ' + geom_filename, geom_filename, 'cellPACK', ignore_cache=ignore_cache) slist, msg = read_collada_surfaces(session, geom_path, 'geometry') for s in slist: s.display = False csurf.add(slist) csurfs.append(csurf) cpm.add(csurfs) # Added ingredient surfaces to compartments ingr_mesh_path = {} comp = {csurf.name: csurf for csurf in csurfs} ingr_ids = list(pieces.keys()) ingr_ids.sort() # Get reproducible ordering of ingredients for ingr_id in ingr_ids: ingr_filename = ingr_filenames[ingr_id] mesh_path = ingr_mesh_path.get(ingr_filename, None) if mesh_path is None: from urllib.parse import urljoin ingr_url = urljoin(recipe_url, ingr_filename) ingr_path = fetch_file(session, ingr_url, 'ingredient ' + ingr_filename, ingr_filename, 'cellPACK', ignore_cache=ignore_cache) mesh_loc = read_apr.read_ingredient(ingr_path) mesh_url = mesh_loc.replace('autoPACKserver', database) mesh_filename = basename(mesh_loc) mesh_path = fetch_file(session, mesh_url, 'mesh ' + mesh_filename, mesh_filename, 'cellPACK', ignore_cache=ignore_cache) ingr_mesh_path[ingr_filename] = mesh_path comp_name, interior_or_surf, ingr_name = ingr_id cs = comp.get((comp_name, interior_or_surf), None) if cs is None: cs = Model(interior_or_surf, session) comp[comp_name].add([cs]) comp[(comp_name, interior_or_surf)] = cs placements = pieces[ingr_id] isurf = read_apr.create_surface(session, mesh_path, ingr_name, placements) cs.add([isurf]) return cpm