Beispiel #1
0
def _indiana_fetch(session, smiles, web_smiles):
    from chimerax.core.fetch import fetch_file
    import os
    filename = fetch_file(
        session, "http://cheminfov.informatics.indiana.edu/rest/thread/d3.py/"
        "SMILES/%s" % smiles, 'SMILES %s' % smiles, web_smiles, None)
    return filename
Beispiel #2
0
def fetch_eds_map(session, id, type = '2fofc', ignore_cache=False, **kw):
  '''
  Fetch crystallographic density maps from PDBe (formerly the Upsalla Electron Density Server).

  2fofc: http://www.ebi.ac.uk/pdbe/coordinates/files/1cbs.ccp4
   fofc: http://www.ebi.ac.uk/pdbe/coordinates/files/1cbs_diff.ccp4
  '''

  url_pattern = 'http://www.ebi.ac.uk/pdbe/coordinates/files/%s'
  
  # Fetch map.
  log = session.logger
  log.status('Fetching %s from PDBe...' % (id,))

  if type == 'fofc':
    map_name = id.lower() + '_diff.ccp4'
  elif type == '2fofc':
    map_name = id.lower() + '.ccp4'
  map_url = url_pattern % map_name

  from chimerax.core.fetch import fetch_file
  filename = fetch_file(session, map_url, 'map %s' % id, map_name, 'EDS',
                        ignore_cache=ignore_cache)

  model_name = 'eds %s' % id
  models, status = session.open_command.open_data(filename, format = 'ccp4',
  			name = model_name, polar_values = (type == 'fofc'), **kw)
  for v in models:
    v.set_display_style('mesh')
    
  return models, status
Beispiel #3
0
def fetch_alphafold_pae(session,
                        uniprot_id,
                        ignore_matrix_cache=False,
                        ignore_download_cache=False):
    '''
    Fetch the predicted aligned error matrix for an AlphaFold prediction from the 
    EMBL AlphaFold server.
    '''
    global _pae_cache
    if not ignore_matrix_cache:
        pae = _pae_cache.get(uniprot_id, None)
        if pae is not None:
            return pae
    from chimerax.core.fetch import fetch_file
    file_name = f'AF-{uniprot_id}-F1-predicted_aligned_error_v1.json'
    url = f'https://alphafold.ebi.ac.uk/files/{file_name}'

    filename = fetch_file(session,
                          url,
                          f'Alphafold {uniprot_id} PAE',
                          file_name,
                          'AlphaFold-PAE',
                          ignore_cache=ignore_download_cache)

    pae = _pae_cache[uniprot_id] = parse_pae_file(filename)
    return pae
Beispiel #4
0
def _fetch_assemblies(session, pdb_id, url_template, file_template, *,
                      save_template=None, max_assemblies=None, ignore_cache=False,
                      transmit_compressed=True, **kw):
    models = []
    n = 1
    id = pdb_id.lower()
    from chimerax.core.fetch import fetch_file
    from chimerax.core.errors import UserError
    while max_assemblies is None or n <= max_assemblies:
        filename = file_template % (id, n)
        url = url_template % filename
        status_name = '%s bioassembly %d' % (pdb_id, n)
        save_name = filename if save_template is None else (save_template % (id, n))
        uncompress = filename.endswith('.gz')
        try:
            path = fetch_file(session, url, status_name, save_name, 'PDB',
                              uncompress=uncompress, transmit_compressed=transmit_compressed,
                              ignore_cache=ignore_cache)
        except UserError:
            break
        model_name = status_name
        mlist, status = session.open_command.open_data(path, name=model_name, **kw)
        if len(mlist) > 1:
            models.append(_group_subunit_models(session, mlist, status_name)) 
        else:
            models.extend(mlist)
        n += 1

    return models
Beispiel #5
0
def fetch_ihm(session, id, ignore_cache=False, **kw):
  '''
  Fetch IHM models from PDB-Dev.

  https://pdb-dev.wwpdb.org/cif/PDBDEV_00000012.cif
  '''

  url_pattern = 'https://pdb-dev.wwpdb.org/cif/%s'
  
  if len(id) < 8:
      zero_pad = '0'*(8-len(id))
      full_id = zero_pad + id
  else:
      full_id = id
      
  log = session.logger
  log.status('Fetching %s from PDB-Dev...' % (full_id,))

  name = 'PDBDEV_%s.cif' % full_id
  url = url_pattern % name

  from chimerax.core.fetch import fetch_file
  filename = fetch_file(session, url, 'IHM %s' % full_id, name, 'PDBDev',
                        ignore_cache=ignore_cache)

  log.status('Opening %s' % name)
  models, status = session.open_command.open_data(filename, format = 'ihm',
  	name = name, **kw)
    
  return models, status
Beispiel #6
0
def fetch_uniprot_accession_info(session, accession, ignore_cache=False):
	session.logger.status("Fetch UniProt accession code %s..." % accession)
	from chimerax.core.fetch import fetch_file
	name = "%s.xml" % accession
	file_name = fetch_file(session, "https://www.uniprot.org/uniprot/%s.xml" % accession,
		"%s UniProt info" % accession, name, "UniProt", ignore_cache=ignore_cache)

	session.logger.status("Parsing %s" % name)
	import xml.dom.minidom
	tree = xml.dom.minidom.parse(file_name)
	get_child = lambda parent, tag_name: [cn for cn in parent.childNodes
		if getattr(cn, "tagName", None) == tag_name][0]
	try:
		uniprot = get_child(tree, "uniprot")
	except IndexError:
		raise InvalidAccessionError("Invalid UniProt accession number: %s" % accession)

	entry = get_child(uniprot, "entry")
	try:
		seq_node = get_child(entry, "sequence")
	except (KeyError, IndexError):
		raise AssertionError("No sequence for accession %s in UniProt info" % accession)

	protein = get_child(entry, "protein")
	rec_name = [cn for cn in protein.childNodes
		if getattr(cn, "tagName", None) in ("recommendedName", "submittedName")][0]
	full_name = get_child(rec_name, "fullName").firstChild.nodeValue
	features = [cn for cn in entry.childNodes if getattr(cn, "tagName", None) == "feature"]
	return "".join([c for c in seq_node.firstChild.nodeValue if not c.isspace()]), full_name, features
Beispiel #7
0
def _get_template(session, name):
    """Get Chemical Component Dictionary (CCD) entry"""
    from chimerax.core.fetch import fetch_file
    filename = '%s.cif' % name
    url = "http://ligand-expo.rcsb.org/reports/%s/%s/%s.cif" % (name[0], name,
                                                                name)
    try:
        return fetch_file(session, url, 'CCD %s' % name, filename, 'CCD')
    except (UserError, OSError):
        return None
Beispiel #8
0
def _get_template(session, name):
    """Get Chemical Component Dictionary (CCD) entry"""
    from chimerax.core.fetch import fetch_file
    filename = '%s.cif' % name
    url = "http://ligand-expo.rcsb.org/reports/%s/%s/%s.cif" % (name[0], name,
                                                                name)
    try:
        return fetch_file(session, url, 'CCD %s' % name, filename, 'CCD')
    except UserError:
        session.logger.warning(
            "Unable to fetch template for '%s': might be missing bonds"
            % name)
        return None
Beispiel #9
0
def fetch_mmtf(session, pdb_id, ignore_cache=False, **kw):
    if len(pdb_id) != 4:
        raise UserError("PDB identifers are 4 characters long, got %r" % pdb_id)

    pdb_id = pdb_id.lower()
    mmtf_name = '%s.mmtf' % pdb_id

    url = 'http://mmtf.rcsb.org/v1.0/full/%s.mmtf.gz' % pdb_id.upper()
    from chimerax.core.fetch import fetch_file
    filename = fetch_file(session, url, 'MMTF %s' % pdb_id, mmtf_name, 'PDB',
        ignore_cache=ignore_cache, uncompress=True)

    session.logger.status("Opening MMTF %s" % (pdb_id,))
    return session.open_command.open_data(filename, format='mmtf', name=pdb_id, **kw)
Beispiel #10
0
def find_doi_zip_archive_url(session, doi):
    doi_url = 'http://doi.org/%s' % doi
    filename = fetch_file(session, doi_url, 'doi %s' % doi,
                          save_name = 'temp.html', save_dir = None,
                          uncompress = True, ignore_cache=True)
    # Ick. Scrape this web page looking for a zip file url.
    urls = find_link_in_html(filename, '.zip')
    if len(urls) > 1:
        from chimerax.core.errors import UserError
        raise UserError('Found multiple zip archives at DOI "%s": %s'
                        % (doi, ', '.join(urls)))
    elif len(urls) == 0:
        from chimerax.core.errors import UserError        
        raise UserError('Found no zip archives at DOI "%s"' % doi)

    file_url = urls.pop()
    return file_url
Beispiel #11
0
def fetch_pdb(session, pdb_id, *, fetch_source="rcsb", ignore_cache=False,
        structure_factors=False, over_sampling=1.5, # for ChimeraX-Clipper plugin
        **kw):
    from chimerax.core.errors import UserError
    if len(pdb_id) != 4:
        raise UserError('PDB identifiers are 4 characters long, got "%s"' % pdb_id)
    if structure_factors:
        try:
            from chimerax.clipper.io import fetch_cif
        except ImportError:
            raise UserError('Working with structure factors requires the '
                'ChimeraX_Clipper plugin, available from the Tool Shed')
    import os
    pdb_id = pdb_id.lower()
    # check on local system -- TODO: configure location
    subdir = pdb_id[1:3]
    filename = "/databases/mol/pdb/%s/pdb%s.ent" % (subdir, pdb_id)
    if os.path.exists(filename):
        session.logger.info("Fetching PDB %s from system cache: %s" % (pdb_id, filename))
    else:
        base_url = _pdb_sources.get(fetch_source, None)
        if base_url is None:
            raise UserError('unrecognized PDB source "%s"' % fetch_source)
        url = base_url % pdb_id
        pdb_name = "%s.pdb" % pdb_id
        from chimerax.core.fetch import fetch_file
        filename = fetch_file(session, url, 'PDB %s' % pdb_id, pdb_name, 'PDB',
                              ignore_cache=ignore_cache)

    session.logger.status("Opening PDB %s" % (pdb_id,))
    models, status = session.open_command.open_data(filename, format='pdb',
        name=pdb_id, **kw)
    if structure_factors:
        sf_file = fetch_cif.fetch_structure_factors(session, pdb_id,
            fetch_source=fetch_source, ignore_cache=ignore_cache)
        from chimerax.clipper import get_map_mgr
        mmgr = get_map_mgr(models[0], create=True)
        if over_sampling < 1:
            warn_str = ('Map over-sampling rate cannot be less than 1. Resetting to 1.0')
            session.logger.warning(warn_str)
            over_sampling = 1
        mmgr.add_xmapset_from_file(sf_file, oversampling_rate = over_sampling)
        return [mmgr.crystal_mgr], status

    return models, status
Beispiel #12
0
def fetch_autopack_results(session,
                           results_name,
                           database=default_autopack_database,
                           ignore_cache=False):

    # Fetch results file.
    results_url = database + '/results/%s.apr.json' % results_name
    session.logger.status('Fetching %s from web %s...' %
                          (results_name, results_url))
    results_filename = results_name + '.apr.json'
    from chimerax.core.fetch import fetch_file
    results_path = fetch_file(session,
                              results_url,
                              'results ' + results_name,
                              results_filename,
                              'cellPACK',
                              ignore_cache=ignore_cache)
    return results_path
Beispiel #13
0
def fetch_structure_factors(session,
                            pdb_id,
                            fetch_source='rcsb',
                            ignore_cache=False,
                            **kw):
    '''Get a structure factor file in CIF format by PDB identifier via the Internet'''
    if len(pdb_id) != 4:
        raise UserError(
            'PDB identifiers are 4 characters long, got "{}"'.format(pdb_id))
    if fetch_source not in _cif_sources.keys():
        fetch_source = _valid_db_commands.get(fetch_source, None)
    if fetch_source is None:
        raise UserError(
            'Fetching structure factors is not implemented for "fromDatabase {}"! Must be one of the following: {}'
            .format(fetch_source, ', '.join(_valid_db_commands.keys())))
    import os
    pdb_id = pdb_id.lower()
    save_name = _cif_filenames[fetch_source].format(pdb_id)
    url = _cif_sources[fetch_source].format(pdb_id)
    from chimerax.core.fetch import fetch_file
    filename = fetch_file(session,
                          url,
                          '{} structure factors'.format(pdb_id),
                          save_name,
                          'PDB-SF',
                          uncompress=_compressed[fetch_source],
                          ignore_cache=ignore_cache)

    # Double check that a cif file was downloaded instead of an HTML error
    # message saying the ID does not exist
    with open(filename, 'r') as f:
        line = f.readline()
        if not line.startswith(('data_', '#')):
            f.close()
            os.remove(filename)
            raise UserError(
                'Structure factors could not be retrieved! Are you '
                'sure this is an x-ray structure?')

    return filename
Beispiel #14
0
def fetch_doi(session, doi, url, ignore_cache = False):
    if not '/' in doi:
        from chimerax.core.errors import UserError
        raise UserError('DOI does not contain required "/", got "%s"' % doi)
        
    from chimerax.core.fetch import cache_directories, fetch_file
    from os.path import join, isdir, basename
    dirs = cache_directories()
    if not ignore_cache:
        for d in dirs:
            path = join(d, 'DOI', doi)
            if isdir(path):
                from os import listdir
                if url:
                    zip_name = basename(url)
                    zf = [f for f in listdir(path) if f == zip_name]
                else:
                    zf = [f for f in listdir(path) if f.endswith('.zip')]
                if len(zf) == 1:
                    zp = join(path, zf[0])
                    return zp

    if url is None:
        zip_file_url = find_doi_zip_archive_url(session, doi)
    else:
        zip_file_url = url
    zip_filename = basename(zip_file_url)
    if dirs:
        from os import makedirs, link
        d = join(dirs[0], 'DOI', doi)
        makedirs(d, exist_ok = True)
        save_dir = d
    else:
        save_dir = None

    filename = fetch_file(session, zip_file_url, 'zip %s %s' % (doi, zip_filename), zip_filename,
                          save_dir = save_dir, uncompress = False, ignore_cache=True)

    return filename
Beispiel #15
0
def fetch_pubchem(session, pubchem_id, *, ignore_cache=False, **kw):
    from chimerax.core.errors import UserError
    if not pubchem_id.isdigit():
        raise UserError('PubChem identifiers are numeric, got "%s"' %
                        pubchem_id)

    import os
    url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%s/SDF?record_type=3d" % pubchem_id
    pubchem_name = "%s.sdf" % pubchem_id
    from chimerax.core.fetch import fetch_file
    filename = fetch_file(session,
                          url,
                          'PubChem %s' % pubchem_id,
                          pubchem_name,
                          'PubChem',
                          ignore_cache=ignore_cache)

    session.logger.status("Opening PubChem %s" % (pubchem_id, ))
    return session.open_command.open_data(filename,
                                          format='sdf',
                                          name="pubchem:" + pubchem_id,
                                          **kw)
Beispiel #16
0
def fetch_emdb(session, emdb_id, ignore_cache=False, **kw):
    from chimerax.core.errors import UserError
    if len(emdb_id) < 4:
        raise UserError("EMDB identifiers are at least 4 characters long")

    import socket
    hname = socket.gethostname()
    if hname.endswith('.edu') or hname.endswith('.gov'):
        # TODO: RCSB https is 20x slower than ftp. Cole Christie looking into it.
        #    url_pattern = ('https://files.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz'
        # The RCSB ftp does not report file size so progress messages don't indicate how long it will take.
        url_pattern = 'ftp://ftp.wwpdb.org/pub/emdb/structures/EMD-%s/map/%s.gz'
#        url_pattern = 'https://files.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz'
#        url_pattern = 'ftp://ftp.rcsb.org/pub/emdb/structures/EMD-%s/map/%s.gz'
    elif hname.endswith('.cn'):
        url_pattern = 'ftp://ftp.emdb-china.org/structures/EMD-%s/map/%s.gz'
    else:
        url_pattern = 'ftp://ftp.ebi.ac.uk/pub/databases/emdb/structures/EMD-%s/map/%s.gz'

    map_name = 'emd_%s.map' % emdb_id
    map_url = url_pattern % (emdb_id, map_name)

    from chimerax.core.fetch import fetch_file
    filename = fetch_file(session,
                          map_url,
                          'map %s' % emdb_id,
                          map_name,
                          'EMDB',
                          uncompress=True,
                          ignore_cache=ignore_cache)

    model_name = 'emdb %s' % emdb_id
    models, status = session.open_command.open_data(filename,
                                                    format='ccp4',
                                                    name=model_name,
                                                    **kw)
    return models, status
Beispiel #17
0
def fetch_homologene(session, ident, ignore_cache=True, **kw):
    """Fetch and display sequence alignment for 'ident' from HomoloGene.

    Use Python library to download the FASTA file and use ChimeraX
    alignment tools for display.
    """
    # First fetch the file using ChimeraX core function
    url = _URL % ident
    session.logger.status("Fetching HomoloGene %s" % ident)
    save_name = "%s.fa" % ident
    from chimerax.core.fetch import fetch_file
    filename = fetch_file(session,
                          url,
                          "HomoloGene %s" % ident,
                          save_name,
                          "HomoloGene",
                          ignore_cache=ignore_cache,
                          uncompress=True)

    session.logger.status("Opening HomoloGene %s" % ident)
    models, status = session.open_command.open_data(filename,
                                                    alignment=False,
                                                    name=ident)
    return models, status
Beispiel #18
0
def fetch_mmcif(
        session,
        pdb_id,
        fetch_source="rcsb",
        ignore_cache=False,
        structure_factors=False,
        over_sampling=1.5,  # for ChimeraX-Clipper plugin
        **kw):
    """Get mmCIF file by PDB identifier via the Internet"""
    if not _initialized:
        _initialize(session)

    if len(pdb_id) != 4:
        raise UserError('PDB identifiers are 4 characters long, got "%s"' %
                        pdb_id)
    if structure_factors:
        try:
            from chimerax.clipper.io import fetch_cif
        except ImportError:
            raise UserError(
                'Working with structure factors requires the '
                'ChimeraX_Clipper plugin, available from the Tool Shed')

    import os
    pdb_id = pdb_id.lower()
    filename = None
    if not fetch_source.endswith('updated'):
        # check on local system -- TODO: configure location
        subdir = pdb_id[1:3]
        filename = "/databases/mol/mmCIF/%s/%s.cif" % (subdir, pdb_id)
        if os.path.exists(filename):
            session.logger.info("Fetching mmCIF %s from system cache: %s" %
                                (pdb_id, filename))
        else:
            filename = None
        cache = 'PDB'
    else:
        cache = fetch_source
    if filename is None:
        base_url = _mmcif_sources.get(fetch_source, None)
        if base_url is None:
            raise UserError('unrecognized mmCIF/PDB source "%s"' %
                            fetch_source)
        url = base_url % pdb_id
        pdb_name = "%s.cif" % pdb_id
        from chimerax.core.fetch import fetch_file
        filename = fetch_file(session,
                              url,
                              'mmCIF %s' % pdb_id,
                              pdb_name,
                              cache,
                              ignore_cache=ignore_cache)
        # double check that a mmCIF file was downloaded instead of an
        # HTML error message saying the ID does not exist
        with open(filename, 'r') as f:
            line = f.readline()
            if not line.startswith(('data_', '#')):
                f.close()
                import os
                os.remove(filename)
                raise UserError("Invalid mmCIF identifier")

    session.logger.status("Opening mmCIF %s" % (pdb_id, ))
    models, status = session.open_command.open_data(filename,
                                                    format='mmcif',
                                                    name=pdb_id,
                                                    **kw)
    if structure_factors:
        sf_file = fetch_cif.fetch_structure_factors(session,
                                                    pdb_id,
                                                    fetch_source=fetch_source,
                                                    ignore_cache=ignore_cache)
        from chimerax.clipper import get_map_mgr
        mmgr = get_map_mgr(models[0], create=True)
        if over_sampling < 1:
            warn_str = (
                'Map over-sampling rate cannot be less than 1. Resetting to 1.0'
            )
            session.logger.warning(warn_str)
            over_sampling = 1
        mmgr.add_xmapset_from_file(sf_file, oversampling_rate=over_sampling)
        return [mmgr.crystal_mgr], status
    return models, status
Beispiel #19
0
def fetch_autopack(session,
                   path,
                   results_name,
                   database=default_autopack_database,
                   ignore_cache=False):

    from . import read_apr
    recipe_loc, pieces = read_apr.read_autopack_results(path)
    recipe_url = recipe_loc.replace('autoPACKserver', database)
    from os.path import basename
    recipe_filename = basename(recipe_loc)
    from chimerax.core.fetch import fetch_file
    recipe_path = fetch_file(session,
                             recipe_url,
                             'recipe for ' + results_name,
                             recipe_filename,
                             'cellPACK',
                             ignore_cache=ignore_cache)

    ingr_filenames, comp_surfaces = read_apr.read_autopack_recipe(recipe_path)

    from chimerax.core.models import Model
    cpm = Model(results_name, session)

    # Fetch compartment surface files.
    csurfs = []
    from chimerax.surface.collada import read_collada_surfaces
    for comp_name, comp_loc, geom_loc in comp_surfaces:
        csurf = Model(comp_name, session)
        if comp_loc is not None:
            comp_url = comp_loc.replace('autoPACKserver', database)
            comp_filename = basename(comp_loc)
            comp_path = fetch_file(session,
                                   comp_url,
                                   'compartment surface ' + comp_filename,
                                   comp_filename,
                                   'cellPACK',
                                   ignore_cache=ignore_cache)
            slist, msg = read_collada_surfaces(session, comp_path,
                                               'representation')
            csurf.add(slist)
        if geom_loc is not None:
            geom_url = geom_loc.replace('autoPACKserver', database)
            geom_filename = basename(geom_loc)
            geom_path = fetch_file(session,
                                   geom_url,
                                   'compartment bounds ' + geom_filename,
                                   geom_filename,
                                   'cellPACK',
                                   ignore_cache=ignore_cache)
            slist, msg = read_collada_surfaces(session, geom_path, 'geometry')
            for s in slist:
                s.display = False
            csurf.add(slist)
        csurfs.append(csurf)
    cpm.add(csurfs)

    # Added ingredient surfaces to compartments
    ingr_mesh_path = {}
    comp = {csurf.name: csurf for csurf in csurfs}
    ingr_ids = list(pieces.keys())
    ingr_ids.sort()  # Get reproducible ordering of ingredients
    for ingr_id in ingr_ids:
        ingr_filename = ingr_filenames[ingr_id]
        mesh_path = ingr_mesh_path.get(ingr_filename, None)
        if mesh_path is None:
            from urllib.parse import urljoin
            ingr_url = urljoin(recipe_url, ingr_filename)
            ingr_path = fetch_file(session,
                                   ingr_url,
                                   'ingredient ' + ingr_filename,
                                   ingr_filename,
                                   'cellPACK',
                                   ignore_cache=ignore_cache)
            mesh_loc = read_apr.read_ingredient(ingr_path)
            mesh_url = mesh_loc.replace('autoPACKserver', database)
            mesh_filename = basename(mesh_loc)
            mesh_path = fetch_file(session,
                                   mesh_url,
                                   'mesh ' + mesh_filename,
                                   mesh_filename,
                                   'cellPACK',
                                   ignore_cache=ignore_cache)
            ingr_mesh_path[ingr_filename] = mesh_path

        comp_name, interior_or_surf, ingr_name = ingr_id
        cs = comp.get((comp_name, interior_or_surf), None)
        if cs is None:
            cs = Model(interior_or_surf, session)
            comp[comp_name].add([cs])
            comp[(comp_name, interior_or_surf)] = cs
        placements = pieces[ingr_id]
        isurf = read_apr.create_surface(session, mesh_path, ingr_name,
                                        placements)
        cs.add([isurf])

    return cpm