Ejemplo n.º 1
0
def _get_metals():
    '''
    Returns a list of metals sorted by mass
    '''
    from chimerax.atomic import Element
    metals = [n for n in Element.names if Element.get_element(n).is_metal]
    sorted_metals = sorted(metals, key=lambda m: Element.get_element(m).mass)
    return [Element.get_element(m) for m in sorted_metals]
Ejemplo n.º 2
0
def add_oxt(session, residue):
    catom = residue.find_atom('C')
    resname = residue.name
    if catom is None:
        session.logger.warning('Residue {} {}{} has no C atom!'.format(
            residue.name, residue.chain_id, residue.number))
        return
    color = catom.color
    for n in catom.neighbors:
        if n.name == 'OXT':
            session.logger.warning(
                'Chain {} already has a C-terminal OXT. Skipping.')
            return
        if n.residue != residue:
            raise UserError(
                'Residue {} {}{} is not a C-terminal residue!'.format(
                    residue.name, residue.chain_id, residue.number))
    from chimerax.build_structure import modify_atom
    from chimerax.atomic import Element
    atoms = modify_atom(catom, catom.element, 3, res_name=residue.name)
    for a in atoms:
        if a.element.name == 'H':
            break
    modify_atom(a,
                Element.get_element('O'),
                1,
                name='OXT',
                res_name=residue.name)
    catom.color = color
    session.logger.info('Added a C-terminal OXT to chain {}'.format(
        residue.chain_id))
Ejemplo n.º 3
0
def place_helium(structure, res_name, position=None):
    '''If position is None, place in the center of view'''
    max_existing = 0
    for r in structure.residues:
        if r.chain_id == "het" and r.number > max_existing:
            max_existing = r.number
    res = structure.new_residue(res_name, "het", max_existing + 1)
    if position is None:
        if len(structure.session.models) == 0:
            position = (0.0, 0.0, 0.0)
        else:
            #view = structure.session.view
            #n, f = view.near_far_distances(view.camera, None)
            #position = view.camera.position.origin() + (n+f) * view.camera.view_direction() / 2

            # apparently the commented-out code above is equivalent to...
            position = structure.session.main_view.center_of_rotation

    from numpy import array
    position = array(position)
    from chimerax.atomic.struct_edit import add_atom
    helium = Element.get_element("He")
    a = add_atom("He", helium, res, position)
    from chimerax.atomic.colors import element_color
    a.color = element_color(helium.number)
    a.draw_mode = a.BALL_STYLE
    return a
Ejemplo n.º 4
0
def add_hydrogen_to_atom(atom, coord, name=None):
    '''
    Add a single hydrogen atom to the given heavy atom, at the given coordinate.
    Simple-minded tool, taking no notice of chemistry or geometry.
    '''
    from chimerax.atomic import Element
    r = atom.residue
    s = atom.structure
    if name is not None:
        if name in r.atoms.names:
            raise TypeError('This atom name is already taken!')
    else:
        existing_names = [
            a.name for a in atom.neighbors if a.element.name == 'H'
        ]
        if len(existing_names):
            last_digits = [
                int(n[-1]) for n in existing_names if n[-1].isdigit()
            ]
            if len(last_digits):
                num = max(last_digits) + 1
            else:
                num = 1
        else:
            num = 1

        name = 'H' + atom.name[1:] + str(num)

    from chimerax.atomic.struct_edit import add_atom
    na = add_atom(name, Element.get_element('H'), r, coord, bonded_to=atom)
    return na
Ejemplo n.º 5
0
def make_gaussian_cube_atoms(session):
    from chimerax.map import Volume
    from chimerax.map_data.gaussian.gaussian_grid import GaussianGrid
    glist = [m.data for m in session.models
             if isinstance(m, Volume) and isinstance(m.data, GaussianGrid)]
    slist = []
    for g in glist:
        atoms = g.gc.atoms
        if atoms:
            from chimerax.atomic import AtomicStructure, Element
            s = AtomicStructure(session)
            r = s.new_residue('UNK', 'A', 1)
            for i, (n,q,x,y,z) in enumerate(atoms):
                e = Element.get_element(n)
                a = s.new_atom(e.name, e)
                b = bohr_radius = 0.5291772108      # Angstroms
                a.coord = (b*x,b*y,b*z)
                a.serial_number = i
                r.add_atom(a)
            s.connect_structure()
            slist.append(s)

    session.models.add(slist)
    session.logger.info('Created %d atomic models for %d Gaussian Cube files'
                        % (len(slist), len(glist)))
Ejemplo n.º 6
0
def bond_with_H_length(heavy, geom):
    element = heavy.element.name
    if element == "C":
        if geom == 4:
            return 1.09
        if geom == 3:
            return 1.08
        if geom == 2:
            return 1.056
    elif element == "N":
        return N_H
    elif element == "O":
        # can't rely on water being in chain "water" anymore...
        if heavy.num_bonds == 0 or heavy.num_bonds == 2 \
        and len([nb for nb in heavy.neighbors if nb.element.number > 1]) == 0:
            return 0.9572
        return 0.96
    elif element == "S":
        return 1.336
    from chimerax.atomic import Element
    return Element.bond_length(heavy.element, Element.get_element(1))
Ejemplo n.º 7
0
    def create_marker(self, atom, rgba, scale):
        for a in self.atoms:
            if a._follow is atom:
                a.color = rgba
                a.radius = scale * atom.radius
                break
        else:
            a = super().create_marker(atom.coord, rgba, scale * atom.radius)
            a.element = Element.get_element(atom.element.name)
            a._follow = atom
            a.hide = atom.hide
            a.display = atom.display
            a.draw_mode = atom.draw_mode

        a._scale = scale
        if atom.draw_mode == atom.STICK_STYLE and atom.neighbors:
            atom.radius = scale * atom.bonds[-1].radius
        return a
Ejemplo n.º 8
0
def set_hide_atoms(AtomsRE, residues):
    # Hide that atoms match AtomsRE and associated hydrogens.
    from chimerax.atomic import Element
    H = Element.get_element(1)
    atoms = []
    for r in residues:
        for a in r.atoms:
            if AtomsRE.match(a.name):
                atoms.append(a)
                continue
            if a.element != H:
                continue
            b = a.neighbors
            if not b:
                continue
            if AtomsRE.match(b[0].name):
                atoms.append(a)
    Atoms(atoms).set_hide_bits(HIDE_NUCLEOTIDE)
Ejemplo n.º 9
0
def place_metal_at_coord(model,
                         chain_id,
                         residue_number,
                         residue_name,
                         atom_name,
                         coord,
                         element_name=None,
                         bfactor=20):
    '''
    Create a new residue encompassing a single metal ion in the current model,
    and place it at the given coordinate.
    '''
    if element_name is None:
        element_name = atom_name.title()
    from chimerax.atomic import Element
    from chimerax.atomic.struct_edit import add_atom
    e = Element.get_element(element_name)
    r = model.new_residue(residue_name, chain_id, residue_number)
    add_atom(atom_name, e, r, coord, bfactor=bfactor)
    return r
Ejemplo n.º 10
0
def complete_terminal_carboxylate(session, cter):
    from chimerax.atomic.bond_geom import bond_positions
    from chimerax.atomic.struct_edit import add_atom
    from chimerax.atomic import Element
    if cter.find_atom("OXT"):
        return
    c = cter.find_atom("C")
    if c:
        if c.num_bonds != 2:
            return
        loc = bond_positions(c.coord, 3, 1.229,
                             [n.coord for n in c.neighbors])[0]
        oxt = add_atom("OXT", Element.get_element("O"), cter, loc, bonded_to=c)
        from chimerax.atomic.colors import element_color
        if c.color == element_color(c.element.number):
            oxt.color = element_color(oxt.element.number)
        else:
            oxt.color = c.color
        session.logger.info("Missing OXT added to C-terminal residue %s" %
                            str(cter))
Ejemplo n.º 11
0
# http://www.rbvi.ucsf.edu/chimerax/docs/licensing.html
# This notice must be embedded in or attached to all copies,
# including partial copies, of the software or any revisions
# or derivations thereof.
# === UCSF ChimeraX Copyright ===

# $Id: __init__.py 41155 2016-06-30 23:18:29Z pett $
"""Find chemical groups in a structure"""

from chimerax.atomic.idatm import type_info, tetrahedral, planar, linear, single

# R is a shorthand for alkyl group
# X is a shorthand for 'any halide'
# None matches anything (and nothing)
from chimerax.atomic import Element
N = Element.get_element('N').number
C = Element.get_element('C').number
O = Element.get_element('O').number
H = Element.get_element('H').number
R = (C, H)
X = ('F', 'Cl', 'Br', 'I')
single_bond = (H, {'geometry': tetrahedral}, {'geometry': single})
heavy = {'not type': ['H', 'HC', 'D', 'DC']}
non_oxygen_single_bond = (H, {
    'geometry':
    tetrahedral,
    'not type': ['O', 'O3', 'O2', 'O3-', 'O2-', 'Oar', 'Oar+']
}, {
    'geometry': single
})
Ejemplo n.º 12
0
def hyd_positions(heavy, include_lone_pairs=False):
    """Return list of positions for hydrogens attached to this atom.
       If a hydrogen could be in one of several positions, don't return any of those.
    """

    # first, find known attached atoms
    bonded_heavys = []
    hyds = []
    for atom in heavy.neighbors:
        if atom.element.number > 1:
            bonded_heavys.append(atom)
        else:
            hyds.append(atom)

    # convert to Points
    hyd_locs = []
    for hyd in hyds:
        hyd_locs.append(hyd._hb_coord)

    if hyd_locs and not include_lone_pairs:
        # explicit hydrogens "win" over atom types
        return hyd_locs

    if heavy.idatm_type in type_info:
        info = type_info[heavy.idatm_type]
        geom = info.geometry
        if include_lone_pairs:
            subs = geom
        else:
            subs = info.substituents
        bonded_locs = hyd_locs[:]
        for b_heavy in bonded_heavys:
            bonded_locs.append(b_heavy._hb_coord)
    else:
        return hyd_locs

    known_subs = len(bonded_locs)
    if known_subs >= subs or known_subs == 0:
        return hyd_locs
    # above eliminates 'single' geometry

    if known_subs == 1 and geom == tetrahedral:
        # rotamer
        return hyd_locs

    max_subs = geom
    if max_subs - subs > 0:
        # the "empty" bond could be anywhere
        return hyd_locs

    heavy_loc = heavy._hb_coord
    bond_len = Element.bond_length(heavy.element, "H")

    if geom == planar:
        coplanar = []
        for b_heavy in bonded_heavys:
            try:
                bh_geom = type_info[b_heavy.idatm_type].geometry
            except KeyError:
                bh_geom = None
            if bh_geom != planar:
                continue
            for atom in b_heavy.neighbors:
                if atom != heavy:
                    coplanar.append(atom._hb_coord)

    else:
        coplanar = None

    hyd_locs = hyd_locs + list(
        bond_positions(heavy_loc, geom, bond_len, bonded_locs, coplanar=coplanar))
    return hyd_locs
Ejemplo n.º 13
0
                  hydrogen_totals, idatm_type, his_Ns, coordinations,
                  in_isolation)
    post_add(session, fake_N, fake_C)
    _delete_shared_data()


class IdatmTypeInfo:
    def __init__(self, geometry, substituents):
        self.geometry = geometry
        self.substituents = substituents


from chimerax.atomic import idatm
type_info = {}
for element_num in range(1, Element.NUM_SUPPORTED_ELEMENTS):
    e = Element.get_element(element_num)
    if e.is_metal or e.is_halogen:
        type_info[e.name] = IdatmTypeInfo(idatm.single, 0)
type_info.update(idatm.type_info)


def post_add(session, fake_n, fake_c):
    # fix up non-"true" terminal residues (terminal simply because
    # next residue is missing)
    for fn in fake_n:
        n = fn.find_atom("N")
        ca = fn.find_atom("CA")
        c = fn.find_atom("C")
        if not n or not ca or not c:
            continue
        dihed = None
Ejemplo n.º 14
0
#    index of donor in group,
#    type of donor geometry
#    degree of tau symmetry
#    argument tuple used when geometry-check function is called
#
#    in argument tuple, conversions will occur before function gets called.
#    namely:
#        positive floats are assumed to be distances, and will be squared
#        integers are assumed to be angles in degrees, and will be
#            converted to radians

import sys
water = sys.intern("water")
theta_tau = sys.intern('theta_tau')
upsilon_tau = sys.intern('upsilon_tau')
OH_bond_dist = Element.bond_length("O", "H")

donor_params = [
    # neutral carboxylic acid
    [[['O3', ['Cac', H]], [1, 1, 0]], 0, upsilon_tau, 2,
     (2.87, 103, -128, 140, -30, 2.87, 103, -128, 155, -30, 150, 2.87, 103,
      -128, 140, -30, 150)],
    # protonated nitrogen double-bonded to carbon
    [
        [['Npl', [['C2', [explicit_single_bond, explicit_single_bond]], H, H]],
         [1, 1, 0, 0, 0, 0]],
        0,
        upsilon_tau,
        4,
        (
            3.17,
Ejemplo n.º 15
0
# license agreement containing restrictions on its disclosure,
# duplication and use.  For details see:
# http://www.rbvi.ucsf.edu/chimerax/docs/licensing.html
# This notice must be embedded in or attached to all copies,
# including partial copies, of the software or any revisions
# or derivations thereof.
# === UCSF ChimeraX Copyright ===

sel_info = {}

from chimerax.atomic import Element, Atom
# Since IDATM has types in conflict with element symbols (e.g. 'H'),
# put the types in first so that the elements 'win'
for idatm, info in Atom.idatm_info_map.items():
    sel_info[idatm] = info.description
for i in range(1,Element.NUM_SUPPORTED_ELEMENTS):
    name = Element.get_element(i).name
    sel_info[name] = "%s (element)" % name

# classifiers
selectors = ["    <ChimeraXClassifier>"
             "ChimeraX :: Selector :: %s :: %s"
             "</ChimeraXClassifier>\n" %
             (name, description) for name, description in sel_info.items()]

with open("bundle_info.xml.in") as f:
    content = f.read()
with open("bundle_info.xml", "w") as f:
    f.write(content.replace("ELEMENT_AND_IDATM_SELECTOR_CLASSIFIERS", "".join(selectors)))
raise SystemExit(0)
Ejemplo n.º 16
0
def add_missing_md_template_atoms(session, residue, md_template,
                                  residue_indices, template_indices):
    import numpy
    template_extra_indices = [
        i for i in range(len(md_template.atoms)) if i not in template_indices
    ]
    if not len(template_extra_indices):
        return
    template_extra_bonds = set([
        b for b in md_template.bonds
        if any([i in template_extra_indices for i in b])
    ])
    from collections import defaultdict
    stub_map = defaultdict(list)
    # stub_map maps an existing atom in the residue to any atoms in the MD
    # template that should be connected to it, but aren't yet modelled.
    found_bonds = set()
    for b in template_extra_bonds:
        i1, i2 = b
        i1_index = numpy.where(template_indices == i1)[0]
        i2_index = numpy.where(template_indices == i2)[0]
        if not len(i1_index) and not len(i2_index):
            continue
        if len(i2_index):
            i1, i2 = i2, i1
            i1_index = i2_index
        i1_index = i1_index[0]
        res_atom = residue.atoms[residue_indices[i1_index]]
        # if not res_atom:
        #     raise RuntimeError("Atom {} should be in residue, but isn't".format(ccd_atom.name))
        stub_map[res_atom].append(i2)
        found_bonds.add(b)
    template_extra_bonds = template_extra_bonds.difference(found_bonds)
    if len(template_extra_bonds):
        err_str = (
            'MD template {} for residue {} {}{}{} contains extra atoms that are not in '
            'a coordinate template, and are not directly connected to existing '
            'atoms. Since MD templates do not explicitly provide geometry,'
            'these atoms will not be built.').format(md_template.name,
                                                     residue.name,
                                                     residue.chain_id,
                                                     residue.number,
                                                     residue.insertion_code)
        session.logger.warning(err_str)
    seen = set()
    for new_atom_list in stub_map.values():
        for i in new_atom_list:
            if i in seen:
                err_str = (
                    'The atom {} in MD template {} bonds to more than '
                    'one existing atom in residue {}. Since MD templates do '
                    'not explicitly specify geometry, this type of atom addition '
                    'is not currently supported. The resulting residue will '
                    'contain only those atoms which the MD and coordinate templates '
                    'have in common').format(md_template.atoms[i].name,
                                             md_template.name, residue.name)
                raise UserError(err_str)
            seen.add(i)
    from chimerax.atomic import Element
    from chimerax.build_structure import modify_atom
    for existing_atom, new_indices in stub_map.items():
        num_new_atoms = len(new_indices)
        num_existing_neighbors = len(existing_atom.neighbors)
        num_bonds = len(existing_atom.neighbors) + num_new_atoms
        new_tatoms = [md_template.atoms[i] for i in new_indices]
        from chimerax.build_structure.mod import ParamError
        try:
            modified_atoms = modify_atom(existing_atom,
                                         existing_atom.element,
                                         num_bonds,
                                         res_name=residue.name)
        except ParamError:
            err_str = (
                'Failed to add atoms {} to atom {} because this will '
                'lead to having {} atoms attached, which is more than its '
                'assigned geometry can support. This is probably due to an '
                'error in the MD template ({}). If this template is built '
                'into ISOLDE, please report this using Help/Report a bug'
            ).format([a.name for a in new_tatoms], existing_atom.name,
                     num_existing_neighbors + len(new_tatoms),
                     md_template.name)
            raise UserError(err_str)
        new_atoms = modified_atoms[1:]
        for na, ta in zip(new_atoms, new_tatoms):
            modify_atom(na,
                        Element.get_element(ta.element.atomic_number),
                        1,
                        name=ta.name,
                        res_name=residue.name)
Ejemplo n.º 17
0
def _find_donors(structure, d_params, limited_donors, generic_don_info):
    don_atoms = []
    don_data = []
    std_donors = {}
    for dp in d_params:
        group_key, donorIndex, geom_type, tau_sym, arg_list, test_dist = dp

        if group_key:
            groups = find_group(group_key, [structure])
        else:
            # generic donors
            groups = []
            for atom in structure.atoms:
                if atom in std_donors:
                    continue
                if atom.element.number not in [7, 8, 16]:
                    continue
                if limited_donors and atom not in limited_donors:
                    continue
                # oxygen, nitrogen, or sulfur
                try:
                    expect_bonds = type_info[atom.idatm_type].substituents
                except KeyError:
                    expect_bonds = 0
                num_bonds = atom.num_bonds
                # screen out the partial terminal N that AddH can leave, since the geometry is
                # problematic and the H direction isn't really determined
                if atom.idatm_type == "Npl" and num_bonds == 2 \
                and 1 in [n.element.number for n in atom.neighbors]:
                    continue
                if num_bonds < expect_bonds:
                    groups.append([atom])
                    continue
                for bonded in atom.neighbors:
                    if bonded.element.number == 1:
                        groups.append([atom])
                        break
            if verbose:
                for g in groups:
                    print("generic donor:", g[0])

        if groups and geom_type == theta_tau:
            # extend probe distance by H-bond length so that all relevant acceptors will be found
            test_dist = test_dist + Element.bond_length(
                groups[0][donorIndex].element, 'H')
        for group in groups:
            donor_atom = group[donorIndex]
            if limited_donors and donor_atom not in limited_donors:
                continue
            if donor_atom in std_donors:
                if group_key != std_donors[donor_atom] and not (
                        # conflicts of non-ring groups with ring
                        # groups not considered a problem (non-ring
                        # groups "win")
                        group_key[0] in _ring_funcs
                        and std_donors[donor_atom][0] not in _ring_funcs):
                    global _problem
                    _problem = ("donor", donor_atom, std_donors[donor_atom],
                                group_key)
                continue
            if donor_atom.is_missing_heavy_template_neighbors(
                    no_template_okay=True):
                global _truncated
                _truncated.add(donor_atom)
                continue
            std_donors[donor_atom] = group_key
            don_atoms.append(donor_atom)
            don_data.append((geom_type, tau_sym, arg_list, test_dist))
    return don_atoms, don_data
Ejemplo n.º 18
0
def find_hbonds(session,
                structures,
                *,
                inter_model=True,
                intra_model=True,
                donors=None,
                acceptors=None,
                dist_slop=0.0,
                angle_slop=0.0,
                inter_submodel=False,
                cache_da=False,
                status=True):
    """Hydrogen bond detection based on criteria in "Three-dimensional
        hydrogen-bond geometry and probability information from a
        crystal survey", J. Computer-Aided Molecular Design, 10 (1996),
        607-622

        If donors and/or acceptors are specified (as :py:class:`~chimerax.atomic.Atoms` collections
        or anything an Atoms collection can be constructued from), then H-bond donors/acceptors
        are restricted to being from those atoms.

        Dist/angle slop are the amount that distances/angles are allowed to exceed
        the values given in the above reference and still be considered hydrogen bonds.

        'cache_da' allows donors/acceptors in molecules to be cached if it is anticipated that
        the same structures will be examined for H-bonds repeatedly (e.g. a dynamics trajectory).

        If 'per_coordset' is True and 'structures' contains a single structure with multiple coordinate
        sets, then hydrogen bonds will be computed for each coordset.

        If 'status' is True, progress will be logged to the status line.

        Returns a list of donor/acceptor pairs, unless the conditions for 'per_coordset' are
        satisfied, in which case a list of such lists will be returned, one per coordset.
    """

    # hack to speed up coordinate lookup...
    from chimerax.atomic import Atoms, Atom
    if len(structures) == 1 or not inter_model or (len(
            set([
                m if m.id is None else
                (m.id[0] if len(m.id) == 1 else m.id[:-1]) for m in structures
            ])) == 1 and not inter_submodel):
        Atom._hb_coord = Atom.coord
    else:
        Atom._hb_coord = Atom.scene_coord
    try:
        if donors and not isinstance(donors, Atoms):
            limited_donors = Atoms(donors)
        else:
            limited_donors = donors
        if acceptors and not isinstance(acceptors, Atoms):
            limited_acceptors = Atoms(acceptors)
        else:
            limited_acceptors = acceptors
        global _d_cache, _a_cache, _prev_limited
        if cache_da:
            if limited_donors:
                dIDs = [id(d) for d in limited_donors]
                dIDs.sort()
            else:
                dIDs = None
            if limited_acceptors:
                aIDs = [id(a) for a in limited_acceptors]
                aIDs.sort()
            else:
                aIDs = None
            key = (dIDs, aIDs)
            if _prev_limited and _prev_limited != key:
                flush_cache()
            _prev_limited = key
            from weakref import WeakKeyDictionary
            if _d_cache is None:
                _d_cache = WeakKeyDictionary()
                _a_cache = WeakKeyDictionary()
        else:
            flush_cache()
        global donor_params, acceptor_params
        global processed_donor_params, processed_acceptor_params
        global _compute_cache
        global verbose
        global _problem
        _problem = None
        global _truncated
        _truncated = set()

        bad_connectivities = 0

        # Used (as necessary) to cache expensive calculations (by other functions also)
        _compute_cache = {}

        process_key = (dist_slop, angle_slop)
        if process_key not in processed_acceptor_params:
            # copy.deepcopy() refuses to copy functions (even as
            # references), so do this instead...
            a_params = []
            for p in acceptor_params:
                a_params.append(copy.copy(p))

            for i in range(len(a_params)):
                a_params[i][3] = _process_arg_tuple(a_params[i][3], dist_slop,
                                                    angle_slop)
            processed_acceptor_params[process_key] = a_params
        else:
            a_params = processed_acceptor_params[process_key]

        # compute some info for generic acceptors/donors
        generic_acc_info = {}
        # oxygens...
        generic_O_acc_args = _process_arg_tuple([3.53, 90], dist_slop,
                                                angle_slop)
        generic_acc_info['misc_O'] = (acc_generic, generic_O_acc_args)
        # dictionary based on bonded atom's geometry...
        generic_acc_info['O2-'] = {
            single: (acc_generic, generic_O_acc_args),
            linear: (acc_generic, generic_O_acc_args),
            planar: (acc_phi_psi,
                     _process_arg_tuple([3.53, 90, 130], dist_slop,
                                        angle_slop)),
            tetrahedral: (acc_generic, generic_O_acc_args)
        }
        generic_acc_info['O3-'] = generic_acc_info['O2-']
        generic_acc_info['O2'] = {
            single: (acc_generic, generic_O_acc_args),
            linear: (acc_generic, generic_O_acc_args),
            planar: (acc_phi_psi,
                     _process_arg_tuple([3.30, 110, 130], dist_slop,
                                        angle_slop)),
            tetrahedral: (acc_theta_tau,
                          _process_arg_tuple([3.03, 100, -180, 145], dist_slop,
                                             angle_slop))
        }
        # list based on number of known bonded atoms...
        generic_acc_info['O3'] = [(acc_generic, generic_O_acc_args),
                                  (acc_theta_tau,
                                   _process_arg_tuple([3.17, 100, -161, 145],
                                                      dist_slop, angle_slop)),
                                  (acc_phi_psi,
                                   _process_arg_tuple([3.42, 120, 135],
                                                      dist_slop, angle_slop))]
        # nitrogens...
        generic_N_acc_args = _process_arg_tuple([3.42, 90], dist_slop,
                                                angle_slop)
        generic_acc_info['misc_N'] = (acc_generic, generic_N_acc_args)
        generic_acc_info['N2'] = (acc_phi_psi,
                                  _process_arg_tuple([3.42, 140, 135],
                                                     dist_slop, angle_slop))
        # tuple based on number of bonded heavy atoms...
        generic_N3_mult_heavy_acc_args = _process_arg_tuple(
            [3.30, 153, -180, 145], dist_slop, angle_slop)
        generic_acc_info['N3'] = (
            (acc_generic, generic_N_acc_args),
            # only one example to draw from; weaken by .1A, 5 degrees
            (acc_theta_tau,
             _process_arg_tuple([3.13, 98, -180, 150], dist_slop, angle_slop)),
            (acc_theta_tau, generic_N3_mult_heavy_acc_args),
            (acc_theta_tau, generic_N3_mult_heavy_acc_args))
        # one example only; weaken by .1A, 5 degrees
        generic_acc_info['N1'] = (acc_theta_tau,
                                  _process_arg_tuple([3.40, 136, -180, 145],
                                                     dist_slop, angle_slop))
        # sulfurs...
        # one example only; weaken by .1A, 5 degrees
        generic_acc_info['S2'] = (acc_phi_psi,
                                  _process_arg_tuple([3.83, 85, 140],
                                                     dist_slop, angle_slop))
        generic_acc_info['Sar'] = generic_acc_info['S3-'] = (
            acc_generic, _process_arg_tuple([3.83, 85], dist_slop, angle_slop))
        # now the donors...

        # planar nitrogens
        gen_don_Npl_1h_params = (don_theta_tau,
                                 _process_arg_tuple([
                                     2.23, 136, 2.23, 141, 140, 2.46, 136, 140
                                 ], dist_slop, angle_slop))
        gen_don_Npl_2h_params = (don_upsilon_tau,
                                 _process_arg_tuple([
                                     3.30, 90, -153, 135, -45, 3.30, 90, -146,
                                     140, -37.5, 130, 3.40, 108, -166, 125,
                                     -35, 140
                                 ], dist_slop, angle_slop))
        gen_don_O_dists = [2.41, 2.28, 2.28, 3.27, 3.14, 3.14]
        gen_don_O_params = (don_generic,
                            _process_arg_tuple(gen_don_O_dists, dist_slop,
                                               angle_slop))
        gen_don_N_dists = [2.36, 2.48, 2.48, 3.30, 3.42, 3.42]
        gen_don_N_params = (don_generic,
                            _process_arg_tuple(gen_don_N_dists, dist_slop,
                                               angle_slop))
        gen_don_S_dists = [2.42, 2.42, 2.42, 3.65, 3.65, 3.65]
        gen_don_S_params = (don_generic,
                            _process_arg_tuple(gen_don_S_dists, dist_slop,
                                               angle_slop))
        generic_don_info = {
            'O': gen_don_O_params,
            'N': gen_don_N_params,
            'S': gen_don_S_params
        }

        from chimerax.atom_search import AtomSearchTree
        metal_coord = {}
        acc_trees = {}
        hbonds = []
        has_sulfur = {}
        for structure in structures:
            if status:
                session.logger.status("Finding acceptors in model '%s'" %
                                      structure.name,
                                      blank_after=0)
            if structure.PBG_METAL_COORDINATION in structure.pbg_map:
                for pb in structure.pbg_map[
                        structure.PBG_METAL_COORDINATION].pseudobonds:
                    a1, a2 = pb.atoms
                    if a1.element.is_metal:
                        metal_coord.setdefault(a2, []).append(a1)
                    if a2.element.is_metal:
                        metal_coord.setdefault(a1, []).append(a2)
            if cache_da and structure in _a_cache and (
                    dist_slop, angle_slop) in _a_cache[structure]:
                acc_atoms = []
                acc_data = []
                for acc_atom, data in _a_cache[structure][(
                        dist_slop, angle_slop)].items():
                    if not acc_atom.deleted:
                        acc_atoms.append(acc_atom)
                        acc_data.append(data)
            else:
                acc_atoms, acc_data = _find_acceptors(structure, a_params,
                                                      limited_acceptors,
                                                      generic_acc_info)
                if cache_da:
                    cache = WeakKeyDictionary()
                    for i in range(len(acc_atoms)):
                        cache[acc_atoms[i]] = acc_data[i]
                    if structure not in _a_cache:
                        _a_cache[structure] = {}
                    _a_cache[structure][(dist_slop, angle_slop)] = cache
            #xyz = []
            has_sulfur[structure] = False
            for acc_atom in acc_atoms:
                #c = acc_atom._hb_coord
                #xyz.append([c[0], c[1], c[2]])
                if acc_atom.element == Element.get_element('S'):
                    has_sulfur[structure] = True
            if status:
                session.logger.status("Building search tree of acceptor atoms",
                                      blank_after=0)
            acc_trees[structure] = AtomSearchTree(
                acc_atoms,
                data=acc_data,
                sep_val=3.0,
                scene_coords=(Atom._hb_coord == Atom.scene_coord))

        if process_key not in processed_donor_params:
            # find max donor distances before they get squared..

            # copy.deepcopy() refuses to copy functions (even as
            # references), so do this instead...
            d_params = []
            for p in donor_params:
                d_params.append(copy.copy(p))

            for di in range(len(d_params)):
                geom_type = d_params[di][2]
                arg_list = d_params[di][4]
                don_rad = Element.bond_radius('N')
                if geom_type == theta_tau:
                    max_dist = max((arg_list[0], arg_list[2], arg_list[5]))
                elif geom_type == upsilon_tau:
                    max_dist = max((arg_list[0], arg_list[5], arg_list[11]))
                elif geom_type == water:
                    max_dist = max((arg_list[1], arg_list[4], arg_list[8]))
                else:
                    max_dist = max(gen_don_O_dists + gen_don_N_dists +
                                   gen_don_S_dists)
                    don_rad = Element.bond_radius('S')
                d_params[di].append(max_dist + dist_slop + don_rad +
                                    Element.bond_radius('H'))

            for i in range(len(d_params)):
                d_params[i][4] = _process_arg_tuple(d_params[i][4], dist_slop,
                                                    angle_slop)
            processed_donor_params[process_key] = d_params
        else:
            d_params = processed_donor_params[process_key]

        generic_water_params = _process_arg_tuple(
            [2.36, 2.36 + OH_bond_dist, 146], dist_slop, angle_slop)
        generic_theta_tau_params = _process_arg_tuple([2.48, 132], dist_slop,
                                                      angle_slop)
        generic_upsilon_tau_params = _process_arg_tuple([3.42, 90, -161, 125],
                                                        dist_slop, angle_slop)
        generic_generic_params = _process_arg_tuple([2.48, 3.42, 130, 90],
                                                    dist_slop, angle_slop)
        for dmi in range(len(structures)):
            structure = structures[dmi]
            if status:
                session.logger.status("Finding donors in model '%s'" %
                                      structure.name,
                                      blank_after=0)
            if cache_da and structure in _d_cache and (
                    dist_slop, angle_slop) in _d_cache[structure]:
                don_atoms = []
                don_data = []
                for don_atom, data in _d_cache[structure][(
                        dist_slop, angle_slop)].items():
                    if not don_atom.deleted:
                        don_atoms.append(don_atom)
                        don_data.append(data)
            else:
                don_atoms, don_data = _find_donors(structure, d_params,
                                                   limited_donors,
                                                   generic_don_info)
                if cache_da:
                    cache = WeakKeyDictionary()
                    for i in range(len(don_atoms)):
                        cache[don_atoms[i]] = don_data[i]
                    if structure not in _d_cache:
                        _d_cache[structure] = {}
                    _d_cache[structure][(dist_slop, angle_slop)] = cache

            if status:
                session.logger.status(
                    "Matching donors in model '%s' to acceptors" %
                    structure.name,
                    blank_after=0)
            for i in range(len(don_atoms)):
                donor_atom = don_atoms[i]
                geom_type, tau_sym, arg_list, test_dist = don_data[i]
                donor_hyds = hyd_positions(donor_atom)
                coord = donor_atom._hb_coord
                for acc_structure in structures:
                    if acc_structure == structure and not intra_model or acc_structure != structure and not inter_model:
                        continue
                    if not inter_submodel \
                    and acc_structure.id and structure.id \
                    and acc_structure.id[0] == structure.id[0] \
                    and acc_structure.id[:-1] == structure.id[:-1] \
                    and acc_structure.id[1:] != structure.id[1:]:
                        continue
                    if has_sulfur[acc_structure]:
                        from .common_geom import SULFUR_COMP
                        td = test_dist + SULFUR_COMP
                    else:
                        td = test_dist
                    accs = acc_trees[acc_structure].search(coord, td)
                    if verbose:
                        session.logger.info(
                            "Found %d possible acceptors for donor %s:" %
                            (len(accs), donor_atom))
                        for acc_data in accs:
                            session.logger.info("\t%s\n" % acc_data[0])
                    for acc_atom, geom_func, args in accs:
                        if acc_atom == donor_atom:
                            # e.g. hydroxyl
                            if verbose:
                                print("skipping: donor == acceptor")
                            continue
                        try:
                            if not geom_func(donor_atom, donor_hyds, *args):
                                continue
                        except ConnectivityError as e:
                            session.logger.info(
                                "Skipping possible acceptor with bad geometry: %s\n%s\n"
                                % (acc_atom, e))
                            bad_connectivities += 1
                            continue
                        except Exception:
                            print("donor:", donor_atom, " acceptor:", acc_atom)
                            raise
                        if verbose:
                            session.logger.info(
                                "\t%s satisfies acceptor criteria" % acc_atom)
                        if geom_type == upsilon_tau:
                            donor_func = don_upsilon_tau
                            add_args = generic_upsilon_tau_params + [tau_sym]
                        elif geom_type == theta_tau:
                            donor_func = don_theta_tau
                            add_args = generic_theta_tau_params
                        elif geom_type == water:
                            donor_func = don_water
                            add_args = generic_water_params
                        else:
                            if donor_atom.idatm_type in ["Npl", "N2+"]:
                                heavys = 0
                                for bonded in donor_atom.neighbors:
                                    if bonded.element.number > 1:
                                        heavys += 1
                                if heavys > 1:
                                    info = gen_don_Npl_1h_params
                                else:
                                    info = gen_don_Npl_2h_params
                            else:
                                info = generic_don_info[
                                    donor_atom.element.name]
                            donor_func, arg_list = info
                            add_args = generic_generic_params
                            if donor_func == don_upsilon_tau:
                                # tack on generic
                                # tau symmetry
                                add_args = generic_upsilon_tau_params + [4]
                            elif donor_func == don_theta_tau:
                                add_args = generic_theta_tau_params
                        try:
                            if not donor_func(donor_atom, donor_hyds, acc_atom,
                                              *tuple(arg_list + add_args)):
                                continue
                        except ConnectivityError as e:
                            session.logger.info(
                                "Skipping possible donor with bad geometry: %s\n%s\n"
                                % (donor_atom, e))
                            bad_connectivities += 1
                            continue
                        except AtomTypeError as e:
                            session.logger.warning(str(e))
                            #_problem = ("atom type", donor_atom, str(e), None)
                            continue
                        if verbose:
                            session.logger.info(
                                "\t%s satisfies donor criteria" % donor_atom)
                        # ensure hbond isn't precluded by metal-coordination...
                        if acc_atom in metal_coord:
                            from chimerax.geometry import angle
                            conflict = False
                            for metal in metal_coord[acc_atom]:
                                if angle(donor_atom._hb_coord,
                                         acc_atom._hb_coord,
                                         metal._hb_coord) < 90.0:
                                    if verbose:
                                        session.logger.info(
                                            "\tH-bond conflicts with"
                                            " metal coordination to %s" %
                                            metal)
                                    conflict = True
                                    break
                            if conflict:
                                continue
                        hbonds.append((donor_atom, acc_atom))
            if status:
                session.logger.status("")
        if bad_connectivities:
            session.logger.warning(
                "Skipped %d atom(s) with bad connectivities; see log for details"
                % bad_connectivities)
        if _problem:
            if session.ui.is_gui:
                # report a bug when atom matches multiple donor/acceptor descriptions
                da, atom, grp1, grp2 = _problem
                res_atoms = atom.residue.atoms

                def res_atom_rep(a):
                    try:
                        i = res_atoms.index(a)
                    except ValueError:
                        return "other %s" % a.element.name
                    return "%2d" % (i + 1)

                descript = "geometry class 1: %s\n\ngeometry class 2: %s" % (
                    repr(grp1), repr(grp2))
                from chimerax.core.logger import report_exception
                report_exception(
                    error_description=
                    """At least one atom was classified into more than one acceptor or donor
    geometry class.  This indicates a problem in the
    donr/acceptor classification mechanism and we would appreciate it if you
    would use the bug-report button below to send us the information that
    will allow us to improve the classification code.

    residue name: %s

    problem %s atom: %d

    residue atoms:
        %s

    residue bonds:
        %s

    %s
    """ % (atom.residue.name, da, res_atoms.index(atom) + 1,
                "\n\t".join([
                "%2d %-4s %-s (%s)" %
                (en[0] + 1, en[1].name, en[1].idatm_type, str(en[1].coord))
                for en in enumerate(res_atoms)
                ]), "\n\t".join([
                "%s <-> %-s" %
                (res_atom_rep(b.atoms[0]), res_atom_rep(b.atoms[1]))
                for b in atom.residue.atoms.bonds
                ]), descript))
            _problem = None
        if _truncated:
            if len(_truncated) > 20:
                session.logger.warning(
                    "%d atoms were skipped as donors/acceptors due to missing"
                    " heavy-atom bond partners" % len(_truncated))
            else:
                session.logger.warning(
                    "The following atoms were skipped as donors/acceptors due to missing"
                    " heavy-atom bond partners: %s" %
                    "; ".join([str(a) for a in _truncated]))
            _truncated = None
    finally:
        delattr(Atom, "_hb_coord")
    return hbonds
Ejemplo n.º 19
0
    def update_chix(self, chix_residue, refresh_connected=True):
        """changes chimerax residue to match self"""
        elements = {}
        known_atoms = []
        new_atoms = []

        #print("updating residue", self.name, chix_residue.name)

        chix_residue.name = self.name

        #print("updating residue:")
        #print(self.write(outfile=False))

        for atom in self.atoms:
            #print(atom, hasattr(atom, "chix_atom"))
            if not hasattr(atom, "chix_atom") or \
               atom.chix_atom is None or \
               atom.chix_atom.deleted or atom.chix_atom not in chix_residue.atoms:
                #if not hasattr(atom, "chix_atom"):
                #    print("no chix atom", atom)
                #elif atom.chix_atom is None:
                #    print("no chix atom yet", atom)
                #elif atom.chix_atom.deleted:
                #    print("chix_atom deleted", atom)
                #else:
                #    print("atoms do not match", atom.chix_atom)

                #print("new chix atom for", atom)

                if hasattr(atom, "chix_name"):
                    atom_name = atom.chix_name
                    # print("atom has chix name:", atom_name)
                else:
                    atom_name = atom.name
                    # print("atom does not have chix name:", atom_name)
                if "." in atom_name or len(atom_name) > 4:
                    # print("previous atom name was illegal, using", atom.element)
                    atom_name = atom.element

                new_atom = chix_residue.structure.new_atom(
                    atom_name, atom.element)
                new_atom.coord = atom.coords

                chix_residue.add_atom(new_atom)
                atom.chix_atom = new_atom
                known_atoms.append(new_atom)
                new_atoms.append(new_atom)

            else:
                if atom.chix_atom.element.name != atom.element:
                    atom.chix_atom.element = Element.get_element(atom.element)
                    new_atoms.append(atom.chix_atom)

                atom.chix_atom.coord = atom.coords
                known_atoms.append(atom.chix_atom)

        for atom in chix_residue.atoms:
            if atom not in known_atoms:
                #print("deleting %s" % atom.atomspec)
                atom.delete()

        for atom in new_atoms:
            # print("starting name:", atom.name)
            if ([a.name for a in known_atoms].count(atom.name) == 1
                    and atom.name.startswith(atom.element.name)
                    and atom.name != atom.element.name
                    and "." not in atom.name):
                # print("skipping", atom.name, atom.serial_number, atom.atomspec)
                continue
            if not atom.name.startswith(atom.element.name):
                atom.name = atom.element.name

            atom_name = "%s1" % atom.name
            k = 1
            while k == 1 or any(
                [chix_atom.name == atom_name for chix_atom in known_atoms]):
                atom_name = "%s%i" % (atom.name, k)
                k += 1
                if len(atom_name) > 4:
                    if atom.name == atom.element.name:
                        # print("breaking:", k, atom.name)
                        break
                    atom.name = atom.element.name
                    k = 1

            # print("name:", atom_name)

            if len(atom_name) <= 4:
                atom.name = atom_name
            else:
                atom.name = atom.element.name

            # print("final name:", atom.name)

        if refresh_connected:
            self.refresh_chix_connected(chix_residue)

        for atom in chix_residue.atoms:
            if atom.serial_number == -1:
                atom.serial_number = atom.structure.atoms.index(atom) + 1

        apply_seqcrow_preset(chix_residue.structure, atoms=new_atoms)
Ejemplo n.º 20
0
    def _make_structure(self, block):
        from numpy import array
        from .maestro import IndexAttribute
        if self.atomic:
            from chimerax.atomic import AtomicStructure as SC
        else:
            from chimerax.atomic import Structure as SC
        from chimerax.atomic import Element
        atoms = block.get_sub_block("m_atom")
        if atoms is None:
            print("No m_atom block found")
            return None
        bonds = block.get_sub_block("m_bond")
        s = SC(self.session, auto_style=self.auto_style)
        SC.register_attr(self.session, "viewdockx_data", "ViewDockX")

        residue_map = {}
        atom_map = {}
        for row in range(atoms.size):
            attrs = atoms.get_attribute_map(row)
            index = attrs[IndexAttribute]

            # Get residue data and create if necessary
            res_seq = attrs["i_m_residue_number"]
            insert_code = attrs.get("s_m_insertion_code", None)
            if not insert_code:
                insert_code = ' '
            chain_id = attrs.get("s_m_chain_name", ' ')
            res_key = (chain_id, res_seq, insert_code)
            try:
                r = residue_map[res_key]
            except KeyError:
                res_name = attrs.get("s_m_pdb_residue_name", "UNK")
                r = s.new_residue(res_name, chain_id, res_seq, insert_code)
                residue_map[res_key] = r
            rgb = attrs.get("s_m_ribbon_color_rgb", None)
            if rgb:
                r.ribbon_color = self._get_color(rgb)

            # Get atom data and create
            try:
                name = attrs["s_m_pdb_atom_name"]
            except KeyError:
                name = attrs.get("s_m_atom_name", "")
            name = name.strip()
            atomic_number = attrs.get("i_m_atomic_number", 6)
            element = Element.get_element(atomic_number)
            if not name:
                name = element.name
            a = s.new_atom(name, element)
            a.coord = array([atoms.get_attribute("r_m_x_coord", row),
                             atoms.get_attribute("r_m_y_coord", row),
                             atoms.get_attribute("r_m_z_coord", row)])
            try:
                a.bfactor = attrs["r_m_pdb_tfactor"]
            except (KeyError, TypeError):
                a.bfactor = 0.0
            try:
                a.occupancy = attrs["r_m_pdb_occupancy"]
            except (KeyError, TypeError):
                a.occupancy = 1.0
            rgb = attrs.get("s_m_color_rgb", None)
            if rgb:
                a.color = self._get_color(rgb)

            # Add atom to residue and to atom map for bonding later
            r.add_atom(a)
            atom_map[index] = a
        if bonds is None or bonds.size == 0:
            s.connect_structure()
        else:
            for row in range(bonds.size):
                attrs = bonds.get_attribute_map(row)
                fi = attrs["i_m_from"]
                ti = attrs["i_m_to"]
                if ti < fi:
                    # Bonds are reported in both directions. We only need one.
                    continue
                afi = atom_map[fi]
                ati = atom_map[ti]
                b = s.new_bond(afi, ati)
                b.order = attrs["i_m_order"]
        return s
Ejemplo n.º 21
0
                if (_donor(a) and _acceptor(nb)) or (_donor(nb)
                                                     and _acceptor(a)):
                    clash -= hbond_allowance
            if distance_only:
                if clash < 0.0:
                    continue
            elif clash < clash_threshold:
                continue
            clashes.setdefault(a, {})[nb] = clash
            clashes.setdefault(nb, {})[a] = clash
    return clashes


from chimerax.atomic import Element

hyd = Element.get_element(1)
negative = set([Element.get_element(sym) for sym in ["N", "O", "S"]])
from chimerax.atomic.idatm import type_info


def _donor(a):
    if a.element == hyd:
        if a.num_bonds > 0 and a.neighbors[0].element in negative:
            return True
    elif a.element in negative:
        try:
            if a.num_bonds < type_info[a.idatm_type].substituents:
                # implicit hydrogen
                return True
        except KeyError:
            pass
Ejemplo n.º 22
0
def read_sdf(session, stream, file_name):

    path = stream.name if hasattr(stream, 'name') else None

    structures = []
    nonblank = False
    state = "init"
    from chimerax.core.errors import UserError
    from chimerax.atomic.struct_edit import add_atom
    from chimerax.atomic import AtomicStructure, Element, Bond, Atom, AtomicStructure
    from numpy import array
    Bond.register_attr(session, "order", "SDF format", attr_type=float)
    Atom.register_attr(session, "charge", "SDF format", attr_type=float)
    AtomicStructure.register_attr(session, "charge_model", "SDF format", attr_type=str)
    try:
        for l in stream:
            line = l.strip()
            nonblank = nonblank or line
            if state == "init":
                state = "post header 1"
                mol_name = line
            elif state == "post header 1":
                state = "post header 2"
            elif state == "post header 2":
                state = "counts"
            elif state == "counts":
                if not line:
                    break
                state = "atoms"
                serial = 1
                anums = {}
                atoms = []
                try:
                    num_atoms = int(l[:3].strip())
                    num_bonds = int(l[3:6].strip())
                except ValueError:
                    raise UserError("Atom/bond counts line of MOL/SDF file '%s' is botched" % file_name)
                from chimerax.atomic.structure import is_informative_name
                name = mol_name if is_informative_name(mol_name) else file_name
                s = AtomicStructure(session, name=name)
                structures.append(s)
                r = s.new_residue("UNL", " ", 1)
            elif state == "atoms":
                num_atoms -= 1
                if num_atoms == 0:
                    if num_bonds:
                        state = "bonds"
                    else:
                        state = "properties"
                try:
                    x = float(l[:10].strip())
                    y = float(l[10:20].strip())
                    z = float(l[20:30].strip())
                    elem = l[31:34].strip()
                except ValueError:
                    s.delete()
                    raise UserError("Atom line of MOL/SDF file '%s' is not x y z element...: '%s'"
                        % (file_name, l))
                element = Element.get_element(elem)
                if element.number == 0:
                    # lone pair of somesuch
                    atoms.append(None)
                    continue
                anum = anums.get(element.name, 0) + 1
                anums[element.name] = anum
                a = add_atom("%s%d" % (element.name, anum), element, r, array([x,y,z]), serial_number=serial)
                serial += 1
                atoms.append(a)
            elif state == "bonds":
                num_bonds -= 1
                if num_bonds == 0:
                    state = "properties"
                try:
                    a1_index = int(l[:3].strip())
                    a2_index = int(l[3:6].strip())
                    order = float(l[6:9].strip())
                except ValueError:
                    raise UserError("Bond line of MOL/SDF file '%s' is not a1 a2 order...: '%s'"
                        % (file_name, 1))
                a1 = atoms[a1_index-1]
                a2 = atoms[a2_index-1]
                if not a1 or not a2:
                    continue
                s.new_bond(a1, a2).order = order
            elif state == "properties":
                if not s.atoms:
                    raise UserError("No atoms found for compound '%s' in MOL/SDF file '%s'" % (name, file_name))
                if line.split() == ["M", "END"]:
                    state = "data"
                    reading_data = None
            elif state == "data":
                if line == "$$$$":
                    nonblank = False
                    state = "init"
                elif reading_data == "charges":
                    data_item = line.strip()
                    if data_item:
                        try:
                            data.append(float(data_item))
                        except ValueError:
                            try:
                                index, charge = data_item.split()
                                index = int(index) - 1
                                charge = float(charge)
                            except ValueError:
                                raise UserError("Charge data (%s) in %s data is not either a floating-point"
                                    " number or an atom index and a floating-point number" % (data_item,
                                    orig_data_name))
                            else:
                                if not indexed_charges:
                                    # for indexed charges, the first thing is a count
                                    data.pop()
                                    indexed_charges = True
                                data.append((index, charge))
                    else:
                        if not indexed_charges and len(atoms) != len(data):
                            raise UserError("Number of charges (%d) in %s data not equal to number of atoms"
                                " (%d)" % (len(data), orig_data_name, len(atoms)))
                        if indexed_charges:
                            for a in atoms:
                                # charge defaults to 0.0, so don't need to set non-indexed
                                for index, charge in data:
                                    atoms[index].charge = charge
                        else:
                            for a, charge in zip(atoms, data):
                                a.charge = charge
                        if "mmff94" in data_name:
                            s.charge_model = "MMFF94"
                        reading_data = None
                elif reading_data == "cid":
                    data_item = line.strip()
                    if data_item:
                        try:
                            cid = int(data_item)
                        except ValueError:
                            raise UserError("PubChem CID (%s) is %s data is not an integer" % (data_item,
                                orid_data_name))
                        s.name = "pubchem:%d" % cid
                        s.prefix_html_title = False
                        s.get_html_title = lambda *args, cid=cid: 'PubChem entry <a href="https://pubchem.ncbi.nlm.nih.gov/compound/%d">%d</a>' % (cid, cid)
                        s.has_formatted_metadata = lambda *args: False
                        reading_data = None
                elif line.startswith('>'):
                    try:
                        lp = line.index('<')
                        rp = line[lp+1:].index('>') + lp + 1
                    except (IndexError, ValueError):
                        continue
                    orig_data_name = line[lp+1:rp]
                    data_name = orig_data_name.lower()
                    if data_name.endswith("charges") and "partial" in data_name:
                        reading_data = "charges"
                        indexed_charges = False
                        data = []
                    elif data_name == "pubchem_compound_cid":
                        reading_data = "cid"
    except BaseException:
        for s in structures:
            s.delete()
        raise
    finally:
        stream.close()

    if nonblank and state not in ["data", "init"]:
        if structures:
            session.logger.warning("Extraneous text after final $$$$ in MOL/SDF file '%s'" % file_name)
        else:
            raise UserError("Unexpected end of file (parser state: %s) in MOL/SDF file '%s'"
                % (state, file_name))

    return structures, ""