Beispiel #1
0
def start_at(gra, key):
    """ start a v-matrix at a specific atom

    Returns the started vmatrix, along with keys to atoms whose neighbors are
    missing from it
    """
    symb_dct = atom_symbols(gra)
    ngb_keys_dct = atoms_sorted_neighbor_atom_keys(gra,
                                                   symbs_first=(
                                                       'X',
                                                       'C',
                                                   ),
                                                   symbs_last=('H', ),
                                                   ords_last=(0.1, ))

    ngb_keys = ngb_keys_dct[key]
    if not ngb_keys:
        zma_keys = []
    elif len(ngb_keys) == 1:
        # Need special handling for atoms with only one neighbor
        if symb_dct[key] in ('H', 'X'):
            key2 = ngb_keys[0]
            zma_keys = (key2, ) + ngb_keys_dct[key2]
        else:
            key2 = ngb_keys[0]
            ngb_keys = tuple(k for k in ngb_keys_dct[key2] if k != key)
            zma_keys = (key, key2) + ngb_keys
    else:
        zma_keys = (key, ) + ngb_keys_dct[key]

    vma = ()
    for row, key_ in enumerate(zma_keys):
        idx1 = idx2 = idx3 = None
        if row > 0:
            key1 = next(k for k in ngb_keys_dct[key_] if k in zma_keys[:row])
            idx1 = zma_keys.index(key1)
        if row > 1:
            key2 = next(k for k in ngb_keys_dct[key1]
                        if k in zma_keys[:row] and k != key_)
            idx2 = zma_keys.index(key2)
        if row > 2:
            key3 = next(k for k in zma_keys[:row]
                        if k not in (key_, key1, key2))
            idx3 = zma_keys.index(key3)

        sym = symb_dct[key_]
        key_row = [idx1, idx2, idx3]
        vma = automol.vmat.add_atom(vma, sym, key_row)

    return vma, zma_keys
Beispiel #2
0
def geometry(gra):
    """ Convert a molecular graph to a molecular geometry.

        :param gra: molecular graph
        :type gra: automol graph data structure
        :rtype: automol molecular geometry data structure
    """

    symbs = atom_symbols(gra)
    if len(symbs) != 1:
        gra = explicit(gra)
        geo = automol.graph.embed.geometry(gra)
    else:
        symb = list(symbs.values())[0]
        # symb = list(symbs.keys())[0]
        geo = ((symb, (0.00, 0.00, 0.00)),)

    return geo
Beispiel #3
0
def heuristic_bond_distance(gra, key1, key2, angstrom=True, check=False):
    """ heuristic bond distance (in angstroms)
    """
    if check:
        assert key1 in atoms_neighbor_atom_keys(gra)[key2]

    symb_dct = atom_symbols(gra)
    symb1 = symb_dct[key1]
    symb2 = symb_dct[key2]

    if ptab.to_number(symb1) == 1 or ptab.to_number(symb2) == 1:
        dist = XH_DIST
    else:
        dist = XY_DIST

    dist *= 1 if angstrom else phycon.ANG2BOHR

    return dist
Beispiel #4
0
def molfile_with_atom_mapping(gra, geo=None, geo_idx_dct=None):
    """ Generate an MOLFile from a molecular graph.
        If coordinates are passed in, they are used to determine stereo.

        :param gra: molecular graph
        :type gra: automol graph data structure
        :param geo: molecular geometry
        :type geo: automol geometry data structure
        :param geo_idx_dct:
        :type geo_idx_dct: dict[:]
        :returns: the MOLFile string, followed by a mapping from MOLFile atoms
            to atoms in the graph
        :rtype: (str, dict)
    """
    gra = without_dummy_atoms(gra)
    gra = dominant_resonance(gra)
    atm_keys = sorted(atom_keys(gra))
    bnd_keys = list(bond_keys(gra))
    atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys)
    atm_bnd_vlcs = dict_.values_by_key(atom_bond_valences(gra), atm_keys)
    atm_rad_vlcs = dict_.values_by_key(atom_unsaturated_valences(gra),
                                       atm_keys)
    bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys)

    if geo is not None:
        assert geo_idx_dct is not None
        atm_xyzs = automol.geom.base.coordinates(geo)
        atm_xyzs = [
            atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct else
            (0., 0., 0.) for atm_key in atm_keys
        ]
    else:
        atm_xyzs = None

    mlf, key_map_inv = molfile.from_data(atm_keys,
                                         bnd_keys,
                                         atm_syms,
                                         atm_bnd_vlcs,
                                         atm_rad_vlcs,
                                         bnd_ords,
                                         atm_xyzs=atm_xyzs)
    return mlf, key_map_inv
Beispiel #5
0
def inchi_with_sort_from_geometry(gra, geo=None, geo_idx_dct=None):
    """ Generate an InChI string from a molecular graph.
        If coordinates are passed in, they are used to determine stereo.

        :param gra: molecular graph
        :type gra: automol graph data structure
        :param geo: molecular geometry
        :type geo: automol geometry data structure
        :param geo_idx_dct:
        :type geo_idx_dct: dict[:]
        :rtype: (str, tuple(int))
    """
    gra = without_dummy_atoms(gra)
    gra = dominant_resonance(gra)
    atm_keys = sorted(atom_keys(gra))
    bnd_keys = list(bond_keys(gra))
    atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys)
    atm_bnd_vlcs = dict_.values_by_key(
        atom_bond_valences(gra), atm_keys)
    atm_rad_vlcs = dict_.values_by_key(
        atom_unsaturated_valences(gra), atm_keys)
    bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys)

    if geo is not None:
        assert geo_idx_dct is not None
        atm_xyzs = automol.geom.base.coordinates(geo)
        atm_xyzs = [atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct
                    else (0., 0., 0.) for atm_key in atm_keys]
    else:
        atm_xyzs = None

    mlf, key_map_inv = molfile.from_data(
        atm_keys, bnd_keys, atm_syms, atm_bnd_vlcs, atm_rad_vlcs, bnd_ords,
        atm_xyzs=atm_xyzs)
    rdm = rdkit_.from_molfile(mlf)
    ich, aux_info = rdkit_.to_inchi(rdm, with_aux_info=True)
    nums = _parse_sort_order_from_aux_info(aux_info)
    nums = tuple(map(key_map_inv.__getitem__, nums))

    return ich, nums
Beispiel #6
0
def _extend_chain_to_include_terminal_hydrogens(gra, keys,
                                                start=True, end=True):
    """ extend each end of a chain to include terminal hydrogens, if any
    """
    symb_dct = atom_symbols(gra)
    atm_ngb_dct = atoms_neighbor_atom_keys(gra)

    sta_ngbs = atm_ngb_dct[keys[0]] - {keys[1]}
    end_ngbs = atm_ngb_dct[keys[-1]] - {keys[-2]}

    sta_ngb = min((k for k in sta_ngbs if symb_dct[k] == 'H'), default=None)
    end_ngb = min((k for k in end_ngbs if symb_dct[k] == 'H'), default=None)

    keys = tuple(keys)

    if start and sta_ngb is not None:
        keys = (sta_ngb,) + keys

    if end and end_ngb is not None:
        keys = keys + (end_ngb,)

    return keys
Beispiel #7
0
def qualitative_convergence_checker_(gra,
                                     keys,
                                     rqq_bond_max=1.8,
                                     rqh_bond_max=1.3,
                                     rhh_bond_max=1.1,
                                     bond_nobond_diff=0.3):
    """ a convergence checker for error minimization, checking that the
    geometry is qualitatively correct (correct connectivity and stereo)
    """
    symb_dct = atom_symbols(gra)
    pairs = set(map(frozenset, itertools.combinations(keys, 2)))

    bnd_keys = pairs & bond_keys(gra)
    nob_keys = pairs - bond_keys(gra)

    nob_symbs = tuple(
        tuple(map(symb_dct.__getitem__, nob_key)) for nob_key in nob_keys)
    bnd_symbs = tuple(
        tuple(map(symb_dct.__getitem__, bnd_key)) for bnd_key in bnd_keys)
    nob_idxs = tuple(tuple(map(keys.index, nob_key)) for nob_key in nob_keys)
    bnd_idxs = tuple(tuple(map(keys.index, bnd_key)) for bnd_key in bnd_keys)

    bnd_udists = tuple(
        (rqq_bond_max if 'H' not in symb else rhh_bond_max if set(symb) ==
         {'H'} else rqh_bond_max) for symb in bnd_symbs)

    diff = bond_nobond_diff
    nob_ldists = tuple(
        (rqq_bond_max + diff if 'H' not in symb else rhh_bond_max +
         diff if set(symb) == {'H'} else rqh_bond_max + diff)
        for symb in nob_symbs)

    bnd_idxs += tuple(map(tuple, map(reversed, bnd_idxs)))
    bnd_idx_vecs = tuple(map(list, zip(*bnd_idxs)))
    bnd_udists *= 2

    nob_idxs += tuple(map(tuple, map(reversed, nob_idxs)))
    nob_idx_vecs = tuple(map(list, zip(*nob_idxs)))
    nob_ldists *= 2

    symbs = tuple(map(symb_dct.__getitem__, keys))
    geo_idx_dct = dict(map(reversed, enumerate(keys)))
    atm_ste_keys = atom_stereo_keys(gra) & set(keys)
    bnd_ste_keys = bond_stereo_keys(gra) & bnd_keys
    atm_ste_par_dct = atom_stereo_parities(gra)
    bnd_ste_par_dct = bond_stereo_parities(gra)

    def _is_converged(xmat, err, grad):
        assert err and numpy.any(grad)
        xyzs = xmat[:, :3]
        dmat = embed.distance_matrix_from_coordinates(xyzs)

        # check for correct connectivity
        connectivity_check = ((numpy.all(
            dmat[bnd_idx_vecs] < bnd_udists) if bnd_udists else True)
                              and (numpy.all(dmat[nob_idx_vecs] > nob_ldists)
                                   if nob_ldists else True))

        # check for correct stereo parities
        geo = automol.geom.base.from_data(symbs, xyzs, angstrom=True)
        atom_stereo_check = all((atom_stereo_parity_from_geometry(
            gra, atm_key, geo, geo_idx_dct) == atm_ste_par_dct[atm_key])
                                for atm_key in atm_ste_keys)

        bond_stereo_check = all((bond_stereo_parity_from_geometry(
            gra, bnd_key, geo, geo_idx_dct) == bnd_ste_par_dct[bnd_key])
                                for bnd_key in bnd_ste_keys)

        return connectivity_check and atom_stereo_check and bond_stereo_check

    return _is_converged
Beispiel #8
0
def complete_branch(gra, key, vma, zma_keys, branch_keys=None):
    """ continue constructing a v-matrix along a chain

    All neighboring atoms along the chain will be included

    Exactly one atom in the chain must already be in the v-matrix

    :param gra: the graph for which the v-matrix will be constructed
    :param keys: the keys for atoms along the chain, which must be contiguous;
        the first atom must already appear in the v-matrix
    :param vma: a partial v-matrix from which to continue
    :param zma_keys: row keys for the partial v-matrix, identifying the atom
        specified by each row of `vma` in order
    :param branch_keys: optionally, restrict the v-matrix to these keys and
        their neighbors; if `None`, the entire branch will be included
    """
    branch_keys = atom_keys(gra) if branch_keys is None else branch_keys
    keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys)

    zma_keys = list(zma_keys)
    symb_dct = atom_symbols(gra)
    ngb_keys_dct = atoms_sorted_neighbor_atom_keys(
        gra, symbs_first=('X', 'C',), symbs_last=('H',), ords_last=(0.1,),
        prioritize_keys=branch_keys)

    # If this z-matrix is being continued from a partial z-matrix, the leading
    # atom for a torsion may have already be defined. To handle this case, I
    # make a dictionary of these leading atoms and use them below where needed.
    lead_key_dct = {}
    for idx, key_row in enumerate(automol.vmat.key_matrix(vma)):
        axis = key_row[:2]
        if None not in axis:
            axis = tuple(map(zma_keys.__getitem__, axis))
            if axis not in lead_key_dct:
                lead_key_dct[axis] = zma_keys[idx]

    def _continue(key1, key2, key3, vma, zma_keys):
        k3ns = list(ngb_keys_dct[key3])
        for k3n in set(k3ns) & set(zma_keys):
            k3ns.remove(k3n)

        if k3ns:
            key4 = k3ns.pop(0)

            lead_key = None
            if (key3, key2) in lead_key_dct:
                lead_key = lead_key_dct[(key3, key2)]

            # Add the leading atom to the v-matrix
            symb = symb_dct[key4]
            dkey = key1 if lead_key is None else lead_key
            key_row = list(map(zma_keys.index, (key3, key2, dkey)))
            vma = automol.vmat.add_atom(vma, symb, key_row)
            assert key4 not in zma_keys, ("Atom {:d} already in v-matrix."
                                          .format(key4))
            zma_keys.append(key4)

            dkey = key4 if lead_key is None else lead_key
            # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled
            # from atom 1 for properly decopuled torsions
            for k3n in k3ns:
                sym = symb_dct[k3n]

                if symb_dct[dkey] == 'X':
                    key_row = list(map(zma_keys.index, (key3, dkey, key2)))
                else:
                    key_row = list(map(zma_keys.index, (key3, key2, dkey)))

                vma = automol.vmat.add_atom(vma, sym, key_row)
                assert k3n not in zma_keys, ("Atom {:d} already in v-matrix."
                                             .format(k3n))
                zma_keys.append(k3n)

            # Recursion
            if key4 in branch_keys:
                vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys)

            if symb_dct[key4] == 'X':
                key2 = key4

            for k3n in k3ns:
                if k3n in branch_keys:
                    vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys)

        return vma, zma_keys

    key1, key2, key3 = keys[:3]
    vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys)

    return vma, zma_keys
Beispiel #9
0
def complete_branch(gra, key, vma, zma_keys, branch_keys=None):
    """ continue constructing a v-matrix along a chain

    All neighboring atoms along the chain will be included

    Exactly one atom in the chain must already be in the v-matrix

    :param gra: the graph for which the v-matrix will be constructed
    :param vma: a partial v-matrix from which to continue
    :param zma_keys: row keys for the partial v-matrix, identifying the atom
        specified by each row of `vma` in order
    :param branch_keys: optionally, restrict the v-matrix to these keys and
        their neighbors; if `None`, the entire branch will be included
    """
    branch_keys = atom_keys(gra) if branch_keys is None else branch_keys
    keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys)

    zma_keys = list(zma_keys)
    symb_dct = atom_symbols(gra)
    ngb_keys_dct = atoms_sorted_neighbor_atom_keys(gra,
                                                   symbs_first=(
                                                       'X',
                                                       'C',
                                                   ),
                                                   symbs_last=('H', ),
                                                   ords_last=(0.1, ))

    def _continue(key1, key2, key3, vma, zma_keys):
        k3ns = list(ngb_keys_dct[key3])
        for k3n in set(k3ns) & set(zma_keys):
            k3ns.remove(k3n)

        if k3ns:
            key4 = k3ns.pop(0)

            # Add the leading atom to the v-matrix
            symb = symb_dct[key4]
            key_row = list(map(zma_keys.index, (key3, key2, key1)))
            vma = automol.vmat.add_atom(vma, symb, key_row)
            assert key4 not in zma_keys, f"Atom {key4:d} already in v-matrix"
            zma_keys.append(key4)

            # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled
            # from atom 1 for properly decopuled torsions
            for k3n in k3ns:
                sym = symb_dct[k3n]

                if symb_dct[key4] == 'X':
                    key_row = list(map(zma_keys.index, (key3, key4, key2)))
                else:
                    key_row = list(map(zma_keys.index, (key3, key2, key4)))

                vma = automol.vmat.add_atom(vma, sym, key_row)
                assert k3n not in zma_keys, f"Atom {k3n:d} already in v-matrix"
                zma_keys.append(k3n)

            # Recursion
            if key4 in branch_keys:
                vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys)

            if symb_dct[key4] == 'X':
                key2 = key4

            for k3n in k3ns:
                if k3n in branch_keys:
                    vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys)

        return vma, zma_keys

    key1, key2, key3 = keys[0], keys[1], keys[2]
    vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys)

    return vma, zma_keys