def start_at(gra, key): """ start a v-matrix at a specific atom Returns the started vmatrix, along with keys to atoms whose neighbors are missing from it """ symb_dct = atom_symbols(gra) ngb_keys_dct = atoms_sorted_neighbor_atom_keys(gra, symbs_first=( 'X', 'C', ), symbs_last=('H', ), ords_last=(0.1, )) ngb_keys = ngb_keys_dct[key] if not ngb_keys: zma_keys = [] elif len(ngb_keys) == 1: # Need special handling for atoms with only one neighbor if symb_dct[key] in ('H', 'X'): key2 = ngb_keys[0] zma_keys = (key2, ) + ngb_keys_dct[key2] else: key2 = ngb_keys[0] ngb_keys = tuple(k for k in ngb_keys_dct[key2] if k != key) zma_keys = (key, key2) + ngb_keys else: zma_keys = (key, ) + ngb_keys_dct[key] vma = () for row, key_ in enumerate(zma_keys): idx1 = idx2 = idx3 = None if row > 0: key1 = next(k for k in ngb_keys_dct[key_] if k in zma_keys[:row]) idx1 = zma_keys.index(key1) if row > 1: key2 = next(k for k in ngb_keys_dct[key1] if k in zma_keys[:row] and k != key_) idx2 = zma_keys.index(key2) if row > 2: key3 = next(k for k in zma_keys[:row] if k not in (key_, key1, key2)) idx3 = zma_keys.index(key3) sym = symb_dct[key_] key_row = [idx1, idx2, idx3] vma = automol.vmat.add_atom(vma, sym, key_row) return vma, zma_keys
def geometry(gra): """ Convert a molecular graph to a molecular geometry. :param gra: molecular graph :type gra: automol graph data structure :rtype: automol molecular geometry data structure """ symbs = atom_symbols(gra) if len(symbs) != 1: gra = explicit(gra) geo = automol.graph.embed.geometry(gra) else: symb = list(symbs.values())[0] # symb = list(symbs.keys())[0] geo = ((symb, (0.00, 0.00, 0.00)),) return geo
def heuristic_bond_distance(gra, key1, key2, angstrom=True, check=False): """ heuristic bond distance (in angstroms) """ if check: assert key1 in atoms_neighbor_atom_keys(gra)[key2] symb_dct = atom_symbols(gra) symb1 = symb_dct[key1] symb2 = symb_dct[key2] if ptab.to_number(symb1) == 1 or ptab.to_number(symb2) == 1: dist = XH_DIST else: dist = XY_DIST dist *= 1 if angstrom else phycon.ANG2BOHR return dist
def molfile_with_atom_mapping(gra, geo=None, geo_idx_dct=None): """ Generate an MOLFile from a molecular graph. If coordinates are passed in, they are used to determine stereo. :param gra: molecular graph :type gra: automol graph data structure :param geo: molecular geometry :type geo: automol geometry data structure :param geo_idx_dct: :type geo_idx_dct: dict[:] :returns: the MOLFile string, followed by a mapping from MOLFile atoms to atoms in the graph :rtype: (str, dict) """ gra = without_dummy_atoms(gra) gra = dominant_resonance(gra) atm_keys = sorted(atom_keys(gra)) bnd_keys = list(bond_keys(gra)) atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys) atm_bnd_vlcs = dict_.values_by_key(atom_bond_valences(gra), atm_keys) atm_rad_vlcs = dict_.values_by_key(atom_unsaturated_valences(gra), atm_keys) bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys) if geo is not None: assert geo_idx_dct is not None atm_xyzs = automol.geom.base.coordinates(geo) atm_xyzs = [ atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct else (0., 0., 0.) for atm_key in atm_keys ] else: atm_xyzs = None mlf, key_map_inv = molfile.from_data(atm_keys, bnd_keys, atm_syms, atm_bnd_vlcs, atm_rad_vlcs, bnd_ords, atm_xyzs=atm_xyzs) return mlf, key_map_inv
def inchi_with_sort_from_geometry(gra, geo=None, geo_idx_dct=None): """ Generate an InChI string from a molecular graph. If coordinates are passed in, they are used to determine stereo. :param gra: molecular graph :type gra: automol graph data structure :param geo: molecular geometry :type geo: automol geometry data structure :param geo_idx_dct: :type geo_idx_dct: dict[:] :rtype: (str, tuple(int)) """ gra = without_dummy_atoms(gra) gra = dominant_resonance(gra) atm_keys = sorted(atom_keys(gra)) bnd_keys = list(bond_keys(gra)) atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys) atm_bnd_vlcs = dict_.values_by_key( atom_bond_valences(gra), atm_keys) atm_rad_vlcs = dict_.values_by_key( atom_unsaturated_valences(gra), atm_keys) bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys) if geo is not None: assert geo_idx_dct is not None atm_xyzs = automol.geom.base.coordinates(geo) atm_xyzs = [atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct else (0., 0., 0.) for atm_key in atm_keys] else: atm_xyzs = None mlf, key_map_inv = molfile.from_data( atm_keys, bnd_keys, atm_syms, atm_bnd_vlcs, atm_rad_vlcs, bnd_ords, atm_xyzs=atm_xyzs) rdm = rdkit_.from_molfile(mlf) ich, aux_info = rdkit_.to_inchi(rdm, with_aux_info=True) nums = _parse_sort_order_from_aux_info(aux_info) nums = tuple(map(key_map_inv.__getitem__, nums)) return ich, nums
def _extend_chain_to_include_terminal_hydrogens(gra, keys, start=True, end=True): """ extend each end of a chain to include terminal hydrogens, if any """ symb_dct = atom_symbols(gra) atm_ngb_dct = atoms_neighbor_atom_keys(gra) sta_ngbs = atm_ngb_dct[keys[0]] - {keys[1]} end_ngbs = atm_ngb_dct[keys[-1]] - {keys[-2]} sta_ngb = min((k for k in sta_ngbs if symb_dct[k] == 'H'), default=None) end_ngb = min((k for k in end_ngbs if symb_dct[k] == 'H'), default=None) keys = tuple(keys) if start and sta_ngb is not None: keys = (sta_ngb,) + keys if end and end_ngb is not None: keys = keys + (end_ngb,) return keys
def qualitative_convergence_checker_(gra, keys, rqq_bond_max=1.8, rqh_bond_max=1.3, rhh_bond_max=1.1, bond_nobond_diff=0.3): """ a convergence checker for error minimization, checking that the geometry is qualitatively correct (correct connectivity and stereo) """ symb_dct = atom_symbols(gra) pairs = set(map(frozenset, itertools.combinations(keys, 2))) bnd_keys = pairs & bond_keys(gra) nob_keys = pairs - bond_keys(gra) nob_symbs = tuple( tuple(map(symb_dct.__getitem__, nob_key)) for nob_key in nob_keys) bnd_symbs = tuple( tuple(map(symb_dct.__getitem__, bnd_key)) for bnd_key in bnd_keys) nob_idxs = tuple(tuple(map(keys.index, nob_key)) for nob_key in nob_keys) bnd_idxs = tuple(tuple(map(keys.index, bnd_key)) for bnd_key in bnd_keys) bnd_udists = tuple( (rqq_bond_max if 'H' not in symb else rhh_bond_max if set(symb) == {'H'} else rqh_bond_max) for symb in bnd_symbs) diff = bond_nobond_diff nob_ldists = tuple( (rqq_bond_max + diff if 'H' not in symb else rhh_bond_max + diff if set(symb) == {'H'} else rqh_bond_max + diff) for symb in nob_symbs) bnd_idxs += tuple(map(tuple, map(reversed, bnd_idxs))) bnd_idx_vecs = tuple(map(list, zip(*bnd_idxs))) bnd_udists *= 2 nob_idxs += tuple(map(tuple, map(reversed, nob_idxs))) nob_idx_vecs = tuple(map(list, zip(*nob_idxs))) nob_ldists *= 2 symbs = tuple(map(symb_dct.__getitem__, keys)) geo_idx_dct = dict(map(reversed, enumerate(keys))) atm_ste_keys = atom_stereo_keys(gra) & set(keys) bnd_ste_keys = bond_stereo_keys(gra) & bnd_keys atm_ste_par_dct = atom_stereo_parities(gra) bnd_ste_par_dct = bond_stereo_parities(gra) def _is_converged(xmat, err, grad): assert err and numpy.any(grad) xyzs = xmat[:, :3] dmat = embed.distance_matrix_from_coordinates(xyzs) # check for correct connectivity connectivity_check = ((numpy.all( dmat[bnd_idx_vecs] < bnd_udists) if bnd_udists else True) and (numpy.all(dmat[nob_idx_vecs] > nob_ldists) if nob_ldists else True)) # check for correct stereo parities geo = automol.geom.base.from_data(symbs, xyzs, angstrom=True) atom_stereo_check = all((atom_stereo_parity_from_geometry( gra, atm_key, geo, geo_idx_dct) == atm_ste_par_dct[atm_key]) for atm_key in atm_ste_keys) bond_stereo_check = all((bond_stereo_parity_from_geometry( gra, bnd_key, geo, geo_idx_dct) == bnd_ste_par_dct[bnd_key]) for bnd_key in bnd_ste_keys) return connectivity_check and atom_stereo_check and bond_stereo_check return _is_converged
def complete_branch(gra, key, vma, zma_keys, branch_keys=None): """ continue constructing a v-matrix along a chain All neighboring atoms along the chain will be included Exactly one atom in the chain must already be in the v-matrix :param gra: the graph for which the v-matrix will be constructed :param keys: the keys for atoms along the chain, which must be contiguous; the first atom must already appear in the v-matrix :param vma: a partial v-matrix from which to continue :param zma_keys: row keys for the partial v-matrix, identifying the atom specified by each row of `vma` in order :param branch_keys: optionally, restrict the v-matrix to these keys and their neighbors; if `None`, the entire branch will be included """ branch_keys = atom_keys(gra) if branch_keys is None else branch_keys keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys) zma_keys = list(zma_keys) symb_dct = atom_symbols(gra) ngb_keys_dct = atoms_sorted_neighbor_atom_keys( gra, symbs_first=('X', 'C',), symbs_last=('H',), ords_last=(0.1,), prioritize_keys=branch_keys) # If this z-matrix is being continued from a partial z-matrix, the leading # atom for a torsion may have already be defined. To handle this case, I # make a dictionary of these leading atoms and use them below where needed. lead_key_dct = {} for idx, key_row in enumerate(automol.vmat.key_matrix(vma)): axis = key_row[:2] if None not in axis: axis = tuple(map(zma_keys.__getitem__, axis)) if axis not in lead_key_dct: lead_key_dct[axis] = zma_keys[idx] def _continue(key1, key2, key3, vma, zma_keys): k3ns = list(ngb_keys_dct[key3]) for k3n in set(k3ns) & set(zma_keys): k3ns.remove(k3n) if k3ns: key4 = k3ns.pop(0) lead_key = None if (key3, key2) in lead_key_dct: lead_key = lead_key_dct[(key3, key2)] # Add the leading atom to the v-matrix symb = symb_dct[key4] dkey = key1 if lead_key is None else lead_key key_row = list(map(zma_keys.index, (key3, key2, dkey))) vma = automol.vmat.add_atom(vma, symb, key_row) assert key4 not in zma_keys, ("Atom {:d} already in v-matrix." .format(key4)) zma_keys.append(key4) dkey = key4 if lead_key is None else lead_key # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled # from atom 1 for properly decopuled torsions for k3n in k3ns: sym = symb_dct[k3n] if symb_dct[dkey] == 'X': key_row = list(map(zma_keys.index, (key3, dkey, key2))) else: key_row = list(map(zma_keys.index, (key3, key2, dkey))) vma = automol.vmat.add_atom(vma, sym, key_row) assert k3n not in zma_keys, ("Atom {:d} already in v-matrix." .format(k3n)) zma_keys.append(k3n) # Recursion if key4 in branch_keys: vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys) if symb_dct[key4] == 'X': key2 = key4 for k3n in k3ns: if k3n in branch_keys: vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys) return vma, zma_keys key1, key2, key3 = keys[:3] vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys) return vma, zma_keys
def complete_branch(gra, key, vma, zma_keys, branch_keys=None): """ continue constructing a v-matrix along a chain All neighboring atoms along the chain will be included Exactly one atom in the chain must already be in the v-matrix :param gra: the graph for which the v-matrix will be constructed :param vma: a partial v-matrix from which to continue :param zma_keys: row keys for the partial v-matrix, identifying the atom specified by each row of `vma` in order :param branch_keys: optionally, restrict the v-matrix to these keys and their neighbors; if `None`, the entire branch will be included """ branch_keys = atom_keys(gra) if branch_keys is None else branch_keys keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys) zma_keys = list(zma_keys) symb_dct = atom_symbols(gra) ngb_keys_dct = atoms_sorted_neighbor_atom_keys(gra, symbs_first=( 'X', 'C', ), symbs_last=('H', ), ords_last=(0.1, )) def _continue(key1, key2, key3, vma, zma_keys): k3ns = list(ngb_keys_dct[key3]) for k3n in set(k3ns) & set(zma_keys): k3ns.remove(k3n) if k3ns: key4 = k3ns.pop(0) # Add the leading atom to the v-matrix symb = symb_dct[key4] key_row = list(map(zma_keys.index, (key3, key2, key1))) vma = automol.vmat.add_atom(vma, symb, key_row) assert key4 not in zma_keys, f"Atom {key4:d} already in v-matrix" zma_keys.append(key4) # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled # from atom 1 for properly decopuled torsions for k3n in k3ns: sym = symb_dct[k3n] if symb_dct[key4] == 'X': key_row = list(map(zma_keys.index, (key3, key4, key2))) else: key_row = list(map(zma_keys.index, (key3, key2, key4))) vma = automol.vmat.add_atom(vma, sym, key_row) assert k3n not in zma_keys, f"Atom {k3n:d} already in v-matrix" zma_keys.append(k3n) # Recursion if key4 in branch_keys: vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys) if symb_dct[key4] == 'X': key2 = key4 for k3n in k3ns: if k3n in branch_keys: vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys) return vma, zma_keys key1, key2, key3 = keys[0], keys[1], keys[2] vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys) return vma, zma_keys