def vmatrix(gra, keys=None, rng_keys=None): """ v-matrix for a connected graph :param gra: the graph :param keys: restrict the v-matrix to a subset of keys, which must span a connected graph :param rng_keys: keys for a ring to start from """ if keys is not None: gra = subgraph(gra, keys) assert is_connected(gra), "Graph must be connected!" # Start with the ring systems and their connections. If there aren't any, # start with the first terminal atom if ring_systems(gra): vma, zma_keys = connected_ring_systems(gra, rng_keys=rng_keys) else: term_keys = sorted(terminal_heavy_atom_keys(gra)) if term_keys: start_key = term_keys[0] else: start_key = sorted(atom_keys(gra))[0] vma, zma_keys = start_at(gra, start_key) rem_keys = atom_keys(gra) - set(zma_keys) vma, zma_keys = continue_vmatrix(gra, rem_keys, vma, zma_keys) return vma, zma_keys
def connected_ring_systems(gra, rng_keys=None, check=True): """ generate a v-matrix covering a graph's ring systems and the connections between them """ if check: assert is_connected(gra), "Graph must be connected!" rsys = sorted(ring_systems(gra), key=atom_count) # Construct the v-matrix for the first ring system, choosing which ring # to start from if rng_keys is None: rsy = rsys.pop(0) rngs = sorted(rings(rsy), key=atom_count) rng_keys = sorted_ring_atom_keys(rngs.pop(0)) else: idx = next((i for i, ks in enumerate(map(atom_keys, rsys)) if set(rng_keys) <= ks), None) assert idx is not None, ( "The ring {} is not in this graph:\n{}" .format(str(rng_keys), string(gra, one_indexed=False))) rsy = rsys.pop(idx) keys_lst = list(ring_system_decomposed_atom_keys(rsy, rng_keys=rng_keys)) vma, zma_keys = ring_system(gra, keys_lst) keys = atom_keys(gra) - set(zma_keys) vma, zma_keys = continue_connected_ring_systems( gra, keys, vma, zma_keys, rsys=rsys, check=False) return vma, zma_keys
def geometry(gra, keys=None, ntries=5, max_dist_err=0.2): """ sample a qualitatively-correct stereo geometry :param gra: the graph, which may or may not have stereo :param keys: graph keys, in the order in which they should appear in the geometry :param ntries: number of tries for finding a valid geometry :param max_dist_err: maximum distance error convergence threshold Qualitatively-correct means it has the right connectivity and the right stero parities, but its bond lengths and bond angles may not be quantitatively realistic """ assert gra == explicit(gra), ( "Graph => geometry conversion requires explicit hydrogens!\n" "Use automol.graph.explicit() to convert to an explicit graph.") # 0. Get keys and symbols symb_dct = atom_symbols(gra) keys = sorted(atom_keys(gra)) if keys is None else keys symbs = tuple(map(symb_dct.__getitem__, keys)) # 1. Generate bounds matrices lmat, umat = distance_bounds_matrices(gra, keys) chi_dct = chirality_constraint_bounds(gra, keys) pla_dct = planarity_constraint_bounds(gra, keys) conv1_ = qualitative_convergence_checker_(gra, keys) conv2_ = embed.distance_convergence_checker_(lmat, umat, max_dist_err) def conv_(xmat, err, grad): return conv1_(xmat, err, grad) & conv2_(xmat, err, grad) # 2. Generate coordinates with correct stereo, trying a few times for _ in range(ntries): xmat = embed.sample_raw_distance_coordinates(lmat, umat, dim4=True) xmat, conv = embed.cleaned_up_coordinates(xmat, lmat, umat, pla_dct=pla_dct, chi_dct=chi_dct, conv_=conv_) if conv: break if not conv: raise error.FailedGeometryGenerationError(f'Bad gra {string(gra)}') # 3. Generate a geometry data structure from the coordinates xyzs = xmat[:, :3] geo = automol.geom.base.from_data(symbs, xyzs, angstrom=True) return geo
def molfile_with_atom_mapping(gra, geo=None, geo_idx_dct=None): """ Generate an MOLFile from a molecular graph. If coordinates are passed in, they are used to determine stereo. :param gra: molecular graph :type gra: automol graph data structure :param geo: molecular geometry :type geo: automol geometry data structure :param geo_idx_dct: :type geo_idx_dct: dict[:] :returns: the MOLFile string, followed by a mapping from MOLFile atoms to atoms in the graph :rtype: (str, dict) """ gra = without_dummy_atoms(gra) gra = dominant_resonance(gra) atm_keys = sorted(atom_keys(gra)) bnd_keys = list(bond_keys(gra)) atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys) atm_bnd_vlcs = dict_.values_by_key(atom_bond_valences(gra), atm_keys) atm_rad_vlcs = dict_.values_by_key(atom_unsaturated_valences(gra), atm_keys) bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys) if geo is not None: assert geo_idx_dct is not None atm_xyzs = automol.geom.base.coordinates(geo) atm_xyzs = [ atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct else (0., 0., 0.) for atm_key in atm_keys ] else: atm_xyzs = None mlf, key_map_inv = molfile.from_data(atm_keys, bnd_keys, atm_syms, atm_bnd_vlcs, atm_rad_vlcs, bnd_ords, atm_xyzs=atm_xyzs) return mlf, key_map_inv
def inchi_with_sort_from_geometry(gra, geo=None, geo_idx_dct=None): """ Generate an InChI string from a molecular graph. If coordinates are passed in, they are used to determine stereo. :param gra: molecular graph :type gra: automol graph data structure :param geo: molecular geometry :type geo: automol geometry data structure :param geo_idx_dct: :type geo_idx_dct: dict[:] :rtype: (str, tuple(int)) """ gra = without_dummy_atoms(gra) gra = dominant_resonance(gra) atm_keys = sorted(atom_keys(gra)) bnd_keys = list(bond_keys(gra)) atm_syms = dict_.values_by_key(atom_symbols(gra), atm_keys) atm_bnd_vlcs = dict_.values_by_key( atom_bond_valences(gra), atm_keys) atm_rad_vlcs = dict_.values_by_key( atom_unsaturated_valences(gra), atm_keys) bnd_ords = dict_.values_by_key(bond_orders(gra), bnd_keys) if geo is not None: assert geo_idx_dct is not None atm_xyzs = automol.geom.base.coordinates(geo) atm_xyzs = [atm_xyzs[geo_idx_dct[atm_key]] if atm_key in geo_idx_dct else (0., 0., 0.) for atm_key in atm_keys] else: atm_xyzs = None mlf, key_map_inv = molfile.from_data( atm_keys, bnd_keys, atm_syms, atm_bnd_vlcs, atm_rad_vlcs, bnd_ords, atm_xyzs=atm_xyzs) rdm = rdkit_.from_molfile(mlf) ich, aux_info = rdkit_.to_inchi(rdm, with_aux_info=True) nums = _parse_sort_order_from_aux_info(aux_info) nums = tuple(map(key_map_inv.__getitem__, nums)) return ich, nums
def planarity_constraint_bounds(gra, keys): """ bounds for enforcing planarity restrictions """ ngb_key_dct = atoms_neighbor_atom_keys(gra) ngb_dct = bond_neighborhoods(gra) bnd_keys = [ bnd_key for bnd_key in sp2_bond_keys(gra) if atom_keys(ngb_dct[bnd_key]) <= set(keys) ] def _planarity_constraints(bnd_key): key1, key2 = sorted(bnd_key) key1ab = sorted(ngb_key_dct[key1] - {key2}) key2ab = sorted(ngb_key_dct[key2] - {key1}) lst = [] # I don't think the order of the keys matters, but I tried to be # roughly consistent with Figure 8 in the Blaney Dixon paper if len(key1ab) == 2 and len(key2ab) == 2: lst.append(tuple(map(keys.index, key1ab + key2ab))) if len(key1ab) == 2: lst.append(tuple(map(keys.index, [key1, key2] + key1ab))) if len(key2ab) == 2: lst.append(tuple(map(keys.index, [key1, key2] + key2ab))) if (len(key1ab) == 2 and len(key2ab) == 1) or (len(key1ab) == 1 and len(key2ab) == 2): lst.append(tuple(map(keys.index, [key1] + key1ab + key2ab))) lst.append(tuple(map(keys.index, [key2] + key1ab + key2ab))) if len(key1ab) == 1 and len(key2ab) == 1: lst.append(tuple(map(keys.index, [key1, key2] + key1ab + key2ab))) return tuple(lst) const_dct = { idxs: (-0.5, +0.5) for idxs in itertools.chain(*map(_planarity_constraints, bnd_keys)) } return const_dct
def angle_key_filler_(gra, keys=None, check=True): """ returns a function that fills in the first or last element of an angle key in a dictionary with a neighboring atom (works for central or dihedral angles) """ keys = atom_keys(gra) if keys is None else keys def _fill_in_angle_key(ang_key): end1_key = ang_key[0] end2_key = ang_key[-1] mid_keys = list(ang_key[1:-1]) assert not any(k is None for k in mid_keys) if end1_key is None: end1_key = atom_neighbor_atom_key(gra, mid_keys[0], excl_atm_keys=[end2_key] + mid_keys, incl_atm_keys=keys) if end2_key is None: end2_key = atom_neighbor_atom_key(gra, mid_keys[-1], excl_atm_keys=[end1_key] + mid_keys, incl_atm_keys=keys) ang_key = [end1_key] + mid_keys + [end2_key] if any(k is None for k in ang_key): if check: raise ValueError( f"Angle key {str(ang_key)} couldn't be filled in") ang_key = None else: ang_key = tuple(ang_key) return ang_key return _fill_in_angle_key
def distance_bounds_matrices(gra, keys, sp_dct=None): """ initial distance bounds matrices :param gra: molecular graph :param keys: atom keys specifying the order of indices in the matrix :param sp_dct: a 2d dictionary giving the shortest path between any pair of atoms in the graph """ assert set(keys) <= set(atom_keys(gra)) sub_gra = subgraph(gra, keys, stereo=True) sp_dct = atom_shortest_paths(sub_gra) if sp_dct is None else sp_dct bounds_ = path_distance_bounds_(gra) natms = len(keys) umat = numpy.zeros((natms, natms)) lmat = numpy.zeros((natms, natms)) for (idx1, key1), (idx2, key2) in itertools.combinations(enumerate(keys), 2): if key2 in sp_dct[key1]: path = sp_dct[key1][key2] ldist, udist = bounds_(path) lmat[idx1, idx2] = lmat[idx2, idx1] = ldist umat[idx1, idx2] = umat[idx2, idx1] = udist else: # they are disconnected lmat[idx1, idx2] = lmat[idx2, idx1] = closest_approach(gra, key1, key2) umat[idx1, idx2] = umat[idx2, idx1] = 999 assert lmat[idx1, idx2] <= umat[idx1, idx2], ( "Lower bound exceeds upper bound. This is a bug!\n" f"{string(gra, one_indexed=False)}\npath: {str(path)}\n") return lmat, umat
def distance_ranges_from_coordinates(gra, dist_dct, ang_dct=None, dih_dct=None, angstrom=True, degree=True, rings_keys=(), keys=None, check=False): """ generate a set of distance ranges from coordinate values :param gra: molecular graph atom keys specifying the order of indices in the matrix :param dist_dct: a dictionary of desired distances for certain atoms; the keys are pairs of atoms, the values are distances in angstroms :type dist_dct: dict[(int, int): float] :param ang_dct: a dictionary of desired angles for certain atoms; the keys are triples of atoms; if the first or last element in a triple is None, an appopriate neighboring atom will be found :param dih_dct: a dictionary of desired angles for certain atoms; the keys are quadruples of atoms; if the first or last element in a triple is None, an appopriate neighboring atom will be found :type dist_dct: dict[(int, int, int): float] :param rings_keys: keys for rings in the graph; angle ranges will automatically be set to allow ring formation :param keys: set of keys that can be used to fill in the angle keys; if None, all graph keys will be considered available for use :param check: check the angle keys to make sure they can all be filled in? """ keys = atom_keys(gra) if keys is None else keys ang_dct = {} if ang_dct is None else ang_dct dih_dct = {} if dih_dct is None else dih_dct # Fill in angle keys ang_key_filler_ = angle_key_filler_(gra, keys, check=check) ang_dct = dict_.transform_keys(ang_dct, ang_key_filler_) if None in ang_dct: ang_dct.pop(None) # Fill in dihedral keys dih_dct = dict_.transform_keys(dih_dct, ang_key_filler_) if None in dih_dct: dih_dct.pop(None) # Convert angles into distances dist_dct = dict_.transform_keys(dist_dct, frozenset) for (key1, key2, key3), a123 in ang_dct.items(): a123 *= phycon.DEG2RAD if degree else 1. k12 = frozenset({key1, key2}) k23 = frozenset({key2, key3}) k13 = frozenset({key1, key3}) d12 = (dist_dct[k12] if k12 in dist_dct else heuristic_bond_distance( gra, key1, key2, angstrom=angstrom)) d23 = (dist_dct[k23] if k23 in dist_dct else heuristic_bond_distance( gra, key2, key3, angstrom=angstrom)) d13 = numpy.sqrt(d12**2 + d23**2 - 2 * d12 * d23 * numpy.cos(a123)) dist_dct[k13] = d13 # Convert convert fixed distances into ranges dist_range_dct = {k: (d, d) for k, d in dist_dct.items()} # Convert dihedrals into distances for (key1, key2, key3, key4), val in dih_dct.items(): # Allow user to leave dihedrals open-ended, as a lower or upper bound if isinstance(val, numbers.Number): d1234 = val else: assert hasattr(val, '__len__') and len(val) == 2 d1234 = next(v for v in val if v is not None) d1234 *= phycon.DEG2RAD if degree else 1. k12 = frozenset({key1, key2}) k23 = frozenset({key2, key3}) k34 = frozenset({key3, key4}) k13 = frozenset({key1, key3}) k24 = frozenset({key2, key4}) k14 = frozenset({key1, key4}) d12 = (dist_dct[k12] if k12 in dist_dct else heuristic_bond_distance( gra, key1, key2, angstrom=angstrom)) d23 = (dist_dct[k23] if k23 in dist_dct else heuristic_bond_distance( gra, key2, key3, angstrom=angstrom)) d34 = (dist_dct[k34] if k34 in dist_dct else heuristic_bond_distance( gra, key3, key4, angstrom=angstrom)) d13 = (dist_dct[k13] if k13 in dist_dct else heuristic_bond_distance( gra, key1, key3, angstrom=angstrom)) d24 = (dist_dct[k24] if k24 in dist_dct else heuristic_bond_distance( gra, key2, key4, angstrom=angstrom)) term1 = (d12**2 + d23**2 - d13**2) * (d23**2 + d34**2 - d24**2) term2 = 2 * d23**2 * (d13**2 + d24**2 - d23**2) denom = numpy.sqrt( (4 * d12**2 * d23**2 - (d12**2 + d23**2 - d13**2)**2) * (4 * d23**2 * d34**2 - (d23**2 + d34**2 - d24**2)**2)) d14 = numpy.sqrt( (term1 + term2 - numpy.cos(d1234) * denom) / (2 * d23**2)) if isinstance(val, numbers.Number) or val[0] == val[1]: dist_range_dct[k14] = (d14, d14) elif val[0] is None: ld14 = closest_approach(gra, key1, key4) dist_range_dct[k14] = (ld14, d14) elif val[1] is None: ud14 = 999. dist_range_dct[k14] = (d14, ud14) else: raise ValueError(f"Invalid dih_dict: {str(dih_dct)}") for rng_keys in rings_keys: assert hasattr( keys, '__iter__'), ("Please pass in rings keys as a list of lists") rsz = len(rng_keys) a123 = (rsz - 2.) * 180. / rsz la123 = (a123 - 10.) * phycon.DEG2RAD ua123 = (a123 + 10.) * phycon.DEG2RAD for key1, key2, key3 in mit.windowed(rng_keys + rng_keys[:2], 3): k12 = frozenset({key1, key2}) k23 = frozenset({key2, key3}) k13 = frozenset({key1, key3}) d12 = (dist_dct[k12] if k12 in dist_dct else heuristic_bond_distance(gra, key1, key2, angstrom=angstrom)) d23 = (dist_dct[k23] if k23 in dist_dct else heuristic_bond_distance(gra, key2, key3, angstrom=angstrom)) ld13 = numpy.sqrt(d12**2 + d23**2 - 2 * d12 * d23 * numpy.cos(la123)) ud13 = numpy.sqrt(d12**2 + d23**2 - 2 * d12 * d23 * numpy.cos(ua123)) dist_range_dct[k13] = (ld13, ud13) return dist_range_dct
def complete_branch(gra, key, vma, zma_keys, branch_keys=None): """ continue constructing a v-matrix along a chain All neighboring atoms along the chain will be included Exactly one atom in the chain must already be in the v-matrix :param gra: the graph for which the v-matrix will be constructed :param keys: the keys for atoms along the chain, which must be contiguous; the first atom must already appear in the v-matrix :param vma: a partial v-matrix from which to continue :param zma_keys: row keys for the partial v-matrix, identifying the atom specified by each row of `vma` in order :param branch_keys: optionally, restrict the v-matrix to these keys and their neighbors; if `None`, the entire branch will be included """ branch_keys = atom_keys(gra) if branch_keys is None else branch_keys keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys) zma_keys = list(zma_keys) symb_dct = atom_symbols(gra) ngb_keys_dct = atoms_sorted_neighbor_atom_keys( gra, symbs_first=('X', 'C',), symbs_last=('H',), ords_last=(0.1,), prioritize_keys=branch_keys) # If this z-matrix is being continued from a partial z-matrix, the leading # atom for a torsion may have already be defined. To handle this case, I # make a dictionary of these leading atoms and use them below where needed. lead_key_dct = {} for idx, key_row in enumerate(automol.vmat.key_matrix(vma)): axis = key_row[:2] if None not in axis: axis = tuple(map(zma_keys.__getitem__, axis)) if axis not in lead_key_dct: lead_key_dct[axis] = zma_keys[idx] def _continue(key1, key2, key3, vma, zma_keys): k3ns = list(ngb_keys_dct[key3]) for k3n in set(k3ns) & set(zma_keys): k3ns.remove(k3n) if k3ns: key4 = k3ns.pop(0) lead_key = None if (key3, key2) in lead_key_dct: lead_key = lead_key_dct[(key3, key2)] # Add the leading atom to the v-matrix symb = symb_dct[key4] dkey = key1 if lead_key is None else lead_key key_row = list(map(zma_keys.index, (key3, key2, dkey))) vma = automol.vmat.add_atom(vma, symb, key_row) assert key4 not in zma_keys, ("Atom {:d} already in v-matrix." .format(key4)) zma_keys.append(key4) dkey = key4 if lead_key is None else lead_key # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled # from atom 1 for properly decopuled torsions for k3n in k3ns: sym = symb_dct[k3n] if symb_dct[dkey] == 'X': key_row = list(map(zma_keys.index, (key3, dkey, key2))) else: key_row = list(map(zma_keys.index, (key3, key2, dkey))) vma = automol.vmat.add_atom(vma, sym, key_row) assert k3n not in zma_keys, ("Atom {:d} already in v-matrix." .format(k3n)) zma_keys.append(k3n) # Recursion if key4 in branch_keys: vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys) if symb_dct[key4] == 'X': key2 = key4 for k3n in k3ns: if k3n in branch_keys: vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys) return vma, zma_keys key1, key2, key3 = keys[:3] vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys) return vma, zma_keys
def complete_branch(gra, key, vma, zma_keys, branch_keys=None): """ continue constructing a v-matrix along a chain All neighboring atoms along the chain will be included Exactly one atom in the chain must already be in the v-matrix :param gra: the graph for which the v-matrix will be constructed :param vma: a partial v-matrix from which to continue :param zma_keys: row keys for the partial v-matrix, identifying the atom specified by each row of `vma` in order :param branch_keys: optionally, restrict the v-matrix to these keys and their neighbors; if `None`, the entire branch will be included """ branch_keys = atom_keys(gra) if branch_keys is None else branch_keys keys = _extend_chain_to_include_anchoring_atoms(gra, [key], zma_keys) zma_keys = list(zma_keys) symb_dct = atom_symbols(gra) ngb_keys_dct = atoms_sorted_neighbor_atom_keys(gra, symbs_first=( 'X', 'C', ), symbs_last=('H', ), ords_last=(0.1, )) def _continue(key1, key2, key3, vma, zma_keys): k3ns = list(ngb_keys_dct[key3]) for k3n in set(k3ns) & set(zma_keys): k3ns.remove(k3n) if k3ns: key4 = k3ns.pop(0) # Add the leading atom to the v-matrix symb = symb_dct[key4] key_row = list(map(zma_keys.index, (key3, key2, key1))) vma = automol.vmat.add_atom(vma, symb, key_row) assert key4 not in zma_keys, f"Atom {key4:d} already in v-matrix" zma_keys.append(key4) # Add the neighbors of atom 3 (if any) to the v-matrix, decoupled # from atom 1 for properly decopuled torsions for k3n in k3ns: sym = symb_dct[k3n] if symb_dct[key4] == 'X': key_row = list(map(zma_keys.index, (key3, key4, key2))) else: key_row = list(map(zma_keys.index, (key3, key2, key4))) vma = automol.vmat.add_atom(vma, sym, key_row) assert k3n not in zma_keys, f"Atom {k3n:d} already in v-matrix" zma_keys.append(k3n) # Recursion if key4 in branch_keys: vma, zma_keys = _continue(key2, key3, key4, vma, zma_keys) if symb_dct[key4] == 'X': key2 = key4 for k3n in k3ns: if k3n in branch_keys: vma, zma_keys = _continue(key2, key3, k3n, vma, zma_keys) return vma, zma_keys key1, key2, key3 = keys[0], keys[1], keys[2] vma, zma_keys = _continue(key1, key2, key3, vma, zma_keys) return vma, zma_keys