def ring_arc_complement_atom_keys(gra, rng): """ non-intersecting arcs from a ring that shares segments with a graph """ gra_atm_bnd_dct = atoms_bond_keys(gra) rng_atm_bnd_dct = atoms_bond_keys(rng) # 1. find divergence points, given by the atom at which the divergence # occurs and the bond followed by the ring as it diverges div_dct = {} for atm_key in atom_keys(gra) & atom_keys(rng): div = rng_atm_bnd_dct[atm_key] - gra_atm_bnd_dct[atm_key] if div: bnd_key, = div div_dct[atm_key] = bnd_key # 2. cycle through the ring atoms; if you meet a starting divergence, start # an arc; extend the arc until you meet an ending divergence; repeat until # all divergences are accounted for atm_keys = sorted_ring_atom_keys_from_bond_keys(bond_keys(rng)) arcs = [] arc = [] for atm_key, next_atm_key in mit.windowed(itertools.cycle(atm_keys), 2): bnd_key = frozenset({atm_key, next_atm_key}) # if we haven't started an arc, see if we are at a starting divergence; # if so, start the arc now and cross the divergence from our list if not arc: if atm_key in div_dct and div_dct[atm_key] == bnd_key: div_dct.pop(atm_key) arc.append(atm_key) # if we've started an arc, extend it; then, check if we are at an # ending divergence; if so, end the arc and cross the divergence from # our list; add it to our list of arcs else: arc.append(atm_key) if next_atm_key in div_dct and div_dct[next_atm_key] == bnd_key: div_dct.pop(next_atm_key) arc.append(next_atm_key) arcs.append(arc) arc = [] # if no divergences are left, break out of the loop if not div_dct: break arcs = tuple(map(tuple, arcs)) return arcs
def set_stereo_from_geometry(gra, geo, geo_idx_dct=None): """ set graph stereo from a geometry (coordinate distances need not match connectivity -- what matters is the relative positions at stereo sites) """ gra = without_stereo_parities(gra) last_gra = None atm_keys = sorted(atom_keys(gra)) geo_idx_dct = (geo_idx_dct if geo_idx_dct is not None else {atm_key: idx for idx, atm_key in enumerate(atm_keys)}) # set atom and bond stereo, iterating to self-consistency atm_keys = set() bnd_keys = set() while last_gra != gra: last_gra = gra atm_keys.update(stereogenic_atom_keys(gra)) bnd_keys.update(stereogenic_bond_keys(gra)) gra = _set_atom_stereo_from_geometry(gra, atm_keys, geo, geo_idx_dct) gra = _set_bond_stereo_from_geometry(gra, bnd_keys, geo, geo_idx_dct) return gra
def _hydrogen_layer(gra): """ AMChI hydrogen (h) layer from graph :param gra: implicit molecular graph :type gra: automol graph data structure :returns: the hydrogen layer, without prefix :rtype: str """ # Determine hydrogen counts nhyd_dct = atom_implicit_hydrogen_valences(gra) all_keys = sorted(atom_keys(gra), key=nhyd_dct.__getitem__) grps = [(nh, sorted(k + 1 for k in ks)) for nh, ks in itertools.groupby(all_keys, key=nhyd_dct.__getitem__) if nh > 0] # Build the hydrogen layer string slyrs = [] for nhyd, keys in grps: parts = util.equivalence_partition(keys, lambda x, y: y in (x - 1, x + 1)) parts = sorted(map(sorted, parts)) strs = [ '{:d}-{:d}'.format(min(p), max(p)) if len(p) > 1 else '{:d}'.format(p[0]) for p in parts ] if nhyd == 1: slyrs.append(','.join(strs) + 'H') else: slyrs.append(','.join(strs) + f'H{nhyd}') nhyd_lyr = ','.join(slyrs) return nhyd_lyr
def ring_atom_chirality(gra, atm, ring_atms, stereo=False): """is this ring atom a chiral center? """ if not stereo: gra = without_stereo_parities(gra) adj_atms = atoms_neighbor_atom_keys(gra) keys = [] for atmi in adj_atms[atm]: key = [atm, atmi] key.sort() key = frozenset(key) keys.append(key) if atmi in ring_atms: for atmj in adj_atms[atmi]: if atmj in ring_atms: key = [atmj, atmi] key.sort() key = frozenset(key) keys.append(key) gras = remove_bonds(gra, keys) cgras = connected_components(gras) ret_gras = [] for gra_i in cgras: atms_i = atom_keys(gra_i) if [x for x in atms_i if x in adj_atms[atm] or x == atm]: ret_gras.append(gra_i) return ret_gras
def atom_equivalence_class_reps(gra, atm_keys=None, stereo=True, dummy=True): """ Identify isomorphically unique atoms, which do not transform into each other by an automorphism Optionally, a subset of atoms can be passed in to consider class representatives from within that list. :param gra: A graph :param atm_keys: An optional list of atom keys from which to determine equivalence class representatives. If None, the full set of atom keys will be used. :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: The list of equivalence class reprentatives/unique atoms. :rtype: frozenset[int] """ atm_keys = atom_keys(gra) if atm_keys is None else atm_keys def _equiv(atm1_key, atm2_key): return are_equivalent_atoms(gra, atm1_key, atm2_key, stereo=stereo, dummy=dummy) eq_classes = util.equivalence_partition(atm_keys, _equiv) class_reps = frozenset(next(iter(c)) for c in eq_classes) return class_reps
def _bond_stereo_corrected_geometry(gra, bnd_ste_par_dct, geo, geo_idx_dct): """ correct the bond stereo parities of a geometry, for a subset of bonds """ bnd_keys = list(bnd_ste_par_dct.keys()) for bnd_key in bnd_keys: par = bnd_ste_par_dct[bnd_key] curr_par = bond_stereo_parity_from_geometry(gra, bnd_key, geo, geo_idx_dct) if curr_par != par: xyzs = automol.geom.base.coordinates(geo) atm1_key, atm2_key = bnd_key atm1_xyz = xyzs[geo_idx_dct[atm1_key]] atm2_xyz = xyzs[geo_idx_dct[atm2_key]] rot_axis = numpy.subtract(atm2_xyz, atm1_xyz) rot_atm_keys = atom_keys( branch(gra, atm1_key, {atm1_key, atm2_key})) rot_idxs = list(map(geo_idx_dct.__getitem__, rot_atm_keys)) geo = automol.geom.rotate(geo, rot_axis, numpy.pi, orig_xyz=atm1_xyz, idxs=rot_idxs) assert bond_stereo_parity_from_geometry(gra, bnd_key, geo, geo_idx_dct) == par gra = set_bond_stereo_parities(gra, {bnd_key: par}) return geo, gra
def linear_segments_atom_keys(gra, lin_keys=None): """ atom keys for linear segments in the graph """ ngb_keys_dct = atoms_neighbor_atom_keys(without_dummy_atoms(gra)) lin_keys = (linear_atom_keys(gra, dummy=True) if lin_keys is None else lin_keys) lin_keys = [k for k in lin_keys if len(ngb_keys_dct[k]) <= 2] lin_segs = connected_components(subgraph(gra, lin_keys)) lin_keys_lst = [] for lin_seg in lin_segs: lin_seg_keys = atom_keys(lin_seg) if len(lin_seg_keys) == 1: key, = lin_seg_keys lin_keys_lst.append([key]) else: end_key1, end_key2 = sorted([ key for key, ngb_keys in atoms_neighbor_atom_keys(lin_seg).items() if len(ngb_keys) == 1 ]) ngb_keys_dct = atoms_neighbor_atom_keys(lin_seg) key = None keys = [end_key1] while key != end_key2: key, = ngb_keys_dct[keys[-1]] - set(keys) keys.append(key) lin_keys_lst.append(keys) lin_keys_lst = tuple(map(tuple, lin_keys_lst)) return lin_keys_lst
def _atom_stereo_corrected_geometry(gra, atm_ste_par_dct, geo, geo_idx_dct): """ correct the atom stereo parities of a geometry, for a subset of atoms """ ring_atm_keys = set(itertools.chain(*rings_atom_keys(gra))) atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) atm_keys = list(atm_ste_par_dct.keys()) for atm_key in atm_keys: par = atm_ste_par_dct[atm_key] curr_par = atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct) if curr_par != par: atm_ngb_keys = atm_ngb_keys_dct[atm_key] # for now, we simply exclude rings from the pivot keys # (will not work for stereo atom at the intersection of two rings) atm_piv_keys = list(atm_ngb_keys - ring_atm_keys)[:2] assert len(atm_piv_keys) == 2 atm3_key, atm4_key = atm_piv_keys # get coordinates xyzs = automol.geom.base.coordinates(geo) atm_xyz = xyzs[geo_idx_dct[atm_key]] atm3_xyz = xyzs[geo_idx_dct[atm3_key]] atm4_xyz = xyzs[geo_idx_dct[atm4_key]] # do the rotation rot_axis = util.vec.unit_bisector(atm3_xyz, atm4_xyz, orig_xyz=atm_xyz) rot_atm_keys = ( atom_keys(branch(gra, atm_key, {atm_key, atm3_key})) | atom_keys(branch(gra, atm_key, {atm_key, atm4_key}))) rot_idxs = list(map(geo_idx_dct.__getitem__, rot_atm_keys)) geo = automol.geom.rotate(geo, rot_axis, numpy.pi, orig_xyz=atm_xyz, idxs=rot_idxs) assert atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct) == par gra = set_atom_stereo_parities(gra, {atm_key: par}) return geo, gra
def atom_longest_chains(gra): """ longest chains, by atom """ atm_keys = atom_keys(gra) long_chain_dct = {atm_key: atom_longest_chain(gra, atm_key) for atm_key in atm_keys} return long_chain_dct
def longest_chain(gra): """ longest chain in the graph """ atm_keys = atom_keys(gra) max_chain = max((atom_longest_chain(gra, atm_key) for atm_key in atm_keys), key=len) return max_chain
def ring_system_decomposed_atom_keys(rsy, rng_keys=None, check=True): """ decomposed atom keys for a polycyclic ring system in a graph The ring system is decomposed into a ring and a series of arcs that can be used to successively construct the system :param rsy: the ring system :param rng_keys: keys for the first ring in the decomposition; if None, the smallest ring in the system will be chosen """ if rng_keys is None: rng = sorted(rings(rsy), key=atom_count)[0] rng_keys = sorted_ring_atom_keys(rng) # check the arguments, if requested if check: # check that the graph is connected assert is_connected(rsy), "Ring system can't be disconnected." # check that the graph is actually a ring system assert is_ring_system(rsy), ( f"This is not a ring system graph:\n{string(rsy):s}") # check that rng is a subgraph of rsy assert set(rng_keys) <= atom_keys(rsy), ( f"{string(rsy, one_indexed=False)}\n^ " "Rings system doesn't contain ring as subgraph:\n" f"{str(rng_keys)}") bnd_keys = list(mit.windowed(rng_keys + rng_keys[:1], 2)) # Remove bonds for the ring rsy = remove_bonds(rsy, bnd_keys) keys_lst = [rng_keys] done_keys = set(rng_keys) while bond_keys(rsy): # Determine shortest paths for the graph with one more ring/arc deleted sp_dct = atom_shortest_paths(rsy) # The shortest path will be the next shortest arc in the system arc_keys = min( (sp_dct[i][j] for i, j in itertools.combinations(done_keys, 2) if j in sp_dct[i]), key=len) # Add this arc to the list keys_lst.append(arc_keys) # Add these keys to the list of done keys done_keys |= set(arc_keys) # Delete tbond keys for the new arc and continue to the next iteration bnd_keys = list(map(frozenset, mit.windowed(arc_keys, 2))) rsy = remove_bonds(rsy, bnd_keys) keys_lst = tuple(map(tuple, keys_lst)) return keys_lst
def rotational_bond_keys(gra, lin_keys=None, with_h_rotors=True): """ get all rotational bonds for a graph :param gra: the graph :param lin_keys: keys to linear atoms in the graph """ gra = explicit(gra) sym_dct = atom_symbols(gra) ngb_keys_dct = atoms_neighbor_atom_keys(gra) bnd_ord_dct = resonance_dominant_bond_orders(gra) rng_bnd_keys = list(itertools.chain(*rings_bond_keys(gra))) def _is_rotational_bond(bnd_key): ngb_keys_lst = [ngb_keys_dct[k] - bnd_key for k in bnd_key] is_single = max(bnd_ord_dct[bnd_key]) <= 1 has_neighbors = all(ngb_keys_lst) not_in_ring = bnd_key not in rng_bnd_keys is_h_rotor = any( set(map(sym_dct.__getitem__, ks)) == {'H'} for ks in ngb_keys_lst) return is_single and has_neighbors and not_in_ring and ( not is_h_rotor or with_h_rotors) rot_bnd_keys = frozenset(filter(_is_rotational_bond, bond_keys(gra))) lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys) dum_keys = tuple(atom_keys(gra, sym='X')) for keys in lin_keys_lst: bnd_keys = sorted((k for k in rot_bnd_keys if k & set(keys)), key=sorted) # Check whether there are neighboring atoms on either side of the # linear segment excl_keys = set(keys) | set(dum_keys) end_key1 = atom_neighbor_atom_key(gra, keys[0], excl_atm_keys=excl_keys) excl_keys |= {end_key1} end_key2 = atom_neighbor_atom_key(gra, keys[-1], excl_atm_keys=excl_keys) end_keys = {end_key1, end_key2} ngb_keys_lst = [ngb_keys_dct[k] - excl_keys for k in end_keys] has_neighbors = all(ngb_keys_lst) if not has_neighbors: rot_bnd_keys -= set(bnd_keys) else: rot_bnd_keys -= set(bnd_keys[:-1]) return rot_bnd_keys
def equivalent_atoms(gra, atm_key, stereo=True, dummy=True): """ Identify sets of isomorphically equivalent atoms Two atoms are equivalent if they transform into each other under an automorphism :param gra: A graph :param atm_key: An atom key for the graph :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: Keys to equivalent atoms :rtype: frozenset """ assert atm_key in atom_keys(gra), ( f"{atm_key} not in {atom_keys(gra)}") atm_symb_dct = atom_symbols(gra) atm_ngbs_dct = atoms_neighbor_atom_keys(gra) def _neighbor_symbols(key): return sorted(map(atm_symb_dct.__getitem__, atm_ngbs_dct[key])) # 1. Find atoms with the same symbols atm_symb = atm_symb_dct[atm_key] cand_keys = atom_keys(gra, sym=atm_symb) # 2. Of those, find atoms with the same neighboring atom types atm_ngb_symbs = _neighbor_symbols(atm_key) cand_keys = [k for k in cand_keys if _neighbor_symbols(k) == atm_ngb_symbs] # 3. Find the equivalent atoms from the list of candidates. # Strategy: Change the atom symbol to 'Ts' and check for isomorphism. # Assumes none of the compounds have element 117. atm_keys = [] for key in cand_keys: if are_equivalent_atoms(gra, atm_key, key, stereo=stereo, dummy=dummy): atm_keys.append(key) return frozenset(atm_keys)
def resonance_dominant_atom_hybridizations(rgr): """ resonance-dominant atom hybridizations, by atom """ rgr = without_fractional_bonds(rgr) atm_keys = list(atom_keys(rgr)) atm_hybs_by_res = [ dict_.values_by_key(atom_hybridizations(dom_rgr), atm_keys) for dom_rgr in dominant_resonances(rgr) ] atm_hybs = [min(hybs) for hybs in zip(*atm_hybs_by_res)] atm_hyb_dct = dict(zip(atm_keys, atm_hybs)) return atm_hyb_dct
def nonresonant_radical_atom_keys(rgr): """ keys for radical atoms that are not in resonance """ rgr = without_fractional_bonds(rgr) atm_keys = list(atom_keys(rgr)) atm_rad_vlcs_by_res = [ dict_.values_by_key(atom_unsaturated_valences(dom_rgr), atm_keys) for dom_rgr in dominant_resonances(rgr) ] atm_rad_vlcs = [min(rad_vlcs) for rad_vlcs in zip(*atm_rad_vlcs_by_res)] atm_rad_keys = frozenset( atm_key for atm_key, atm_rad_vlc in zip(atm_keys, atm_rad_vlcs) if atm_rad_vlc) return atm_rad_keys
def radical_dissociation_products(gra, pgra1): """ For a given species, determine the products of a dissociation occuring around a radical site. We assume one of the dissociation products is known, and we attempt to find the corresponding product. Currently, we assume that the input pgra1 is appropriately stereolabeled. :param gra: species undergoing dissociation :type gra: automol.graph object :param pgra1: one of the known products of dissociation :type pgra1: automol.graph object :rtype: tuple(automol.graph.object) """ # Remove gractional bonds for functions to work gra = without_fractional_bonds(gra) # Attempt to find a graph of product corresponding to pgra1 pgra2 = None for rad in sing_res_dom_radical_atom_keys(gra): for adj in atoms_neighbor_atom_keys(gra)[rad]: for group in atom_groups(gra, adj, stereo=False): if isomorphism(group, pgra1, backbone_only=True): pgra2 = remove_atoms(gra, atom_keys(group)) if bond_keys(group) in pgra2: pgra2 = remove_bonds(pgra2, bond_keys(group)) # If pgra2 is ID'd, rebuild the two product graphs with stereo labels if pgra2 is not None: keys2 = atom_keys(pgra2) idx_gra = to_index_based_stereo(gra) idx_pgra2 = subgraph(idx_gra, keys2, stereo=True) pgra2 = from_index_based_stereo(idx_pgra2) return pgra1, pgra2
def from_graph(gra): """ networkx graph object from a molecular graph """ nxg = networkx.Graph() nxg.add_nodes_from(atom_keys(gra)) nxg.add_edges_from(bond_keys(gra)) networkx.set_node_attributes(nxg, atom_symbols(gra), 'symbol') networkx.set_node_attributes(nxg, atom_implicit_hydrogen_valences(gra), 'implicit_hydrogen_valence') networkx.set_node_attributes(nxg, atom_stereo_parities(gra), 'stereo_parity') networkx.set_edge_attributes(nxg, bond_orders(gra), 'order') networkx.set_edge_attributes(nxg, bond_stereo_parities(gra), 'stereo_parity') return nxg
def sing_res_dom_radical_atom_keys(rgr): """ resonance-dominant radical atom keys,for one resonance TODO: DEPRECATE """ rgr = without_fractional_bonds(rgr) atm_keys = list(atom_keys(rgr)) atm_rad_vlcs_by_res = [ dict_.values_by_key(atom_unsaturated_valences(dom_rgr), atm_keys) for dom_rgr in dominant_resonances(rgr) ] first_atm_rad_val = [atm_rad_vlcs_by_res[0]] atm_rad_vlcs = [max(rad_vlcs) for rad_vlcs in zip(*first_atm_rad_val)] atm_rad_keys = frozenset( atm_key for atm_key, atm_rad_vlc in zip(atm_keys, atm_rad_vlcs) if atm_rad_vlc) return atm_rad_keys
def radical_atom_keys(gra, single_res=False, min_valence=1.): """ Radical atom keys for this molecular graph Radical atoms are based on the lowest-spin resonance structures for this graph. If the `single_res` flag is set, a single low-spin resonance structure will be chosen when there are multiple such structures. This function should eventually replace both `resonance_dominant_radical_atom_keys` and `sing_res_dom_radical_atom_keys` for a more user-friendly interface. Note that this function ignores the bond orders in `gra`. If you wish to identify radical atom keys based on the bond orders in `gra`, this can be done by using the `atom_unsaturated_valences` function. :param gra: the molecular graph :param single_res: only include radical keys for a single (arbitrary) resonance structure, or include all atoms that are radicals in any of the low-spin resonance structures? :type single_res: bool :param min_valence: optionally, specify that only sites with at least a certain number of radical electrons be included :type min_valence: int :returns: the radical atom keys :rtype: frozenset[int] """ gra = without_fractional_bonds(gra) atm_keys = list(atom_keys(gra)) if single_res: atm_rad_vlcs = dict_.values_by_key( atom_unsaturated_valences(dominant_resonance(gra)), atm_keys) else: atm_rad_vlcs_by_res = [ dict_.values_by_key(atom_unsaturated_valences(dom_gra), atm_keys) for dom_gra in dominant_resonances(gra) ] atm_rad_vlcs = [ max(rad_vlcs) for rad_vlcs in zip(*atm_rad_vlcs_by_res) ] atm_rad_keys = frozenset( atm_key for atm_key, atm_rad_vlc in zip(atm_keys, atm_rad_vlcs) if atm_rad_vlc >= min_valence) return atm_rad_keys
def radical_dissociation_prods(gra, pgra1): """ given a dissociation product, determine the other product """ gra = without_fractional_bonds(gra) pgra2 = None rads = sing_res_dom_radical_atom_keys(gra) adj_atms = atoms_neighbor_atom_keys(gra) # adj_idxs = tuple(adj_atms[rad] for rad in rads) for rad in rads: for adj in adj_atms[rad]: for group in atom_groups(gra, adj, stereo=False): if isomorphism(group, pgra1, backbone_only=True): pgra2 = remove_atoms(gra, atom_keys(group)) # pgra2 = remove_bonds(pgra2, bond_keys(group)) if bond_keys(group) in pgra2: pgra2 = remove_bonds(pgra2, bond_keys(group)) return (pgra1, pgra2)
def resonance_dominant_radical_atom_keys(rgr): """ resonance-dominant radical atom keys TODO: DEPRECATE (keys of resonance-dominant radical sites) """ rgr = without_fractional_bonds(rgr) atm_keys = list(atom_keys(rgr)) atm_rad_vlcs_by_res = [ dict_.values_by_key(atom_unsaturated_valences(dom_rgr), atm_keys) for dom_rgr in dominant_resonances(rgr) ] atm_rad_vlcs = [max(rad_vlcs) for rad_vlcs in zip(*atm_rad_vlcs_by_res)] atm_rad_keys = frozenset( atm_key for atm_key, atm_rad_vlc in zip(atm_keys, atm_rad_vlcs) if atm_rad_vlc) return atm_rad_keys
def _insert_stereo_hydrogens(gra): """ Insert hydrogens necessary for bond stereo into an implicit graph. Hydrogens are given negative keys for proper stereo sorting """ bnd_keys = bond_stereo_keys(gra) nkeys_dct = atoms_neighbor_atom_keys(gra) nhyd_dct = atom_implicit_hydrogen_valences(gra) next_key = -max(atom_keys(gra)) - 1 for bnd_key in bnd_keys: key1, key2 = bnd_key nkey1s = nkeys_dct[key1] - {key2} nkey2s = nkeys_dct[key2] - {key1} for key, nkeys in [(key1, nkey1s), (key2, nkey2s)]: if not nkeys: assert nhyd_dct[key] == 1 gra = add_bonded_atom(gra, 'H', key, next_key) gra = set_atom_implicit_hydrogen_valences(gra, {key: 0}) next_key = next_key - 1 return gra
def from_graph(gra): """ igraph object from a molecular graph """ atm_keys = sorted(atom_keys(gra)) bnd_keys = sorted(bond_keys(gra), key=sorted) atm_vals = dict_.values_by_key(atoms(gra), atm_keys) bnd_vals = dict_.values_by_key(bonds(gra), bnd_keys) atm_colors = list(itertools.starmap(_encode_vertex_attributes, atm_vals)) bnd_colors = list(itertools.starmap(_encode_edge_attributes, bnd_vals)) atm_idx_dct = dict(map(reversed, enumerate(atm_keys))) bnd_idxs = [sorted(map(atm_idx_dct.__getitem__, k)) for k in bnd_keys] igr = igraph.Graph(bnd_idxs) igr.vs['keys'] = atm_keys igr.vs['color'] = atm_colors igr.es['color'] = bnd_colors return igr
def radical_atom_keys_from_resonance(rgr, min_valence=1.): """ Radical atom keys for a resonance molecular graph Assumes the graph has already been assinged to a resonance structure. :param rgr: a resonance-structure molecular graph :param min_valence: optionally, specify that only sites with at least a certain number of radical electrons be included :type min_valence: int :returns: the radical atom keys :rtype: frozenset[int] """ rgr = without_fractional_bonds(rgr) atm_keys = list(atom_keys(rgr)) atm_rad_vlcs = dict_.values_by_key(atom_unsaturated_valences(rgr), atm_keys) atm_rad_keys = frozenset( atm_key for atm_key, atm_rad_vlc in zip(atm_keys, atm_rad_vlcs) if atm_rad_vlc >= min_valence) return atm_rad_keys
def branch_atom_keys(gra, atm_key, bnd_key): """ atom keys for branch extending along `bnd_key` away from `atm_key` """ bnch_atm_keys = atom_keys(branch(gra, atm_key, bnd_key)) return bnch_atm_keys - {atm_key}
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False): """ SMILES string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo? :type stereo: bool :param local_stereo: Is the graph using local stereo assignments? That is, are they based on atom keys rather than canonical keys? :type local_stereo: bool :param res_stereo: allow resonant double-bond stereo? :type res_stereo: bool :returns: the SMILES string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # If not using local stereo assignments, canonicalize the graph first. # From this point on, the stereo parities can be assumed to correspond to # the neighboring atom keys. if not local_stereo: gra = canonical(gra) # Convert to implicit graph gra = implicit(gra) # Insert hydrogens necessary for bond stereo gra = _insert_stereo_hydrogens(gra) # Find a dominant resonance rgr = dominant_resonance(gra) # Determine atom symbols symb_dct = atom_symbols(rgr) # Determine atom implicit hydrogens nhyd_dct = atom_implicit_hydrogen_valences(rgr) # Determine bond orders for this resonance bnd_ord_dct = bond_orders(rgr) # Find radical sites for this resonance rad_atm_keys = radical_atom_keys_from_resonance(rgr) # Determine neighbors nkeys_dct = atoms_neighbor_atom_keys(rgr) # Find stereo parities atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr), lambda x: x is not None) bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr), lambda x: x is not None) # Remove stereo parities if requested if not res_stereo: print('before') print(bnd_par_dct) bnd_par_dct = dict_.filter_by_key(bnd_par_dct, lambda x: bnd_ord_dct[x] == 2) print('after') print(bnd_par_dct) else: raise NotImplementedError("Not yet implemented!") def _atom_representation(key, just_seen=None, nkeys=(), closures=()): symb = ptab.to_symbol(symb_dct[key]) nhyd = nhyd_dct[key] needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '') par_rep = '' if key in atm_par_dct: needs_brackets = True skeys = [just_seen] if nhyd: assert nhyd == 1 skeys.append(-numpy.inf) if closures: skeys.extend(closures) skeys.extend(nkeys) can_par = atm_par_dct[key] smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys)) par_rep = '@@' if smi_par else '@' if needs_brackets: rep = f'[{symb}{par_rep}{hyd_rep}]' else: rep = f'{symb}' return rep # Get the pool of stereo bonds for the graph and set up a dictionary for # storing the ending representation. ste_bnd_key_pool = list(bnd_par_dct.keys()) drep_dct = {} def _bond_representation(key, just_seen=None): key0 = just_seen key1 = key # First, handle the bond order if key0 is None or key1 is None: rep = '' else: bnd_ord = bnd_ord_dct[frozenset({key0, key1})] if bnd_ord == 1: rep = '' elif bnd_ord == 2: rep = '=' elif bnd_ord == 3: rep = '#' else: raise ValueError("Bond orders greater than 3 not permitted.") drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else '' bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None) if bnd_key is not None: # We've encountered a new stereo bond, so remove it from the pool ste_bnd_key_pool.remove(bnd_key) # Determine the atoms involved key2, = bnd_key - {key1} nkey1s = set(nkeys_dct[key1]) - {key2} nkey2s = set(nkeys_dct[key2]) - {key1} nmax1 = max(nkey1s) nmax2 = max(nkey2s) nkey1 = just_seen if just_seen in nkey1s else nmax1 nkey2 = nmax2 # Determine parity can_par = bnd_par_dct[bnd_key] smi_par = can_par if nkey1 == nmax1 else not can_par # Determine bond directions drep1 = drep if drep else '/' if just_seen in nkey1s: drep = drep1 flip = not smi_par else: drep_dct[(key1, nkey1)] = drep1 flip = smi_par drep2 = _flip_direction(drep1, flip=flip) drep_dct[(key2, nkey2)] = drep2 rep += drep # Second, handle directionality (bond stereo) return rep # Get the pool of rings for the graph and set up a dictionary for storing # their tags. As the SMILES is built, each next ring that is encountered # will be given a tag, removed from the pool, and transferred to the tag # dictionary. rng_pool = list(rings_atom_keys(rgr)) rng_tag_dct = {} def _ring_representation_with_nkeys_and_closures(key, nkeys=()): nkeys = nkeys.copy() # Check for new rings in the ring pool. If a new ring is found, create # a tag, add it to the tags dictionary, and drop it from the rings # pool. for new_rng in rng_pool: if key in new_rng: # Choose a neighbor key for SMILES ring closure clos_nkey = sorted(set(new_rng) & set(nkeys))[0] # Add it to the ring tag dictionary with the current key first # and the closure key last tag = max(rng_tag_dct.values(), default=0) + 1 assert tag < 10, ( f"Ring tag exceeds 10 for this graph:\n{string(gra)}") rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey) rng_tag_dct[rng] = tag # Remove it from the pool of unseen rings rng_pool.remove(new_rng) tags = [] closures = [] for rng, tag in rng_tag_dct.items(): if key == rng[-1]: nkeys.remove(rng[0]) closures.append(rng[0]) # Handle the special case where the last ring bond has stereo if (rng[-1], rng[0]) in drep_dct: drep = drep_dct[(rng[-1], rng[0])] tags.append(f'{drep}{tag}') else: tags.append(f'{tag}') if key == rng[0]: nkeys.remove(rng[-1]) closures.append(rng[-1]) tags.append(f'{tag}') rrep = ''.join(map(str, tags)) return rrep, nkeys, closures # Determine neighboring keys nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr), sorted) def _recurse_smiles(smi, lst, key, just_seen=None): nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the SMILES string and connection list. The connection list is # used for sorting. rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures( key, nkeys) arep = _atom_representation(key, just_seen, nkeys, closures=closures) brep = _bond_representation(key, just_seen) smi = f'{brep}{arep}{rrep}' lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-strings/lists by recursively calling this function. sub_smis = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key) sub_smis.append(sub_smi) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we joint it as # {arep1}{brep2}{arep2}... if len(sub_lsts) == 1: sub_smi = sub_smis[0] sub_lst = sub_lsts[0] # Extend the SMILES string smi += f'{sub_smi}' # Extend the list lst.extend(sub_lst) # If there are multiple neighbors, we joint them as # {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}... else: assert len(sub_lsts) > 1 # Extend the SMILES string smi += (''.join(map("({:s})".format, sub_smis[:-1])) + sub_smis[-1]) # Append the lists of neighboring branches. lst.append(sub_lsts) return smi, lst # If there are terminal atoms, start from the first one atm_keys = atom_keys(rgr) term_keys = terminal_atom_keys(gra, heavy=False) start_key = min(term_keys) if term_keys else min(atm_keys) smi, _ = _recurse_smiles('', [], start_key) return smi