def isomorphism(gra1, gra2, backbone_only=False, stereo=True, dummy=True): """ Obtain an isomorphism between two graphs This should eventually replace the other isomorphism functions. :param backbone_only: Compare backbone atoms only? :type backbone_only: bool :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: The isomorphism mapping `gra1` onto `gra2` :rtype: dict """ if backbone_only: gra1 = implicit(gra1) gra2 = implicit(gra2) if not stereo: gra1 = without_stereo_parities(gra1) gra2 = without_stereo_parities(gra2) if not dummy: gra1 = without_dummy_atoms(gra1) gra2 = without_dummy_atoms(gra2) return _isomorphism(gra1, gra2)
def ring_atom_chirality(gra, atm, ring_atms, stereo=False): """is this ring atom a chiral center? """ if not stereo: gra = without_stereo_parities(gra) adj_atms = atoms_neighbor_atom_keys(gra) keys = [] for atmi in adj_atms[atm]: key = [atm, atmi] key.sort() key = frozenset(key) keys.append(key) if atmi in ring_atms: for atmj in adj_atms[atmi]: if atmj in ring_atms: key = [atmj, atmi] key.sort() key = frozenset(key) keys.append(key) gras = remove_bonds(gra, keys) cgras = connected_components(gras) ret_gras = [] for gra_i in cgras: atms_i = atom_keys(gra_i) if [x for x in atms_i if x in adj_atms[atm] or x == atm]: ret_gras.append(gra_i) return ret_gras
def set_stereo_from_geometry(gra, geo, geo_idx_dct=None): """ set graph stereo from a geometry (coordinate distances need not match connectivity -- what matters is the relative positions at stereo sites) """ gra = without_stereo_parities(gra) last_gra = None atm_keys = sorted(atom_keys(gra)) geo_idx_dct = (geo_idx_dct if geo_idx_dct is not None else {atm_key: idx for idx, atm_key in enumerate(atm_keys)}) # set atom and bond stereo, iterating to self-consistency atm_keys = set() bnd_keys = set() while last_gra != gra: last_gra = gra atm_keys.update(stereogenic_atom_keys(gra)) bnd_keys.update(stereogenic_bond_keys(gra)) gra = _set_atom_stereo_from_geometry(gra, atm_keys, geo, geo_idx_dct) gra = _set_bond_stereo_from_geometry(gra, bnd_keys, geo, geo_idx_dct) return gra
def atom_groups(gra, atm, stereo=False): """ return a list of groups off of one atom TODO: MERGE WITH BRANCH FUNCTIONS OR MAKE NAMING CONSISTENT SOMEHOW """ if not stereo: gra = without_stereo_parities(gra) adj_atms = atoms_neighbor_atom_keys(gra) keys = [] for atmi in adj_atms[atm]: key = [atm, atmi] key.sort() key = frozenset(key) keys.append(key) gras = remove_bonds(gra, keys) return connected_components(gras)
def stereomers(tsg): """ Expand all possible stereo assignments for the reactants in this TS graph. (Ignores stereo assignments already present, if any.) :param tsg: The TS graph, without stereo assignments. :returns: All possible TS graphs with stereo assignments for the reactants. """ rcts_gra = reactants_graph(tsg) frm_bnd_keys = forming_bond_keys(tsg) brk_bnd_keys = breaking_bond_keys(tsg) rcts_gra = without_stereo_parities(rcts_gra) rcts_sgrs = _stereomers(rcts_gra) ste_tsgs = tuple( graph(rcts_sgr, frm_bnd_keys, brk_bnd_keys) for rcts_sgr in rcts_sgrs) return ste_tsgs
def compatible_reverse_stereomers(ste_tsg): """ Given a TS graph with stereo assignments, expand all possible reverse graphs compatble with the forward graph. :param ste_tsg: The TS graph, with stereo assignments. :returns: All possible reverse TS graphs. """ frm_bnd_keys = forming_bond_keys(ste_tsg) brk_bnd_keys = breaking_bond_keys(ste_tsg) _, des_ste_atm_keys = nonconserved_atom_stereo_keys(ste_tsg) _, des_ste_bnd_keys = nonconserved_bond_stereo_keys(ste_tsg) cons_atm_keys = sorted(atom_stereo_keys(ste_tsg) - des_ste_atm_keys) cons_bnd_keys = sorted(bond_stereo_keys(ste_tsg) - des_ste_bnd_keys) # 1. Determine index-based stereo assignments for conserved stereo centers idx_tsg = to_index_based_stereo(ste_tsg) cons_idx_atm_pars = dict_.values_by_key(atom_stereo_parities(idx_tsg), cons_atm_keys) cons_idx_bnd_pars = dict_.values_by_key(bond_stereo_parities(idx_tsg), cons_bnd_keys) # 2. Determine all possible index-based stereo assignments for the reverse # reaction. prds_gra = without_stereo_parities(products_graph(ste_tsg)) prds_sgrs = _stereomers(prds_gra) prds_idx_sgrs = list(map(_to_index_based_stereo, prds_sgrs)) rev_idx_tsgs_pool = [ graph(p, brk_bnd_keys, frm_bnd_keys) for p in prds_idx_sgrs ] # 3. Find possibilities which match the assignments for the conserved # stereo centers. rev_idx_tsgs = [] for rev_idx_tsg in rev_idx_tsgs_pool: rev_cons_idx_atm_pars = dict_.values_by_key( atom_stereo_parities(rev_idx_tsg), cons_atm_keys) rev_cons_idx_bnd_pars = dict_.values_by_key( bond_stereo_parities(rev_idx_tsg), cons_bnd_keys) if (rev_cons_idx_atm_pars == cons_idx_atm_pars and rev_cons_idx_bnd_pars == cons_idx_bnd_pars): rev_idx_tsgs.append(rev_idx_tsg) # 4. Convert the matching reverse graphs back from index-based stereo # assignments to absolute stereo assignments. rev_ste_tsgs = list(map(from_index_based_stereo, rev_idx_tsgs)) return rev_ste_tsgs
def amchi(gra, stereo=True, can=True, is_reflected=None): """ AMChI string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo in the AMChI string, if present? :type stereo: bool :param can: Canonicalize the graph? Set to True by default, causing the graph to be canonicalized. If setting to False to avoid re-canonicalization, the `is_reflected` flag must be set for a canonical result. :type can: bool :param is_reflected: If using pre-canonicalized graph, is it a reflected enantiomer? If True, yes; if False, it's an enantiomer that isn't reflected; if None, it's not an enantiomer. :type is_reflected: bool or NoneType :returns: the AMChI string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # Convert to implicit graph gra = implicit(gra) # Canonicalize and determine canonical enantiomer if can: gra, is_reflected = canonical_enantiomer(gra) fml_str = _formula_string(gra) main_lyr_dct = _main_layers(gra) ste_lyr_dct = _stereo_layers(gra, is_reflected=is_reflected) chi = automol.amchi.base.from_data(fml_str=fml_str, main_lyr_dct=main_lyr_dct, ste_lyr_dct=ste_lyr_dct) return chi
def _stereo_corrected_geometry(sgr, geo, geo_idx_dct): """ correct the stereo parities of a geometry (works iterately to handle cases of higher-order stereo) """ assert sgr == explicit(sgr) gra = without_stereo_parities(sgr) if has_stereo(sgr): full_atm_ste_par_dct = atom_stereo_parities(sgr) full_bnd_ste_par_dct = bond_stereo_parities(sgr) atm_keys = set() bnd_keys = set() last_gra = None while last_gra != gra: last_gra = gra atm_keys.update(stereogenic_atom_keys(gra)) bnd_keys.update(stereogenic_bond_keys(gra)) atm_ste_par_dct = { atm_key: full_atm_ste_par_dct[atm_key] for atm_key in atm_keys } bnd_ste_par_dct = { bnd_key: full_bnd_ste_par_dct[bnd_key] for bnd_key in bnd_keys } geo, gra = _atom_stereo_corrected_geometry(gra, atm_ste_par_dct, geo, geo_idx_dct) geo, gra = _bond_stereo_corrected_geometry(gra, bnd_ste_par_dct, geo, geo_idx_dct) return geo
def stereomers(gra): """ all stereomers, ignoring this graph's assignments """ bool_vals = (False, True) def _expand_atom_stereo(sgr): atm_ste_keys = stereogenic_atom_keys(sgr) nste_atms = len(atm_ste_keys) sgrs = [ set_atom_stereo_parities(sgr, dict(zip(atm_ste_keys, atm_ste_par_vals))) for atm_ste_par_vals in itertools.product(bool_vals, repeat=nste_atms) ] return sgrs def _expand_bond_stereo(sgr): bnd_ste_keys = stereogenic_bond_keys(sgr) nste_bnds = len(bnd_ste_keys) sgrs = [ set_bond_stereo_parities(sgr, dict(zip(bnd_ste_keys, bnd_ste_par_vals))) for bnd_ste_par_vals in itertools.product(bool_vals, repeat=nste_bnds) ] return sgrs last_sgrs = [] sgrs = [without_stereo_parities(gra)] while sgrs != last_sgrs: last_sgrs = sgrs sgrs = list(itertools.chain(*map(_expand_atom_stereo, sgrs))) sgrs = list(itertools.chain(*map(_expand_bond_stereo, sgrs))) return tuple(sorted(sgrs, key=frozen))
def is_ring_system(gra): """ is this graph a ring system? """ gra = without_stereo_parities(gra) return union_from_sequence(rings(gra), check=False) == gra
def from_index_based_stereo(sgr): """ Convert a graph from index-based stereo assignments back to absolute stereo assignments, where parities are independent of atom ordering. :param sgr: a graph with index-based stereo assignments :returns: a graph with absolute stereo assignments """ assert sgr == explicit(sgr), ( f"Not an explicit graph:\n{string(sgr, one_indexed=False)}") gra = without_stereo_parities(sgr) if has_stereo(sgr): atm_keys_pool = atom_stereo_keys(sgr) bnd_keys_pool = bond_stereo_keys(sgr) idx_atm_ste_par_dct = atom_stereo_parities(sgr) idx_bnd_ste_par_dct = bond_stereo_parities(sgr) atm_ngb_keys_dct = atoms_neighbor_atom_keys(sgr) atm_keys = set() bnd_keys = set() last_gra = None # Do the assignments iteratively to handle higher-order stereo while last_gra != gra: last_gra = gra abs_atm_ste_par_dct = {} abs_bnd_ste_par_dct = {} atm_keys.update(stereogenic_atom_keys(gra) & atm_keys_pool) bnd_keys.update(stereogenic_bond_keys(gra) & bnd_keys_pool) # Determine absolute stereo assignments for atoms for atm_key in atm_keys: abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm_key, atm_ngb_keys_dct[atm_key]) idx_srt_keys = sorted(abs_srt_keys) if util.is_even_permutation(idx_srt_keys, abs_srt_keys): abs_atm_ste_par_dct[atm_key] = ( idx_atm_ste_par_dct[atm_key]) else: abs_atm_ste_par_dct[atm_key] = ( not idx_atm_ste_par_dct[atm_key]) # Determine absolute stereo assignments for bonds for bnd_key in bnd_keys: atm1_key, atm2_key = sorted(bnd_key) atm1_abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm1_key, atm_ngb_keys_dct[atm1_key] - bnd_key) atm2_abs_srt_keys = atom_stereo_sorted_neighbor_atom_keys( gra, atm2_key, atm_ngb_keys_dct[atm2_key] - bnd_key) atm1_idx_srt_keys = sorted(atm1_abs_srt_keys) atm2_idx_srt_keys = sorted(atm2_abs_srt_keys) if not ((atm1_idx_srt_keys[0] != atm1_abs_srt_keys[0]) ^ (atm2_idx_srt_keys[0] != atm2_abs_srt_keys[0])): abs_bnd_ste_par_dct[bnd_key] = ( idx_bnd_ste_par_dct[bnd_key]) else: abs_bnd_ste_par_dct[bnd_key] = ( not idx_bnd_ste_par_dct[bnd_key]) gra = set_atom_stereo_parities(gra, abs_atm_ste_par_dct) gra = set_bond_stereo_parities(gra, abs_bnd_ste_par_dct) atm_ste_keys = atom_stereo_keys(gra) bnd_ste_keys = bond_stereo_keys(gra) assert atm_ste_keys == atm_keys_pool, ( "Index-based to absolute stereo conversion failed:\n" f"{str(atm_ste_keys)} != {str(atm_keys_pool)}") assert bnd_ste_keys == bnd_keys_pool, ( "Index-based to absolute stereo conversion failed:\n" f"{str(bnd_ste_keys)} != {str(bnd_keys_pool)}") return gra
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False): """ SMILES string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo? :type stereo: bool :param local_stereo: Is the graph using local stereo assignments? That is, are they based on atom keys rather than canonical keys? :type local_stereo: bool :param res_stereo: allow resonant double-bond stereo? :type res_stereo: bool :returns: the SMILES string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # If not using local stereo assignments, canonicalize the graph first. # From this point on, the stereo parities can be assumed to correspond to # the neighboring atom keys. if not local_stereo: gra = canonical(gra) # Convert to implicit graph gra = implicit(gra) # Insert hydrogens necessary for bond stereo gra = _insert_stereo_hydrogens(gra) # Find a dominant resonance rgr = dominant_resonance(gra) # Determine atom symbols symb_dct = atom_symbols(rgr) # Determine atom implicit hydrogens nhyd_dct = atom_implicit_hydrogen_valences(rgr) # Determine bond orders for this resonance bnd_ord_dct = bond_orders(rgr) # Find radical sites for this resonance rad_atm_keys = radical_atom_keys_from_resonance(rgr) # Determine neighbors nkeys_dct = atoms_neighbor_atom_keys(rgr) # Find stereo parities atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr), lambda x: x is not None) bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr), lambda x: x is not None) # Remove stereo parities if requested if not res_stereo: print('before') print(bnd_par_dct) bnd_par_dct = dict_.filter_by_key(bnd_par_dct, lambda x: bnd_ord_dct[x] == 2) print('after') print(bnd_par_dct) else: raise NotImplementedError("Not yet implemented!") def _atom_representation(key, just_seen=None, nkeys=(), closures=()): symb = ptab.to_symbol(symb_dct[key]) nhyd = nhyd_dct[key] needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '') par_rep = '' if key in atm_par_dct: needs_brackets = True skeys = [just_seen] if nhyd: assert nhyd == 1 skeys.append(-numpy.inf) if closures: skeys.extend(closures) skeys.extend(nkeys) can_par = atm_par_dct[key] smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys)) par_rep = '@@' if smi_par else '@' if needs_brackets: rep = f'[{symb}{par_rep}{hyd_rep}]' else: rep = f'{symb}' return rep # Get the pool of stereo bonds for the graph and set up a dictionary for # storing the ending representation. ste_bnd_key_pool = list(bnd_par_dct.keys()) drep_dct = {} def _bond_representation(key, just_seen=None): key0 = just_seen key1 = key # First, handle the bond order if key0 is None or key1 is None: rep = '' else: bnd_ord = bnd_ord_dct[frozenset({key0, key1})] if bnd_ord == 1: rep = '' elif bnd_ord == 2: rep = '=' elif bnd_ord == 3: rep = '#' else: raise ValueError("Bond orders greater than 3 not permitted.") drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else '' bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None) if bnd_key is not None: # We've encountered a new stereo bond, so remove it from the pool ste_bnd_key_pool.remove(bnd_key) # Determine the atoms involved key2, = bnd_key - {key1} nkey1s = set(nkeys_dct[key1]) - {key2} nkey2s = set(nkeys_dct[key2]) - {key1} nmax1 = max(nkey1s) nmax2 = max(nkey2s) nkey1 = just_seen if just_seen in nkey1s else nmax1 nkey2 = nmax2 # Determine parity can_par = bnd_par_dct[bnd_key] smi_par = can_par if nkey1 == nmax1 else not can_par # Determine bond directions drep1 = drep if drep else '/' if just_seen in nkey1s: drep = drep1 flip = not smi_par else: drep_dct[(key1, nkey1)] = drep1 flip = smi_par drep2 = _flip_direction(drep1, flip=flip) drep_dct[(key2, nkey2)] = drep2 rep += drep # Second, handle directionality (bond stereo) return rep # Get the pool of rings for the graph and set up a dictionary for storing # their tags. As the SMILES is built, each next ring that is encountered # will be given a tag, removed from the pool, and transferred to the tag # dictionary. rng_pool = list(rings_atom_keys(rgr)) rng_tag_dct = {} def _ring_representation_with_nkeys_and_closures(key, nkeys=()): nkeys = nkeys.copy() # Check for new rings in the ring pool. If a new ring is found, create # a tag, add it to the tags dictionary, and drop it from the rings # pool. for new_rng in rng_pool: if key in new_rng: # Choose a neighbor key for SMILES ring closure clos_nkey = sorted(set(new_rng) & set(nkeys))[0] # Add it to the ring tag dictionary with the current key first # and the closure key last tag = max(rng_tag_dct.values(), default=0) + 1 assert tag < 10, ( f"Ring tag exceeds 10 for this graph:\n{string(gra)}") rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey) rng_tag_dct[rng] = tag # Remove it from the pool of unseen rings rng_pool.remove(new_rng) tags = [] closures = [] for rng, tag in rng_tag_dct.items(): if key == rng[-1]: nkeys.remove(rng[0]) closures.append(rng[0]) # Handle the special case where the last ring bond has stereo if (rng[-1], rng[0]) in drep_dct: drep = drep_dct[(rng[-1], rng[0])] tags.append(f'{drep}{tag}') else: tags.append(f'{tag}') if key == rng[0]: nkeys.remove(rng[-1]) closures.append(rng[-1]) tags.append(f'{tag}') rrep = ''.join(map(str, tags)) return rrep, nkeys, closures # Determine neighboring keys nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr), sorted) def _recurse_smiles(smi, lst, key, just_seen=None): nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the SMILES string and connection list. The connection list is # used for sorting. rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures( key, nkeys) arep = _atom_representation(key, just_seen, nkeys, closures=closures) brep = _bond_representation(key, just_seen) smi = f'{brep}{arep}{rrep}' lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-strings/lists by recursively calling this function. sub_smis = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key) sub_smis.append(sub_smi) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we joint it as # {arep1}{brep2}{arep2}... if len(sub_lsts) == 1: sub_smi = sub_smis[0] sub_lst = sub_lsts[0] # Extend the SMILES string smi += f'{sub_smi}' # Extend the list lst.extend(sub_lst) # If there are multiple neighbors, we joint them as # {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}... else: assert len(sub_lsts) > 1 # Extend the SMILES string smi += (''.join(map("({:s})".format, sub_smis[:-1])) + sub_smis[-1]) # Append the lists of neighboring branches. lst.append(sub_lsts) return smi, lst # If there are terminal atoms, start from the first one atm_keys = atom_keys(rgr) term_keys = terminal_atom_keys(gra, heavy=False) start_key = min(term_keys) if term_keys else min(atm_keys) smi, _ = _recurse_smiles('', [], start_key) return smi