def isomorphism(gra1, gra2, backbone_only=False, stereo=True, dummy=True): """ Obtain an isomorphism between two graphs This should eventually replace the other isomorphism functions. :param backbone_only: Compare backbone atoms only? :type backbone_only: bool :param stereo: Consider stereo? :type stereo: bool :param dummy: Consider dummy atoms? :type dummy: bool :returns: The isomorphism mapping `gra1` onto `gra2` :rtype: dict """ if backbone_only: gra1 = implicit(gra1) gra2 = implicit(gra2) if not stereo: gra1 = without_stereo_parities(gra1) gra2 = without_stereo_parities(gra2) if not dummy: gra1 = without_dummy_atoms(gra1) gra2 = without_dummy_atoms(gra2) return _isomorphism(gra1, gra2)
def backbone_isomorphism(gra1, gra2): """ graph backbone isomorphism TODO: DEPRECATE for implicit graphs, this is the relabeling of `gra1` to produce `gra2` for other graphs, it gives the correspondences between backbone atoms """ gra1 = implicit(gra1) gra2 = implicit(gra2) nxg1 = _networkx.from_graph(gra1) nxg2 = _networkx.from_graph(gra2) iso_dct = _networkx.isomorphism(nxg1, nxg2) return iso_dct
def rotational_symmetry_number(gra, key1, key2, lin_keys=None): """ get the rotational symmetry number along a given rotational axis :param gra: the graph :param key1: the first atom key :param key2: the second atom key """ ngb_keys_dct = atoms_neighbor_atom_keys(without_dummy_atoms(gra)) imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(implicit(gra)) axis_keys = {key1, key2} # If the keys are part of a linear chain, use the ends of that for the # symmetry number calculation lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys) for keys in lin_keys_lst: if key1 in keys or key2 in keys: if len(keys) == 1: key1, key2 = sorted(ngb_keys_dct[keys[0]]) else: key1, = ngb_keys_dct[keys[0]] - {keys[1]} key2, = ngb_keys_dct[keys[-1]] - {keys[-2]} axis_keys |= set(keys) break sym_num = 1 for key in (key1, key2): if key in imp_hyd_vlc_dct: ngb_keys = ngb_keys_dct[key] - axis_keys if len(ngb_keys) == imp_hyd_vlc_dct[key] == 3: sym_num = 3 break return sym_num
def is_branched(gra): """ determine is the molecule has a branched chain """ _is_branched = False gra = implicit(gra) chain_length = len(longest_chain(gra)) natoms = atom_count(gra, with_implicit=False) if natoms != chain_length: _is_branched = True return _is_branched
def backbone_isomorphism(gra1, gra2, igraph=False): """ graph backbone isomorphism TODO: DEPRECATE for implicit graphs, this is the relabeling of `gra1` to produce `gra2` for other graphs, it gives the correspondences between backbone atoms """ gra1 = implicit(gra1) gra2 = implicit(gra2) if igraph: igr1 = _igraph.from_graph(gra1) igr2 = _igraph.from_graph(gra2) iso_dcts = _igraph.isomorphisms(igr1, igr2) iso_dct = iso_dcts[0] if iso_dcts else None else: nxg1 = _networkx.from_graph(gra1) nxg2 = _networkx.from_graph(gra2) iso_dct = _networkx.isomorphism(nxg1, nxg2) return iso_dct
def stereo_priority_vector(gra, atm_key, atm_ngb_key): """ generates a sortable one-to-one representation of the branch extending from `atm_key` through its bonded neighbor `atm_ngb_key` """ bbn_keys = backbone_keys(gra) exp_hyd_keys = explicit_hydrogen_keys(gra) if atm_ngb_key not in bbn_keys: assert atm_ngb_key in exp_hyd_keys assert frozenset({atm_key, atm_ngb_key}) in bond_keys(gra) pri_vec = () else: gra = implicit(gra) atm_dct = atoms(gra) bnd_dct = bonds(gra) assert atm_key in bbn_keys assert frozenset({atm_key, atm_ngb_key}) in bnd_dct # here, switch to an implicit graph atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra) def _priority_vector(atm1_key, atm2_key, seen_keys): # we keep a list of seen keys to cut off cycles, avoiding infinite # loops bnd_val = bnd_dct[frozenset({atm1_key, atm2_key})] atm_val = atm_dct[atm2_key] bnd_val = _replace_nones_with_negative_infinity(bnd_val) atm_val = _replace_nones_with_negative_infinity(atm_val) if atm2_key in seen_keys: ret = (bnd_val, ) else: seen_keys.update({atm1_key, atm2_key}) atm3_keys = atm_ngb_keys_dct[atm2_key] - {atm1_key} if atm3_keys: next_vals, seen_keys = zip(*[ _priority_vector(atm2_key, atm3_key, seen_keys) for atm3_key in atm3_keys ]) ret = (bnd_val, atm_val) + next_vals else: ret = (bnd_val, atm_val) return ret, seen_keys pri_vec, _ = _priority_vector(atm_key, atm_ngb_key, set()) return pri_vec
def linear_atom_keys(rgr, dummy=True): """ atoms forming linear bonds, based on their hybridization :param rgr: the graph :param dummy: whether or not to consider atoms connected to dummy atoms as linear, if different from what would be predicted based on their hybridization :returns: the linear atom keys :rtype: tuple[int] """ rgr = without_fractional_bonds(rgr) atm_hyb_dct = resonance_dominant_atom_hybridizations(implicit(rgr)) lin_atm_keys = set(dict_.keys_by_value(atm_hyb_dct, lambda x: x == 1)) if dummy: dum_ngb_key_dct = dummy_atoms_neighbor_atom_key(rgr) lin_atm_keys |= set(dum_ngb_key_dct.values()) lin_atm_keys = tuple(sorted(lin_atm_keys)) return lin_atm_keys
def amchi(gra, stereo=True, can=True, is_reflected=None): """ AMChI string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo in the AMChI string, if present? :type stereo: bool :param can: Canonicalize the graph? Set to True by default, causing the graph to be canonicalized. If setting to False to avoid re-canonicalization, the `is_reflected` flag must be set for a canonical result. :type can: bool :param is_reflected: If using pre-canonicalized graph, is it a reflected enantiomer? If True, yes; if False, it's an enantiomer that isn't reflected; if None, it's not an enantiomer. :type is_reflected: bool or NoneType :returns: the AMChI string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # Convert to implicit graph gra = implicit(gra) # Canonicalize and determine canonical enantiomer if can: gra, is_reflected = canonical_enantiomer(gra) fml_str = _formula_string(gra) main_lyr_dct = _main_layers(gra) ste_lyr_dct = _stereo_layers(gra, is_reflected=is_reflected) chi = automol.amchi.base.from_data(fml_str=fml_str, main_lyr_dct=main_lyr_dct, ste_lyr_dct=ste_lyr_dct) return chi
def bond_symmetry_numbers(gra, frm_bnd_key=None, brk_bnd_key=None): """ symmetry numbers, by bond TODO: DEPRECATE -- I think this function can be replaced with rotational_symmetry_number(). Passing in formed and broken keys is unnecessary if one passes in a TS graph, which is stored in the reaction object. the (approximate) symmetry number of the torsional potential for this bond, based on the hydrogen counts for each atom It is reduced to 1 if one of the H atoms in the torsional bond is a neighbor to the special bonding atom (the atom that is being transferred) """ imp_gra = implicit(gra) atm_imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(imp_gra) bnd_keys = bond_keys(imp_gra) tfr_atm = None if frm_bnd_key and brk_bnd_key: for atm_f in list(frm_bnd_key): for atm_b in list(brk_bnd_key): if atm_f == atm_b: tfr_atm = atm_f if tfr_atm: neighbor_dct = atoms_neighbor_atom_keys(gra) nei_tfr = neighbor_dct[tfr_atm] atms = gra[0] all_hyds = [] for atm in atms: if atms[atm][0] == 'H': all_hyds.append(atm) else: nei_tfr = {} bnd_symb_num_dct = {} bnd_symb_nums = [] for bnd_key in bnd_keys: bnd_sym = 1 vlc = max(map(atm_imp_hyd_vlc_dct.__getitem__, bnd_key)) if vlc == 3: bnd_sym = 3 if tfr_atm: for atm in nei_tfr: nei_s = neighbor_dct[atm] h_nei = 0 for nei in nei_s: if nei in all_hyds: h_nei += 1 if h_nei == 3: bnd_sym = 1 bnd_symb_nums.append(bnd_sym) bnd_symb_num_dct = dict(zip(bnd_keys, bnd_symb_nums)) # fill in the rest of the bonds for completeness bnd_symb_num_dct = dict_.by_key(bnd_symb_num_dct, bond_keys(gra), fill_val=1) return bnd_symb_num_dct
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False): """ SMILES string from graph :param gra: molecular graph :type gra: automol graph data structure :param stereo: Include stereo? :type stereo: bool :param local_stereo: Is the graph using local stereo assignments? That is, are they based on atom keys rather than canonical keys? :type local_stereo: bool :param res_stereo: allow resonant double-bond stereo? :type res_stereo: bool :returns: the SMILES string :rtype: str """ assert is_connected(gra), ( "Cannot form connection layer for disconnected graph.") if not stereo: gra = without_stereo_parities(gra) # If not using local stereo assignments, canonicalize the graph first. # From this point on, the stereo parities can be assumed to correspond to # the neighboring atom keys. if not local_stereo: gra = canonical(gra) # Convert to implicit graph gra = implicit(gra) # Insert hydrogens necessary for bond stereo gra = _insert_stereo_hydrogens(gra) # Find a dominant resonance rgr = dominant_resonance(gra) # Determine atom symbols symb_dct = atom_symbols(rgr) # Determine atom implicit hydrogens nhyd_dct = atom_implicit_hydrogen_valences(rgr) # Determine bond orders for this resonance bnd_ord_dct = bond_orders(rgr) # Find radical sites for this resonance rad_atm_keys = radical_atom_keys_from_resonance(rgr) # Determine neighbors nkeys_dct = atoms_neighbor_atom_keys(rgr) # Find stereo parities atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr), lambda x: x is not None) bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr), lambda x: x is not None) # Remove stereo parities if requested if not res_stereo: print('before') print(bnd_par_dct) bnd_par_dct = dict_.filter_by_key(bnd_par_dct, lambda x: bnd_ord_dct[x] == 2) print('after') print(bnd_par_dct) else: raise NotImplementedError("Not yet implemented!") def _atom_representation(key, just_seen=None, nkeys=(), closures=()): symb = ptab.to_symbol(symb_dct[key]) nhyd = nhyd_dct[key] needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '') par_rep = '' if key in atm_par_dct: needs_brackets = True skeys = [just_seen] if nhyd: assert nhyd == 1 skeys.append(-numpy.inf) if closures: skeys.extend(closures) skeys.extend(nkeys) can_par = atm_par_dct[key] smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys)) par_rep = '@@' if smi_par else '@' if needs_brackets: rep = f'[{symb}{par_rep}{hyd_rep}]' else: rep = f'{symb}' return rep # Get the pool of stereo bonds for the graph and set up a dictionary for # storing the ending representation. ste_bnd_key_pool = list(bnd_par_dct.keys()) drep_dct = {} def _bond_representation(key, just_seen=None): key0 = just_seen key1 = key # First, handle the bond order if key0 is None or key1 is None: rep = '' else: bnd_ord = bnd_ord_dct[frozenset({key0, key1})] if bnd_ord == 1: rep = '' elif bnd_ord == 2: rep = '=' elif bnd_ord == 3: rep = '#' else: raise ValueError("Bond orders greater than 3 not permitted.") drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else '' bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None) if bnd_key is not None: # We've encountered a new stereo bond, so remove it from the pool ste_bnd_key_pool.remove(bnd_key) # Determine the atoms involved key2, = bnd_key - {key1} nkey1s = set(nkeys_dct[key1]) - {key2} nkey2s = set(nkeys_dct[key2]) - {key1} nmax1 = max(nkey1s) nmax2 = max(nkey2s) nkey1 = just_seen if just_seen in nkey1s else nmax1 nkey2 = nmax2 # Determine parity can_par = bnd_par_dct[bnd_key] smi_par = can_par if nkey1 == nmax1 else not can_par # Determine bond directions drep1 = drep if drep else '/' if just_seen in nkey1s: drep = drep1 flip = not smi_par else: drep_dct[(key1, nkey1)] = drep1 flip = smi_par drep2 = _flip_direction(drep1, flip=flip) drep_dct[(key2, nkey2)] = drep2 rep += drep # Second, handle directionality (bond stereo) return rep # Get the pool of rings for the graph and set up a dictionary for storing # their tags. As the SMILES is built, each next ring that is encountered # will be given a tag, removed from the pool, and transferred to the tag # dictionary. rng_pool = list(rings_atom_keys(rgr)) rng_tag_dct = {} def _ring_representation_with_nkeys_and_closures(key, nkeys=()): nkeys = nkeys.copy() # Check for new rings in the ring pool. If a new ring is found, create # a tag, add it to the tags dictionary, and drop it from the rings # pool. for new_rng in rng_pool: if key in new_rng: # Choose a neighbor key for SMILES ring closure clos_nkey = sorted(set(new_rng) & set(nkeys))[0] # Add it to the ring tag dictionary with the current key first # and the closure key last tag = max(rng_tag_dct.values(), default=0) + 1 assert tag < 10, ( f"Ring tag exceeds 10 for this graph:\n{string(gra)}") rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey) rng_tag_dct[rng] = tag # Remove it from the pool of unseen rings rng_pool.remove(new_rng) tags = [] closures = [] for rng, tag in rng_tag_dct.items(): if key == rng[-1]: nkeys.remove(rng[0]) closures.append(rng[0]) # Handle the special case where the last ring bond has stereo if (rng[-1], rng[0]) in drep_dct: drep = drep_dct[(rng[-1], rng[0])] tags.append(f'{drep}{tag}') else: tags.append(f'{tag}') if key == rng[0]: nkeys.remove(rng[-1]) closures.append(rng[-1]) tags.append(f'{tag}') rrep = ''.join(map(str, tags)) return rrep, nkeys, closures # Determine neighboring keys nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr), sorted) def _recurse_smiles(smi, lst, key, just_seen=None): nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else [] # Remove keys just seen from the list of neighbors, to avoid doubling # back. if just_seen in nkeys: nkeys.remove(just_seen) # Start the SMILES string and connection list. The connection list is # used for sorting. rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures( key, nkeys) arep = _atom_representation(key, just_seen, nkeys, closures=closures) brep = _bond_representation(key, just_seen) smi = f'{brep}{arep}{rrep}' lst = [key] # Now, extend the layer/list along the neighboring atoms. if nkeys: # Build sub-strings/lists by recursively calling this function. sub_smis = [] sub_lsts = [] while nkeys: nkey = nkeys.pop(0) sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key) sub_smis.append(sub_smi) sub_lsts.append(sub_lst) # If this is a ring, remove the neighbor on the other side of # `key` to prevent repetition as we go around the ring. if sub_lst[-1] == key: nkeys.remove(sub_lst[-2]) # Now, join the sub-layers and lists together. # If there is only one neighbor, we joint it as # {arep1}{brep2}{arep2}... if len(sub_lsts) == 1: sub_smi = sub_smis[0] sub_lst = sub_lsts[0] # Extend the SMILES string smi += f'{sub_smi}' # Extend the list lst.extend(sub_lst) # If there are multiple neighbors, we joint them as # {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}... else: assert len(sub_lsts) > 1 # Extend the SMILES string smi += (''.join(map("({:s})".format, sub_smis[:-1])) + sub_smis[-1]) # Append the lists of neighboring branches. lst.append(sub_lsts) return smi, lst # If there are terminal atoms, start from the first one atm_keys = atom_keys(rgr) term_keys = terminal_atom_keys(gra, heavy=False) start_key = min(term_keys) if term_keys else min(atm_keys) smi, _ = _recurse_smiles('', [], start_key) return smi