def _set_bond_stereo_from_geometry(gra, bnd_keys, geo, geo_idx_dct): assert gra == explicit(gra) bnd_pars = [ _bond_stereo_parity_from_geometry(gra, bnd_key, geo, geo_idx_dct) for bnd_key in bnd_keys ] gra = set_bond_stereo_parities(gra, dict(zip(bnd_keys, bnd_pars))) return gra
def _set_atom_stereo_from_geometry(gra, atm_keys, geo, geo_idx_dct): assert gra == explicit(gra) atm_pars = [ _atom_stereo_parity_from_geometry(gra, atm_key, geo, geo_idx_dct) for atm_key in atm_keys ] gra = set_atom_stereo_parities(gra, dict(zip(atm_keys, atm_pars))) return gra
def _connected_heuristic_geometry(gra): """ stereo-specific coordinates for a connected molecular geometry """ assert gra == explicit(gra) atm_keys = sorted(atom_keys(gra)) zma, zma_key_dct = connected_heuristic_zmatrix(gra) geo = automol.zmatrix.geometry(zma) idxs = dict_.values_by_key(zma_key_dct, atm_keys) geo = automol.geom.from_subset(geo, idxs) geo_idx_dct = {atm_key: idx for idx, atm_key in enumerate(atm_keys)} return geo, geo_idx_dct
def stereogenic_bond_keys(gra): """ (unassigned) stereogenic bonds in this graph """ gra = without_bond_orders(gra) gra = explicit(gra) # for simplicity, add the explicit hydrogens back in bnd_keys = dict_.keys_by_value( resonance_dominant_bond_orders(gra), lambda x: 2 in x) # make sure both ends are sp^2 (excludes cumulenes) atm_hyb_dct = resonance_dominant_atom_hybridizations(gra) sp2_atm_keys = dict_.keys_by_value(atm_hyb_dct, lambda x: x == 2) bnd_keys = frozenset({bnd_key for bnd_key in bnd_keys if bnd_key <= sp2_atm_keys}) bnd_keys -= bond_stereo_keys(gra) bnd_keys -= functools.reduce( # remove double bonds in small rings frozenset.union, filter(lambda x: len(x) < 8, rings_bond_keys(gra)), frozenset()) atm_ngb_keys_dct = atom_neighbor_keys(gra) def _is_stereogenic(bnd_key): atm1_key, atm2_key = bnd_key def _is_symmetric_on_bond(atm_key, atm_ngb_key): atm_ngb_keys = list(atm_ngb_keys_dct[atm_key] - {atm_ngb_key}) if not atm_ngb_keys: # C=:O: ret = True elif len(atm_ngb_keys) == 1: # C=N:-X ret = False else: assert len(atm_ngb_keys) == 2 # C=C(-X)-Y ret = (stereo_priority_vector(gra, atm_key, atm_ngb_keys[0]) == stereo_priority_vector(gra, atm_key, atm_ngb_keys[1])) return ret return not (_is_symmetric_on_bond(atm1_key, atm2_key) or _is_symmetric_on_bond(atm2_key, atm1_key)) ste_gen_bnd_keys = frozenset(filter(_is_stereogenic, bnd_keys)) return ste_gen_bnd_keys
def stereogenic_atom_keys(gra): """ (unassigned) stereogenic atoms in this graph """ gra = without_bond_orders(gra) gra = explicit(gra) # for simplicity, add the explicit hydrogens back in atm_keys = dict_.keys_by_value(atom_bond_valences(gra), lambda x: x == 4) atm_keys -= atom_stereo_keys(gra) atm_ngb_keys_dct = atom_neighbor_keys(gra) def _is_stereogenic(atm_key): atm_ngb_keys = list(atm_ngb_keys_dct[atm_key]) pri_vecs = [stereo_priority_vector(gra, atm_key, atm_ngb_key) for atm_ngb_key in atm_ngb_keys] return not any(pv1 == pv2 for pv1, pv2 in itertools.combinations(pri_vecs, r=2)) ste_gen_atm_keys = frozenset(filter(_is_stereogenic, atm_keys)) return ste_gen_atm_keys
def heuristic_geometry(gra): """ stereo-specific coordinates for a molecular geometry (need not be connected) """ assert gra == explicit(gra) gra_iter = iter(connected_components(gra)) gra_ = next(gra_iter) geo_, geo_idx_dct_ = _connected_heuristic_geometry(gra_) geo = geo_ geo_idx_dct = geo_idx_dct_ for gra_ in gra_iter: geo_, geo_idx_dct_ = _connected_heuristic_geometry(gra_) natms = automol.geom.count(geo) geo_idx_dct_ = dict_.transform_values(geo_idx_dct_, (natms).__add__) geo_idx_dct.update(geo_idx_dct_) geo = automol.geom.join(geo, geo_) return geo, geo_idx_dct
def _stereo_corrected_geometry(sgr, geo, geo_idx_dct): """ correct the stereo parities of a geometry (works iterately to handle cases of higher-order stereo) """ assert sgr == explicit(sgr) gra = without_stereo_parities(sgr) if has_stereo(sgr): full_atm_ste_par_dct = atom_stereo_parities(sgr) full_bnd_ste_par_dct = bond_stereo_parities(sgr) atm_keys = set() bnd_keys = set() last_gra = None while last_gra != gra: last_gra = gra atm_keys.update(stereogenic_atom_keys(gra)) bnd_keys.update(stereogenic_bond_keys(gra)) atm_ste_par_dct = { atm_key: full_atm_ste_par_dct[atm_key] for atm_key in atm_keys } bnd_ste_par_dct = { bnd_key: full_bnd_ste_par_dct[bnd_key] for bnd_key in bnd_keys } geo, gra = _atom_stereo_corrected_geometry(gra, atm_ste_par_dct, geo, geo_idx_dct) geo, gra = _bond_stereo_corrected_geometry(gra, bnd_ste_par_dct, geo, geo_idx_dct) return geo
def connected_heuristic_zmatrix(gra): """ stereo-specific coordinates for a connected molecular graph (currently unable to handle rings -- fix that) """ assert gra == explicit(gra) atm_sym_dct = atom_symbols(gra) # this will contain triplets of adjacent atoms from which to continue # filling out the z-matrix, after it has been started triplets = [] # 1. start the z-matrix and set the lists of triplets rng_atm_keys_lst = rings_atom_keys(gra) if not rng_atm_keys_lst: # find the first heavy atom in the longest chain (if there isn't one, # we are dealing with atomic or molecular hydrogen, which will be # captured by the last two cases) chain = longest_chain(gra) if atm_sym_dct[chain[0]] != 'H': chain = list(reversed(chain)) if len(chain) > 1: atm_key = chain[1] else: atm_key = chain[0] # determine the z-matrix of the starting atom and its neighbors zma, zma_key_dct, dummy_atm_key, gra = _start_zmatrix_from_atom( gra, atm_key) # since this is the first heavy atom in the longest chain, we only need # to follow one branch from this atom to complete the z-matrix; this # will be the branch extending toward the next heavy atom in the chai if len(chain) > 3: atm1_key, atm2_key, atm3_key = chain[:3] # if we inserted a dummy atom on the starting geometry, we should # use that as atom 1 in the triplet, rather than if dummy_atm_key is not None: atm1_key = dummy_atm_key triplets = [(atm1_key, atm2_key, atm3_key)] elif len(rng_atm_keys_lst) == 1: rng_atm_keys, = rng_atm_keys_lst zma, zma_key_dct = _start_zmatrix_from_ring(gra, rng_atm_keys) triplets += list(mit.windowed(rng_atm_keys[-2:] + rng_atm_keys, 3)) else: # currently, multiple rings are not implemented raise NotImplementedError # 2. complete the z-matrix by looping over triplets for atm1_key, atm2_key, atm3_key in triplets: zma, zma_key_dct, gra = _complete_zmatrix_for_branch( gra, atm1_key, atm2_key, atm3_key, zma, zma_key_dct) # 3. convert to Cartesian geometry for stereo correction geo = automol.zmatrix.geometry(zma) geo_idx_dct = zma_key_dct geo = _stereo_corrected_geometry(gra, geo, geo_idx_dct) # 4. convert back to z-matrix, keeping the original z-matrix structure vma = automol.zmatrix.var_(zma) zma = automol.zmatrix.from_geometry(vma, geo) return zma, zma_key_dct
4: ('C', 1, True), 5: ('O', 1, None), 6: ('O', 0, None), 7: ('O', 0, None) }, { frozenset({3, 4}): (1, None), frozenset({2, 6}): (1, None), frozenset({0, 2}): (1, None), frozenset({3, 6}): (1, None), frozenset({2, 4}): (1, None), frozenset({1, 3}): (1, None), frozenset({5, 7}): (1, None), frozenset({4, 7}): (1, None) }) SGR = explicit(GRA) GEO, GEO_IDX_DCT = heuristic_geometry(SGR) # GEO = ( # ('C', (-4.3870588134, -1.233231672517, 0.143749726309016)), # ('C', (3.430304171771, -2.162836645393, 0.06129774977456508)), # ('C', (-2.228277354885, 0.1940343942502, -1.0788747507898575)), # ('C', (1.642516616594, -0.2666792157215, -1.1217150146524835)), # ('C', (-0.128929182575, 1.167922277159, 0.6891116967734786)), # ('O', (2.44416862025, 4.3088944278, 2.2811617948010614)), # ('O', (-0.4839163664245, -1.530973627393, -2.314392676447687)), # ('O', (0.1546996690877, 3.88389710099, 0.8186943317590577)), # ('H', (-3.69835752847, -2.73865502820, 1.385388988006255)), # ('H', (-5.55611791656, 0.04579783881378, 1.2732325623382277)), # ('H', (-5.59710374244, -2.09637223693, -1.2950602194746856)), # ('H', (2.43356758828, -3.46358524781, 1.3249230383597128)),
def _are_all_explicit(gras): return all(gra == explicit(gra) for gra in gras)