def vmatrix(gra, keys=None, rng_keys=None): """ v-matrix for a connected graph :param gra: the graph :param keys: restrict the v-matrix to a subset of keys, which must span a connected graph :param rng_keys: keys for a ring to start from """ if keys is not None: gra = subgraph(gra, keys) assert is_connected(gra), "Graph must be connected!" # Start with the ring systems and their connections. If there aren't any, # start with the first terminal atom if ring_systems(gra): vma, zma_keys = connected_ring_systems(gra, rng_keys=rng_keys) else: term_keys = sorted(terminal_heavy_atom_keys(gra)) if term_keys: start_key = term_keys[0] else: start_key = sorted(atom_keys(gra))[0] vma, zma_keys = start_at(gra, start_key) rem_keys = atom_keys(gra) - set(zma_keys) vma, zma_keys = continue_vmatrix(gra, rem_keys, vma, zma_keys) return vma, zma_keys
def inchi_with_sort_from_geometry(gra, geo=None, geo_idx_dct=None): """ Generate an InChI string from a molecular graph. If coordinates are passed in, they are used to determine stereo. :param gra: molecular graph :type gra: automol graph data structure :param geo: molecular geometry :type geo: automol geometry data structure :param geo_idx_dct: :type geo_idx_dct: dict[:] :returns: the inchi string, along with the InChI sort order of the atoms :rtype: (str, tuple(int)) """ if geo is not None: natms = automol.geom.base.count(geo) geo_idx_dct = (dict(enumerate(range(natms))) if geo_idx_dct is None else geo_idx_dct) mlf, key_map_inv = molfile_with_atom_mapping(gra, geo=geo, geo_idx_dct=geo_idx_dct) rdm = rdkit_.from_molfile(mlf) ich, aux_info = rdkit_.to_inchi(rdm, with_aux_info=True) nums_lst = _parse_sort_order_from_aux_info(aux_info) nums_lst = tuple( tuple(map(key_map_inv.__getitem__, nums)) for nums in nums_lst) # Assuming the MolFile InChI works, the above code is all we need. What # follows is to correct cases where it fails. # This only appears to work sometimes, so when it doesn't, we fall back on # the original inchi output. if geo is not None: gra = set_stereo_from_geometry(gra, geo, geo_idx_dct=geo_idx_dct) gra = implicit(gra) sub_ichs = automol.inchi.split(ich) failed = False new_sub_ichs = [] for sub_ich, nums in zip(sub_ichs, nums_lst): sub_gra = subgraph(gra, nums, stereo=True) sub_ich = _connected_inchi_with_graph_stereo( sub_ich, sub_gra, nums) if sub_ich is None: failed = True break new_sub_ichs.append(sub_ich) # If it worked, replace the InChI with our forced-stereo InChI. if not failed: ich = automol.inchi.join(new_sub_ichs) ich = automol.inchi.standard_form(ich) return ich, nums_lst
def continue_vmatrix(gra, keys, vma, zma_keys): """ continue a v-matrix for a subset of keys, starting from a partial v-matrix """ gra = subgraph(gra, set(keys) | set(zma_keys)) vma, zma_keys = continue_connected_ring_systems( gra, keys, vma, zma_keys) # Complete any incomplete branches branch_keys = _atoms_missing_neighbors(gra, zma_keys) for key in branch_keys: vma, zma_keys = complete_branch(gra, key, vma, zma_keys) return vma, zma_keys
def distance_bounds_matrices(gra, keys, sp_dct=None): """ initial distance bounds matrices :param gra: molecular graph :param keys: atom keys specifying the order of indices in the matrix :param sp_dct: a 2d dictionary giving the shortest path between any pair of atoms in the graph """ assert set(keys) <= set(atom_keys(gra)) sub_gra = subgraph(gra, keys, stereo=True) sp_dct = atom_shortest_paths(sub_gra) if sp_dct is None else sp_dct bounds_ = path_distance_bounds_(gra) natms = len(keys) umat = numpy.zeros((natms, natms)) lmat = numpy.zeros((natms, natms)) for (idx1, key1), (idx2, key2) in itertools.combinations(enumerate(keys), 2): if key2 in sp_dct[key1]: path = sp_dct[key1][key2] ldist, udist = bounds_(path) lmat[idx1, idx2] = lmat[idx2, idx1] = ldist umat[idx1, idx2] = umat[idx2, idx1] = udist else: # they are disconnected lmat[idx1, idx2] = lmat[idx2, idx1] = closest_approach(gra, key1, key2) umat[idx1, idx2] = umat[idx2, idx1] = 999 assert lmat[idx1, idx2] <= umat[idx1, idx2], ( "Lower bound exceeds upper bound. This is a bug!\n" f"{string(gra, one_indexed=False)}\npath: {str(path)}\n") return lmat, umat
def continue_connected_ring_systems(gra, keys, vma, zma_keys, rsys=None, check=True): """ generate the connected ring systems for a subset of keys, continuing on from a partial v-matrix The subset must have at least one neighbor that already exists in the v-matrix :param gra: the graph for which the v-matrix will be constructed :param keys: the subset of keys to be added to the v-matrix :param vma: a partial v-matrix from which to continue :param zma_keys: row keys for the partial v-matrix, identifying the atom specified by each row of `vma` in order :param rsys: optionally, pass the ring systems in to avoid recalculating """ gra = subgraph(gra, set(keys) | set(zma_keys)) sub = subgraph(gra, keys) if check: assert is_connected(gra), "Graph must be connected!" if rsys is None: rsys = sorted(ring_systems(sub), key=atom_count) rsys = list(rsys) while rsys: # Find the next ring system with a connection to the current # v-vmatrix and connect them conn = False for idx, rsy_keys in enumerate(map(atom_keys, rsys)): if set(zma_keys) & rsy_keys: # ring systems are connected by one bond -- no chain needed keys = set(zma_keys) & rsy_keys assert len(keys) == 1, ( "Attempting to add redundant keys to v-matrix: {}" .format(str(keys))) key, = keys conn = True else: # see if the ring systems are connected by a chain keys = shortest_path_between_groups( gra, zma_keys, rsy_keys) # if so, build a bridge from the current v-matrix to this next # ring system vma, zma_keys = continue_chain(gra, keys[:-1], vma, zma_keys, term_hydrogens=False) key = keys[-1] conn = bool(keys is not None) if conn: rsy = rsys.pop(idx) break assert keys is not None, "This is a disconnected graph!" # 2. Decompose the ring system with the connecting ring first rng_keys = next(rks for rks in rings_atom_keys(rsy) if key in rks) keys_lst = ring_system_decomposed_atom_keys(rsy, rng_keys=rng_keys) # 3. Build the next ring system vma, zma_keys = continue_ring_system(gra, keys_lst, vma, zma_keys) return vma, zma_keys