Ejemplo n.º 1
0
def isomorphism(gra1, gra2, backbone_only=False, stereo=True, dummy=True):
    """ Obtain an isomorphism between two graphs

    This should eventually replace the other isomorphism functions.

    :param backbone_only: Compare backbone atoms only?
    :type backbone_only: bool
    :param stereo: Consider stereo?
    :type stereo: bool
    :param dummy: Consider dummy atoms?
    :type dummy: bool
    :returns: The isomorphism mapping `gra1` onto `gra2`
    :rtype: dict
    """
    if backbone_only:
        gra1 = implicit(gra1)
        gra2 = implicit(gra2)

    if not stereo:
        gra1 = without_stereo_parities(gra1)
        gra2 = without_stereo_parities(gra2)

    if not dummy:
        gra1 = without_dummy_atoms(gra1)
        gra2 = without_dummy_atoms(gra2)

    return _isomorphism(gra1, gra2)
Ejemplo n.º 2
0
def backbone_isomorphism(gra1, gra2):
    """ graph backbone isomorphism

    TODO: DEPRECATE

    for implicit graphs, this is the relabeling of `gra1` to produce `gra2`
    for other graphs, it gives the correspondences between backbone atoms
    """
    gra1 = implicit(gra1)
    gra2 = implicit(gra2)
    nxg1 = _networkx.from_graph(gra1)
    nxg2 = _networkx.from_graph(gra2)
    iso_dct = _networkx.isomorphism(nxg1, nxg2)
    return iso_dct
Ejemplo n.º 3
0
def rotational_symmetry_number(gra, key1, key2, lin_keys=None):
    """ get the rotational symmetry number along a given rotational axis

    :param gra: the graph
    :param key1: the first atom key
    :param key2: the second atom key
    """
    ngb_keys_dct = atoms_neighbor_atom_keys(without_dummy_atoms(gra))
    imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(implicit(gra))

    axis_keys = {key1, key2}
    # If the keys are part of a linear chain, use the ends of that for the
    # symmetry number calculation
    lin_keys_lst = linear_segments_atom_keys(gra, lin_keys=lin_keys)
    for keys in lin_keys_lst:
        if key1 in keys or key2 in keys:
            if len(keys) == 1:
                key1, key2 = sorted(ngb_keys_dct[keys[0]])
            else:
                key1, = ngb_keys_dct[keys[0]] - {keys[1]}
                key2, = ngb_keys_dct[keys[-1]] - {keys[-2]}
                axis_keys |= set(keys)
                break

    sym_num = 1
    for key in (key1, key2):
        if key in imp_hyd_vlc_dct:
            ngb_keys = ngb_keys_dct[key] - axis_keys
            if len(ngb_keys) == imp_hyd_vlc_dct[key] == 3:
                sym_num = 3
                break
    return sym_num
Ejemplo n.º 4
0
def is_branched(gra):
    """ determine is the molecule has a branched chain
    """
    _is_branched = False
    gra = implicit(gra)
    chain_length = len(longest_chain(gra))
    natoms = atom_count(gra, with_implicit=False)

    if natoms != chain_length:
        _is_branched = True
    return _is_branched
Ejemplo n.º 5
0
def backbone_isomorphism(gra1, gra2, igraph=False):
    """ graph backbone isomorphism

    TODO: DEPRECATE

    for implicit graphs, this is the relabeling of `gra1` to produce `gra2`
    for other graphs, it gives the correspondences between backbone atoms
    """
    gra1 = implicit(gra1)
    gra2 = implicit(gra2)
    if igraph:
        igr1 = _igraph.from_graph(gra1)
        igr2 = _igraph.from_graph(gra2)
        iso_dcts = _igraph.isomorphisms(igr1, igr2)
        iso_dct = iso_dcts[0] if iso_dcts else None
    else:
        nxg1 = _networkx.from_graph(gra1)
        nxg2 = _networkx.from_graph(gra2)
        iso_dct = _networkx.isomorphism(nxg1, nxg2)
    return iso_dct
Ejemplo n.º 6
0
def stereo_priority_vector(gra, atm_key, atm_ngb_key):
    """ generates a sortable one-to-one representation of the branch extending
    from `atm_key` through its bonded neighbor `atm_ngb_key`
    """
    bbn_keys = backbone_keys(gra)
    exp_hyd_keys = explicit_hydrogen_keys(gra)

    if atm_ngb_key not in bbn_keys:
        assert atm_ngb_key in exp_hyd_keys
        assert frozenset({atm_key, atm_ngb_key}) in bond_keys(gra)
        pri_vec = ()
    else:
        gra = implicit(gra)
        atm_dct = atoms(gra)
        bnd_dct = bonds(gra)
        assert atm_key in bbn_keys
        assert frozenset({atm_key, atm_ngb_key}) in bnd_dct

        # here, switch to an implicit graph
        atm_ngb_keys_dct = atoms_neighbor_atom_keys(gra)

        def _priority_vector(atm1_key, atm2_key, seen_keys):
            # we keep a list of seen keys to cut off cycles, avoiding infinite
            # loops

            bnd_val = bnd_dct[frozenset({atm1_key, atm2_key})]
            atm_val = atm_dct[atm2_key]

            bnd_val = _replace_nones_with_negative_infinity(bnd_val)
            atm_val = _replace_nones_with_negative_infinity(atm_val)

            if atm2_key in seen_keys:
                ret = (bnd_val, )
            else:
                seen_keys.update({atm1_key, atm2_key})
                atm3_keys = atm_ngb_keys_dct[atm2_key] - {atm1_key}
                if atm3_keys:
                    next_vals, seen_keys = zip(*[
                        _priority_vector(atm2_key, atm3_key, seen_keys)
                        for atm3_key in atm3_keys
                    ])
                    ret = (bnd_val, atm_val) + next_vals
                else:
                    ret = (bnd_val, atm_val)

            return ret, seen_keys

        pri_vec, _ = _priority_vector(atm_key, atm_ngb_key, set())

    return pri_vec
Ejemplo n.º 7
0
def linear_atom_keys(rgr, dummy=True):
    """ atoms forming linear bonds, based on their hybridization

    :param rgr: the graph
    :param dummy: whether or not to consider atoms connected to dummy atoms as
        linear, if different from what would be predicted based on their
        hybridization
    :returns: the linear atom keys
    :rtype: tuple[int]
    """
    rgr = without_fractional_bonds(rgr)
    atm_hyb_dct = resonance_dominant_atom_hybridizations(implicit(rgr))
    lin_atm_keys = set(dict_.keys_by_value(atm_hyb_dct, lambda x: x == 1))

    if dummy:
        dum_ngb_key_dct = dummy_atoms_neighbor_atom_key(rgr)
        lin_atm_keys |= set(dum_ngb_key_dct.values())

    lin_atm_keys = tuple(sorted(lin_atm_keys))
    return lin_atm_keys
Ejemplo n.º 8
0
def amchi(gra, stereo=True, can=True, is_reflected=None):
    """ AMChI string from graph

        :param gra: molecular graph
        :type gra: automol graph data structure
        :param stereo: Include stereo in the AMChI string, if present?
        :type stereo: bool
        :param can: Canonicalize the graph? Set to True by default, causing the
            graph to be canonicalized. If setting to False to avoid
            re-canonicalization, the `is_reflected` flag must be set for a
            canonical result.
        :type can: bool
        :param is_reflected: If using pre-canonicalized graph, is it a
            reflected enantiomer? If True, yes; if False, it's an enantiomer
            that isn't reflected; if None, it's not an enantiomer.
        :type is_reflected: bool or NoneType
        :returns: the AMChI string
        :rtype: str
    """
    assert is_connected(gra), (
        "Cannot form connection layer for disconnected graph.")

    if not stereo:
        gra = without_stereo_parities(gra)

    # Convert to implicit graph
    gra = implicit(gra)

    # Canonicalize and determine canonical enantiomer
    if can:
        gra, is_reflected = canonical_enantiomer(gra)

    fml_str = _formula_string(gra)
    main_lyr_dct = _main_layers(gra)
    ste_lyr_dct = _stereo_layers(gra, is_reflected=is_reflected)

    chi = automol.amchi.base.from_data(fml_str=fml_str,
                                       main_lyr_dct=main_lyr_dct,
                                       ste_lyr_dct=ste_lyr_dct)
    return chi
Ejemplo n.º 9
0
def bond_symmetry_numbers(gra, frm_bnd_key=None, brk_bnd_key=None):
    """ symmetry numbers, by bond

    TODO: DEPRECATE -- I think this function can be replaced with
    rotational_symmetry_number(). Passing in formed and broken keys is
    unnecessary if one passes in a TS graph, which is stored in the reaction
    object.

    the (approximate) symmetry number of the torsional potential for this bond,
    based on the hydrogen counts for each atom
    It is reduced to 1 if one of the H atoms in the torsional bond is a
    neighbor to the special bonding atom (the atom that is being transferred)
    """
    imp_gra = implicit(gra)
    atm_imp_hyd_vlc_dct = atom_implicit_hydrogen_valences(imp_gra)

    bnd_keys = bond_keys(imp_gra)

    tfr_atm = None
    if frm_bnd_key and brk_bnd_key:
        for atm_f in list(frm_bnd_key):
            for atm_b in list(brk_bnd_key):
                if atm_f == atm_b:
                    tfr_atm = atm_f

        if tfr_atm:
            neighbor_dct = atoms_neighbor_atom_keys(gra)
            nei_tfr = neighbor_dct[tfr_atm]

            atms = gra[0]
            all_hyds = []
            for atm in atms:
                if atms[atm][0] == 'H':
                    all_hyds.append(atm)
        else:
            nei_tfr = {}

    bnd_symb_num_dct = {}
    bnd_symb_nums = []
    for bnd_key in bnd_keys:
        bnd_sym = 1
        vlc = max(map(atm_imp_hyd_vlc_dct.__getitem__, bnd_key))
        if vlc == 3:
            bnd_sym = 3
            if tfr_atm:
                for atm in nei_tfr:
                    nei_s = neighbor_dct[atm]
                    h_nei = 0
                    for nei in nei_s:
                        if nei in all_hyds:
                            h_nei += 1
                    if h_nei == 3:
                        bnd_sym = 1
        bnd_symb_nums.append(bnd_sym)

    bnd_symb_num_dct = dict(zip(bnd_keys, bnd_symb_nums))

    # fill in the rest of the bonds for completeness
    bnd_symb_num_dct = dict_.by_key(bnd_symb_num_dct,
                                    bond_keys(gra),
                                    fill_val=1)

    return bnd_symb_num_dct
Ejemplo n.º 10
0
def smiles(gra, stereo=True, local_stereo=False, res_stereo=False):
    """ SMILES string from graph

        :param gra: molecular graph
        :type gra: automol graph data structure
        :param stereo: Include stereo?
        :type stereo: bool
        :param local_stereo: Is the graph using local stereo assignments? That
            is, are they based on atom keys rather than canonical keys?
        :type local_stereo: bool
        :param res_stereo: allow resonant double-bond stereo?
        :type res_stereo: bool
        :returns: the SMILES string
        :rtype: str
    """
    assert is_connected(gra), (
        "Cannot form connection layer for disconnected graph.")

    if not stereo:
        gra = without_stereo_parities(gra)

    # If not using local stereo assignments, canonicalize the graph first.
    # From this point on, the stereo parities can be assumed to correspond to
    # the neighboring atom keys.
    if not local_stereo:
        gra = canonical(gra)

    # Convert to implicit graph
    gra = implicit(gra)

    # Insert hydrogens necessary for bond stereo
    gra = _insert_stereo_hydrogens(gra)

    # Find a dominant resonance
    rgr = dominant_resonance(gra)

    # Determine atom symbols
    symb_dct = atom_symbols(rgr)

    # Determine atom implicit hydrogens
    nhyd_dct = atom_implicit_hydrogen_valences(rgr)

    # Determine bond orders for this resonance
    bnd_ord_dct = bond_orders(rgr)

    # Find radical sites for this resonance
    rad_atm_keys = radical_atom_keys_from_resonance(rgr)

    # Determine neighbors
    nkeys_dct = atoms_neighbor_atom_keys(rgr)

    # Find stereo parities
    atm_par_dct = dict_.filter_by_value(atom_stereo_parities(rgr),
                                        lambda x: x is not None)
    bnd_par_dct = dict_.filter_by_value(bond_stereo_parities(rgr),
                                        lambda x: x is not None)

    # Remove stereo parities if requested
    if not res_stereo:
        print('before')
        print(bnd_par_dct)
        bnd_par_dct = dict_.filter_by_key(bnd_par_dct,
                                          lambda x: bnd_ord_dct[x] == 2)
        print('after')
        print(bnd_par_dct)
    else:
        raise NotImplementedError("Not yet implemented!")

    def _atom_representation(key, just_seen=None, nkeys=(), closures=()):
        symb = ptab.to_symbol(symb_dct[key])
        nhyd = nhyd_dct[key]

        needs_brackets = key in rad_atm_keys or symb not in ORGANIC_SUBSET

        hyd_rep = f'H{nhyd}' if nhyd > 1 else ('H' if nhyd == 1 else '')
        par_rep = ''

        if key in atm_par_dct:
            needs_brackets = True

            skeys = [just_seen]
            if nhyd:
                assert nhyd == 1
                skeys.append(-numpy.inf)
            if closures:
                skeys.extend(closures)
            skeys.extend(nkeys)

            can_par = atm_par_dct[key]
            smi_par = can_par ^ util.is_odd_permutation(skeys, sorted(skeys))
            par_rep = '@@' if smi_par else '@'

        if needs_brackets:
            rep = f'[{symb}{par_rep}{hyd_rep}]'
        else:
            rep = f'{symb}'

        return rep

    # Get the pool of stereo bonds for the graph and set up a dictionary for
    # storing the ending representation.
    ste_bnd_key_pool = list(bnd_par_dct.keys())
    drep_dct = {}

    def _bond_representation(key, just_seen=None):
        key0 = just_seen
        key1 = key

        # First, handle the bond order
        if key0 is None or key1 is None:
            rep = ''
        else:
            bnd_ord = bnd_ord_dct[frozenset({key0, key1})]
            if bnd_ord == 1:
                rep = ''
            elif bnd_ord == 2:
                rep = '='
            elif bnd_ord == 3:
                rep = '#'
            else:
                raise ValueError("Bond orders greater than 3 not permitted.")

        drep = drep_dct[(key0, key1)] if (key0, key1) in drep_dct else ''

        bnd_key = next((b for b in ste_bnd_key_pool if key1 in b), None)
        if bnd_key is not None:
            # We've encountered a new stereo bond, so remove it from the pool
            ste_bnd_key_pool.remove(bnd_key)

            # Determine the atoms involved
            key2, = bnd_key - {key1}
            nkey1s = set(nkeys_dct[key1]) - {key2}
            nkey2s = set(nkeys_dct[key2]) - {key1}

            nmax1 = max(nkey1s)
            nmax2 = max(nkey2s)

            nkey1 = just_seen if just_seen in nkey1s else nmax1
            nkey2 = nmax2

            # Determine parity
            can_par = bnd_par_dct[bnd_key]
            smi_par = can_par if nkey1 == nmax1 else not can_par

            # Determine bond directions
            drep1 = drep if drep else '/'
            if just_seen in nkey1s:
                drep = drep1
                flip = not smi_par
            else:
                drep_dct[(key1, nkey1)] = drep1
                flip = smi_par

            drep2 = _flip_direction(drep1, flip=flip)

            drep_dct[(key2, nkey2)] = drep2

        rep += drep

        # Second, handle directionality (bond stereo)
        return rep

    # Get the pool of rings for the graph and set up a dictionary for storing
    # their tags. As the SMILES is built, each next ring that is encountered
    # will be given a tag, removed from the pool, and transferred to the tag
    # dictionary.
    rng_pool = list(rings_atom_keys(rgr))
    rng_tag_dct = {}

    def _ring_representation_with_nkeys_and_closures(key, nkeys=()):
        nkeys = nkeys.copy()

        # Check for new rings in the ring pool. If a new ring is found, create
        # a tag, add it to the tags dictionary, and drop it from the rings
        # pool.
        for new_rng in rng_pool:
            if key in new_rng:
                # Choose a neighbor key for SMILES ring closure
                clos_nkey = sorted(set(new_rng) & set(nkeys))[0]

                # Add it to the ring tag dictionary with the current key first
                # and the closure key last
                tag = max(rng_tag_dct.values(), default=0) + 1
                assert tag < 10, (
                    f"Ring tag exceeds 10 for this graph:\n{string(gra)}")
                rng = cycle_ring_atom_key_to_front(new_rng, key, clos_nkey)
                rng_tag_dct[rng] = tag

                # Remove it from the pool of unseen rings
                rng_pool.remove(new_rng)

        tags = []
        closures = []
        for rng, tag in rng_tag_dct.items():
            if key == rng[-1]:
                nkeys.remove(rng[0])
                closures.append(rng[0])
                # Handle the special case where the last ring bond has stereo
                if (rng[-1], rng[0]) in drep_dct:
                    drep = drep_dct[(rng[-1], rng[0])]
                    tags.append(f'{drep}{tag}')
                else:
                    tags.append(f'{tag}')
            if key == rng[0]:
                nkeys.remove(rng[-1])
                closures.append(rng[-1])
                tags.append(f'{tag}')

        rrep = ''.join(map(str, tags))
        return rrep, nkeys, closures

    # Determine neighboring keys
    nkeys_dct_pool = dict_.transform_values(atoms_neighbor_atom_keys(rgr),
                                            sorted)

    def _recurse_smiles(smi, lst, key, just_seen=None):
        nkeys = nkeys_dct_pool.pop(key) if key in nkeys_dct_pool else []

        # Remove keys just seen from the list of neighbors, to avoid doubling
        # back.
        if just_seen in nkeys:
            nkeys.remove(just_seen)

        # Start the SMILES string and connection list. The connection list is
        # used for sorting.
        rrep, nkeys, closures = _ring_representation_with_nkeys_and_closures(
            key, nkeys)
        arep = _atom_representation(key, just_seen, nkeys, closures=closures)
        brep = _bond_representation(key, just_seen)
        smi = f'{brep}{arep}{rrep}'
        lst = [key]

        # Now, extend the layer/list along the neighboring atoms.
        if nkeys:
            # Build sub-strings/lists by recursively calling this function.
            sub_smis = []
            sub_lsts = []
            while nkeys:
                nkey = nkeys.pop(0)
                sub_smi, sub_lst = _recurse_smiles('', [], nkey, just_seen=key)

                sub_smis.append(sub_smi)
                sub_lsts.append(sub_lst)

                # If this is a ring, remove the neighbor on the other side of
                # `key` to prevent repetition as we go around the ring.
                if sub_lst[-1] == key:
                    nkeys.remove(sub_lst[-2])

            # Now, join the sub-layers and lists together.
            # If there is only one neighbor, we joint it as
            #   {arep1}{brep2}{arep2}...
            if len(sub_lsts) == 1:
                sub_smi = sub_smis[0]
                sub_lst = sub_lsts[0]

                # Extend the SMILES string
                smi += f'{sub_smi}'

                # Extend the list
                lst.extend(sub_lst)
            # If there are multiple neighbors, we joint them as
            #   {arep1}({brep2}{arep2}...)({brep3}{arep3}...){brep4}{arep4}...
            else:
                assert len(sub_lsts) > 1

                # Extend the SMILES string
                smi += (''.join(map("({:s})".format, sub_smis[:-1])) +
                        sub_smis[-1])

                # Append the lists of neighboring branches.
                lst.append(sub_lsts)

        return smi, lst

    # If there are terminal atoms, start from the first one
    atm_keys = atom_keys(rgr)
    term_keys = terminal_atom_keys(gra, heavy=False)
    start_key = min(term_keys) if term_keys else min(atm_keys)

    smi, _ = _recurse_smiles('', [], start_key)

    return smi