예제 #1
0
def mol_to_atom_feats_and_adjacency_list(mol: AllChem.Mol,
                                         atom_map_to_index_map=None,
                                         params: AtomFeatParams = None):
    """
    :param atom_map_to_index_map: if you pass this in it will use the defined indices for each atom. Otherwise will use
    rdkit default indexing.
    """
    params = AtomFeatParams() if params is None else params
    atoms = mol.GetAtoms()
    num_atoms = len(atoms)

    node_feats = np.zeros((num_atoms, params.atom_feature_length),
                          dtype=np.float32)
    idx_to_atom_map = np.zeros(num_atoms, dtype=np.float32)

    if atom_map_to_index_map is None:
        # then we will create this map
        atom_map_to_index_map = {}
        use_supplied_idx_flg = False
    else:
        # we will use the mapping given
        use_supplied_idx_flg = True
        assert set(atom_map_to_index_map.values()) == set(range(len(atoms))), \
            "if give pre supplied ordering it must be the same size as the molecules trying to order"

    # First we will create the atom features and the mappings
    for atom in atoms:
        props = atom.GetPropsAsDict()
        am = props['molAtomMapNumber']  # the atom mapping in the file
        if use_supplied_idx_flg:
            idx = atom_map_to_index_map[am]
        else:
            idx = atom.GetIdx()  # goes from 0 to A-1
            atom_map_to_index_map[am] = idx
        idx_to_atom_map[idx] = am
        atom_features = get_atom_features(atom, params)
        node_feats[idx, :] = atom_features

    # Now we will go through and create the adjacency lists
    adjacency_lists = {k: [] for k in params.bond_names}
    for bond in mol.GetBonds():
        begin = bond.GetBeginAtom()
        end = bond.GetEndAtom()
        props_b = begin.GetPropsAsDict()
        props_e = end.GetPropsAsDict()
        am_b = props_b['molAtomMapNumber']
        am_e = props_e['molAtomMapNumber']
        ix_b = atom_map_to_index_map[am_b]
        ix_e = atom_map_to_index_map[am_e]

        bond_name = params.get_bond_name(bond)
        adjacency_lists[bond_name].append((ix_b, ix_e))

    # Finally we pack all the results together
    res = graph_as_adj_list.GraphAsAdjList(
        node_feats, {k: np.array(v).T
                     for k, v in adjacency_lists.items()},
        np.zeros(node_feats.shape[0], dtype=data_types.INT))

    return res
예제 #2
0
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: typing.Iterable[typing.Union[
            FunctionalGroup, FunctionalGroupFactory]],
        placer_ids: typing.Optional[tuple[int, ...]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                An :class:`iterable` of :class:`.FunctionalGroup` or
                :class:`.FunctionalGroupFactory` or both.
                :class:`.FunctionalGroup` instances are added to the
                building block and :class:`.FunctionalGroupFactory`
                instances are used to create :class:`.FunctionalGroup`
                instances the building block should hold.
                :class:`.FunctionalGroup` instances are used to
                identify which atoms are modified during
                :class:`.ConstructedMolecule` construction.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge())
            for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        super().__init__(atoms, bonds, position_matrix)
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))
예제 #3
0
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: _FunctionalGroups,
        placer_ids: typing.Optional[abc.Iterable[int]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                The :class:`.FunctionalGroup` instances the building
                block should have, and / or
                :class:`.FunctionalGroupFactory` instances used for
                creating them.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(
                id=a.GetIdx(),
                atomic_number=a.GetAtomicNum(),
                charge=a.GetFormalCharge(),
            ) for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        Molecule.__init__(
            self=self,
            atoms=atoms,
            bonds=bonds,
            position_matrix=position_matrix,
        )
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))