Python one_hot_encodeの例

プログラミング言語: Python

名前空間/パッケージ名: deepchem.utils.molecule_feature_utils

メソッド/関数: one_hot_encode

hotexamples.comのコード掲載数: 6

Python one_hot_encode - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdeepchem.utils.molecule_feature_utils.one_hot_encodeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

  def atom_features(self, atom: RDKitAtom) -> np.ndarray:
    """
    Deepchem already contains an atom_features function, however we are defining a new one here due to the need to handle features specific to MAT.
    Since we need new features like Atom GetNeighbors and IsInRing, and the number of features required for MAT is a fraction of what the Deepchem atom_features function computes, we can speed up computation by defining a custom function.

    Parameters
    ----------
    atom: RDKitAtom
      RDKit Atom object.

    Returns
    ----------
    ndarray
      Numpy array containing atom features.
    """
    attrib = []
    attrib += one_hot_encode(atom.GetAtomicNum(),
                             [5, 6, 7, 8, 9, 15, 16, 17, 35, 53, 999])
    attrib += one_hot_encode(len(atom.GetNeighbors()), [0, 1, 2, 3, 4, 5])
    attrib += one_hot_encode(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

    attrib += one_hot_encode(atom.GetFormalCharge(),
                             [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])

    attrib.append(atom.IsInRing())
    attrib.append(atom.GetIsAromatic())

    return np.array(attrib, dtype=np.float32)

コード例 #2

ファイルを表示

 def test_one_hot_encode(self):
     # string set
     assert one_hot_encode("a", ["a", "b", "c"]) == [1.0, 0.0, 0.0]
     # integer set
     assert one_hot_encode(2, [0.0, 1, 2]) == [0.0, 0.0, 1.0]
     # include_unknown_set is False
     assert one_hot_encode(3, [0.0, 1, 2]) == [0.0, 0.0, 0.0]
     # include_unknown_set is True
     assert one_hot_encode(3, [0.0, 1, 2], True) == [0.0, 0.0, 0.0, 1.0]

コード例 #3

ファイルを表示

ファイル: mol_graph_conv_featurizer.py プロジェクト: walid0925/deepchem

    def _edge_features(self, mol: RDKitMol, path_atoms: Tuple[int, ...],
                       ring_info) -> np.ndarray:
        """Computes the edge features for a given pair of nodes.

    Parameters
    ----------
    mol : : RDKitMol
        RDKit molecule instance.
    path_atoms: tuple
        Shortest path between the given pair of nodes.
    ring_info: list
        Different rings that contain the pair of atoms
    """
        features = []
        path_bonds = []
        path_length = len(path_atoms)
        for path_idx in range(path_length - 1):
            bond = mol.GetBondBetweenAtoms(path_atoms[path_idx],
                                           path_atoms[path_idx + 1])
            if bond is None:
                import warnings
                warnings.warn('Valid idx of bonds must be passed')
            path_bonds.append(bond)

        for path_idx in range(self.max_length):
            if path_idx < len(path_bonds):
                bond_type = get_bond_type_one_hot(path_bonds[path_idx])
                conjugacy = get_bond_is_conjugated_one_hot(
                    path_bonds[path_idx])
                ring_attach = get_bond_is_in_same_ring_one_hot(
                    path_bonds[path_idx])
                features.append(
                    np.concatenate([bond_type, conjugacy, ring_attach]))
            else:
                features.append(np.zeros(6))

        if path_length + 1 > self.max_length:
            path_length = self.max_length + 1
        position_feature = np.zeros(self.max_length + 2)
        position_feature[path_length] = 1
        features.append(position_feature)
        if ring_info:
            rfeat = [
                one_hot_encode(r, allowable_set=self.RING_TYPES)
                for r in ring_info
            ]
            # The 1.0 float value represents True Boolean
            rfeat = [1.0] + np.any(rfeat, axis=0).tolist()
            features.append(rfeat)
        else:
            # This will return a boolean vector with all entries False
            features.append(
                [0.0] +
                one_hot_encode(ring_info, allowable_set=self.RING_TYPES))
        return np.concatenate(features, axis=0)

コード例 #4

ファイルを表示

    def _featurize_string(self, string: str) -> np.ndarray:
        """Compute one-hot featurization of string.

    Parameters
    ----------
    string: str
      An arbitrary string to be featurized.

    Returns
    -------
    np.ndarray
      An one hot vector encoded from arbitrary input string.
      The shape is `(max_length, len(charset) + 1)`.
      The index of unknown character is `len(charset)`.
    """
        if isinstance(self.max_length, int):
            if (len(string) > self.max_length):  # Validation
                raise ValueError(
                    "The length of {} is longer than `max_length`.")
            string = self.pad_string(string)  # Padding

        return np.array([
            one_hot_encode(val, self.charset, include_unknown_set=True)
            for val in string
        ])

コード例 #5

ファイルを表示

ファイル: one_hot_featurizer.py プロジェクト: wisecashew/deepchem

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """Compute one-hot featurization of this molecule.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      An one hot vector encoded from SMILES.
      The shape is `(max_length, len(charset) + 1)`.
      The index of unknown character is `len(charset)`.
    """
    try:
      from rdkit import Chem
    except ModuleNotFoundError:
      raise ImportError("This class requires RDKit to be installed.")

    smiles = Chem.MolToSmiles(mol)
    # validation
    if len(smiles) > self.max_length:
      logger.info(
          "The length of {} is longer than `max_length`. So we return an empty array."
      )
      return np.array([])

    smiles = self.pad_smile(smiles)
    return np.array([
        one_hot_encode(val, self.charset, include_unknown_set=True)
        for val in smiles
    ])

コード例 #6

ファイルを表示

def get_atom_explicit_valence_one_hot(
        atom: RDKitAtom,
        allowable_set: List[int] = DEFAULT_ATOM_EXPLICIT_VALENCE_SET,
        include_unknown_set: bool = True) -> List[float]:
    """Get an one-hot feature of explicit valence of an atom.
  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[int]
    Atom explicit valence to consider. The default set is `[1, ..., 6]`
  include_unknown_set: bool, default True
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.
  Returns
  -------
  List[float]
    A one-hot vector of explicit valence an atom has.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.
  """
    return one_hot_encode(atom.GetExplicitValence(), allowable_set,
                          include_unknown_set)