def test_onehot_featurizer_arbitrary_with_max_length(self):
   """
   Test one hot encoding with max_length.
   """
   string = "abcdefghijklmnopqrstuvwxyzvewqmc"
   charset = "abcdefghijklmnopqrstuvwxyz"
   length = len(charset) + 1
   featurizer = OneHotFeaturizer(charset, max_length=120)
   feature = featurizer([string])
   assert feature.shape == (1, 120, length)
   # untranform
   undo_string = featurizer.untransform(feature[0])
   assert string == undo_string
 def test_onehot_featurizer_SMILES_with_max_length(self):
   """
   Test one hot encoding with max_length.
   """
   from rdkit import Chem
   length = len(ZINC_CHARSET) + 1
   smiles = 'CC(=O)Oc1ccccc1C(=O)O'
   mol = Chem.MolFromSmiles(smiles)
   featurizer = OneHotFeaturizer(max_length=120)
   feature = featurizer([mol])
   assert feature.shape == (1, 120, length)
   # untranform
   undo_smiles = featurizer.untransform(feature[0])
   assert smiles == undo_smiles
 def test_onehot_featurizer_arbitrary(self):
   """
   Test simple one hot encoding for arbitrary string.
   """
   string = "abcdefghijklmnopqrstuvwxyzwebhasw"
   charset = "abcdefghijklmnopqrstuvwxyz"
   length = len(charset) + 1
   defaultMaxLength = 100
   featurizer = OneHotFeaturizer(charset)
   feature = featurizer([string])  # Implicit call to featurize()
   assert feature.shape == (1, defaultMaxLength, length)
   # untransform
   undo_string = featurizer.untransform(feature[0])
   assert string == undo_string
 def test_onehot_featurizer_SMILES(self):
   """
   Test simple one hot encoding for SMILES strings.
   """
   from rdkit import Chem
   length = len(ZINC_CHARSET) + 1
   smiles = 'CC(=O)Oc1ccccc1C(=O)O'
   mol = Chem.MolFromSmiles(smiles)
   featurizer = OneHotFeaturizer()
   feature = featurizer([mol])
   defaultMaxLength = 100
   assert feature.shape == (1, defaultMaxLength, length)
   # untranform
   undo_smiles = featurizer.untransform(feature[0])
   assert smiles == undo_smiles
 def test_correct_transformation_SMILES(self):
   """
   Test correct one hot encoding.
   """
   from rdkit import Chem
   charset = ['C', 'N', '=', ')', '(', 'O']
   smiles = 'CN=C=O'
   mol = Chem.MolFromSmiles(smiles)
   featurizer = OneHotFeaturizer(charset=charset, max_length=100)
   feature = featurizer([mol])
   assert np.allclose(feature[0][0], np.array([1, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][1], np.array([0, 1, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][2], np.array([0, 0, 1, 0, 0, 0, 0]))
   assert np.allclose(feature[0][3], np.array([1, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][4], np.array([0, 0, 1, 0, 0, 0, 0]))
   assert np.allclose(feature[0][5], np.array([0, 0, 0, 0, 0, 1, 0]))
   # untranform
   undo_smiles = featurizer.untransform(feature[0])
   assert smiles == undo_smiles
 def test_correct_transformation_arbitrary(self):
   """
   Test correct one hot encoding.
   """
   charset = "1234567890"
   string = "12345"
   featurizer = OneHotFeaturizer(charset=charset, max_length=100)
   feature = featurizer([string])
   assert np.allclose(feature[0][0],
                      np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][1],
                      np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][2],
                      np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][3],
                      np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]))
   assert np.allclose(feature[0][4],
                      np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]))
   assert "This test case has not yet been written."