コード例 #1
0
ファイル: __init__.py プロジェクト: Justin318/deepchem
 def predict_on_batch(self, X, pad_batch=True):
     """
 Makes predictions on batch of data.
 """
     if pad_batch:
         len_unpadded = len(X)
         Xpad = pad_features(self.model_instance.batch_size, X)
         return self.model_instance.predict_on_batch(Xpad)[:len_unpadded]
     else:
         return self.model_instance.predict_on_batch(X)
コード例 #2
0
ファイル: __init__.py プロジェクト: Justin318/deepchem
    def predict_on_batch(self, X, pad_batch=False):
        """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
        if pad_batch:
            X = pad_features(self.batch_size, X)

        if not self._restored_model:
            self.restore()
        with self.eval_graph.graph.as_default():

            # run eval data through the model
            n_tasks = self.n_tasks
            output = []
            start = time.time()
            with self._get_shared_session(train=False).as_default():
                feed_dict = self.construct_feed_dict(X)
                data = self._get_shared_session(train=False).run(
                    self.eval_graph.output, feed_dict=feed_dict)
                batch_output = np.asarray(data[:n_tasks], dtype=float)
                # reshape to batch_size x n_tasks x ...
                if batch_output.ndim == 3:
                    batch_output = batch_output.transpose((1, 0, 2))
                elif batch_output.ndim == 2:
                    batch_output = batch_output.transpose((1, 0))
                else:
                    raise ValueError(
                        'Unrecognized rank combination for output: %s' %
                        (batch_output.shape, ))
                output.append(batch_output)

                outputs = np.array(
                    from_one_hot(np.squeeze(np.concatenate(output)), axis=-1))

        return np.copy(outputs)
コード例 #3
0
ファイル: test_datasets.py プロジェクト: Justin318/deepchem
    def test_pad_features(self):
        """Test that pad_features pads features correctly."""
        batch_size = 100
        num_features = 10
        num_tasks = 5

        # Test cases where n_samples < 2*n_samples < batch_size
        n_samples = 29
        X_b = np.zeros((n_samples, num_features))

        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size

        # Test cases where n_samples < batch_size
        n_samples = 79
        X_b = np.zeros((n_samples, num_features))
        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size

        # Test case where n_samples == batch_size
        n_samples = 100
        X_b = np.zeros((n_samples, num_features))
        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size

        # Test case for object featurization.
        n_samples = 2
        X_b = np.array([{"a": 1}, {"b": 2}])
        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size

        # Test case for more complicated object featurization
        n_samples = 2
        X_b = np.array([(1, {"a": 1}), (2, {"b": 2})])
        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size

        # Test case with multidimensional data
        n_samples = 50
        num_atoms = 15
        d = 3
        X_b = np.zeros((n_samples, num_atoms, d))
        X_out = pad_features(batch_size, X_b)
        assert len(X_out) == batch_size
コード例 #4
0
ファイル: test_datasets.py プロジェクト: rbharath/deepchem
  def test_pad_features(self):
    """Test that pad_features pads features correctly."""
    batch_size = 100
    num_features = 10
    num_tasks = 5
  
    # Test cases where n_samples < 2*n_samples < batch_size
    n_samples = 29
    X_b = np.zeros((n_samples, num_features))
  
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size

    # Test cases where n_samples < batch_size
    n_samples = 79
    X_b = np.zeros((n_samples, num_features))
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size

    # Test case where n_samples == batch_size
    n_samples = 100 
    X_b = np.zeros((n_samples, num_features))
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size

    # Test case for object featurization.
    n_samples = 2
    X_b = np.array([{"a": 1}, {"b": 2}])
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size

    # Test case for more complicated object featurization
    n_samples = 2
    X_b = np.array([(1, {"a": 1}), (2, {"b": 2})])
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size

    # Test case with multidimensional data
    n_samples = 50
    num_atoms = 15
    d = 3
    X_b = np.zeros((n_samples, num_atoms, d))
    X_out = pad_features(batch_size, X_b)
    assert len(X_out) == batch_size
コード例 #5
0
ファイル: fcnet.py プロジェクト: rbharath/deepchem
  def predict_on_batch(self, X):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      num_tasks = self.num_tasks
      outputs = []
      with self._get_shared_session().as_default():
        n_samples = len(X)
        # Some tensorflow models can't handle variadic batches,
        # especially models using tf.pack, tf.split. Pad batch-size
        # to handle these cases.
        X = pad_features(self.model_params["batch_size"], X)
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
            self.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:num_tasks], dtype=float)
        # reshape to batch_size x num_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        # Handle edge case when batch-size is 1.
        elif batch_outputs.ndim == 1:
          #print("X.shape, batch_outputs.shape")
          #print(X.shape, batch_outputs.shape)
          n_samples = len(X)
          batch_outputs = batch_outputs.reshape((n_samples, num_tasks))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_outputs.shape))
        # Prune away any padding that was added
        batch_outputs = batch_outputs[:n_samples]
        outputs.append(batch_outputs)

        outputs = np.squeeze(np.concatenate(outputs)) 

    return np.copy(outputs)