def predict_on_batch(self, X, pad_batch=True): """ Makes predictions on batch of data. """ if pad_batch: len_unpadded = len(X) Xpad = pad_features(self.model_instance.batch_size, X) return self.model_instance.predict_on_batch(Xpad)[:len_unpadded] else: return self.model_instance.predict_on_batch(X)
def predict_on_batch(self, X, pad_batch=False): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape n_examples x n_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if pad_batch: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks output = [] start = time.time() with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_output = np.asarray(data[:n_tasks], dtype=float) # reshape to batch_size x n_tasks x ... if batch_output.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) elif batch_output.ndim == 2: batch_output = batch_output.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_output.shape, )) output.append(batch_output) outputs = np.array( from_one_hot(np.squeeze(np.concatenate(output)), axis=-1)) return np.copy(outputs)
def test_pad_features(self): """Test that pad_features pads features correctly.""" batch_size = 100 num_features = 10 num_tasks = 5 # Test cases where n_samples < 2*n_samples < batch_size n_samples = 29 X_b = np.zeros((n_samples, num_features)) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size # Test cases where n_samples < batch_size n_samples = 79 X_b = np.zeros((n_samples, num_features)) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size # Test case where n_samples == batch_size n_samples = 100 X_b = np.zeros((n_samples, num_features)) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size # Test case for object featurization. n_samples = 2 X_b = np.array([{"a": 1}, {"b": 2}]) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size # Test case for more complicated object featurization n_samples = 2 X_b = np.array([(1, {"a": 1}), (2, {"b": 2})]) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size # Test case with multidimensional data n_samples = 50 num_atoms = 15 d = 3 X_b = np.zeros((n_samples, num_atoms, d)) X_out = pad_features(batch_size, X_b) assert len(X_out) == batch_size
def predict_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape num_examples x num_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if not self._restored_model: self.restore() with self.graph.as_default(): assert not model_ops.is_training() self.require_attributes(['output']) # run eval data through the model num_tasks = self.num_tasks outputs = [] with self._get_shared_session().as_default(): n_samples = len(X) # Some tensorflow models can't handle variadic batches, # especially models using tf.pack, tf.split. Pad batch-size # to handle these cases. X = pad_features(self.model_params["batch_size"], X) feed_dict = self.construct_feed_dict(X) data = self._get_shared_session().run( self.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:num_tasks], dtype=float) # reshape to batch_size x num_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) # Handle edge case when batch-size is 1. elif batch_outputs.ndim == 1: #print("X.shape, batch_outputs.shape") #print(X.shape, batch_outputs.shape) n_samples = len(X) batch_outputs = batch_outputs.reshape((n_samples, num_tasks)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_outputs.shape)) # Prune away any padding that was added batch_outputs = batch_outputs[:n_samples] outputs.append(batch_outputs) outputs = np.squeeze(np.concatenate(outputs)) return np.copy(outputs)