def evaluate_error_class2(self, dataset, transformers=[], batch_size=50): """ Evaluate the error in energy and gradient components, forcebalance-style. TODO(rbharath): Should be a subclass PhysicalModel method. Also, need to find a better name for this method (class2 doesn't tell us anything about the semantics of this method. """ y_preds = [] y_train = [] grads = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): # untransformed E is needed for undo_grad_transform energy_batch = self.predict_on_batch(X_batch) grad_batch = self.predict_grad_on_batch(X_batch) grad_batch = undo_grad_transforms(grad_batch, energy_batch, transformers) grads.append(grad_batch) y_pred_batch = np.reshape(energy_batch, y_batch.shape) # y_pred_batch gives us the pred E and pred multitask trained gradE y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) # undo transforms on y_batch should know how to handle E and gradE separately y_batch = undo_transforms(y_batch, transformers) y_train.append(y_batch) y_pred = np.vstack(y_preds) y = np.vstack(y_train) grad = np.vstack(grads) n_samples, n_tasks = len(dataset), self.get_num_tasks() n_atoms = int((n_tasks - 1) / 3) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) y = np.reshape(y, (n_samples, n_tasks)) grad_train = y[:, 1:] energy_error = y[:, 0] - y_pred[:, 0] energy_error = np.sqrt(np.mean( energy_error * energy_error)) * 2625.5002 grad = np.reshape(grad, (n_samples, n_atoms, 3)) grad_train = np.reshape(grad_train, (n_samples, n_atoms, 3)) grad_error = grad - grad_train grad_error = np.sqrt(np.mean(grad_error * grad_error)) * 4961.47596096 print("Energy error (RMSD): %f kJ/mol" % energy_error) print("Grad error (RMSD): %f kJ/mol/A" % grad_error) return energy_error, grad_error
def evaluate_error_class2(self, dataset, transformers=[], batch_size=50): """ Evaluate the error in energy and gradient components, forcebalance-style. TODO(rbharath): Should be a subclass PhysicalModel method. Also, need to find a better name for this method (class2 doesn't tell us anything about the semantics of this method. """ y_preds = [] y_train = [] grads = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): # untransformed E is needed for undo_grad_transform energy_batch = self.predict_on_batch(X_batch) grad_batch = self.predict_grad_on_batch(X_batch) grad_batch = undo_grad_transforms(grad_batch, energy_batch, transformers) grads.append(grad_batch) y_pred_batch = np.reshape(energy_batch, y_batch.shape) # y_pred_batch gives us the pred E and pred multitask trained gradE y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) # undo transforms on y_batch should know how to handle E and gradE separately y_batch = undo_transforms(y_batch, transformers) y_train.append(y_batch) y_pred = np.vstack(y_preds) y = np.vstack(y_train) grad = np.vstack(grads) n_samples, n_tasks = len(dataset), self.get_num_tasks() n_atoms = int((n_tasks-1)/3) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) y = np.reshape(y, (n_samples, n_tasks)) grad_train = y[:,1:] energy_error = y[:,0]-y_pred[:,0] energy_error = np.sqrt(np.mean(energy_error*energy_error))*2625.5002 grad = np.reshape(grad, (n_samples, n_atoms, 3)) grad_train = np.reshape(grad_train, (n_samples, n_atoms, 3)) grad_error = grad-grad_train grad_error = np.sqrt(np.mean(grad_error*grad_error))*4961.47596096 print("Energy error (RMSD): %f kJ/mol" % energy_error) print("Grad error (RMSD): %f kJ/mol/A" % grad_error) return energy_error, grad_error
def compute_model_performance(self, metrics, per_task_metrics=False): """ Computes statistics of model on test data and saves results to csv. Parameters ---------- metrics: list List of dc.metrics.Metric objects per_task_metrics: bool, optional If true, return computed metric for each task on multitask dataset. """ self.model.build() y = [] w = [] def generator_closure(): for feed_dict in self.generator: y.append(feed_dict[self.label_keys[0]]) if len(self.weights) > 0: w.append(feed_dict[self.weights[0]]) yield feed_dict if not len(metrics): return {} else: mode = metrics[0].mode y_pred = self.model.predict_on_generator(generator_closure()) y = np.concatenate(y, axis=0) multitask_scores = {} all_task_scores = {} y = undo_transforms(y, self.output_transformers) y_pred = undo_transforms(y_pred, self.output_transformers) if len(w) != 0: w = np.array(w) w = np.reshape(w, newshape=y.shape) # Compute multitask metrics for metric in metrics: if per_task_metrics: multitask_scores[metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes) all_task_scores[metric.name] = computed_metrics else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes) if not per_task_metrics: return multitask_scores else: return multitask_scores, all_task_scores
def evaluate_error(self, dataset, transformers=[], batch_size=50): """ Evaluate the error in energy and gradient components, forcebalance-style. TODO(rbharath): This looks like it should be a subclass method for a PhysicalMethod class. forcebalance style errors aren't meaningful for most chem-informatic datasets. """ y_preds = [] y_train = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): y_pred_batch = self.predict_on_batch(X_batch) y_pred_batch = np.reshape(y_pred_batch, y_batch.shape) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_batch = undo_transforms(y_batch, transformers) y_train.append(y_batch) y_pred = np.vstack(y_preds) y = np.vstack(y_train) n_samples, n_tasks = len(dataset), self.get_num_tasks() n_atoms = int((n_tasks - 1) / 3) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) y = np.reshape(y, (n_samples, n_tasks)) grad = y_pred[:, 1:] grad_train = y[:, 1:] energy_error = y[:, 0] - y_pred[:, 0] # convert Hartree to kJ/mol energy_error = np.sqrt(np.mean( energy_error * energy_error)) * 2625.5002 grad = np.reshape(grad, (n_samples, n_atoms, 3)) grad_train = np.reshape(grad_train, (n_samples, n_atoms, 3)) grad_error = grad - grad_train # convert Hartree/bohr to kJ/mol/Angstrom grad_error = np.sqrt(np.mean(grad_error * grad_error)) * 4961.47596096 print("Energy error (RMSD): %f kJ/mol" % energy_error) print("Grad error (RMSD): %f kJ/mol/A" % grad_error) return energy_error, grad_error
def predict(self, dataset: Dataset, transformers: List[Transformer] = []) -> OneOrMany[np.ndarray]: """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ y_preds = [] n_tasks = self.get_num_tasks() ind = 0 for (X_batch, _, _, ids_batch) in dataset.iterbatches(deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.concatenate(y_preds) return y_pred
def predict_proba_on_generator(self, generator, transformers=[]): """ Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() with self._get_tf("Graph").as_default(): with tf.Session() as sess: saver = tf.train.Saver() self._initialize_weights(sess, saver) out_tensors = [x.out_tensor for x in self.outputs] results = [] for feed_dict in generator: # Extract number of unique samples in the batch from w_b n_valid_samples = len(np.nonzero(feed_dict[self.weights][:, 0])[0]) feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } feed_dict[self._training_placeholder] = 0.0 result = np.array(sess.run(out_tensors, feed_dict=feed_dict)) if len(result.shape) == 3: result = np.transpose(result, axes=[1, 0, 2]) result = undo_transforms(result, transformers) # Only fetch the first set of unique samples results.append(result[:n_valid_samples]) return np.concatenate(results, axis=0)
def predict_proba(self, dataset, transformers=[], batch_size=None): """ TODO: Do transformers even make sense here? Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if batch_size is None: batch_size = self.batch_size with self._get_tf("Graph").as_default(): saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, self.last_checkpoint) y_preds = [] n_tasks = self.get_num_tasks() for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch, sess=sess) y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] return y_pred
def predict(self, dataset, transformers=[]): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] n_tasks = self.get_num_tasks() ind = 0 for (X_batch, _, _, ids_batch) in dataset.iterbatches( self.batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks)) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) # Special case to handle singletasks. if n_tasks == 1: y_pred = np.reshape(y_pred, (n_samples,)) return y_pred
def compute_model_performance(self, metrics, csv_out=None, stats_out=None, threshold=None): """ Computes statistics of model on test data and saves results to csv. """ y = self.dataset.y y = undo_transforms(y, self.output_transformers) w = self.dataset.w if not len(metrics): return {} else: mode = metrics[0].mode if mode == "classification": y_pred = self.model.predict_proba(self.dataset, self.output_transformers) y_pred_print = self.model.predict( self.dataset, self.output_transformers).astype(int) else: y_pred = self.model.predict(self.dataset, self.output_transformers) y_pred_print = y_pred multitask_scores = {} if csv_out is not None: log("Saving predictions to %s" % csv_out, self.verbosity) self.output_predictions(y_pred_print, csv_out) # Compute multitask metrics for metric in metrics: multitask_scores[metric.name] = metric.compute_metric(y, y_pred, w) if stats_out is not None: log("Saving stats to %s" % stats_out, self.verbosity) self.output_statistics(multitask_scores, stats_out) return multitask_scores
def bayesian_predict(self, dataset, transformers=[], n_passes=4, untransform=False): """Generates predictions and confidences on a dataset object https://arxiv.org/pdf/1506.02142.pdf # Returns: mu: numpy ndarray of shape (n_samples, n_tasks) sigma: numpy ndarray of shape (n_samples, n_tasks) """ X = dataset.X max_index = X.shape[0] - 1 num_batches = (max_index // self.batch_size) + 1 mus = [] sigmas = [] for i in range(num_batches): start = i * self.batch_size end = min((i + 1) * self.batch_size, max_index + 1) batch = X[start:end] mu, sigma = self.bayesian_predict_on_batch( batch, transformers=[], n_passes=n_passes) mus.append(mu) sigmas.append(sigma) mu = np.concatenate(mus, axis=0) sigma = np.concatenate(sigmas, axis=0) + 0.55 if untransform: mu = undo_transforms(mu, transformers) for i in range(sigma.shape[1]): sigma[:, i] = sigma[:, i] * transformers[0].y_stds[i] return mu[:max_index + 1], sigma[:max_index + 1]
def predict_proba(self, dataset, transformers=[], n_classes=2): """ TODO: Do transformers even make sense here? Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ y_preds = [] n_tasks = self.get_num_tasks() for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches( self.batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch) y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks, n_classes)) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes)) return y_pred
def predict_on_smiles(self, smiles, transformers=[], untransform=False): """Generates predictions on a numpy array of smile strings # Returns: y_: numpy ndarray of shape (n_samples, n_tasks) """ max_index = len(smiles) - 1 n_tasks = len(self.outputs) num_batches = (max_index // self.batch_size) + 1 featurizer = ConvMolFeaturizer() y_ = [] for i in range(num_batches): start = i * self.batch_size end = min((i + 1) * self.batch_size, max_index + 1) smiles_batch = smiles[start:end] y_.append( self.predict_on_smiles_batch(smiles_batch, featurizer, transformers)) y_ = np.concatenate(y_, axis=0)[:max_index + 1] y_ = y_.reshape(-1, n_tasks) if untransform: y_ = undo_transforms(y_, transformers) return y_
def compute_model_performance(self, metrics, csv_out=None, stats_out=None, threshold=None): """ Computes statistics of model on test data and saves results to csv. """ y = self.dataset.y y = undo_transforms(y, self.output_transformers) w = self.dataset.w if not len(metrics): return {} else: mode = metrics[0].mode if mode == "classification": y_pred = self.model.predict_proba(self.dataset, self.output_transformers) y_pred_print = self.model.predict( self.dataset, self.output_transformers).astype(int) else: y_pred = self.model.predict(self.dataset, self.output_transformers) y_pred_print = y_pred multitask_scores = {} if csv_out is not None: log("Saving predictions to %s" % csv_out, self.verbose) self.output_predictions(y_pred_print, csv_out) # Compute multitask metrics for metric in metrics: multitask_scores[metric.name] = metric.compute_metric(y, y_pred, w) if stats_out is not None: log("Saving stats to %s" % stats_out, self.verbose) self.output_statistics(multitask_scores, stats_out) return multitask_scores
def predict(self, dataset, transformers=[], batch_size=None): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] n_tasks = self.get_num_tasks() ind = 0 for (X_batch, _, _, ids_batch) in dataset.iterbatches(batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks)) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) # Special case to handle singletasks. if n_tasks == 1: y_pred = np.reshape(y_pred, (n_samples, )) return y_pred
def predict(self, dataset, transformers=[], batch_size=None): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ if not self.built: self.build() with self._get_tf("Graph").as_default(): saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, self.last_checkpoint) y_preds = [] n_tasks = self.get_num_tasks() for (X_batch, y_b, w_b, ids_batch) in dataset.iterbatches(batch_size, deterministic=True): y_pred_batch = self.predict_on_batch(X_batch, sess=sess) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] y_pred = np.reshape(y_pred, (n_samples, n_tasks)) return y_pred
def predict(self, dataset: Dataset, transformers: List[Transformer] = []) -> np.ndarray: """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: Dataset Dataset to make prediction on transformers: List[Transformer] Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. Returns ------- np.ndarray A numpy array of predictions the model produces. """ y_preds = [] for (X_batch, _, _, ids_batch) in dataset.iterbatches(deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.concatenate(y_preds) return y_pred
def predict_proba(self, dataset, transformers=[], batch_size=None): """ TODO: Do transformers even make sense here? Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if batch_size is None: batch_size = self.batch_size with self._get_tf("Graph").as_default(): saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, self.last_checkpoint) y_preds = [] n_tasks = self.get_num_tasks() for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches( batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch, sess=sess) y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] return y_pred
def predict_proba(self, dataset, transformers=[], batch_size=None, n_classes=2): """ TODO: Do transformers even make sense here? Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ y_preds = [] n_tasks = self.get_num_tasks() for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch) y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks, n_classes)) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes)) return y_pred
def evaluate_error(self, dataset, transformers=[], batch_size=50): """ Evaluate the error in energy and gradient components, forcebalance-style. TODO(rbharath): This looks like it should be a subclass method for a PhysicalMethod class. forcebalance style errors aren't meaningful for most chem-informatic datasets. """ y_preds = [] y_train = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): y_pred_batch = self.predict_on_batch(X_batch) y_pred_batch = np.reshape(y_pred_batch, y_batch.shape) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_batch = undo_transforms(y_batch, transformers) y_train.append(y_batch) y_pred = np.vstack(y_preds) y = np.vstack(y_train) n_samples, n_tasks = len(dataset), self.get_num_tasks() n_atoms = int((n_tasks-1)/3) y_pred = np.reshape(y_pred, (n_samples, n_tasks)) y = np.reshape(y, (n_samples, n_tasks)) grad = y_pred[:,1:] grad_train = y[:,1:] energy_error = y[:,0]-y_pred[:,0] # convert Hartree to kJ/mol energy_error = np.sqrt(np.mean(energy_error*energy_error))*2625.5002 grad = np.reshape(grad, (n_samples, n_atoms, 3)) grad_train = np.reshape(grad_train, (n_samples, n_atoms, 3)) grad_error = grad-grad_train # convert Hartree/bohr to kJ/mol/Angstrom grad_error = np.sqrt(np.mean(grad_error*grad_error))*4961.47596096 print("Energy error (RMSD): %f kJ/mol" % energy_error) print("Grad error (RMSD): %f kJ/mol/A" % grad_error) return energy_error, grad_error
def test_fd_grad(self, dataset, transformers=[], batch_size=50): """ Uses self to calculate finite difference gradient on provided Dataset object. Currently only useful if your task is energy and self contains predict_grad_on_batch. TODO(rbharath): This shouldn't be a method of the Model class. Perhaps a method of PhysicalModel subclass. Leaving it in for time-being while refactoring continues. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): for xb in X_batch: num_atoms = xb.shape[0] coords = 3 h = 0.001 fd_batch = [] # Filling a new batch with displaced geometries for i in range(num_atoms): for j in range(coords): displace = np.zeros((num_atoms, coords)) displace[i][j] += h / 2 fd_batch.append(xb + displace) fd_batch.append(xb - displace) fd_batch = np.asarray(fd_batch) # Predict energy on displaced geometry batch y_pred_batch = self.predict_on_batch(fd_batch) energy = y_pred_batch[:, 0] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_pred_batch = y_pred_batch[:, 0] y_pred_batch = np.reshape(y_pred_batch, (3 * num_atoms, 2)) fd_grads = [] # Calculate numerical gradient by centered finite difference for x in y_pred_batch: fd_grads.append((x[0] - x[1]) / h) fd_grads = np.asarray(fd_grads) fd_grads = np.reshape(fd_grads, (num_atoms, coords)) xb = np.asarray([xb]) y_pred_batch = self.predict_grad_on_batch(xb) y_pred_batch = undo_grad_transforms(energy, y_pred_batch, transformers) # Calculate error between symbolic gradient and numerical gradient y_pred_batch = y_pred_batch - fd_grads #print(y_pred_batch) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) return y_pred
def compute_model_performance(self, metrics, csv_out=None, stats_out=None, per_task_metrics=False): """ Computes statistics of model on test data and saves results to csv. Parameters ---------- metrics: list List of dc.metrics.Metric objects csv_out: str, optional Filename to write CSV of model predictions. stats_out: str, optional Filename to write computed statistics. per_task_metrics: bool, optional If true, return computed metric for each task on multitask dataset. """ y = self.dataset.y y = undo_transforms(y, self.output_transformers) w = self.dataset.w if not len(metrics): return {} else: mode = metrics[0].mode if mode == "classification": y_pred = self.model.predict_proba(self.dataset, self.output_transformers) y_pred_print = self.model.predict(self.dataset, self.output_transformers).astype(int) else: y_pred = self.model.predict(self.dataset, self.output_transformers) y_pred_print = y_pred multitask_scores = {} all_task_scores = {} if csv_out is not None: log("Saving predictions to %s" % csv_out, self.verbose) self.output_predictions(y_pred_print, csv_out) # Compute multitask metrics for metric in metrics: if per_task_metrics: multitask_scores[metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True) all_task_scores[metric.name] = computed_metrics else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False) if stats_out is not None: log("Saving stats to %s" % stats_out, self.verbose) self.output_statistics(multitask_scores, stats_out) if not per_task_metrics: return multitask_scores else: return multitask_scores, all_task_scores
def test_fd_grad(self, dataset, transformers=[], batch_size=50): """ Uses self to calculate finite difference gradient on provided Dataset object. Currently only useful if your task is energy and self contains predict_grad_on_batch. TODO(rbharath): This shouldn't be a method of the Model class. Perhaps a method of PhysicalModel subclass. Leaving it in for time-being while refactoring continues. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(batch_size): for xb in X_batch: num_atoms = xb.shape[0] coords = 3 h = 0.001 fd_batch = [] # Filling a new batch with displaced geometries for i in range(num_atoms): for j in range(coords): displace = np.zeros((num_atoms, coords)) displace[i][j] += h/2 fd_batch.append(xb+displace) fd_batch.append(xb-displace) fd_batch = np.asarray(fd_batch) # Predict energy on displaced geometry batch y_pred_batch = self.predict_on_batch(fd_batch) energy = y_pred_batch[:,0] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_pred_batch = y_pred_batch[:,0] y_pred_batch = np.reshape(y_pred_batch, (3*num_atoms, 2)) fd_grads = [] # Calculate numerical gradient by centered finite difference for x in y_pred_batch: fd_grads.append((x[0]-x[1])/h) fd_grads = np.asarray(fd_grads) fd_grads = np.reshape(fd_grads, (num_atoms, coords)) xb = np.asarray([xb]) y_pred_batch = self.predict_grad_on_batch(xb) y_pred_batch = undo_grad_transforms(energy, y_pred_batch, transformers) # Calculate error between symbolic gradient and numerical gradient y_pred_batch = y_pred_batch-fd_grads #print(y_pred_batch) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) return y_pred
def predict_on_generator(self, generator, transformers=[], outputs=None): out = super(TextCNNModel, self).predict_on_generator( generator, transformers=[], outputs=outputs) if outputs is None: outputs = self.outputs if len(outputs) > 1: out = np.stack(out, axis=1) out = undo_transforms(out, transformers) return out
def predict_on_generator(self, generator, transformers=[], outputs=None): """ Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] with self._get_tf("Graph").as_default(): with tf.Session() as sess: saver = tf.train.Saver() self._initialize_weights(sess, saver) out_tensors = [x.out_tensor for x in self.outputs] # Gather results for each output results = [[] for out in out_tensors] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } feed_dict[self._training_placeholder] = 0.0 feed_results = sess.run(out_tensors, feed_dict=feed_dict) if len(feed_results) > 1: if len(transformers): raise ValueError( "Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): results[ind].append(result) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] else: return final_results
def predict(self, dataset, transformers=[], outputs=None): if outputs is None: outputs = self.outputs if transformers != [] and not isinstance(outputs, collections.Sequence): raise ValueError( "DTNN does not support single tensor output with transformers") retval = super(DTNNTensorGraph, self).predict(dataset, outputs=outputs) if not isinstance(outputs, collections.Sequence): return retval retval = np.concatenate(retval, axis=-1) return undo_transforms(retval, transformers)
def predict(self, dataset, transformers=[]): """ Prediction for multitask models. """ n_tasks = len(self.tasks) n_samples = len(dataset) y_pred = np.zeros((n_samples, n_tasks)) for ind, task in enumerate(self.tasks): task_model = self.model_builder(self.task_model_dirs[task]) task_model.reload() y_pred[:, ind] = task_model.predict(dataset, []) y_pred = undo_transforms(y_pred, transformers) return y_pred
def predict(self, dataset, transformers=[]): """ Prediction for multitask models. """ n_tasks = len(self.tasks) n_samples = len(dataset) y_preds = [] for ind, task in enumerate(self.tasks): task_model = self.model_builder(self.task_model_dirs[task]) task_model.reload() y_preds.append(task_model.predict(dataset, [])) y_pred = np.stack(y_preds, axis=1) y_pred = undo_transforms(y_pred, transformers) return y_pred
def predict_proba(self, dataset, transformers=[], n_classes=2): y_preds = [] n_tasks = self.n_tasks for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches( self.batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch) assert y_pred_batch.shape == (n_samples, n_tasks, n_classes) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes)) return y_pred
def predict_proba(self, dataset, transformers=[], n_classes=2): y_preds = [] n_tasks = self.n_tasks for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(self.batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_proba_on_batch(X_batch) assert y_pred_batch.shape == (n_samples, n_tasks, n_classes) y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.vstack(y_preds) # The iterbatches does padding with zero-weight examples on the last batch. # Remove padded examples. n_samples = len(dataset) y_pred = y_pred[:n_samples] y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes)) return y_pred
def predict_proba_on_generator(self, generator, transformers=[]): if not self.built: self.build() with self._get_tf("Graph").as_default(): out_tensors = [x.out_tensor for x in self.outputs] results = [] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } feed_dict[self._training_placeholder] = 1.0 ## result = np.array(self.session.run(out_tensors, feed_dict=feed_dict)) if len(result.shape) == 3: result = np.transpose(result, axes=[1, 0, 2]) if len(transformers) > 0: result = undo_transforms(result, transformers) results.append(result) return np.concatenate(results, axis=0)
def predict(self, dataset, transformers=[], batch_size=None): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] n_tasks = self.get_num_tasks() ind = 0 for (X_batch, _, _, ids_batch) in dataset.iterbatches(batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.concatenate(y_preds) return y_pred
def predict_proba_on_generator(self, generator, transformers=[]): if not self.built: self.build() with self._get_tf("Graph").as_default(): with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, self.last_checkpoint) out_tensors = [x.out_tensor for x in self.outputs] results = [] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } result = np.array(sess.run(out_tensors, feed_dict=feed_dict)) if len(result.shape) == 3: result = np.transpose(result, axes=[1, 0, 2]) if len(transformers) > 0: result = undo_transforms(result, transformers) results.append(result) return np.concatenate(results, axis=0)
def predict(self, dataset, transformers=[], batch_size=None): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ y_preds = [] n_tasks = self.get_num_tasks() ind = 0 for (X_batch, _, _, ids_batch) in dataset.iterbatches( batch_size, deterministic=True): n_samples = len(X_batch) y_pred_batch = self.predict_on_batch(X_batch) # Discard any padded predictions y_pred_batch = y_pred_batch[:n_samples] y_pred_batch = undo_transforms(y_pred_batch, transformers) y_preds.append(y_pred_batch) y_pred = np.concatenate(y_preds) return y_pred
def predict_on_generator(self, generator, transformers=[], outputs=None): if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] with self._get_tf("Graph").as_default(): # Gather results for each output results = [[] for out in outputs] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } # Recording the number of samples in the input batch n_samples = max(feed_dict[self.membership.out_tensor]) + 1 feed_dict[self._training_placeholder] = 0.0 feed_results = self.session.run(outputs, feed_dict=feed_dict) if len(feed_results) > 1: if len(transformers): raise ValueError("Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): # GraphConvTensorGraph constantly outputs batch_size number of # results, only valid samples should be appended to final results results[ind].append(result[:n_samples]) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] else: return final_results
def _predict( self, generator: Iterable[Tuple[Any, Any, Any]], transformers: List[Transformer], uncertainty: bool, other_output_types: Optional[OneOrMany[str]] ) -> OneOrMany[np.ndarray]: """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. other_output_types: list, optional Provides a list of other output_types (strings) to predict from model. Returns: a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ results: Optional[List[np.ndarray]] = None variances: Optional[List[np.ndarray]] = None if uncertainty and (other_output_types is not None): raise ValueError( 'This model cannot compute uncertainties and other output types simultaneously. Please invoke one at a time.' ) if uncertainty: if self._variance_outputs is None or len( self._variance_outputs) == 0: raise ValueError('This model cannot compute uncertainties') if len(self._variance_outputs) != len(self._prediction_outputs): raise ValueError( 'The number of variances must exactly match the number of outputs' ) if other_output_types: if self._other_outputs is None or len(self._other_outputs) == 0: raise ValueError( 'This model cannot compute other outputs since no other output_types were specified.' ) self._ensure_built() self.model.eval() for batch in generator: inputs, labels, weights = batch inputs, _, _ = self._prepare_batch((inputs, None, None)) # Invoke the model. if len(inputs) == 1: inputs = inputs[0] output_values = self.model(inputs) if isinstance(output_values, torch.Tensor): output_values = [output_values] output_values = [t.detach().cpu().numpy() for t in output_values] # Apply tranformers and record results. if uncertainty: var = [output_values[i] for i in self._variance_outputs] if variances is None: variances = [var] else: for i, t in enumerate(var): variances[i].append(t) access_values = [] if other_output_types: access_values += self._other_outputs elif self._prediction_outputs is not None: access_values += self._prediction_outputs if len(access_values) > 0: output_values = [output_values[i] for i in access_values] if len(transformers) > 0: if len(output_values) > 1: raise ValueError( "predict() does not support Transformers for models with multiple outputs." ) elif len(output_values) == 1: output_values = [ undo_transforms(output_values[0], transformers) ] if results is None: results = [[] for i in range(len(output_values))] for i, t in enumerate(output_values): results[i].append(t) # Concatenate arrays to create the final results. final_results = [] final_variances = [] if results is not None: for r in results: final_results.append(np.concatenate(r, axis=0)) if uncertainty and variances is not None: for v in variances: final_variances.append(np.concatenate(v, axis=0)) return zip(final_results, final_variances) if len(final_results) == 1: return final_results[0] else: return final_results
mode='regression', model_dir=MODEL_DIR, error_bars=ERROR_BARS) model.fit(train_dataset, nb_epoch=8) valid_scores = model.evaluate(valid_dataset, [metric], transformers) model.save() model.load_from_dir('model_saves') mu, sigma = model.bayesian_predict( valid_dataset, transformers, untransform=True, n_passes=24) print(mu[:4]) print(sigma[:4]) target = undo_transforms(valid_dataset.y, transformers) print(r2_score(target, mu)) mu = mu[:, 0].tolist() sigma = sigma[:, 0].tolist() target = target[:, 0].tolist() print(mu[:4]) print(sigma[:4]) print(target[:4]) in_one_sigma = 0 in_two_sigma = 0 in_four_sigma = 0
def compute_model_performance(self, metrics, csv_out=None, stats_out=None, per_task_metrics=False, no_r2=False, no_concordance_index=False, plot=False): """ Computes statistics of model on test data and saves results to csv. Parameters ---------- metrics: list List of dc.metrics.Metric objects per_task_metrics: bool, optional If true, return computed metric for each task on multitask dataset. """ self.model.build() y = [] w = [] def generator_closure(): for feed_dict in self.generator: y.append(feed_dict[self.label_keys[0]]) if len(self.weights) > 0: w.append(feed_dict[self.weights[0]]) yield feed_dict if not len(metrics): return {} else: mode = metrics[0].mode if mode == "classification": y_pred = self.model.predict_proba_on_generator(generator_closure()) y = np.transpose(np.array(y), axes=[0, 2, 1, 3]) y = np.reshape(y, newshape=(-1, self.n_tasks, self.n_classes)) y = from_one_hot(y, axis=-1) else: y_pred = self.model.predict_proba_on_generator(generator_closure()) y = np.transpose(np.array(y), axes=[0, 2, 1, 3]) y = np.reshape(y, newshape=(-1, self.n_tasks)) y_pred = np.reshape(y_pred, newshape=(-1, self.n_tasks)) y_pred = self.model.predict_on_generator(generator_closure()) y = np.concatenate(y, axis=0) multitask_scores = {} all_task_scores = {} y = undo_transforms(y, self.output_transformers) y_pred = undo_transforms(y_pred, self.output_transformers) if len(w) != 0: w = np.array(w) w = np.reshape(w, newshape=y.shape) if csv_out is not None: log("Saving predictions to %s" % csv_out, self.verbose) self.output_predictions(y_pred, csv_out) plot_finished = False # Compute multitask metrics for i, metric in enumerate(metrics): mtc_name = metric.metric.__name__ if no_r2 and (mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score'): continue if per_task_metrics: if self.is_training_set: if no_concordance_index and metric.metric.__name__ == "concordance_index": multitask_scores[metric.name] = None all_task_scores[metric.name] = None continue if plot and not plot_finished: multitask_scores[ metric. name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, no_concordance_index=no_concordance_index, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics plot_finished = True else: multitask_scores[ metric. name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics elif plot and (i == len(metrics) - 1 or metric.metric.__name__ == "concordance_index") and (not plot_finished): multitask_scores[ metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics plot_finished = True else: #Otherwise don't need to plot. multitask_scores[ metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics else: if self.is_training_set: if no_concordance_index and metric.metric.__name__ == "concordance_index": multitask_scores[metric.name] = None continue if plot and not plot_finished: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, no_concordance_index=no_concordance_index, tasks=self.tasks, model_name=self.model_name) plot_finished = True else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) elif plot and (i == len(metrics) - 1 or metric.metric.__name__ == "concordance_index") and (not plot_finished): multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) plot_finished = True else: #Otherwise don't need to plot. multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) if not per_task_metrics: return multitask_scores else: return multitask_scores, all_task_scores
def compute_model_performance(self, metrics, csv_out=None, stats_out=None, per_task_metrics=False, no_concordance_index=False, plot=False, no_r2=False): """ Computes statistics of model on test data and saves results to csv. Parameters ---------- metrics: list List of dc.metrics.Metric objects csv_out: str, optional Filename to write CSV of model predictions. stats_out: str, optional Filename to write computed statistics. per_task_metrics: bool, optional If true, return computed metric for each task on multitask dataset. """ y = self.dataset.y y = undo_transforms(y, self.output_transformers) w = self.dataset.w if not len(metrics): return {} else: mode = metrics[0].mode y_pred = self.model.predict(self.dataset, self.output_transformers) if mode == "classification": y_pred_print = np.argmax(y_pred, -1) else: y_pred_print = y_pred multitask_scores = {} all_task_scores = {} if csv_out is not None: log("Saving predictions to %s" % csv_out, self.verbose) self.output_predictions(y_pred_print, csv_out) plot_finished = False # Compute multitask metrics for i, metric in enumerate(metrics): mtc_name = metric.metric.__name__ if no_r2 and (mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score'): continue if per_task_metrics: if self.is_training_set: if no_concordance_index and metric.metric.__name__ == "concordance_index": multitask_scores[metric.name] = None all_task_scores[metric.name] = None continue if plot and not plot_finished: # If this dataset is the training data set, don't calculate CI if no_concordance_index. multitask_scores[ metric. name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, no_concordance_index=no_concordance_index, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics plot_finished = True else: # No longer need to plot. Could be wasting time calculating metrics again, but they # are super fast so it is no big deal. multitask_scores[ metric. name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics # Now deal with validation or test sets. elif plot and (i == len(metrics) - 1 or metric.metric.__name__ == "concordance_index") and (not plot_finished): multitask_scores[ metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics plot_finished = True else: # Otherwise don't need to plot. multitask_scores[ metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) all_task_scores[metric.name] = computed_metrics else: if self.is_training_set: if no_concordance_index and metric.metric.__name__ == "concordance_index": multitask_scores[metric.name] = None continue if plot and not plot_finished: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, no_concordance_index=no_concordance_index, tasks=self.tasks, model_name=self.model_name) plot_finished = True else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) elif plot and (i == len(metrics) - 1 or metric.metric.__name__ == "concordance_index") and (not plot_finished): multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, plot=True, all_metrics=metrics, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) plot_finished = True else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, plot=False, is_training_set=self.is_training_set, tasks=self.tasks, model_name=self.model_name) if stats_out is not None: log("Saving stats to %s" % stats_out, self.verbose) self.output_statistics(multitask_scores, stats_out) if not per_task_metrics: return multitask_scores else: return multitask_scores, all_task_scores
def compute_model_performance(metrics, y_pred, y, w, transformers, tasks, n_classes=2, per_task_metrics=False): """ Computes statistics of a model based and saves results to csv. :param metrics: list List of :Metric objects. :param y_pred: ndarray The predicted values. :param y: ndarray The ground truths. :param w: ndarray Label weights. :param transformers: list DeepChem/PADME data transformers used in the loading pipeline. :param n_classes: int, optional Number of classes in the data (for classification tasks only). :param per_task_metrics: bool, optional If true, return computed metric for each task on multitask dataset. :return: """ if not len(metrics): return {} multitask_scores = {} all_task_scores = {} y = undo_transforms(y, transformers) y_pred = undo_transforms(y_pred, transformers) if len(w) != 0: w = np.array(w) w = np.reshape(w, newshape=y.shape) # Compute multitask metrics for metric in metrics: if per_task_metrics: multitask_scores[ metric.name], computed_metrics = metric.compute_metric( y, y_pred, w, per_task_metrics=True, n_classes=n_classes, tasks=tasks) all_task_scores[metric.name] = computed_metrics else: multitask_scores[metric.name] = metric.compute_metric( y, y_pred, w, per_task_metrics=False, n_classes=n_classes, tasks=tasks) if not per_task_metrics: return multitask_scores else: return multitask_scores, all_task_scores
def _predict(self, generator, transformers, outputs, uncertainty): """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] if uncertainty: if len(self.variances) == 0: raise ValueError('This model cannot compute uncertainties') if len(self.variances) != len(outputs): raise ValueError( 'The number of variances must exactly match the number of outputs') tensors = outputs + self.variances else: tensors = outputs with self._get_tf("Graph").as_default(): # Gather results for each output results = [[] for out in tensors] n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(generator, uncertainty): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 feed_results = self._run_graph(tensors, feed_dict, uncertainty) if tfe.in_eager_mode(): feed_results = [f.numpy() for f in feed_results] if len(feed_results) > 1: if len(transformers): raise ValueError("Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): results[ind].append(result) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] elif uncertainty: return zip(final_results[:len(outputs)], final_results[len(outputs):]) else: return final_results
def _predict( self, generator: Iterable[Tuple[Any, Any, Any]], transformers: List[Transformer], outputs: Optional[OneOrMany[tf.Tensor]], uncertainty: bool, other_output_types: Optional[OneOrMany[str]]) -> OneOrMany[np.ndarray]: """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. outputs: Tensor or list of Tensors The outputs to return. If this is None, the model's standard prediction outputs will be returned. Alternatively one or more Tensors within the model may be specified, in which case the output of those Tensors will be returned. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. other_output_types: list, optional Provides a list of other output_types (strings) to predict from model. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ results: Optional[List[List[np.ndarray]]] = None variances: Optional[List[List[np.ndarray]]] = None if (outputs is not None) and (other_output_types is not None): raise ValueError( 'This model cannot compute outputs and other output_types simultaneously.' 'Please invoke one at a time.') if uncertainty and (other_output_types is not None): raise ValueError( 'This model cannot compute uncertainties and other output types simultaneously.' 'Please invoke one at a time.') if uncertainty: assert outputs is None if self._variance_outputs is None or len(self._variance_outputs) == 0: raise ValueError('This model cannot compute uncertainties') if len(self._variance_outputs) != len(self._prediction_outputs): raise ValueError( 'The number of variances must exactly match the number of outputs') if other_output_types: assert outputs is None if self._other_outputs is None or len(self._other_outputs) == 0: raise ValueError( 'This model cannot compute other outputs since no other output_types were specified.' ) if (outputs is not None and self.model.inputs is not None and len(self.model.inputs) == 0): raise ValueError( "Cannot use 'outputs' argument with a model that does not specify its inputs." "Note models defined in imperative subclassing style cannot specify outputs" ) if tf.is_tensor(outputs): outputs = [outputs] for batch in generator: inputs, labels, weights = batch self._create_inputs(inputs) inputs, _, _ = self._prepare_batch((inputs, None, None)) # Invoke the model. if len(inputs) == 1: inputs = inputs[0] if outputs is not None: outputs = tuple(outputs) key = tuple(t.ref() for t in outputs) if key not in self._output_functions: self._output_functions[key] = tf.keras.backend.function( self.model.inputs, outputs) output_values = self._output_functions[key](inputs) else: output_values = self._compute_model(inputs) if tf.is_tensor(output_values): output_values = [output_values] output_values = [t.numpy() for t in output_values] # Apply tranformers and record results. if uncertainty: var = [output_values[i] for i in self._variance_outputs] if variances is None: variances = [var] else: for i, t in enumerate(var): variances[i].append(t) access_values = [] if other_output_types: access_values += self._other_outputs elif self._prediction_outputs is not None: access_values += self._prediction_outputs if len(access_values) > 0: output_values = [output_values[i] for i in access_values] if len(transformers) > 0: if len(output_values) > 1: raise ValueError( "predict() does not support Transformers for models with multiple outputs." ) elif len(output_values) == 1: output_values = [undo_transforms(output_values[0], transformers)] if results is None: results = [[] for i in range(len(output_values))] for i, t in enumerate(output_values): results[i].append(t) # Concatenate arrays to create the final results. final_results = [] final_variances = [] if results is not None: for r in results: final_results.append(np.concatenate(r, axis=0)) if uncertainty and variances is not None: for v in variances: final_variances.append(np.concatenate(v, axis=0)) return zip(final_results, final_variances) if len(final_results) == 1: return final_results[0] else: return final_results