Beispiel #1
0
 def test_one_hot(self):
   y = np.array([0, 0, 1, 0, 1, 1, 0])
   y_hot = metrics.to_one_hot(y)
   expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1,
                                                                         0]])
   yp = metrics.from_one_hot(y_hot)
   assert np.array_equal(expected, y_hot)
   assert np.array_equal(y, yp)
Beispiel #2
0
 def test_one_hot(self):
     y = np.array([0, 0, 1, 0, 1, 1, 0])
     y_hot = metrics.to_one_hot(y)
     expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1],
                          [1, 0]])
     yp = metrics.from_one_hot(y_hot)
     assert np.array_equal(expected, y_hot)
     assert np.array_equal(y, yp)
Beispiel #3
0
def test_one_hot():
  """Test the one hot encoding."""
  y = np.array([0, 0, 1, 0, 1, 1, 0])
  y_hot = to_one_hot(y)
  expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0]])
  yp = from_one_hot(y_hot)
  assert np.array_equal(expected, y_hot)
  assert np.array_equal(y, yp)
Beispiel #4
0
  def predict_on_batch(self, X, pad_batch=False):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    len_unpadded = len(X)
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      output = []
      start = time.time()
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))


    outputs = np.copy(outputs)
    outputs = np.reshape(outputs, (len(X), n_tasks))
    outputs = outputs[:len_unpadded]
    return outputs
Beispiel #5
0
  def predict_on_batch(self, X, pad_batch=False):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    len_unpadded = len(X)
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      output = []
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))


    outputs = np.copy(outputs)
    outputs = np.reshape(outputs, (len(X), n_tasks))
    outputs = outputs[:len_unpadded]
    return outputs
Beispiel #6
0
  def predict_on_batch(self, X):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      num_tasks = self.num_tasks
      output = []
      start = time.time()
      with self._get_shared_session().as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
            self.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:num_tasks], dtype=float)
        # reshape to batch_size x num_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))

    return np.copy(outputs)
Beispiel #7
0
  def predict_on_generator(self, generator, transformers=[]):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

    # Arguments
        x: the input data, as a Numpy array.
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.

    # Returns
        A Numpy array of predictions.
    """
    retval = self.predict_proba_on_generator(generator, transformers)
    if self.mode == 'classification':
      retval = np.expand_dims(from_one_hot(retval, axis=2), axis=1)
    return retval
Beispiel #8
0
    def predict_on_batch(self, X, sess=None):
        """Generates output predictions for the input samples,
      processing the samples in a batched way.

    # Arguments
        x: the input data, as a Numpy array.
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.

    # Returns
        A Numpy array of predictions.
    """
        retval = self.predict_proba_on_batch(X, sess)
        if self.mode == 'classification':
            return from_one_hot(retval, axis=2)
        return retval
Beispiel #9
0
  def predict_on_batch(self, X, sess=None):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

    # Arguments
        x: the input data, as a Numpy array.
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.

    # Returns
        A Numpy array of predictions.
    """
    retval = self.predict_proba_on_batch(X, sess)
    if self.mode == 'classification':
      return from_one_hot(retval, axis=2)
    return retval
Beispiel #10
0
  def predict_on_batch(self, X, pad_batch=False):
    
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      output = []
      start = time.time()
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # transfer 2D prediction tensor to 2D x n_classes(=2) 
        complimentary = np.ones(np.shape(batch_output))
        complimentary = complimentary - batch_output
        batch_output = np.squeeze(np.stack(arrays = [complimentary,
                                                     batch_output],
                                            axis = 2))
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))

    return np.copy(outputs)
Beispiel #11
0
  def predict_on_batch(self, X):
    
    if self.pad_batches:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      output = []
      start = time.time()
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # transfer 2D prediction tensor to 2D x n_classes(=2) 
        complimentary = np.ones(np.shape(batch_output))
        complimentary = complimentary - batch_output
        batch_output = np.squeeze(np.stack(arrays = [complimentary,
                                                     batch_output],
                                            axis = 2))
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))

    return np.copy(outputs)
Beispiel #12
0
 def predict_on_generator(self, generator, transformers=[]):
   retval = self.predict_proba_on_generator(generator, transformers)
   if self.mode == 'classification':
     retval = np.expand_dims(from_one_hot(retval, axis=2), axis=1)
   return retval
 def predict_on_batch(self, X_batch):
     X_batch = torch.autograd.Variable(torch.cuda.FloatTensor(X_batch))
     outputs = self.forward(X_batch, training=False)
     y_pred_batch = torch.stack(outputs, 1).data.cpu().numpy()[:]
     y_pred_batch = from_one_hot(y_pred_batch, 2)
     return y_pred_batch
Beispiel #14
0
    def compute_model_performance(self,
                                  metrics,
                                  csv_out=None,
                                  stats_out=None,
                                  per_task_metrics=False,
                                  no_r2=False,
                                  no_concordance_index=False,
                                  plot=False):
        """
    Computes statistics of model on test data and saves results to csv.

    Parameters
    ----------
    metrics: list
      List of dc.metrics.Metric objects
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.
    """
        self.model.build()
        y = []
        w = []

        def generator_closure():
            for feed_dict in self.generator:
                y.append(feed_dict[self.label_keys[0]])
                if len(self.weights) > 0:
                    w.append(feed_dict[self.weights[0]])
                yield feed_dict

        if not len(metrics):
            return {}
        else:
            mode = metrics[0].mode
        if mode == "classification":
            y_pred = self.model.predict_proba_on_generator(generator_closure())
            y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
            y = np.reshape(y, newshape=(-1, self.n_tasks, self.n_classes))
            y = from_one_hot(y, axis=-1)
        else:
            y_pred = self.model.predict_proba_on_generator(generator_closure())
            y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
            y = np.reshape(y, newshape=(-1, self.n_tasks))
            y_pred = np.reshape(y_pred, newshape=(-1, self.n_tasks))
        y_pred = self.model.predict_on_generator(generator_closure())
        y = np.concatenate(y, axis=0)
        multitask_scores = {}
        all_task_scores = {}

        y = undo_transforms(y, self.output_transformers)
        y_pred = undo_transforms(y_pred, self.output_transformers)
        if len(w) != 0:
            w = np.array(w)
            w = np.reshape(w, newshape=y.shape)

        if csv_out is not None:
            log("Saving predictions to %s" % csv_out, self.verbose)
            self.output_predictions(y_pred, csv_out)

        plot_finished = False
        # Compute multitask metrics
        for i, metric in enumerate(metrics):
            mtc_name = metric.metric.__name__
            if no_r2 and (mtc_name == 'r2_score'
                          or mtc_name == 'pearson_r2_score'):
                continue
            if per_task_metrics:
                if self.is_training_set:
                    if no_concordance_index and metric.metric.__name__ == "concordance_index":
                        multitask_scores[metric.name] = None
                        all_task_scores[metric.name] = None
                        continue
                    if plot and not plot_finished:
                        multitask_scores[
                            metric.
                            name], computed_metrics = metric.compute_metric(
                                y,
                                y_pred,
                                w,
                                per_task_metrics=True,
                                n_classes=self.n_classes,
                                plot=True,
                                all_metrics=metrics,
                                is_training_set=self.is_training_set,
                                no_concordance_index=no_concordance_index,
                                tasks=self.tasks,
                                model_name=self.model_name)
                        all_task_scores[metric.name] = computed_metrics
                        plot_finished = True
                    else:
                        multitask_scores[
                            metric.
                            name], computed_metrics = metric.compute_metric(
                                y,
                                y_pred,
                                w,
                                per_task_metrics=True,
                                n_classes=self.n_classes,
                                plot=False,
                                is_training_set=self.is_training_set,
                                tasks=self.tasks,
                                model_name=self.model_name)
                        all_task_scores[metric.name] = computed_metrics

                elif plot and (i == len(metrics) - 1 or metric.metric.__name__
                               == "concordance_index") and (not plot_finished):
                    multitask_scores[
                        metric.name], computed_metrics = metric.compute_metric(
                            y,
                            y_pred,
                            w,
                            per_task_metrics=True,
                            n_classes=self.n_classes,
                            plot=True,
                            all_metrics=metrics,
                            is_training_set=self.is_training_set,
                            tasks=self.tasks,
                            model_name=self.model_name)
                    all_task_scores[metric.name] = computed_metrics
                    plot_finished = True

                else:  #Otherwise don't need to plot.
                    multitask_scores[
                        metric.name], computed_metrics = metric.compute_metric(
                            y,
                            y_pred,
                            w,
                            per_task_metrics=True,
                            n_classes=self.n_classes,
                            plot=False,
                            is_training_set=self.is_training_set,
                            tasks=self.tasks,
                            model_name=self.model_name)
                    all_task_scores[metric.name] = computed_metrics

            else:
                if self.is_training_set:
                    if no_concordance_index and metric.metric.__name__ == "concordance_index":
                        multitask_scores[metric.name] = None
                        continue
                    if plot and not plot_finished:
                        multitask_scores[metric.name] = metric.compute_metric(
                            y,
                            y_pred,
                            w,
                            per_task_metrics=False,
                            n_classes=self.n_classes,
                            plot=True,
                            all_metrics=metrics,
                            is_training_set=self.is_training_set,
                            no_concordance_index=no_concordance_index,
                            tasks=self.tasks,
                            model_name=self.model_name)
                        plot_finished = True
                    else:
                        multitask_scores[metric.name] = metric.compute_metric(
                            y,
                            y_pred,
                            w,
                            per_task_metrics=False,
                            n_classes=self.n_classes,
                            plot=False,
                            is_training_set=self.is_training_set,
                            tasks=self.tasks,
                            model_name=self.model_name)

                elif plot and (i == len(metrics) - 1 or metric.metric.__name__
                               == "concordance_index") and (not plot_finished):
                    multitask_scores[metric.name] = metric.compute_metric(
                        y,
                        y_pred,
                        w,
                        per_task_metrics=False,
                        n_classes=self.n_classes,
                        plot=True,
                        all_metrics=metrics,
                        is_training_set=self.is_training_set,
                        tasks=self.tasks,
                        model_name=self.model_name)
                    plot_finished = True

                else:  #Otherwise don't need to plot.
                    multitask_scores[metric.name] = metric.compute_metric(
                        y,
                        y_pred,
                        w,
                        per_task_metrics=False,
                        n_classes=self.n_classes,
                        plot=False,
                        is_training_set=self.is_training_set,
                        tasks=self.tasks,
                        model_name=self.model_name)

        if not per_task_metrics:
            return multitask_scores
        else:
            return multitask_scores, all_task_scores
Beispiel #15
0
  def compute_model_performance(self, metrics, per_task_metrics=False):
    """
    Computes statistics of model on test data and saves results to csv.

    Parameters
    ----------
    metrics: list
      List of dc.metrics.Metric objects
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.
    """
    self.model.build()
    y = []
    w = []

    def generator_closure():
      for feed_dict in self.generator:
        labels = []
        for layer in self.label_keys:
          labels.append(feed_dict[layer])
          del feed_dict[layer]
        for weight in self.weights:
          w.append(feed_dict[weight])
          del feed_dict[weight]
        y.append(np.array(labels))
        yield feed_dict

    if not len(metrics):
      return {}
    else:
      mode = metrics[0].mode
    if mode == "classification":
      y_pred = self.model.predict_proba_on_generator(generator_closure())
      y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
      y = np.reshape(y, newshape=(-1, self.n_tasks, self.n_classes))
      y = from_one_hot(y, axis=-1)
    else:
      y_pred = self.model.predict_proba_on_generator(generator_closure())
      y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
      y = np.reshape(y, newshape=(-1, self.n_tasks))
      y_pred = np.reshape(y_pred, newshape=(-1, self.n_tasks))
    multitask_scores = {}
    all_task_scores = {}

    y = undo_transforms(y, self.output_transformers)
    y_pred = undo_transforms(y_pred, self.output_transformers)
    if len(w) != 0:
      w = np.array(w)
      w = np.reshape(w, newshape=y.shape)

    # Compute multitask metrics
    for metric in metrics:
      if per_task_metrics:
        multitask_scores[metric.name], computed_metrics = metric.compute_metric(
            y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes)
        all_task_scores[metric.name] = computed_metrics
      else:
        multitask_scores[metric.name] = metric.compute_metric(
            y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes)

    if not per_task_metrics:
      return multitask_scores
    else:
      return multitask_scores, all_task_scores