예제 #1
0
    def test(self,
             sess,
             batcher,
             rc=False,
             shifts=[0],
             mc_n=0,
             test_batches=None):
        """ Compute model accuracy on a test set.

        Args:
          sess:         TensorFlow session
          batcher:      Batcher object to provide data
          rc:             Average predictions from the forward and reverse
            complement sequences.
          shifts:         Average predictions from sequence shifts left/right.
          mc_n:           Monte Carlo iterations per rc/shift.
          test_batches: Number of test batches

        Returns:
          acc:          Accuracy object
        """

        # determine ensemble iteration parameters
        ensemble_fwdrc = []
        ensemble_shifts = []
        for shift in shifts:
            ensemble_fwdrc.append(True)
            ensemble_shifts.append(shift)
            if rc:
                ensemble_fwdrc.append(False)
                ensemble_shifts.append(shift)

        if mc_n > 0:
            # setup feed dict
            fd = self.set_mode('test_mc')

        else:
            # setup feed dict
            fd = self.set_mode('test')

            # co-opt the variable to represent
            # iterations per fwdrc/shift.
            mc_n = 1

        # initialize prediction and target arrays
        preds = []
        targets = []
        targets_na = []

        batch_losses = []
        batch_target_losses = []

        # sequence index
        si = 0

        # get first batch
        Xb, Yb, NAb, Nb = batcher.next()

        batch_num = 0
        while Xb is not None and (test_batches is None
                                  or batch_num < test_batches):
            # make ensemble predictions
            preds_batch, preds_batch_var, preds_all = self._predict_ensemble(
                sess, fd, Xb, ensemble_fwdrc, ensemble_shifts, mc_n)

            # add target info
            fd[self.targets] = Yb
            fd[self.targets_na] = NAb

            targets_na.append(np.zeros([Nb, self.preds_length], dtype='bool'))

            # recompute loss w/ ensembled prediction
            fd[self.preds_adhoc] = preds_batch
            targets_batch, loss_batch, target_losses_batch = sess.run(
                [self.targets_op, self.loss_adhoc, self.target_losses_adhoc],
                feed_dict=fd)

            # accumulate predictions and targets
            if preds_batch.ndim == 3:
                preds.append(preds_batch[:Nb, :, :].astype('float16'))
                targets.append(targets_batch[:Nb, :, :].astype('float16'))

            else:
                for qi in range(preds_batch.shape[3]):
                    # TEMP, ideally this will be in the HDF5 and set previously
                    self.quantile_means = np.geomspace(0.1, 256, 16)

                    # softmax
                    preds_batch_norm = np.expand_dims(np.sum(np.exp(
                        preds_batch[:Nb, :, :, :]),
                                                             axis=3),
                                                      axis=3)
                    pred_probs_batch = np.exp(
                        preds_batch[:Nb, :, :, :]) / preds_batch_norm

                    # expectation over quantile medians
                    preds.append(np.dot(pred_probs_batch, self.quantile_means))

                    # compare to quantile median
                    targets.append(
                        self.quantile_means[targets_batch[:Nb, :, :] - 1])

            # accumulate loss
            batch_losses.append(loss_batch)
            batch_target_losses.append(target_losses_batch)

            # update sequence index
            si += Nb

            # next batch
            Xb, Yb, NAb, Nb = batcher.next()
            batch_num += 1

        targets = np.concatenate(targets, axis=0)
        preds = np.concatenate(preds, axis=0)
        targets_na = np.concatenate(targets_na, axis=0)

        # reset batcher
        batcher.reset()

        # mean across batches
        batch_losses = np.mean(batch_losses)
        batch_target_losses = np.array(batch_target_losses).mean(axis=0)

        # instantiate accuracy object
        acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses,
                                batch_target_losses)

        return acc
예제 #2
0
    def test_from_data_ops(self, sess, test_batches=None):
        """ Compute model accuracy on a test set, where data is loaded from a queue.

        Args:
          sess:         TensorFlow session
          test_batches: Number of test batches to use.

        Returns:
          acc:          Accuracy object
      """

        # TODO(dbelanger) this ignores rc and shift ensembling for now.
        # Accuracy will be slightly lower than if we had used this.
        # The rc and shift data augmentation need to be pulled into the graph.

        fd = self.set_mode('test')

        # initialize prediction and target arrays
        preds = []
        targets = []
        targets_na = []

        batch_losses = []
        batch_target_losses = []

        # sequence index
        data_available = True
        batch_num = 0
        while data_available and (test_batches is None
                                  or batch_num < test_batches):
            try:
                # make non-ensembled predictions
                run_ops = [
                    self.targets_op, self.preds_op, self.loss_op,
                    self.target_losses, self.targets, self.targets_na
                ]
                run_returns = sess.run(run_ops, feed_dict=fd)
                targets_batch, preds_batch, loss_batch, target_losses_batch, Yb, NAb = run_returns

                # accumulate predictions and targets
                preds.append(preds_batch.astype('float16'))
                targets.append(targets_batch.astype('float16'))
                targets_na.append(
                    np.zeros([self.hp.batch_size, self.preds_length],
                             dtype='bool'))

                # accumulate loss
                batch_losses.append(loss_batch)
                batch_target_losses.append(target_losses_batch)

                batch_num += 1

            except tf.errors.OutOfRangeError:
                data_available = False

        # construct arrays
        targets = np.concatenate(targets, axis=0)
        preds = np.concatenate(preds, axis=0)
        targets_na = np.concatenate(targets_na, axis=0)

        # mean across batches
        batch_losses = np.mean(batch_losses)
        batch_target_losses = np.array(batch_target_losses).mean(axis=0)

        # instantiate accuracy object
        acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses,
                                batch_target_losses)

        return acc
예제 #3
0
    def test_h5(self, sess, batcher, test_batches=None):
        """ Compute model accuracy on a test set.

        Args:
          sess:         TensorFlow session
          batcher:      Batcher object to provide data
          test_batches: Number of test batches

        Returns:
          acc:          Accuracy object
        """
        # setup feed dict
        fd = self.set_mode("test")

        # initialize prediction and target arrays
        preds = []
        targets = []
        targets_na = []

        batch_losses = []
        batch_target_losses = []
        batch_sizes = []

        # get first batch
        batch_num = 0
        Xb, Yb, NAb, Nb = batcher.next()

        while Xb is not None and (test_batches is None
                                  or batch_num < test_batches):
            # update feed dict
            fd[self.inputs_ph] = Xb
            fd[self.targets_ph] = Yb

            # make predictions
            run_ops = [
                self.targets_eval,
                self.preds_eval_loss,
                self.loss_eval,
                self.loss_eval_targets,
            ]
            run_returns = sess.run(run_ops, feed_dict=fd)
            targets_batch, preds_batch, loss_batch, target_losses_batch = run_returns

            # accumulate predictions and targets
            preds.append(preds_batch[:Nb, :, :].astype("float16"))
            targets.append(targets_batch[:Nb, :, :].astype("float16"))
            targets_na.append(np.zeros([Nb, self.preds_length], dtype="bool"))

            # accumulate loss
            batch_losses.append(loss_batch)
            batch_target_losses.append(target_losses_batch)
            batch_sizes.append(Nb)

            # next batch
            batch_num += 1
            Xb, Yb, NAb, Nb = batcher.next()

        # reset batcher
        batcher.reset()

        # construct arrays
        targets = np.concatenate(targets, axis=0)
        preds = np.concatenate(preds, axis=0)
        targets_na = np.concatenate(targets_na, axis=0)

        # mean across batches
        batch_losses = np.array(batch_losses, dtype="float64")
        batch_losses = np.average(batch_losses, weights=batch_sizes)
        batch_target_losses = np.array(batch_target_losses, dtype="float64")
        batch_target_losses = np.average(batch_target_losses,
                                         axis=0,
                                         weights=batch_sizes)

        # instantiate accuracy object
        acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses,
                                batch_target_losses)

        return acc
예제 #4
0
    def test_tfr(self,
                 sess,
                 dataset,
                 handle_ph=None,
                 test_batches=None,
                 sample=1.0):
        """ Compute model accuracy on a test set, where data is loaded from a queue.

        Args:
          sess:           TensorFlow session
          dataset:        Dataset
          handle_ph:      Dataset handle placeholder
          test_batches:   Number of test batches to use.
          sample:         Sample sequence positions to save predictions/targets.

        Returns:
          acc:          Accuracy object
      """
        fd = self.set_mode("test")

        if handle_ph is not None:
            fd[handle_ph] = dataset.handle

        # initialize prediction and target arrays
        if test_batches is None:
            num_seqs = dataset.num_seqs
        else:
            num_seqs = min(dataset.num_seqs, test_batches * self.hp.batch_size)

        # need to wait for variable num_targets
        sample_length = int(np.round(sample * self.preds_length))
        preds = None
        targets = None
        targets_na = np.zeros((num_seqs, sample_length), dtype="bool")

        batch_losses = []
        batch_target_losses = []
        batch_sizes = []

        # sequence index
        data_available = True
        batch_num = 0
        si = 0
        while data_available and (test_batches is None
                                  or batch_num < test_batches):
            try:
                # make predictions
                run_ops = [
                    self.targets_eval,
                    self.preds_eval_loss,
                    self.loss_eval,
                    self.loss_eval_targets,
                ]
                run_returns = sess.run(run_ops, feed_dict=fd)
                targets_batch, preds_batch, loss_batch, target_losses_batch = (
                    run_returns)
                batch_size, _, num_targets = preds_batch.shape

                # w/ target knowledge, create arrays
                if preds is None:
                    preds = np.zeros((num_seqs, sample_length, num_targets),
                                     dtype="float16")
                    targets = np.zeros((num_seqs, sample_length, num_targets),
                                       dtype="float16")

                # accumulate predictions and targets
                if sample_length < self.preds_length:
                    sampled_indexes = np.random.choice(np.arange(
                        self.preds_length),
                                                       size=sample_length,
                                                       replace=False)
                    sampled_indexes.sort()
                    preds[si:si + batch_size] = preds_batch[:,
                                                            sampled_indexes, :]
                    targets[si:si +
                            batch_size] = targets_batch[:, sampled_indexes, :]
                else:
                    preds[si:si + batch_size] = preds_batch
                    targets[si:si + batch_size] = targets_batch
                    # targets_na is already zero

                # accumulate loss
                batch_losses.append(loss_batch)
                batch_target_losses.append(target_losses_batch)
                batch_sizes.append(preds_batch.shape[0])

                batch_num += 1
                si += batch_size

            except tf.errors.OutOfRangeError:
                data_available = False

        # mean across batches
        batch_losses = np.array(batch_losses, dtype="float64")
        batch_losses = np.average(batch_losses, weights=batch_sizes)
        batch_target_losses = np.array(batch_target_losses, dtype="float64")
        batch_target_losses = np.average(batch_target_losses,
                                         axis=0,
                                         weights=batch_sizes)

        # instantiate accuracy object
        acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses,
                                batch_target_losses)

        return acc
예제 #5
0
    def test_from_data_ops(self,
                           sess,
                           rc=False,
                           shifts=[0],
                           mc_n=0,
                           num_test_batches=0):
        """ Compute model accuracy on a test set, where data is loaded from a queue.

        Args:
          sess:         TensorFlow session
          rc:             Average predictions from the forward and reverse
            complement sequences.
          shifts:         Average predictions from sequence shifts left/right.
          mc_n:           Monte Carlo iterations per rc/shift.
          num_test_batches: if > 0, only use this many test batches

        Returns:
          acc:          Accuracy object
        """

        # TODO(dbelanger) this ignores rc and shift ensembling for now.
        # Accuracy will be slightly lower than if we had used this.
        # The rc and shift data augmentation need to be pulled into the graph.

        fd = self.set_mode('test')

        # co-opt the variable to represent
        # iterations per fwdrc/shift.
        mc_n = 1

        # initialize prediction and target arrays
        preds = []
        targets = []
        targets_na = []

        batch_losses = []
        batch_target_losses = []

        # sequence index
        si = 0
        Nb = self.batch_size
        batch_count = 0
        while batch_count < num_test_batches:
            batch_count += 1
            # make non-ensembled predictions
            targets_batch, preds_batch, loss_batch, Yb, NAb = sess.run(
                [
                    self.targets_op, self.preds_op, self.loss_op, self.targets,
                    self.targets_na
                ],
                feed_dict=fd)
            target_losses_batch = loss_batch
            targets_na.append(np.zeros([Nb, self.preds_length], dtype='bool'))

            preds.append(preds_batch[:Nb, :, :].astype('float16'))
            targets.append(targets_batch[:Nb, :, :].astype('float16'))

            # accumulate loss
            batch_losses.append(loss_batch)
            batch_target_losses.append(target_losses_batch)

        targets = np.concatenate(targets, axis=0)
        preds = np.concatenate(preds, axis=0)
        targets_na = np.concatenate(targets_na, axis=0)

        # mean across batches
        batch_losses = np.mean(batch_losses)
        batch_target_losses = np.array(batch_target_losses).mean(axis=0)

        # instantiate accuracy object
        acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses,
                                batch_target_losses)

        return acc
예제 #6
0
  def test_tfr(self, sess, test_batches=None):
    """ Compute model accuracy on a test set, where data is loaded from a queue.

        Args:
          sess:         TensorFlow session
          test_batches: Number of test batches to use.

        Returns:
          acc:          Accuracy object
      """
    fd = self.set_mode('test')

    # initialize prediction and target arrays
    preds = []
    targets = []
    targets_na = []

    batch_losses = []
    batch_target_losses = []
    batch_sizes = []

    # sequence index
    data_available = True
    batch_num = 0
    loss_avg = RunningAverage()

    with tqdm(total=test_batches) as t:
      while data_available and (test_batches is None or batch_num < test_batches):
        try:
          # make predictions
          run_ops = [self.targets_eval, self.preds_eval,
                    self.loss_eval, self.loss_eval_targets]
          run_returns = sess.run(run_ops, feed_dict=fd)
          targets_batch, preds_batch, loss_batch, target_losses_batch = run_returns

          # accumulate predictions and targets
          preds.append(preds_batch.astype('float16'))
          targets.append(targets_batch.astype('float16'))
          targets_na.append(np.zeros([preds_batch.shape[0], self.preds_length], dtype='bool'))

          # accumulate loss
          batch_losses.append(loss_batch)
          batch_target_losses.append(target_losses_batch)
          batch_sizes.append(preds_batch.shape[0])

          batch_num += 1
          loss_avg.update(loss_batch)
          t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
          t.update()


        except tf.errors.OutOfRangeError:
          data_available = False

    # construct arrays
    targets = np.concatenate(targets, axis=0)
    preds = np.concatenate(preds, axis=0)
    targets_na = np.concatenate(targets_na, axis=0)

    # mean across batches
    batch_losses = np.array(batch_losses, dtype='float64')
    batch_losses = np.average(batch_losses, weights=batch_sizes)
    batch_target_losses = np.array(batch_target_losses, dtype='float64')
    batch_target_losses = np.average(batch_target_losses, axis=0, weights=batch_sizes)

    # instantiate accuracy object
    acc = accuracy.Accuracy(targets, preds, targets_na,
                            batch_losses, batch_target_losses)
    return acc