def test(self, sess, batcher, rc=False, shifts=[0], mc_n=0, test_batches=None): """ Compute model accuracy on a test set. Args: sess: TensorFlow session batcher: Batcher object to provide data rc: Average predictions from the forward and reverse complement sequences. shifts: Average predictions from sequence shifts left/right. mc_n: Monte Carlo iterations per rc/shift. test_batches: Number of test batches Returns: acc: Accuracy object """ # determine ensemble iteration parameters ensemble_fwdrc = [] ensemble_shifts = [] for shift in shifts: ensemble_fwdrc.append(True) ensemble_shifts.append(shift) if rc: ensemble_fwdrc.append(False) ensemble_shifts.append(shift) if mc_n > 0: # setup feed dict fd = self.set_mode('test_mc') else: # setup feed dict fd = self.set_mode('test') # co-opt the variable to represent # iterations per fwdrc/shift. mc_n = 1 # initialize prediction and target arrays preds = [] targets = [] targets_na = [] batch_losses = [] batch_target_losses = [] # sequence index si = 0 # get first batch Xb, Yb, NAb, Nb = batcher.next() batch_num = 0 while Xb is not None and (test_batches is None or batch_num < test_batches): # make ensemble predictions preds_batch, preds_batch_var, preds_all = self._predict_ensemble( sess, fd, Xb, ensemble_fwdrc, ensemble_shifts, mc_n) # add target info fd[self.targets] = Yb fd[self.targets_na] = NAb targets_na.append(np.zeros([Nb, self.preds_length], dtype='bool')) # recompute loss w/ ensembled prediction fd[self.preds_adhoc] = preds_batch targets_batch, loss_batch, target_losses_batch = sess.run( [self.targets_op, self.loss_adhoc, self.target_losses_adhoc], feed_dict=fd) # accumulate predictions and targets if preds_batch.ndim == 3: preds.append(preds_batch[:Nb, :, :].astype('float16')) targets.append(targets_batch[:Nb, :, :].astype('float16')) else: for qi in range(preds_batch.shape[3]): # TEMP, ideally this will be in the HDF5 and set previously self.quantile_means = np.geomspace(0.1, 256, 16) # softmax preds_batch_norm = np.expand_dims(np.sum(np.exp( preds_batch[:Nb, :, :, :]), axis=3), axis=3) pred_probs_batch = np.exp( preds_batch[:Nb, :, :, :]) / preds_batch_norm # expectation over quantile medians preds.append(np.dot(pred_probs_batch, self.quantile_means)) # compare to quantile median targets.append( self.quantile_means[targets_batch[:Nb, :, :] - 1]) # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) # update sequence index si += Nb # next batch Xb, Yb, NAb, Nb = batcher.next() batch_num += 1 targets = np.concatenate(targets, axis=0) preds = np.concatenate(preds, axis=0) targets_na = np.concatenate(targets_na, axis=0) # reset batcher batcher.reset() # mean across batches batch_losses = np.mean(batch_losses) batch_target_losses = np.array(batch_target_losses).mean(axis=0) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc
def test_from_data_ops(self, sess, test_batches=None): """ Compute model accuracy on a test set, where data is loaded from a queue. Args: sess: TensorFlow session test_batches: Number of test batches to use. Returns: acc: Accuracy object """ # TODO(dbelanger) this ignores rc and shift ensembling for now. # Accuracy will be slightly lower than if we had used this. # The rc and shift data augmentation need to be pulled into the graph. fd = self.set_mode('test') # initialize prediction and target arrays preds = [] targets = [] targets_na = [] batch_losses = [] batch_target_losses = [] # sequence index data_available = True batch_num = 0 while data_available and (test_batches is None or batch_num < test_batches): try: # make non-ensembled predictions run_ops = [ self.targets_op, self.preds_op, self.loss_op, self.target_losses, self.targets, self.targets_na ] run_returns = sess.run(run_ops, feed_dict=fd) targets_batch, preds_batch, loss_batch, target_losses_batch, Yb, NAb = run_returns # accumulate predictions and targets preds.append(preds_batch.astype('float16')) targets.append(targets_batch.astype('float16')) targets_na.append( np.zeros([self.hp.batch_size, self.preds_length], dtype='bool')) # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) batch_num += 1 except tf.errors.OutOfRangeError: data_available = False # construct arrays targets = np.concatenate(targets, axis=0) preds = np.concatenate(preds, axis=0) targets_na = np.concatenate(targets_na, axis=0) # mean across batches batch_losses = np.mean(batch_losses) batch_target_losses = np.array(batch_target_losses).mean(axis=0) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc
def test_h5(self, sess, batcher, test_batches=None): """ Compute model accuracy on a test set. Args: sess: TensorFlow session batcher: Batcher object to provide data test_batches: Number of test batches Returns: acc: Accuracy object """ # setup feed dict fd = self.set_mode("test") # initialize prediction and target arrays preds = [] targets = [] targets_na = [] batch_losses = [] batch_target_losses = [] batch_sizes = [] # get first batch batch_num = 0 Xb, Yb, NAb, Nb = batcher.next() while Xb is not None and (test_batches is None or batch_num < test_batches): # update feed dict fd[self.inputs_ph] = Xb fd[self.targets_ph] = Yb # make predictions run_ops = [ self.targets_eval, self.preds_eval_loss, self.loss_eval, self.loss_eval_targets, ] run_returns = sess.run(run_ops, feed_dict=fd) targets_batch, preds_batch, loss_batch, target_losses_batch = run_returns # accumulate predictions and targets preds.append(preds_batch[:Nb, :, :].astype("float16")) targets.append(targets_batch[:Nb, :, :].astype("float16")) targets_na.append(np.zeros([Nb, self.preds_length], dtype="bool")) # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) batch_sizes.append(Nb) # next batch batch_num += 1 Xb, Yb, NAb, Nb = batcher.next() # reset batcher batcher.reset() # construct arrays targets = np.concatenate(targets, axis=0) preds = np.concatenate(preds, axis=0) targets_na = np.concatenate(targets_na, axis=0) # mean across batches batch_losses = np.array(batch_losses, dtype="float64") batch_losses = np.average(batch_losses, weights=batch_sizes) batch_target_losses = np.array(batch_target_losses, dtype="float64") batch_target_losses = np.average(batch_target_losses, axis=0, weights=batch_sizes) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc
def test_tfr(self, sess, dataset, handle_ph=None, test_batches=None, sample=1.0): """ Compute model accuracy on a test set, where data is loaded from a queue. Args: sess: TensorFlow session dataset: Dataset handle_ph: Dataset handle placeholder test_batches: Number of test batches to use. sample: Sample sequence positions to save predictions/targets. Returns: acc: Accuracy object """ fd = self.set_mode("test") if handle_ph is not None: fd[handle_ph] = dataset.handle # initialize prediction and target arrays if test_batches is None: num_seqs = dataset.num_seqs else: num_seqs = min(dataset.num_seqs, test_batches * self.hp.batch_size) # need to wait for variable num_targets sample_length = int(np.round(sample * self.preds_length)) preds = None targets = None targets_na = np.zeros((num_seqs, sample_length), dtype="bool") batch_losses = [] batch_target_losses = [] batch_sizes = [] # sequence index data_available = True batch_num = 0 si = 0 while data_available and (test_batches is None or batch_num < test_batches): try: # make predictions run_ops = [ self.targets_eval, self.preds_eval_loss, self.loss_eval, self.loss_eval_targets, ] run_returns = sess.run(run_ops, feed_dict=fd) targets_batch, preds_batch, loss_batch, target_losses_batch = ( run_returns) batch_size, _, num_targets = preds_batch.shape # w/ target knowledge, create arrays if preds is None: preds = np.zeros((num_seqs, sample_length, num_targets), dtype="float16") targets = np.zeros((num_seqs, sample_length, num_targets), dtype="float16") # accumulate predictions and targets if sample_length < self.preds_length: sampled_indexes = np.random.choice(np.arange( self.preds_length), size=sample_length, replace=False) sampled_indexes.sort() preds[si:si + batch_size] = preds_batch[:, sampled_indexes, :] targets[si:si + batch_size] = targets_batch[:, sampled_indexes, :] else: preds[si:si + batch_size] = preds_batch targets[si:si + batch_size] = targets_batch # targets_na is already zero # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) batch_sizes.append(preds_batch.shape[0]) batch_num += 1 si += batch_size except tf.errors.OutOfRangeError: data_available = False # mean across batches batch_losses = np.array(batch_losses, dtype="float64") batch_losses = np.average(batch_losses, weights=batch_sizes) batch_target_losses = np.array(batch_target_losses, dtype="float64") batch_target_losses = np.average(batch_target_losses, axis=0, weights=batch_sizes) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc
def test_from_data_ops(self, sess, rc=False, shifts=[0], mc_n=0, num_test_batches=0): """ Compute model accuracy on a test set, where data is loaded from a queue. Args: sess: TensorFlow session rc: Average predictions from the forward and reverse complement sequences. shifts: Average predictions from sequence shifts left/right. mc_n: Monte Carlo iterations per rc/shift. num_test_batches: if > 0, only use this many test batches Returns: acc: Accuracy object """ # TODO(dbelanger) this ignores rc and shift ensembling for now. # Accuracy will be slightly lower than if we had used this. # The rc and shift data augmentation need to be pulled into the graph. fd = self.set_mode('test') # co-opt the variable to represent # iterations per fwdrc/shift. mc_n = 1 # initialize prediction and target arrays preds = [] targets = [] targets_na = [] batch_losses = [] batch_target_losses = [] # sequence index si = 0 Nb = self.batch_size batch_count = 0 while batch_count < num_test_batches: batch_count += 1 # make non-ensembled predictions targets_batch, preds_batch, loss_batch, Yb, NAb = sess.run( [ self.targets_op, self.preds_op, self.loss_op, self.targets, self.targets_na ], feed_dict=fd) target_losses_batch = loss_batch targets_na.append(np.zeros([Nb, self.preds_length], dtype='bool')) preds.append(preds_batch[:Nb, :, :].astype('float16')) targets.append(targets_batch[:Nb, :, :].astype('float16')) # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) targets = np.concatenate(targets, axis=0) preds = np.concatenate(preds, axis=0) targets_na = np.concatenate(targets_na, axis=0) # mean across batches batch_losses = np.mean(batch_losses) batch_target_losses = np.array(batch_target_losses).mean(axis=0) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc
def test_tfr(self, sess, test_batches=None): """ Compute model accuracy on a test set, where data is loaded from a queue. Args: sess: TensorFlow session test_batches: Number of test batches to use. Returns: acc: Accuracy object """ fd = self.set_mode('test') # initialize prediction and target arrays preds = [] targets = [] targets_na = [] batch_losses = [] batch_target_losses = [] batch_sizes = [] # sequence index data_available = True batch_num = 0 loss_avg = RunningAverage() with tqdm(total=test_batches) as t: while data_available and (test_batches is None or batch_num < test_batches): try: # make predictions run_ops = [self.targets_eval, self.preds_eval, self.loss_eval, self.loss_eval_targets] run_returns = sess.run(run_ops, feed_dict=fd) targets_batch, preds_batch, loss_batch, target_losses_batch = run_returns # accumulate predictions and targets preds.append(preds_batch.astype('float16')) targets.append(targets_batch.astype('float16')) targets_na.append(np.zeros([preds_batch.shape[0], self.preds_length], dtype='bool')) # accumulate loss batch_losses.append(loss_batch) batch_target_losses.append(target_losses_batch) batch_sizes.append(preds_batch.shape[0]) batch_num += 1 loss_avg.update(loss_batch) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() except tf.errors.OutOfRangeError: data_available = False # construct arrays targets = np.concatenate(targets, axis=0) preds = np.concatenate(preds, axis=0) targets_na = np.concatenate(targets_na, axis=0) # mean across batches batch_losses = np.array(batch_losses, dtype='float64') batch_losses = np.average(batch_losses, weights=batch_sizes) batch_target_losses = np.array(batch_target_losses, dtype='float64') batch_target_losses = np.average(batch_target_losses, axis=0, weights=batch_sizes) # instantiate accuracy object acc = accuracy.Accuracy(targets, preds, targets_na, batch_losses, batch_target_losses) return acc