Exemple #1
0
def write_fastq(forward_path, reverse_path, fastq_reads):
  """Write forward and reverse reads into a pair of FASTQ files.

  Args:
    forward_path: path to which to save forward reads.
    reverse_path: path to which to save reverse reads.
    fastq_reads: iterable of Read objects.
  """
  with gfile.Open(forward_path, "w") as forward:
    with gfile.Open(reverse_path, "w") as reverse:
      for read in fastq_reads:
        comp_sequence = dna.reverse_complement(read.sequence)
        forward.write("@%s\n%s\n+\n%s\n" %
                      (read.title, read.sequence, read.quality))
        reverse.write("@%s\n%s\n+\n%s\n" %
                      (read.title, comp_sequence, read.quality[::-1]))
Exemple #2
0
def history_of_iter_dir(iter_dir, can_write_cache=False):
    execution_data_iter_dir = os.path.join(
        iter_dir.replace('results', 'execution_data'), 'eval')
    if not gfile.IsDirectory(execution_data_iter_dir):
        return None

    test_acc = None
    test_iter = None
    for events_file in gfile.ListDirectory(execution_data_iter_dir):
        if not events_file.startswith('events.out'):
            continue
        for e in tf.train.summary_iterator(
                os.path.join(execution_data_iter_dir, events_file)):
            for v in e.summary.value:
                if v.tag == 'accuracy' or v.tag == 'top_1_accuracy':
                    if test_iter is None or e.step > test_iter:
                        test_iter = e.step
                        test_acc = v.simple_value

    try:
        with gfile.Open(os.path.join(iter_dir, 'density_ratio')) as f:
            density_ratio = float(f.read())
    except Exception as e:
        density_ratio = 1.0

    res = IterDatum(
        iter=os.path.basename(iter_dir),
        density_ratio=density_ratio,
        test_acc=test_acc,
    )

    if can_write_cache and test_acc is not None:
        write_iter(iter_dir, density_ratio, test_acc)
        with gfile.Open(plot_cache, 'w') as f:
            f.write('')
            f.flush()
        with gfile.Open(plot_cache, 'wb') as f:
            pickle.dump(res, f)
    return res
Exemple #3
0
      def update_best_model(path, cur_epoch):
        """Update the records of the model with the lowest validation error.

        Args:
          path: the path to the checkpoint of the current model.
          cur_epoch: a integer of the current epoch
        """

        cur_checkpoint = path
        cur_checkpoint_meta = six.ensure_str(cur_checkpoint) + '.meta'

        gfile.Copy(cur_train_report, best_train_report, overwrite=True)
        gfile.Copy(cur_valid_report, best_valid_report, overwrite=True)
        gfile.Copy(cur_checkpoint, best_checkpoint, overwrite=True)
        gfile.Copy(cur_checkpoint_meta, best_checkpoint_meta, overwrite=True)
        with gfile.Open(best_epoch_file, 'w') as f:
          f.write(str(cur_epoch)+'\n')
Exemple #4
0
def trial_datum_of_trial(experiment_dir, trial):
    plot_cache = os.path.join(experiment_dir, trial, 'plot_cache.pkl')
    if gfile.Exists(plot_cache):
        with gfile.Open(plot_cache, 'rb') as f:
            return pickle.loads(f.read())

    iter_dirs = sorted(
        iter_dirs_of_trial_dir(os.path.join(experiment_dir, trial)),
        key=lambda x: int(iter_re.match(os.path.basename(x)).group('iter')))

    pool = mp.Pool(5)

    res = TrialDatum(
        trial=trial,
        iter_data=list(
            filter(
                lambda x: x is not None,
                pool.map(
                    iter_datum_of_iter_dir,
                    map(lambda x: (x[1], x[0] < len(iter_dirs) - 1),
                        enumerate(iter_dirs))))),
    )
    pool.close()
    return res
Exemple #5
0
def iter_datum_of_iter_dir(iter_dir_and_can_write_cache,
                           verbose=True,
                           ignore_cache=False):
    iter_dir, can_write_cache = iter_dir_and_can_write_cache

    if not ignore_cache:
        res = read_iter(iter_dir)
        if res:
            return IterDatum(
                iter=os.path.basename(res[0]),
                density_ratio=res[1],
                test_acc=res[2],
            )
        plot_cache = os.path.join(iter_dir, 'plot_cache.pkl')
        if gfile.Exists(plot_cache):
            if verbose:
                print('PLOT CACHE EXISTS: {}'.format(plot_cache))
            with gfile.Open(plot_cache, 'rb') as f:
                try:
                    it = pickle.loads(f.read())
                    write_iter(iter_dir, it.density_ratio, it.test_acc)
                    return it
                except:
                    gfile.Remove(plot_cache)

    execution_data_iter_dir = os.path.join(
        iter_dir.replace('results', 'execution_data'), 'eval')
    if not gfile.IsDirectory(execution_data_iter_dir):
        return None

    test_acc = None
    test_iter = None
    for events_file in gfile.ListDirectory(execution_data_iter_dir):
        if not events_file.startswith('events.out'):
            continue
        for e in tf.train.summary_iterator(
                os.path.join(execution_data_iter_dir, events_file)):
            for v in e.summary.value:
                if v.tag == 'accuracy' or v.tag == 'top_1_accuracy':
                    if test_iter is None or e.step > test_iter:
                        test_iter = e.step
                        test_acc = v.simple_value

    if verbose:
        print(test_acc)

    try:
        with gfile.Open(os.path.join(iter_dir, 'density_ratio')) as f:
            density_ratio = float(f.read())
    except Exception as e:
        density_ratio = 1.0

    res = IterDatum(
        iter=os.path.basename(iter_dir),
        density_ratio=density_ratio,
        test_acc=test_acc,
    )

    if can_write_cache and test_acc is not None:
        write_iter(iter_dir, density_ratio, test_acc)
        # with gfile.Open(plot_cache, 'w') as f:
        #     f.write('')
        #     f.flush()
        # with gfile.Open(plot_cache, 'wb') as f:
        #     pickle.dump(res, f)
    return res
Exemple #6
0
def run_training(hps,
                 experiment_proto,
                 train_dir,
                 train_input_paths,
                 val_input_paths,
                 tuner=None,
                 master='',
                 metrics_targets=None,
                 metrics_measures=None):
  """Main training function.

  Trains the model given a directory to write to and a logfile to write to.

  Args:
    hps: tf.HParams with training parameters.
    experiment_proto: selection_pb2.Experiment proto for training.
    train_dir: str path to train directory.
    train_input_paths: List[str] giving paths to input sstables for training.
    val_input_paths: List[str] giving paths to input sstable(s) for validation.
    tuner: optional hp_tuner.HPTuner.
    master: optional string to pass to a tf.Supervisor.
    metrics_targets: String list of network targets to report metrics for.
    metrics_measures: Measurements about the performance of the network to
        report, e.g. 'auc/top_1p'.

  Returns:
    None.

  Raises:
    Error: if the hyperparamter combination in hps is infeasible and there is
    no tuner. (If the hyperparameter combination is infeasible and there is
    a tuner then the params are reported back to the tuner as infeasible.)
  """
  hps_infeasible, infeasible_reason = hps_is_infeasible(
      hps, experiment_proto.sequence_length)
  if hps_infeasible:
    if tuner:
      tuner.report_done(True, infeasible_reason)
      logger.info('report_done(infeasible=%r)', hps_infeasible)
      return
    else:
      raise Error('Hyperparams are infeasible: %s', infeasible_reason)

  logger.info('Starting training.')
  if tuner:
    logger.info('Using tuner: loaded HParams from Vizier')
  else:
    logger.info('No tuner: using default HParams')
  logger.info('experiment_proto: %s', experiment_proto)
  logger.info('train_dir: %s', train_dir)
  logger.info('train_input_paths[0]: %s', train_input_paths[0])
  logger.info('val_input_paths[0]: %s', val_input_paths[0])
  logger.info('%r', list(hps.values()))
  generationinfo.to_file(os.path.join(train_dir, 'geninfo.pbtxt'))
  with gfile.Open(os.path.join(train_dir, config.hparams_name), 'w') as f:
    f.write(str(hps.to_proto()))

  eval_size = hps.eval_size or None

  def make_subdir(subdirectory_mame):
    path = os.path.join(train_dir, subdirectory_mame)
    gfile.MakeDirs(path)
    return path

  logger.info('Computing preprocessing statistics')
  # TODO(shoyer): move this over into preprocessing instead?
  experiment_proto = dataset_stats.compute_experiment_statistics(
      experiment_proto,
      train_input_paths,
      os.path.join(
          hps.input_dir,
          six.ensure_str(
              config.wetlab_experiment_train_pbtxt_path[hps.val_fold]) +
          '.wstats'),
      preprocess_mode=hps.preprocess_mode,
      max_size=eval_size,
      logdir=make_subdir('compute-statistics'),
      save_stats=hps.save_stats)

  logging.info('Saving experiment proto with statistics')
  with gfile.Open(
      os.path.join(train_dir, config.wetlab_experiment_train_name), 'w') as f:
    f.write(str(experiment_proto))

  logger.debug(str(hps.to_proto()))
  logger.debug(hps.run_name)

  tr_entries = len(sstable.MergedSSTable(train_input_paths))
  logger.info('Training sstable size: %d', tr_entries)
  val_entries = len(sstable.MergedSSTable(val_input_paths))
  logger.info('Validation sstable size: %d', val_entries)

  epoch_size = hps.epoch_size or int(tr_entries * (1 + hps.ratio_random_dna))
  num_batches_per_epoch = int(float(epoch_size) / hps.mbsz)

  eval_ff.config_pandas_display(FLAGS.interactive_display)
  tr_evaluator = eval_ff.Evaluator(
      hps,
      experiment_proto,
      train_input_paths,
      make_subdir(config.experiment_training_dir),
      verbose=FLAGS.verbose_eval)
  val_evaluator = eval_ff.Evaluator(
      hps,
      experiment_proto,
      val_input_paths,
      make_subdir(config.experiment_validation_dir),
      verbose=FLAGS.verbose_eval)

  with tf.Graph().as_default():
    # we need to use the registered key 'hparams'
    tf.add_to_collection('hparams', hps)

    # TODO(shoyer): collect these into a Model class:
    dummy_inputs = data.dummy_inputs(
        experiment_proto,
        input_features=hps.input_features,
        kmer_k_max=hps.kmer_k_max,
        additional_output=six.ensure_str(hps.additional_output).split(','))
    output_layer = output_layers.create_output_layer(experiment_proto, hps)
    net = ff.FeedForward(dummy_inputs, output_layer.logit_axis, hps)

    trainer = FeedForwardTrainer(hps, net, output_layer, experiment_proto,
                                 train_input_paths)

    summary_writer = tf.SummaryWriter(make_subdir('training'), flush_secs=30)

    # TODO(shoyer): file a bug to figure out why write_version=2 (now the
    # default) doesn't work.
    saver = tf.Saver(write_version=1)

    # We are always the chief since we do not do distributed training.
    # Every replica with a different task id is completely independent and all
    # must be their own chief.
    sv = tf.Supervisor(
        logdir=train_dir,
        is_chief=True,
        summary_writer=summary_writer,
        save_summaries_secs=10,
        save_model_secs=180,
        saver=saver)

    logger.info('Preparing session')

    train_report_dir = os.path.join(train_dir, config.experiment_training_dir)
    cur_train_report = os.path.join(train_report_dir,
                                    config.experiment_report_name)
    best_train_report = os.path.join(train_report_dir,
                                     config.experiment_best_report_name)

    valid_report_dir = os.path.join(train_dir, config.experiment_validation_dir)
    cur_valid_report = os.path.join(valid_report_dir,
                                    config.experiment_report_name)
    best_valid_report = os.path.join(valid_report_dir,
                                     config.experiment_best_report_name)

    best_checkpoint = os.path.join(train_dir, 'model.ckpt-lowest_val_loss')
    best_checkpoint_meta = best_checkpoint + '.meta'
    best_epoch_file = os.path.join(train_dir, 'best_epoch.txt')

    with sv.managed_session(master) as sess:

      logger.info('Starting queue runners')
      sv.start_queue_runners(sess)

      def save_and_evaluate():
        """Save and evaluate the current model.

        Returns:
          path: the path string to the checkpoint.
          summary_df: pandas.DataFrame storing the evaluation result on the
            validation dataset with rows for each output name and columns for
            each metric value
        """
        logger.info('Saving model checkpoint')
        path = sv.saver.save(
            sess,
            sv.save_path,
            global_step=sv.global_step,
            write_meta_graph=True)
        tr_evaluator.run(path, eval_size)
        summary_df, _ = val_evaluator.run_and_report(
            tuner,
            path,
            eval_size,
            metrics_targets=metrics_targets,
            metrics_measures=metrics_measures)
        return path, summary_df

      def update_best_model(path, cur_epoch):
        """Update the records of the model with the lowest validation error.

        Args:
          path: the path to the checkpoint of the current model.
          cur_epoch: a integer of the current epoch
        """

        cur_checkpoint = path
        cur_checkpoint_meta = six.ensure_str(cur_checkpoint) + '.meta'

        gfile.Copy(cur_train_report, best_train_report, overwrite=True)
        gfile.Copy(cur_valid_report, best_valid_report, overwrite=True)
        gfile.Copy(cur_checkpoint, best_checkpoint, overwrite=True)
        gfile.Copy(cur_checkpoint_meta, best_checkpoint_meta, overwrite=True)
        with gfile.Open(best_epoch_file, 'w') as f:
          f.write(str(cur_epoch)+'\n')

      def compare_with_best_model(checkpoint_path, summary_df, cur_epoch):
        logger.info('Comparing current val loss with the best model')

        if not gfile.Exists(best_train_report):
          logger.info('No best model saved. Adding current model...')
          update_best_model(checkpoint_path, cur_epoch)
        else:
          with gfile.GFile(best_valid_report) as f:
            with xarray.open_dataset(f) as best_ds:
              best_ds.load()
          cur_loss = summary_df['loss'].loc['mean']
          best_loss = best_ds['loss'].mean('output')
          logger.info('Current val loss:%f', cur_loss)
          logger.info('The best val loss:%f', best_loss)
          if cur_loss < best_loss:
            logger.info(
                'Current model has lower loss. Updating the best model.')
            update_best_model(checkpoint_path, cur_epoch)
          else:
            logger.info('The best model has lower loss.')

      logger.info('Running eval before starting training')
      save_and_evaluate()

      try:
        for cur_epoch in trainer.train(sess, hps.epochs, num_batches_per_epoch):
          checkpoint_path, val_summary_df = save_and_evaluate()
          if (cur_epoch+1) % hps.epoch_interval_to_save_best == 0:
            compare_with_best_model(checkpoint_path, val_summary_df, cur_epoch)
          if tuner and tuner.should_trial_stop():
            break
      except eval_ff.TrainingDivergedException as error:
        logger.error('Training diverged: %s', str(error))
        infeasible = True
      else:
        infeasible = False

      logger.info('Saving final checkpoint')
      sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

  if tuner:
    # should be at the very end of execution, to avoid possible race conditions
    tuner.report_done(infeasible=infeasible)
    logger.info('report_done(infeasible=%r)', infeasible)

  logger.info('Done.')