Esempi in Python per TrainingLogger, esempi in Python per deeplearning.clgen.telemetry.TrainingLogger

Esempio n. 1

0

Mostra file

def test_TrainingLogger_create_file():
    """Test that EpochEndCallback() produces a file."""
    with tempfile.TemporaryDirectory() as d:
        logger = telemetry.TrainingLogger(pathlib.Path(d))
        logger.KerasEpochBeginCallback(0, {})
        logger.KerasEpochEndCallback(0, {"loss": 1})
        # Note that one is added to the epoch number.
        assert (pathlib.Path(d) / "epoch_001_telemetry.pbtxt").is_file()

Esempio n. 2

0

Mostra file

def test_TrainingLogger_EpochTelemetry():
    """Test that EpochTelemetry() returns protos."""
    with tempfile.TemporaryDirectory() as d:
        logger = telemetry.TrainingLogger(pathlib.Path(d))
        assert not logger.EpochTelemetry()
        logger.KerasEpochBeginCallback(0, {})
        logger.KerasEpochEndCallback(0, {"loss": 1})
        assert len(logger.EpochTelemetry()) == 1
        logger.KerasEpochBeginCallback(1, {})
        logger.KerasEpochEndCallback(1, {"loss": 1})
        assert len(logger.EpochTelemetry()) == 2

Esempio n. 3

0

Mostra file

def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    if not FLAGS.dataset_root:
        raise app.UsageError("--dataset_root must be a directory")
    dataset_root = pathlib.Path(FLAGS.dataset_root)
    assert dataset_root.is_dir()
    assert (dataset_root / "training").is_dir()
    assert (dataset_root / "validation").is_dir()
    assert (dataset_root / "testing").is_dir()

    if not FLAGS.model_path:
        raise app.UsageError("--model_path must be set")
    model_path = pathlib.Path(FLAGS.model_path)
    model_path.mkdir(parents=True, exist_ok=True)

    training_protos = LoadPositiveNegativeProtos(dataset_root / "training")
    validation_protos = LoadPositiveNegativeProtos(dataset_root / "validation")
    testing_protos = LoadPositiveNegativeProtos(dataset_root / "testing")
    app.Log(
        1,
        "Number of training examples: %s.",
        humanize.Commas(
            len(training_protos.positive) + len(training_protos.negative)),
    )
    app.Log(
        1,
        "Number of validation examples: %s.",
        humanize.Commas(
            len(validation_protos.positive) + len(validation_protos.negative)),
    )
    app.Log(
        1,
        "Number of testing examples: %s.",
        humanize.Commas(
            len(testing_protos.positive) + len(testing_protos.negative)),
    )

    sequence_length = FLAGS.sequence_length
    text = "\n\n".join([
        p.src for p in training_protos.positive + training_protos.negative +
        validation_protos.positive + validation_protos.negative +
        testing_protos.positive + testing_protos.negative
    ])
    app.Log(1, "Deriving atomizer from %s chars.", humanize.Commas(len(text)))
    atomizer = atomizers.AsciiCharacterAtomizer.FromText(text)
    app.Log(1, "Vocabulary size: %s.", humanize.Commas(len(atomizer.vocab)))
    app.Log(1, "Pickled atomizer to %s.", model_path / "atomizer.pkl")
    with open(model_path / "atomizer.pkl", "wb") as f:
        pickle.dump(atomizer, f)

    app.Log(1, "Encoding training corpus")
    x, y = ProtosToModelData(training_protos, sequence_length, atomizer)

    validation_data = None
    if validation_protos.positive:
        app.Log(1, "Encoding validation corpus")
        validation_data = ProtosToModelData(validation_protos, sequence_length,
                                            atomizer)

    app.Log(1, "Encoding test corpus")
    test_x, test_y = ProtosToModelData(testing_protos, sequence_length,
                                       atomizer)

    np.random.seed(FLAGS.seed)
    app.Log(1, "Building Keras model")
    model = BuildKerasModel(
        sequence_length=sequence_length,
        lstm_size=FLAGS.lstm_size,
        num_layers=FLAGS.num_layers,
        dnn_size=FLAGS.dnn_size,
        atomizer=atomizer,
    )
    app.Log(1, "Training model")

    def OnEpochEnd(epoch, logs):
        """End-of-epoch model evaluate."""
        del logs
        app.Log(1, "Evaluating model at epoch %d", epoch)
        score, accuracy = model.evaluate(test_x,
                                         Encode1HotLabels(test_y),
                                         batch_size=FLAGS.batch_size,
                                         verbose=0)
        app.Log(1, "Score: %.2f, Accuracy: %.2f", score * 100, accuracy * 100)

    logger = telemetry.TrainingLogger(pathlib.Path(FLAGS.model_path))
    model.fit(
        x,
        Encode1HotLabels(y),
        epochs=FLAGS.num_epochs,
        batch_size=FLAGS.batch_size,
        verbose=True,
        shuffle=True,
        validation_data=validation_data,
        callbacks=[
            keras.callbacks.ModelCheckpoint(
                FLAGS.model_path + "/weights_{epoch:03d}.hdf5",
                verbose=1,
                mode="min",
                save_best_only=False,
            ),
            keras.callbacks.LambdaCallback(on_epoch_end=OnEpochEnd),
            logger.KerasCallback(keras),
        ],
    )

Esempio n. 4

0

Mostra file

File: models.py Progetto: zhangheyu518/clgen

 def TrainingTelemetry(
         self) -> typing.List[telemetry_pb2.ModelEpochTelemetry]:
     """Get the training telemetry data."""
     return telemetry.TrainingLogger(self.cache.path /
                                     'logs').EpochTelemetry()

Esempio n. 5

0

Mostra file

File: tensorflow_backend.py Progetto: SpringRi/phd

    def Train(self, corpus) -> None:
        """Locked training.

    If there are cached epoch checkpoints, the one closest to the target number
    of epochs will be loaded, and the model will be trained for only the
    remaining number of epochs, if any. This means that calling this function
    twice will only actually train the model the first time, and all subsequent
    calls will be no-ops.

    This method must only be called when the model is locked.
    """
        if self.is_trained:
            return

        data_generator = data_generators.TensorflowBatchGenerator(
            corpus, self.config.training)
        tf = self.InitTfGraph(inference=False)

        logger = telemetry.TrainingLogger(self.cache.path / 'logs')

        # training options
        # TODO(cec): Enable support for multiple optimizers:
        initial_learning_rate = (
            self.config.training.adam_optimizer.initial_learning_rate_micros /
            1e6)
        decay_rate = (self.config.training.adam_optimizer.
                      learning_rate_decay_per_epoch_micros / 1e6)

        # # resume from prior checkpoint
        ckpt_path, ckpt_paths = None, None
        if (self.cache.path / 'checkpoints' / 'checkpoint').exists():
            checkpoint_state = tf.train.get_checkpoint_state(self.cache.path /
                                                             'checkpoints')
            assert checkpoint_state
            assert checkpoint_state.model_checkpoint_path
            ckpt_path, ckpt_paths = self.GetParamsPath(checkpoint_state)

        with tf.Session() as sess:
            tf.global_variables_initializer().run()

            # Keep all checkpoints.
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=100,
                                   save_relative_paths=True)

            # restore model from closest checkpoint.
            if ckpt_path:
                logging.info("Restoring checkpoint {}".format(ckpt_path))
                saver.restore(sess, ckpt_path)

            # make sure we don't lose track of other checkpoints
            if ckpt_paths:
                saver.recover_last_checkpoints(ckpt_paths)

            # Per-epoch training loop.
            for epoch_num in range(
                    sess.run(self.epoch) + 1,
                    self.config.training.num_epochs + 1):
                logger.EpochBeginCallback()

                # decay and set learning rate
                new_learning_rate = initial_learning_rate * (
                    (float(100 - decay_rate) / 100.0)**(epoch_num - 1))
                sess.run(tf.assign(self.learning_rate, new_learning_rate))
                sess.run(tf.assign(self.epoch, epoch_num))

                # TODO(cec): refactor data generator to a Python generator.
                data_generator.CreateBatches()

                logging.info('Epoch %d/%d:', epoch_num,
                             self.config.training.num_epochs)
                state = sess.run(self.initial_state)
                # Per-batch inner loop.
                bar = progressbar.ProgressBar(
                    max_value=data_generator.num_batches)
                for _ in bar(range(data_generator.num_batches)):
                    x, y = data_generator.NextBatch()
                    feed = {self.input_data: x, self.targets: y}
                    for i, (c, h) in enumerate(self.initial_state):
                        feed[c] = state[i].c
                        feed[h] = state[i].h
                    loss, state, _ = sess.run(
                        [self.loss, self.final_state, self.train_op], feed)

                # Log the loss and delta.
                logging.info('Loss: %.6f.', loss)

                # Save after every epoch.
                start_time = time.time()
                global_step = epoch_num
                checkpoint_prefix = (self.cache.path / 'checkpoints' /
                                     'checkpoint')
                saver.save(sess, checkpoint_prefix, global_step=global_step)
                checkpoint_path = f'{checkpoint_prefix}-{global_step}'
                logging.info(
                    'Saved checkpoint %s in %s ms.', checkpoint_path,
                    humanize.intcomma(int((time.time() - start_time) * 1000)))
                assert pathlib.Path(
                    f'{checkpoint_prefix}-{global_step}.index').is_file()
                assert pathlib.Path(
                    f'{checkpoint_prefix}-{global_step}.meta').is_file()

                logger.EpochEndCallback(epoch_num, loss)

Esempio n. 6

0

Mostra file

File: keras_backend.py Progetto: zhangheyu518/clgen

  def Train(self, corpus, **unused_kwargs) -> 'keras.models.Sequential':
    """Locked training.

    If there are cached epoch checkpoints, the one closest to the target number
    of epochs will be loaded, and the model will be trained for only the
    remaining number of epochs, if any. This means that calling this function
    twice will only actually train the model the first time, and all subsequent
    calls will be no-ops.

    This method must only be called when the model is locked.

    Returns:
      The trained Keras model.
    """
    del unused_kwargs

    model = builders.BuildKerasModel(self.config, self.atomizer.vocab_size)
    with open(self.cache.keypath('model.yaml'), 'w') as f:
      f.write(model.to_yaml())
    model.compile(
        loss='categorical_crossentropy',
        optimizer=builders.BuildOptimizer(self.config))

    # Print a model summary.
    buf = io.StringIO()
    model.summary(print_fn=lambda x: buf.write(x + '\n'))
    app.Log(1, 'Model summary:\n%s', buf.getvalue())

    # TODO(cec): Add an atomizer.CreateVocabularyFile() method, with frequency
    # counts for a given corpus.
    def Escape(token: str) -> str:
      """Make a token visible and printable."""
      if token == '\t':
        return '\\t'
      elif token == '\n':
        return '\\n'
      elif not token.strip():
        return f"'{token}'"
      else:
        return token

    if not (self.cache.path / 'embeddings' / 'metadata.tsv').is_file():
      with open(self.cache.path / 'embeddings' / 'metadata.tsv', 'w') as f:
        for _, token in sorted(
            self.atomizer.decoder.items(), key=lambda x: x[0]):
          f.write(Escape(token) + '\n')

    target_num_epochs = self.config.training.num_epochs
    starting_epoch = 0

    epoch_checkpoints = self.epoch_checkpoints
    if len(epoch_checkpoints) >= target_num_epochs:
      # We have already trained a model to at least this number of epochs, so
      # simply the weights from that epoch and call it a day.
      app.Log(1, 'Loading weights from %s',
              epoch_checkpoints[target_num_epochs - 1])
      model.load_weights(epoch_checkpoints[target_num_epochs - 1])
      return model

    # Now entering the point at which training is inevitable.
    with logutil.TeeLogsToFile('train', self.cache.path / 'logs'):
      # Deferred importing of Keras so that we don't have to activate the
      # TensorFlow backend every time we import this module.
      import keras

      if epoch_checkpoints:
        # We have already trained a model at least part of the way to our target
        # number of epochs, so load the most recent one.
        starting_epoch = len(epoch_checkpoints)
        app.Log(1, 'Resuming training from epoch %d.', starting_epoch)
        model.load_weights(epoch_checkpoints[-1])

      callbacks = [
          keras.callbacks.ModelCheckpoint(
              str(self.cache.path / 'checkpoints' / '{epoch:03d}.hdf5'),
              verbose=1,
              mode="min",
              save_best_only=False),
          keras.callbacks.TensorBoard(
              str(self.cache.path / 'embeddings'),
              write_graph=True,
              embeddings_freq=1,
              embeddings_metadata={
                  'embedding_1':
                  str(self.cache.path / 'embeddings' / 'metadata.tsv'),
              }),
          telemetry.TrainingLogger(
              self.cache.path / 'logs').KerasCallback(keras),
      ]

      generator = data_generators.AutoGenerator(corpus, self.config.training)
      steps_per_epoch = (corpus.encoded.token_count - 1) // (
          self.config.training.batch_size *
          self.config.training.sequence_length)
      app.Log(
          1, 'Step counts: %s per epoch, %s left to do, %s total',
          humanize.Commas(steps_per_epoch),
          humanize.Commas(
              (target_num_epochs - starting_epoch) * steps_per_epoch),
          humanize.Commas(target_num_epochs * steps_per_epoch))
      model.fit_generator(
          generator,
          steps_per_epoch=steps_per_epoch,
          callbacks=callbacks,
          initial_epoch=starting_epoch,
          epochs=target_num_epochs)
    return model

Esempio n. 7

0

Mostra file

def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    model_dir = pathlib.Path(FLAGS.reachability_model_dir)
    if not model_dir.is_dir():
        raise app.UsageError('--reachability_model_dir is not a directory.')

    logger = telemetry.TrainingLogger(logdir=model_dir / 'logs')
    telemetry_ = logger.EpochTelemetry()
    num_epochs = len(telemetry_)
    training_time_ms = sum(t.epoch_wall_time_ms for t in telemetry_)
    training_time_natural = humanize.naturaldelta(training_time_ms / 1000)
    time_per_epoch_natural = humanize.naturaldelta(training_time_ms /
                                                   num_epochs / 1000)
    losses = [round(t.loss, 2) for t in telemetry_]

    with open(model_dir / 'model.json') as f:
        model: keras.models.Model = keras.models.model_from_json(f.read())

    model.compile(loss='binary_crossentropy',
                  metrics=['accuracy'],
                  optimizer='adam')
    model.summary()
    print(f'Total training time: {training_time_natural} '
          f'({time_per_epoch_natural} per epoch).')
    print(f'Number of epochs: {num_epochs}.')
    print(f'Training losses: {losses}.')

    training_data = pbutil.FromFile(model_dir / 'training_data.pbtxt',
                                    reachability_pb2.ReachabilityDataset())
    testing_data = pbutil.FromFile(model_dir / 'testing_data.pbtxt',
                                   reachability_pb2.ReachabilityDataset())
    data = reachability_pb2.ReachabilityDataset()
    data.entry.extend(training_data.entry)
    data.entry.extend(testing_data.entry)

    num_nodes = len(training_data.entry[0].graph.node)
    num_nodes_natural = humanize.intcomma(num_nodes)
    num_training_graphs_natural = humanize.intcomma(len(training_data.entry))
    num_testing_graphs_natural = humanize.intcomma(len(testing_data.entry))
    print(f'Training data: {num_training_graphs_natural} graphs of '
          f'{num_nodes_natural} nodes each.')
    print(f'Testing data: {num_testing_graphs_natural} graphs of '
          f'{num_nodes_natural} nodes each.')

    num_connections_training = sum(
        sum(len(n.child) for n in entry.graph.node)
        for entry in training_data.entry)
    num_connections_testing = sum(
        sum(len(n.child) for n in entry.graph.node)
        for entry in testing_data.entry)

    print(
        'Average graph connections: {:.1f} training ({:.1f} per node), '
        '{:.1f} testing ({:.1f} per node).'.format(
            num_connections_training / len(training_data.entry),
            num_connections_training / (len(training_data.entry) * num_nodes),
            num_connections_testing / len(testing_data.entry),
            num_connections_testing / (len(testing_data.entry) * num_nodes)))

    sequence_length = train_model.GetSequenceLength(
        len(training_data.entry[0].graph.node))
    print('Sequence length:', sequence_length)

    with open(model_dir / 'atomizer.pkl', 'rb') as f:
        atomizer: atomizers.AtomizerBase = pickle.load(f)

    print('Vocabulary size:', atomizer.vocab_size)

    seqs = [
        train_model.ControlFlowGraphToSequence(entry.graph)
        for entry in data.entry
    ]
    num_uniq_seqs = len(set(seqs))
    print('Unique sequences: {} of {} ({:.2f}%)'.format(
        humanize.intcomma(num_uniq_seqs), humanize.intcomma(len(seqs)),
        (num_uniq_seqs / len(seqs)) * 100))
    num_uniq_labels = len(
        set([''.join(str(x) for x in e.reachable) for e in data.entry]))
    print('Unique labels: {} of {} ({:.2f}%)'.format(
        humanize.intcomma(num_uniq_labels), humanize.intcomma(len(seqs)),
        (num_uniq_labels / len(seqs)) * 100))

    test_x, test_y = train_model.ProtosToModelData(testing_data,
                                                   sequence_length, atomizer)

    zero_r_acc = sum(sum(x)
                     for x in test_y) / len(testing_data.entry) / num_nodes
    zero_r_acc = max(zero_r_acc[0], 1 - zero_r_acc[0])
    print('Zero-R accuracy: {:.2%}'.format(zero_r_acc))

    row = model.evaluate(test_x,
                         test_y,
                         batch_size=FLAGS.batch_size,
                         verbose=0)
    overall_loss, losses, accuracies = (row[0], row[1:1 + num_nodes],
                                        row[num_nodes + 1:])
    print('Accuracy: {:.2%}'.format(sum(accuracies) / len(accuracies)))
    print('Accuracy (excluding first class): {:.2%}'.format(
        sum(accuracies[1:]) / len(accuracies[1:])))
    print('done.')

Esempio n. 8

0

Mostra file

File: tensorflow_backend.py Progetto: tehranixyz/ProGraML

  def Train(
    self,
    corpus,
    test_sampler: typing.Optional[samplers.Sampler] = None,
    **unused_kwargs,
  ) -> None:
    """Locked training.

    If there are cached epoch checkpoints, the one closest to the target number
    of epochs will be loaded, and the model will be trained for only the
    remaining number of epochs, if any. This means that calling this function
    twice will only actually train the model the first time, and all subsequent
    calls will be no-ops.

    This method must only be called when the model is locked.
    """
    del unused_kwargs

    if self.is_trained:
      return

    data_generator = data_generators.TensorflowBatchGenerator(
      corpus, self.config.training
    )
    tf = self.InitTfGraph()

    logger = telemetry.TrainingLogger(self.cache.path / "logs")

    # Create and merge the tensorboard summary ops.
    merged = tf.compat.v1.summary.merge_all()

    # training options
    # TODO(cec): Enable support for multiple optimizers:
    initial_learning_rate = (
      self.config.training.adam_optimizer.initial_learning_rate_micros / 1e6
    )
    decay_rate = (
      self.config.training.adam_optimizer.learning_rate_decay_per_epoch_micros
      / 1e6
    )

    # # resume from prior checkpoint
    ckpt_path, ckpt_paths = None, None
    if (self.cache.path / "checkpoints" / "checkpoint").exists():
      checkpoint_state = tf.train.get_checkpoint_state(
        self.cache.path / "checkpoints"
      )
      assert checkpoint_state
      assert checkpoint_state.model_checkpoint_path
      ckpt_path, ckpt_paths = self.GetParamsPath(checkpoint_state)

    with tf.compat.v1.Session() as sess, self.dashboard_db.Session() as dbs:
      dbs.query(dashboard_db.TrainingTelemetry).filter(
        dashboard_db.TrainingTelemetry.model_id == self.dashboard_model_id
      ).filter(dashboard_db.TrainingTelemetry.pending == True).delete()

      tf.compat.v1.global_variables_initializer().run()

      # Keep all checkpoints.
      saver = tf.compat.v1.train.Saver(
        tf.global_variables(), max_to_keep=100, save_relative_paths=True
      )

      # restore model from closest checkpoint.
      if ckpt_path:
        app.Log(1, "Restoring checkpoint {}".format(ckpt_path))
        saver.restore(sess, ckpt_path)

      # make sure we don't lose track of other checkpoints
      if ckpt_paths:
        saver.recover_last_checkpoints(ckpt_paths)

      # Offset epoch counts by 1 so that they are in the range [1..n]
      current_epoch = sess.run(self.epoch) + 1
      max_epoch = self.config.training.num_epochs + 1

      # Per-epoch training loop.
      for epoch_num in range(current_epoch, max_epoch):
        logger.EpochBeginCallback()

        # decay and set learning rate
        new_learning_rate = initial_learning_rate * (
          (float(100 - decay_rate) / 100.0) ** (epoch_num - 1)
        )
        sess.run(tf.compat.v1.assign(self.learning_rate, new_learning_rate))
        sess.run(tf.compat.v1.assign(self.epoch, epoch_num))

        # TODO(cec): refactor data generator to a Python generator.
        data_generator.CreateBatches()

        app.Log(1, "Epoch %d/%d:", epoch_num, self.config.training.num_epochs)
        state = sess.run(self.initial_state)
        # Per-batch inner loop.
        bar = progressbar.ProgressBar(max_value=data_generator.num_batches)
        last_log_time = time.time()
        for i in bar(range(data_generator.num_batches)):
          x, y = data_generator.NextBatch()
          feed = {self.input_data: x, self.targets: y}
          for j, (c, h) in enumerate(self.initial_state):
            feed[c], feed[h] = state[j].c, state[j].h
          summary, loss, state, _ = sess.run(
            [merged, self.loss, self.final_state, self.train_op], feed
          )

          # Periodically write progress to tensorboard.
          if i % FLAGS.clgen_tf_backend_tensorboard_summary_step_count == 0:
            step = (epoch_num - 1) * data_generator.num_batches + i
            self.summary_writer.add_summary(summary, step)
            # Add telemetry database entry. This isn't committed until the end
            # of the epoch, when the checkpoint is created.
            now = time.time()
            duration_ns = int((now - last_log_time) * 1e6)
            dbs.add(
              dashboard_db.TrainingTelemetry(
                model_id=self.dashboard_model_id,
                epoch=epoch_num,
                step=step,
                training_loss=loss,
                learning_rate=new_learning_rate,
                ns_per_batch=int(duration_ns)
                / FLAGS.clgen_tf_backend_tensorboard_summary_step_count,
              )
            )
            last_log_time = now
            dbs.commit()

        # Log the loss and delta.
        app.Log(1, "Loss: %.6f.", loss)

        # Save after every epoch.
        start_time = time.time()
        global_step = epoch_num
        checkpoint_prefix = self.cache.path / "checkpoints" / "checkpoint"
        checkpoint_path = saver.save(
          sess, checkpoint_prefix, global_step=global_step
        )
        app.Log(
          1,
          "Saved checkpoint %s in %s ms.",
          checkpoint_path,
          humanize.Commas(int((time.time() - start_time) * 1000)),
        )
        dbs.query(dashboard_db.TrainingTelemetry).filter(
          dashboard_db.TrainingTelemetry.pending == True
        ).update({"pending": False})
        dbs.commit()
        assert pathlib.Path(
          f"{checkpoint_prefix}-{global_step}.index"
        ).is_file()
        assert pathlib.Path(f"{checkpoint_prefix}-{global_step}.meta").is_file()

        logger.EpochEndCallback(epoch_num, loss)
        # If we have a sampler that we can use at the end of epochs, then
        # This is confusing logic! Consider a refactor to simplify things.
        if test_sampler:
          break
      else:
        return

    if test_sampler and FLAGS.clgen_per_epoch_test_samples > 0:
      self._EndOfEpochTestSample(corpus, test_sampler, step, epoch_num)
      self.Train(corpus, test_sampler)

Esempio n. 9

0

Mostra file

File: train_model.py Progetto: SpringRi/phd

def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    model_dir = pathlib.Path(FLAGS.reachability_model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    (model_dir / 'logs').mkdir(exist_ok=True)
    (model_dir / 'checkpoints').mkdir(exist_ok=True)

    logging.info('Generating graphs dataset ...')
    data = MakeReachabilityDataset(FLAGS.reachability_num_training_graphs +
                                   FLAGS.reachability_num_testing_graphs)
    training_data = reachability_pb2.ReachabilityDataset()
    training_data.entry.extend(
        data.entry[:FLAGS.reachability_num_training_graphs])
    pbutil.ToFile(training_data, model_dir / 'training_data.pbtxt')
    testing_data = reachability_pb2.ReachabilityDataset()
    testing_data.entry.extend(
        data.entry[FLAGS.reachability_num_training_graphs:])
    pbutil.ToFile(testing_data, model_dir / 'testing_data.pbtxt')

    logging.info('Number of training examples: %s.',
                 humanize.intcomma(len(training_data.entry)))
    logging.info('Number of testing examples: %s.',
                 humanize.intcomma(len(testing_data.entry)))

    n = FLAGS.reachability_num_nodes
    sequence_length = GetSequenceLength(FLAGS.reachability_num_nodes)
    logging.info('Using sequence length %s.',
                 humanize.intcomma(sequence_length))
    seqs = [ControlFlowGraphToSequence(entry.graph) for entry in data.entry]
    text = '\n'.join(seqs)
    logging.info('Deriving atomizer from %s chars.',
                 humanize.intcomma(len(text)))
    atomizer = atomizers.AsciiCharacterAtomizer.FromText(text)
    logging.info('Vocabulary size: %s.',
                 humanize.intcomma(len(atomizer.vocab)))
    with open(model_dir / 'atomizer.pkl', 'wb') as f:
        pickle.dump(atomizer, f)
    logging.info('Pickled atomizer to %s.', model_dir / 'atomizer.pkl')

    x, y = ProtosToModelData(training_data, sequence_length, atomizer)
    logging.info('Training data: x %s, y[%s] %s', x.shape, len(y), y[0].shape)

    test_x, test_y = ProtosToModelData(testing_data, sequence_length, atomizer)
    logging.info('Testing data: x %s, y[%s] %s', test_x.shape, len(test_y),
                 test_y[0].shape)

    num_uniq_seqs = len(set(seqs))
    logging.info('Unique sequences: %s of %s (%.2f %%)',
                 humanize.intcomma(num_uniq_seqs),
                 humanize.intcomma(len(seqs)),
                 (num_uniq_seqs / len(seqs)) * 100)
    num_uniq_labels = len(
        set([''.join(str(x) for x in e.reachable) for e in data.entry]))
    logging.info('Unique labels: %s of %s (%.2f %%)',
                 humanize.intcomma(num_uniq_labels),
                 humanize.intcomma(len(seqs)),
                 (num_uniq_labels / len(seqs)) * 100)

    np.random.seed(FLAGS.reachability_model_seed)
    random.seed(FLAGS.reachability_model_seed)
    logging.info('Building Keras model ...')
    model = BuildKerasModel(sequence_length=sequence_length,
                            num_classes=n,
                            lstm_size=FLAGS.lstm_size,
                            num_layers=FLAGS.num_layers,
                            dnn_size=FLAGS.dnn_size,
                            atomizer=atomizer)

    model_json = model.to_json()
    with open(model_dir / 'model.json', 'w') as f:
        f.write(model_json)
    logging.info('Wrote model to %s', model_dir / 'model.json')

    logging.info('Training model ...')

    def OnEpochEnd(epoch, logs):
        """End-of-epoch model evaluate."""
        del logs
        logging.info('Evaluating model at epoch %d', epoch)
        # score, accuracy
        row = model.evaluate(test_x,
                             test_y,
                             batch_size=FLAGS.batch_size,
                             verbose=0)
        overall_loss, losses, accuracies = row[0], row[1:1 + n], row[n + 1:]
        logging.info('Accuracy (excluding first class): %.2f %%',
                     (sum(accuracies[1:]) / len(accuracies[1:])) * 100)

    logger = telemetry.TrainingLogger(logdir=model_dir / 'logs')
    model.fit(
        x,
        y,
        epochs=FLAGS.num_epochs,
        batch_size=FLAGS.batch_size,
        verbose=True,
        shuffle=True,
        callbacks=[
            keras.callbacks.ModelCheckpoint(str(model_dir / 'checkpoints') +
                                            '/weights_{epoch:03d}.hdf5',
                                            verbose=1,
                                            mode="min",
                                            save_best_only=False),
            keras.callbacks.LambdaCallback(on_epoch_end=OnEpochEnd),
            logger.KerasCallback(keras),
        ])

    for i in range(5):
        outs = FlattenModelOutputs(model.predict(np.array([x[i]])))
        logging.info('outs:    %s', outs)
        logging.info('clamped: %s', np.rint(outs).astype(np.int32))
        logging.info('true:    %s', FlattenModelData(y, i))
        logging.info('')
    logging.info('done')