Ejemplo n.º 1
0
  def _EndOfEpochTestSample(
    self, corpus, sampler: samplers.Sampler, step: int, epoch_num: int
  ):
    """Run sampler"""
    import tensorflow as tf

    atomizer = corpus.atomizer
    sampler.Specialize(atomizer)
    sampler.batch_size = 1
    seed = 0

    self.InitSampling(sampler, seed)
    self.InitSampleBatch(sampler)

    samples, stats = [], []
    for i in range(FLAGS.clgen_per_epoch_test_samples):
      done = np.zeros(1, dtype=np.bool)
      while not done[0]:
        start_time = time.time()
        sample_in_progress = sampler.tokenized_start_text.copy()
        indices = self.SampleNextIndices(sampler, done)

        # Iterate over all samples in batch to determine whether they're
        # done.
        for index in indices[0]:
          sample_in_progress.append(atomizer.decoder[index])
          if not sampler.SampleIsComplete(sample_in_progress):
            continue

          stats.append(
            (len(sample_in_progress), int((time.time() - start_time) * 1000))
          )
          sample = "".join(sample_in_progress)
          samples.append(sample)
          app.Log(1, "End-of-epoch sample %d:\n%s", i + 1, sample)
          done[0] = True
          break

    # Write samples to file.
    with self.dashboard_db.Session(commit=True) as dbs:
      dbs.add_all(
        [
          dashboard_db.TrainingSample(
            model_id=self.dashboard_model_id,
            epoch=epoch_num,
            step=step,
            sample=sample,
            token_count=stats[0],
            sample_time=stats[1],
          )
          for sample, stats in zip(samples, stats)
        ]
      )
    samples_as_markdown = [
      self.FormatCodeAsMarkdown(sample) for sample in samples
    ]
    samples_tensor = tf.convert_to_tensor(samples_as_markdown, dtype=tf.string)
    summary_op = tf.summary.text("samples", samples_tensor)
    summary = self.inference_sess.run(summary_op)
    self.summary_writer.add_summary(summary, step)
Ejemplo n.º 2
0
    def _EndOfEpochTestSample(self, corpus, sampler: samplers.Sampler,
                              step: int):
        """Run sampler"""
        import tensorflow as tf
        atomizer = corpus.atomizer
        sampler.Specialize(atomizer)
        sampler.batch_size = 1
        seed = 0

        self.InitSampling(sampler, seed)
        self.InitSampleBatch(sampler)

        samples = []
        for i in range(12):
            done = np.zeros(1, dtype=np.bool)
            while not done[0]:
                sample_in_progress = sampler.tokenized_start_text.copy()
                indices = self.SampleNextIndices(sampler, done)

                # Iterate over all samples in batch to determine whether they're
                # done.
                for index in indices[0]:
                    sample_in_progress.append(atomizer.decoder[index])
                    if not sampler.SampleIsComplete(sample_in_progress):
                        continue

                    sample = ''.join(sample_in_progress)
                    samples.append(sample)
                    app.Log(1, 'End-of-epoch sample %d:\n%s', i + 1, sample)
                    done[0] = True
                    break

        # Write samples to file.
        samples_as_markdown = [
            f'```\n{sample.strip()}\n```' for sample in samples
        ]
        samples_tensor = tf.convert_to_tensor(samples_as_markdown,
                                              dtype=tf.string)
        summary_op = tf.summary.text('samples', samples_tensor)
        summary = self.inference_sess.run(summary_op)
        self.summary_writer.add_summary(summary, step)