def _EndOfEpochTestSample( self, corpus, sampler: samplers.Sampler, step: int, epoch_num: int ): """Run sampler""" import tensorflow as tf atomizer = corpus.atomizer sampler.Specialize(atomizer) sampler.batch_size = 1 seed = 0 self.InitSampling(sampler, seed) self.InitSampleBatch(sampler) samples, stats = [], [] for i in range(FLAGS.clgen_per_epoch_test_samples): done = np.zeros(1, dtype=np.bool) while not done[0]: start_time = time.time() sample_in_progress = sampler.tokenized_start_text.copy() indices = self.SampleNextIndices(sampler, done) # Iterate over all samples in batch to determine whether they're # done. for index in indices[0]: sample_in_progress.append(atomizer.decoder[index]) if not sampler.SampleIsComplete(sample_in_progress): continue stats.append( (len(sample_in_progress), int((time.time() - start_time) * 1000)) ) sample = "".join(sample_in_progress) samples.append(sample) app.Log(1, "End-of-epoch sample %d:\n%s", i + 1, sample) done[0] = True break # Write samples to file. with self.dashboard_db.Session(commit=True) as dbs: dbs.add_all( [ dashboard_db.TrainingSample( model_id=self.dashboard_model_id, epoch=epoch_num, step=step, sample=sample, token_count=stats[0], sample_time=stats[1], ) for sample, stats in zip(samples, stats) ] ) samples_as_markdown = [ self.FormatCodeAsMarkdown(sample) for sample in samples ] samples_tensor = tf.convert_to_tensor(samples_as_markdown, dtype=tf.string) summary_op = tf.summary.text("samples", samples_tensor) summary = self.inference_sess.run(summary_op) self.summary_writer.add_summary(summary, step)
def _EndOfEpochTestSample(self, corpus, sampler: samplers.Sampler, step: int): """Run sampler""" import tensorflow as tf atomizer = corpus.atomizer sampler.Specialize(atomizer) sampler.batch_size = 1 seed = 0 self.InitSampling(sampler, seed) self.InitSampleBatch(sampler) samples = [] for i in range(12): done = np.zeros(1, dtype=np.bool) while not done[0]: sample_in_progress = sampler.tokenized_start_text.copy() indices = self.SampleNextIndices(sampler, done) # Iterate over all samples in batch to determine whether they're # done. for index in indices[0]: sample_in_progress.append(atomizer.decoder[index]) if not sampler.SampleIsComplete(sample_in_progress): continue sample = ''.join(sample_in_progress) samples.append(sample) app.Log(1, 'End-of-epoch sample %d:\n%s', i + 1, sample) done[0] = True break # Write samples to file. samples_as_markdown = [ f'```\n{sample.strip()}\n```' for sample in samples ] samples_tensor = tf.convert_to_tensor(samples_as_markdown, dtype=tf.string) summary_op = tf.summary.text('samples', samples_tensor) summary = self.inference_sess.run(summary_op) self.summary_writer.add_summary(summary, step)