def EndEpisodeStep(self) -> random_opt_pb2.DelayedRewardStep: start_ms = labdate.MillisecondsTimestamp() step = random_opt_pb2.DelayedRewardStep(start_time_epoch_ms=start_ms, ) try: clang.Compile([self.working_bytecode_path], self.binary_path, copts=['-O0']) try: runtimes = self.GetRuntimes() self.episodes[-1].binary_runtime_ms.extend(runtimes) if self.BinaryIsValid(): step.reward = self.runtime_reward( sum(runtimes) / len(runtimes)) else: self.episodes[-1].outcome = ( random_opt_pb2.DelayedRewardEpisode.EVAL_FAILED) step.reward = self.eval_failed_reward except ValueError as e: self.episodes[-1].outcome = random_opt_pb2.Step.EXEC_FAILED self.episodes[-1].outcome_error_msg = text.truncate( str(e), 255) step.reward = self.exec_failed_reward except clang.ClangException as e: self.episodes[-1].outcome = ( random_opt_pb2.DelayedRewardEpisode.COMPILE_FAILED) self.episodes[-1].outcome_error_msg = text.truncate(str(e), 255) step.reward = self.compile_failed_reward obs = self.observation_space.sample() step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms self.episodes[-1].step.extend([step]) return obs, step.reward, True, {}
def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest, context) -> generator_pb2.GenerateTestcasesResponse: """Generate testcases.""" del context response = services.BuildDefaultResponse( generator_pb2.GenerateTestcasesResponse) # Generate random strings. for _ in range(request.num_testcases): # Pick a length for the random string. n = random.randint(self.config.string_min_len, self.config.string_max_len + 1) # Instantiate a testcase. testcase = response.testcases.add() testcase.toolchain = self.config.toolchain testcase.generator.CopyFrom(self.generator) start_time = labdate.MillisecondsTimestamp() testcase.inputs['src'] = ''.join( random.choice(string.ascii_lowercase) for _ in range(n)) end_time = labdate.MillisecondsTimestamp() p = testcase.profiling_events.add() p.type = 'generation' p.event_start_epoch_ms = start_time p.duration_ms = end_time - start_time return response
def ActionStep(self, action: int) -> random_opt_pb2.DelayedRewardStep: if not self.action_space.contains(action): raise ValueError(f"Unknown action: '{action}'") start_ms = labdate.MillisecondsTimestamp() obs = self.observation_space.sample() opt_pass = self.config.candidate_pass[action] step = random_opt_pb2.DelayedRewardStep( start_time_epoch_ms=start_ms, opt_pass=opt_pass, ) # Run the full list of passes and update working_bytecode file. try: all_passes = [step.opt_pass for step in self.episodes[-1].step[1:]] opt.RunOptPassOnBytecode(self.bytecode_path, self.working_dir / 'temp.ll', all_passes) step.bytecode_changed = BytecodesAreEqual( self.working_dir / 'temp.ll', self.working_bytecode_path) shutil.copyfile(self.working_dir / 'temp.ll', self.working_bytecode_path) step.reward = (self.bytecode_changed_reward if step.bytecode_changed else self.bytecode_unchanged_reward) except llvm.LlvmError as e: # Opt failed, set the error message. step.reward = self.opt_failed_reward step.opt_error_msg = text.truncate(str(e), 255) step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms self.episodes[-1].step.extend([step]) return obs, step.reward, False, {}
def reset(self): logging.debug('$ cp %s %s', self.bytecode_path, self.working_bytecode_path) shutil.copyfile(self.bytecode_path, self.working_bytecode_path) clang.Compile([self.working_bytecode_path], self.binary_path, copts=['-O0']) start_time = labdate.MillisecondsTimestamp() self.RunSetupCommand() self.RunBinary() if not self.BinaryIsValid(): raise ValueError(f"Failed to validate base binary.") self.episodes.append( random_opt_pb2.Episode(step=[ random_opt_pb2.Step( start_time_epoch_ms=start_time, status=random_opt_pb2.Step.PASS, binary_runtime_ms=self.GetRuntimes(), reward=0, total_reward=0, speedup=1.0, total_speedup=1.0, ) ])) self.episodes[-1].step[0].total_step_runtime_ms = ( labdate.MillisecondsTimestamp() - start_time)
def Sample(self, sampler: samplers.Sampler, sample_observers: typing.List[ sample_observers_lib.SampleObserver], seed: int = None) -> None: """Sample a model. This method uses the observer model, returning nothing. To access the samples produced, implement a SampleObserver and pass it in as an argument. Sampling continues indefinitely until one of the sample observers returns False when notified of a new sample. If the model is not already trained, calling Sample() first trains the model. Thus a call to Sample() is equivalent to calling Train() then Sample(). Args: sampler: The sampler to sample using. sample_observers: A list of SampleObserver objects that are notified of new generated samples. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Raises: UserError: If called with no sample observers. UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ if not sample_observers: raise errors.UserError("Cannot sample without any observers") sample_start_time = labdate.MillisecondsTimestamp() self.Train() with logutil.TeeLogsToFile(f'sampler_{sampler.hash}', self.cache.path / 'logs'): app.Log(1, "Sampling: '%s'", sampler.start_text) atomizer = self.corpus.atomizer sampler.Specialize(atomizer) self.backend.InitSampling(sampler, seed) [obs.Specialize(self, sampler) for obs in sample_observers] batch_count = 1 while self._SampleBatch(sampler, atomizer, sample_observers): batch_count += 1 time_now = labdate.MillisecondsTimestamp() app.Log( 1, 'Produced %s sample batches at a rate of %s ms / batch.', humanize.Commas(batch_count), humanize.Commas( int((time_now - sample_start_time) / max(batch_count, 1))))
def Profile(self, event_name: str = ''): """A context manager which prints the elapsed time upon exit. Args: event_name: The name of the event being profiled. """ event = ProfilingEvent(labdate.MillisecondsTimestamp(), event_name) yield event elapsed = labdate.MillisecondsTimestamp() - event.start_time with self._writer() as writer: writer.writerow((event.start_time, elapsed, event.name))
def GenerateOneSource(self) -> typing.Tuple[str, int, int]: """Generate and return a single CLSmith program. Returns: A tuple of the source code as a string, the generation time, and the start time. """ start_epoch_ms_utc = labdate.MillisecondsTimestamp() src = clsmith.Exec(*list(self.config.opt)) wall_time_ms = labdate.MillisecondsTimestamp() - start_epoch_ms_utc return src, wall_time_ms, start_epoch_ms_utc
def _SampleBatch( self, sampler: samplers.Sampler, atomizer: atomizers.AtomizerBase, sample_observers: typing.List[sample_observers_lib.SampleObserver] ) -> typing.List[model_pb2.Sample]: """Run a single iteration of the batched sample inner-loop.""" samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(sampler.batch_size) ] done = np.zeros(sampler.batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler) # The return value of this method. If any of the sample_observers return # False, this value is set to False. continue_sampling = True # Sampling loop. Continues until all samples in the batch are done. while not done.all(): indices = self.backend.SampleNextIndices(sampler, done) # Iterate over all samples in batch to determine whether they're # done. for i in range(sampler.batch_size): if done[i]: continue for index in indices[i]: samples_in_progress[i].append(atomizer.decoder[index]) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) # Notify sample observers. continue_sampling &= all([ not obs.OnSample(sample) for obs in sample_observers ]) # Wall sample time is the difference between the end of the previous # sample and the end of the current sample. wall_time_start = labdate.MillisecondsTimestamp() break return continue_sampling
def _create_lock(): lockfile = lockfile_pb2.LockFile( owner_process_id=os.getpid() if pid is None else pid, owner_process_argv=' '.join(sys.argv), date_acquired_utc_epoch_ms=labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()), owner_hostname=system.HOSTNAME, owner_user=system.USERNAME) pbutil.ToFile(lockfile, self.path, assume_filename='LOCK.pbtxt')
def EpochEndCallback(self, epoch: int, loss: float): now = labdate.MillisecondsTimestamp() epoch_time_ms = now - self.last_epoch_begin_timestamp telemetry = telemetry_pb2.ModelEpochTelemetry( timestamp_utc_epoch_ms=now, epoch_num=epoch, epoch_wall_time_ms=epoch_time_ms, loss=loss, ) pbutil.ToFile(telemetry, self.logdir / f'epoch_{epoch:03d}_telemetry.pbtxt')
def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest, context) -> generator_pb2.GenerateTestcasesResponse: """Generate testcases.""" del context response = services.BuildDefaultResponse( generator_pb2.GenerateTestcasesResponse) # Generate random strings. for _ in range(request.num_testcases): # Instantiate a testcase. testcase = response.testcases.add() testcase.CopyFrom(self.config.testcase_to_generate) testcase.generator.CopyFrom(self.generator) start_time = labdate.MillisecondsTimestamp() end_time = labdate.MillisecondsTimestamp() p = testcase.profiling_events.add() p.type = 'generation' p.event_start_epoch_ms = start_time p.duration_ms = end_time - start_time return response
def reset(self): """Reset the environment state.""" logging.debug('$ cp %s %s', self.bytecode_path, self.working_bytecode_path) shutil.copyfile(self.bytecode_path, self.working_bytecode_path) clang.Compile([self.bytecode_path], self.binary_path, copts=['-O0']) self.RunSetupCommand() self.episodes.append( random_opt_pb2.DelayedRewardEpisode(step=[ random_opt_pb2.DelayedRewardStep( start_time_epoch_ms=labdate.MillisecondsTimestamp(), ) ]))
def ConvertOne() -> logging_pb2.LogRecord: """Convert the current starting_match and lines_buffer into a LogRecord.""" if starting_match: records.append( logging_pb2.LogRecord( level=ABSL_LEVEL_TO_LOG_RECORD_LEVEL[starting_match.group('lvl')], date_utc_epoch_ms=labdate.MillisecondsTimestamp( DatetimeFromAbslTimestamp(starting_match.group('timestamp'), year=year)), thread_id=int(starting_match.group('thread_id')), file_name=starting_match.group('filename'), line_number=int(starting_match.group('lineno')), message='\n'.join([starting_match.group('contents')] + lines_buffer).rstrip()))
def TestingLoop(min_interesting_results: int, max_testing_time_seconds: int, batch_size: int, generator: base_generator.GeneratorServiceBase, dut_harness: base_harness.HarnessBase, gs_harness: base_harness.HarnessBase, filters: difftests.FiltersBase, interesting_results_dir: pathlib.Path, start_time: float = None) -> None: """The main fuzzing loop. Args: min_interesting_results: The minimum number of interesting results to find. max_testing_time_seconds: The maximum time allowed to find interesting results. batch_size: The number of testcases to generate and execute in each batch. generator: A testcase generator. dut_harness: The device under test. gs_harness: The device to compare outputs against. filters: A filters instance for testcases. interesting_results_dir: The directory to write interesting results to. start_time: The starting time, as returned by time.time(). If not provided, the starting time will be the moment that this function is called. Set this value if you would like to include initialization overhead in the calculated testing time. """ start_time = start_time or time.time() interesting_results_dir.mkdir(parents=True, exist_ok=True) num_interesting_results = 0 batch_num = 0 while (num_interesting_results < min_interesting_results and time.time() < start_time + max_testing_time_seconds): batch_num += 1 logging.info('Starting generate / test / eval batch %d ...', batch_num) interesting_results = RunBatch(generator, dut_harness, gs_harness, filters, batch_size) num_interesting_results += len(interesting_results) for result in interesting_results: pbutil.ToFile( result, interesting_results_dir / (str(labdate.MillisecondsTimestamp()) + '.pbtxt')) logging.info( 'Stopping after %.2f seconds and %s batches (%.0fms / testcase).\n' 'Found %s interesting results.', time.time() - start_time, humanize.intcomma(batch_num), (((time.time() - start_time) / (batch_num * batch_size)) * 1000), num_interesting_results) logging.flush()
def SetProto(self, proto: deepsmith_pb2.ProfilingEvent) -> \ deepsmith_pb2.ProfilingEvent: """Set a protocol buffer representation. Args: proto: A protocol buffer message. Returns: A ProfilingEvent message. """ proto.client = self.client.string proto.type = self.type.string proto.duration_ms = self.duration_ms proto.event_start_epoch_ms = labdate.MillisecondsTimestamp(self.event_start) return proto
def RunTestcase(opencl_environment: env.OpenCLEnvironment, testbed: deepsmith_pb2.Testbed, testcase: deepsmith_pb2.Testcase, cflags: typing.List[str]) -> deepsmith_pb2.Result: """Run a testcase.""" if testcase.toolchain != 'opencl': raise ValueError( f"Unsupported testcase toolchain: '{testcase.toolchain}'") if testcase.harness.name != 'cldrive': raise ValueError( f"Unsupported testcase harness: '{testcase.harness.name}'") result = deepsmith_pb2.Result() result.testbed.CopyFrom(testbed) platform_id, device_id = opencl_environment.ids() driver = MakeDriver( testcase, True if testbed.opts['opencl_opt'] == 'enabled' else False) # MakeDriver() annotates the testcase, so we must only set the testcase field # of the output result after we have called it. result.testcase.CopyFrom(testcase) # Get a temporary file to write and run the driver from. with tempfile.NamedTemporaryFile(prefix='deepsmith_', delete=False) as f: path = pathlib.Path(f.name) try: CompileDriver(driver, path, platform_id, device_id, cflags=cflags) timeout = testcase.harness.opts.get('timeout_seconds', '60') cmd = ['timeout', '-s9', timeout, f.name] start_time = labdate.GetUtcMillisecondsNow() proc = opencl_environment.Exec(cmd) end_time = labdate.GetUtcMillisecondsNow() # Build result message. result.returncode = proc.returncode result.outputs['stdout'] = proc.stdout result.outputs['stderr'] = proc.stderr runtime = result.profiling_events.add() runtime.client = system.HOSTNAME runtime.type = 'runtime' runtime.duration_ms = int( round((end_time - start_time).total_seconds() * 1000)) runtime.event_start_epoch_ms = labdate.MillisecondsTimestamp( start_time) result.outcome = GetResultOutcome(result) except DriverCompilationError as e: logging.warning('%s', e) result.outcome = deepsmith_pb2.Result.UNKNOWN finally: fs.rm(path) return result
def __init__(self, directory: pathlib.Path, name: str = 'profile'): self._directory = pathlib.Path(directory) if not self._directory.is_dir(): raise ValueError(f"Directory not found: {directory}") self._name = name # Create the name of the logfile now, so that is timestamped to the start of # execution. timestamp = labdate.MillisecondsTimestamp() log_name = '.'.join( [self._name, system.HOSTNAME, str(timestamp), 'csv']) self._path = self._directory / log_name with self._writer() as writer: writer.writerow(('Start Time (ms since UNIX epoch)', 'Elapsed Time (ms)', 'Event'))
def GetRepositoryMetadata( repo: Repository.Repository) -> scrape_repos_pb2.GitHubRepoMetadata(): """Get metadata about a GitHub repository. Args: repo: A Repository instance. Returns: A GitHubRepoMetadata instance. """ meta = scrape_repos_pb2.GitHubRepoMetadata() meta.scraped_utc_epoch_ms = labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()) meta.owner = repo.owner.login meta.name = repo.name meta.num_watchers = repo.watchers_count meta.num_forks = repo.forks_count meta.num_stars = repo.stargazers_count meta.clone_from_url = repo.clone_url return meta
def CreatePackageManifest( package_root: pathlib.Path, contents: typing.List[pathlib.Path]) -> dpack_pb2.DataPackage: """Create a DataPackage message for the contents of a package. Args: package_root: The root of the package. contents: A list of relative paths to files to include. Returns: A DataPackage instance with attributes set. """ manifest = dpack_pb2.DataPackage() manifest.comment = '' manifest.utc_epoch_ms_packaged = labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()) for path in contents: f = manifest.file.add() SetDataPackageFileAttributes(package_root, path, f) f.comment = f.comment or '' return manifest
def FromText(cls, text: str, atoms: typing.Set[str]) -> 'GreedyAtomizer': """Instantiate and an atomizer from a corpus text. Args: text: Text corpus atoms: A list of multi-character tokens. Returns: An atomizer instance. """ if not atoms: raise errors.UserError('No atoms specified') # Instantiate a greedy atomizer using the full vocabulary. full_vocab = dict(zip(atoms, range(len(atoms)))) c = GreedyAtomizer(full_vocab, determine_chars=True) # Derive the subset of the vocabulary required to encode the given text. tokens = sorted(list(set(c.TokenizeString(text)))) vocab_subset = dict(zip(tokens, range(len(tokens)))) end_time = labdate.MillisecondsTimestamp() # Return a new atomizer using the subset vocabulary. return GreedyAtomizer(vocab_subset)
def SampleFast(self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.List[model_pb2.Sample]: """Sample a model. Same as Sample(), but without printing or caching samples. Because samples are not cached, infinite sampling loops are not supported, since we must return the sample protos at some point. Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A list of Sample protos. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ self.Train() sample_count = 1 with logutil.TeeLogsToFile(f'sampler_{sampler.hash}', self.cache.path / 'logs'): logging.info("Sampling: '%s'", sampler.start_text) sample_start_time = labdate.MillisecondsTimestamp() atomizer = self.corpus.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) samples = [] # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size) ] done = np.zeros(batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices( sampler, batch_size) # Iterate over all samples in batch to determine whether they're # done. for i in range(batch_size): if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) sample_count += 1 samples.append(sample) wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(len(samples)), humanize.intcomma( int((now - sample_start_time) / len(samples)))) break return samples
def Sample(self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.List[model_pb2.Sample]: """Sample a model. If the model is not already trained, calling Sample() first trains the model. Thus a call to Sample() is equivalent to calling Train() then Sample(). Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A list of Sample protos. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ self.Train() sample_count = 1 self.SamplerCache(sampler).mkdir(exist_ok=True) with logutil.TeeLogsToFile(f'sampler_{sampler.hash}', self.cache.path / 'logs'): logging.info("Sampling: '%s'", sampler.start_text) if min_num_samples < 0: logging.warning( 'Entering an infinite sample loop, this process will never end!' ) sample_start_time = labdate.MillisecondsTimestamp() atomizer = self.corpus.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) samples = [] sample_dir = self.SamplerCache(sampler) # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size) ] done = np.zeros(batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices( sampler, batch_size) # Iterate over all samples in batch to determine whether they're # done. for i in range(batch_size): if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) print(f'=== BEGIN CLGEN SAMPLE {sample_count} ' f'===\n\n{sample.text}\n') sample_count += 1 sample_id = crypto.sha256_str(sample.text) sample_path = sample_dir / f'{sample_id}.pbtxt' pbutil.ToFile(sample, sample_path) if min_num_samples > 0: samples.append(sample) wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(len(samples)), humanize.intcomma( int((now - sample_start_time) / max(len(samples), 1)))) break return samples
def test_GetRepositoryMetadata_timestamp(): """Test that the timestamp in metadata is set to (aprox) now.""" now_ms = labdate.MillisecondsTimestamp(labdate.GetUtcMillisecondsNow()) meta = scraper.GetRepositoryMetadata(MockRepository()) assert now_ms - meta.scraped_utc_epoch_ms <= 1000
def Sample(self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.Iterable[model_pb2.Sample]: """Sample a model. If the model is not already trained, calling Sample() first trains the model. Thus a call to Sample() is equivalent to calling Train() then Sample(). Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A iterator over samples. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ sample_count = 1 self.SamplerCache(sampler).mkdir(exist_ok=True) atomizer = self.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) sample_start_time = labdate.MillisecondsTimestamp() samples = [] sample_dir = self.SamplerCache(sampler) print("batch Size : " + str(batch_size)) if (FLAGS.sampling_technique.split(" ")[0] in ["topK", "nucleus", "beam", "default"]): logging.info('Sampling technique set to : %s with value : %s ', FLAGS.sampling_technique.split(" ")[0], FLAGS.sampling_technique.split(" ")[1]) else: logging.info('Sampling technique set to : default') # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size) ] done = np.zeros(batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices(sampler, batch_size) # Iterate over all samples in batch to determine whether they're # done. print for i in range(batch_size): #print("batch_size : "+ str(i)+" :"+ str(batch_size)+" num_tokens : "+str(len(samples_in_progress[i]))) if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) print( f'=== BEGIN SIMULINK MDL SAMPLE (Pretrained Model){sample_count} ' ) #f'===\n\n{sample.text}\n') sample_count += 1 sample_path = sample_dir / f'Sample{sample_count}.mdl' pbutil.ToFile(sample, sample_path) if min_num_samples > 0: samples.append(sample) wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(sample_count - 1), humanize.intcomma( int((now - sample_start_time) / max(sample_count - 1, 1)))) break return samples
def EpochBeginCallback(self) -> None: self.last_epoch_begin_timestamp = labdate.MillisecondsTimestamp()
def test_MillisecondsTimestamp_invalid_argument(): with pytest.raises(TypeError): labdate.MillisecondsTimestamp('not a date')
def test_Result_ToProto(): now = datetime.datetime.now() result = deeplearning.deepsmith.result.Result( testcase=deeplearning.deepsmith.testcase.Testcase( toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'), generator=deeplearning.deepsmith.generator.Generator( name='generator'), harness=deeplearning.deepsmith.harness.Harness(name='harness'), inputset=[ deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='src'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='void main() {}'), ), deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='data'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='[1,2]'), ), ], invariant_optset=[ deeplearning.deepsmith.testcase.TestcaseInvariantOpt( name=deeplearning.deepsmith.testcase. TestcaseInvariantOptName(string='config'), value=deeplearning.deepsmith.testcase. TestcaseInvariantOptValue(string='opt'), ), ], profiling_events=[ deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event. ProfilingEventType(string='generate', ), duration_ms=100, event_start=now, ), deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event. ProfilingEventType(string='foo', ), duration_ms=100, event_start=now, ), ]), testbed=deeplearning.deepsmith.testbed.Testbed( toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'), name='clang', optset=[ deeplearning.deepsmith.testbed.TestbedOpt( name=deeplearning.deepsmith.testbed.TestbedOptName( string='arch'), value=deeplearning.deepsmith.testbed.TestbedOptValue( string='x86_64'), ), deeplearning.deepsmith.testbed.TestbedOpt( name=deeplearning.deepsmith.testbed.TestbedOptName( string='build'), value=deeplearning.deepsmith.testbed.TestbedOptValue( string='debug+assert'), ), ], ), returncode=0, outputset=[ deeplearning.deepsmith.result.ResultOutput( name=deeplearning.deepsmith.result.ResultOutputName( string='stdout'), value=deeplearning.deepsmith.result.ResultOutputValue( truncated_value='Hello, world!'), ), deeplearning.deepsmith.result.ResultOutput( name=deeplearning.deepsmith.result.ResultOutputName( string='stderr'), value=deeplearning.deepsmith.result.ResultOutputValue( truncated_value=''), ), ], profiling_events=[ deeplearning.deepsmith.profiling_event.ResultProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='exec', ), duration_ms=500, event_start=now, ), deeplearning.deepsmith.profiling_event.ResultProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='overhead', ), duration_ms=100, event_start=now, ), ], outcome_num=6, ) proto = result.ToProto() assert proto.testcase.toolchain == 'cpp' assert proto.testcase.generator.name == 'generator' assert proto.testcase.harness.name == 'harness' assert len(proto.testcase.inputs) == 2 assert proto.testcase.inputs['src'] == 'void main() {}' assert proto.testcase.inputs['data'] == '[1,2]' assert len(proto.testcase.invariant_opts) == 1 assert proto.testcase.invariant_opts['config'] == 'opt' assert len(proto.testcase.profiling_events) == 2 assert proto.testcase.profiling_events[0].client == 'localhost' assert proto.testcase.profiling_events[0].type == 'generate' assert proto.testcase.profiling_events[0].client == 'localhost' assert proto.testbed.toolchain == 'cpp' assert proto.testbed.name == 'clang' assert len(proto.testbed.opts) == 2 assert proto.testbed.opts['arch'] == 'x86_64' assert proto.testbed.opts['build'] == 'debug+assert' assert len(proto.outputs) == 2 assert proto.outputs['stdout'] == 'Hello, world!' assert proto.outputs['stderr'] == '' assert len(proto.testcase.profiling_events) == 2 assert proto.profiling_events[0].client == 'localhost' assert proto.profiling_events[0].type == 'exec' assert proto.profiling_events[0].duration_ms == 500 assert (proto.profiling_events[0].event_start_epoch_ms == labdate.MillisecondsTimestamp(now)) assert proto.profiling_events[1].client == 'localhost' assert proto.profiling_events[1].type == 'overhead' assert proto.profiling_events[1].duration_ms == 100 assert (proto.profiling_events[1].event_start_epoch_ms == labdate.MillisecondsTimestamp(now)) assert proto.outcome == deepsmith_pb2.Result.PASS
def test_timestamp_datetime_equivalence(): date_in = labdate.GetUtcMillisecondsNow() timestamp = labdate.MillisecondsTimestamp(date_in) date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp) assert date_in == date_out
def SetProto(self, proto: model_pb2.Sample) -> None: proto.text = self.text proto.num_tokens = self.num_tokens proto.wall_time_ms = self.wall_time_ms proto.sample_start_epoch_ms_utc = labdate.MillisecondsTimestamp( self.sample_date)
def test_default_timestamp_datetime_equivalence(): now = labdate.GetUtcMillisecondsNow() timestamp = labdate.MillisecondsTimestamp() date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp) assert now.date() == date_out.date()