def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest, context) -> generator_pb2.GenerateTestcasesResponse: """Generate testcases.""" del context response = services.BuildDefaultResponse( generator_pb2.GenerateTestcasesResponse) # Generate random strings. for _ in range(request.num_testcases): # Pick a length for the random string. n = random.randint(self.config.string_min_len, self.config.string_max_len + 1) # Instantiate a testcase. testcase = response.testcases.add() testcase.toolchain = self.config.toolchain testcase.generator.CopyFrom(self.generator) start_time = labdate.MillisecondsTimestamp() testcase.inputs['src'] = ''.join( random.choice(string.ascii_lowercase) for _ in range(n)) end_time = labdate.MillisecondsTimestamp() p = testcase.profiling_events.add() p.type = 'generation' p.event_start_epoch_ms = start_time p.duration_ms = end_time - start_time return response
def GenerateOneSource(self) -> typing.Tuple[str, int, int]: """Generate and return a single CLSmith program. Returns: A tuple of the source code as a string, the generation time, and the start time. """ start_epoch_ms_utc = labdate.MillisecondsTimestamp() src = clsmith.Exec(*list(self.config.opt)) wall_time_ms = labdate.MillisecondsTimestamp() - start_epoch_ms_utc return src, wall_time_ms, start_epoch_ms_utc
def _create_lock(): lockfile = lockfile_pb2.LockFile( owner_process_id=os.getpid() if pid is None else pid, owner_process_argv=' '.join(sys.argv), date_acquired_utc_epoch_ms=labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()), owner_hostname=system.HOSTNAME, owner_user=system.USERNAME) pbutil.ToFile(lockfile, self.path, assume_filename='LOCK.pbtxt')
def RunTestcase(opencl_environment: env.OpenCLEnvironment, testbed: deepsmith_pb2.Testbed, testcase: deepsmith_pb2.Testcase, opts: typing.List[str]) -> deepsmith_pb2.Result: """Run a testcase.""" if testcase.toolchain != 'opencl': raise ValueError( f"Unsupported testcase toolchain: '{testcase.toolchain}'") if testcase.harness.name != 'cl_launcher': raise ValueError( f"Unsupported testcase harness: '{testcase.harness.name}'") result = deepsmith_pb2.Result() result.testbed.CopyFrom(testbed) result.testcase.CopyFrom(testcase) # Set up additional command line flags for cl_launcher. We always run with # debugging output enabled. opts.append('---debug') if testbed.opts['opencl_opt'] == 'disabled': opts.append('---disable_opts') start_time_epoch_ms = labdate.MillisecondsTimestamp() process = cl_launcher.ExecClsmithSource( opencl_environment, testcase.inputs['src'], driver.NDRange.FromString(testcase.inputs['gsize']), driver.NDRange.FromString(testcase.inputs['lsize']), *opts, timeout_seconds=testcase.harness.opts.get('timeout_seconds', '60')) wall_time = labdate.MillisecondsTimestamp() - start_time_epoch_ms result = deepsmith_pb2.Result() result.testcase.CopyFrom(testcase) result.testbed.CopyFrom(testbed) result.returncode = process.returncode result.outputs['stdout'] = process.stdout result.outputs['stderr'] = process.stderr prof = result.profiling_events.add() prof.client = socket.gethostname() prof.type = 'runtime' prof.duration_ms = wall_time prof.event_start_epoch_ms = start_time_epoch_ms result.outcome = GetResultOutcome(result) return result
def EpochEndCallback(self, epoch: int, loss: float): now = labdate.MillisecondsTimestamp() epoch_time_ms = now - self.last_epoch_begin_timestamp telemetry = telemetry_pb2.ModelEpochTelemetry( timestamp_utc_epoch_ms=now, epoch_num=epoch, epoch_wall_time_ms=epoch_time_ms, loss=loss, ) pbutil.ToFile(telemetry, self.logdir / f'epoch_{epoch:03d}_telemetry.pbtxt')
def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest, context) -> generator_pb2.GenerateTestcasesResponse: """Generate testcases.""" del context response = services.BuildDefaultResponse( generator_pb2.GenerateTestcasesResponse) # Generate random strings. for _ in range(request.num_testcases): # Instantiate a testcase. testcase = response.testcases.add() testcase.CopyFrom(self.config.testcase_to_generate) testcase.generator.CopyFrom(self.generator) start_time = labdate.MillisecondsTimestamp() end_time = labdate.MillisecondsTimestamp() p = testcase.profiling_events.add() p.type = 'generation' p.event_start_epoch_ms = start_time p.duration_ms = end_time - start_time return response
def TestingLoop(min_interesting_results: int, max_testing_time_seconds: int, batch_size: int, generator: base_generator.GeneratorServiceBase, dut_harness: base_harness.HarnessBase, gs_harness: base_harness.HarnessBase, filters: difftests.FiltersBase, interesting_results_dir: pathlib.Path, start_time: float = None) -> None: """The main fuzzing loop. Args: min_interesting_results: The minimum number of interesting results to find. max_testing_time_seconds: The maximum time allowed to find interesting results. batch_size: The number of testcases to generate and execute in each batch. generator: A testcase generator. dut_harness: The device under test. gs_harness: The device to compare outputs against. filters: A filters instance for testcases. interesting_results_dir: The directory to write interesting results to. start_time: The starting time, as returned by time.time(). If not provided, the starting time will be the moment that this function is called. Set this value if you would like to include initialization overhead in the calculated testing time. """ start_time = start_time or time.time() interesting_results_dir.mkdir(parents=True, exist_ok=True) num_interesting_results = 0 batch_num = 0 while (num_interesting_results < min_interesting_results and time.time() < start_time + max_testing_time_seconds): batch_num += 1 logging.info('Starting generate / test / eval batch %d ...', batch_num) interesting_results = RunBatch(generator, dut_harness, gs_harness, filters, batch_size) num_interesting_results += len(interesting_results) for result in interesting_results: pbutil.ToFile( result, interesting_results_dir / (str(labdate.MillisecondsTimestamp()) + '.pbtxt')) logging.info( 'Stopping after %.2f seconds and %s batches (%.0fms / testcase).\n' 'Found %s interesting results.', time.time() - start_time, humanize.intcomma(batch_num), (((time.time() - start_time) / (batch_num * batch_size)) * 1000), num_interesting_results) logging.flush()
def ConvertOne() -> logging_pb2.LogRecord: """Convert the current starting_match and lines_buffer into a LogRecord.""" if starting_match: records.append( logging_pb2.LogRecord( level=ABSL_LEVEL_TO_LOG_RECORD_LEVEL[starting_match.group( 'lvl')], date_utc_epoch_ms=labdate.MillisecondsTimestamp( DatetimeFromAbslTimestamp( starting_match.group('timestamp'))), thread_id=int(starting_match.group('thread_id')), file_name=starting_match.group('filename'), line_number=int(starting_match.group('lineno')), message='\n'.join([starting_match.group('contents')] + lines_buffer).rstrip()))
def RunTestcase(opencl_environment: env.OpenCLEnvironment, testbed: deepsmith_pb2.Testbed, testcase: deepsmith_pb2.Testcase, cflags: typing.List[str]) -> deepsmith_pb2.Result: """Run a testcase.""" if testcase.toolchain != 'opencl': raise ValueError( f"Unsupported testcase toolchain: '{testcase.toolchain}'") if testcase.harness.name != 'cldrive': raise ValueError( f"Unsupported testcase harness: '{testcase.harness.name}'") result = deepsmith_pb2.Result() result.testbed.CopyFrom(testbed) platform_id, device_id = opencl_environment.ids() driver = MakeDriver( testcase, True if testbed.opts['opencl_opt'] == 'enabled' else False) # MakeDriver() annotates the testcase, so we must only set the testcase field # of the output result after we have called it. result.testcase.CopyFrom(testcase) # Get a temporary file to write and run the driver from. with tempfile.NamedTemporaryFile(prefix='deepsmith_', delete=False) as f: path = pathlib.Path(f.name) try: CompileDriver(driver, path, platform_id, device_id, cflags=cflags) timeout = testcase.harness.opts.get('timeout_seconds', '60') cmd = ['timeout', '-s9', timeout, f.name] start_time = labdate.GetUtcMillisecondsNow() proc = opencl_environment.Exec(cmd) end_time = labdate.GetUtcMillisecondsNow() # Build result message. result.returncode = proc.returncode result.outputs['stdout'] = proc.stdout result.outputs['stderr'] = proc.stderr runtime = result.profiling_events.add() runtime.client = system.HOSTNAME runtime.type = 'runtime' runtime.duration_ms = int( round((end_time - start_time).total_seconds() * 1000)) runtime.event_start_epoch_ms = labdate.MillisecondsTimestamp( start_time) result.outcome = GetResultOutcome(result) except DriverCompilationError as e: logging.warning('%s', e) result.outcome = deepsmith_pb2.Result.UNKNOWN finally: fs.rm(path) return result
def SetProto(self, proto: deepsmith_pb2.ProfilingEvent) -> \ deepsmith_pb2.ProfilingEvent: """Set a protocol buffer representation. Args: proto: A protocol buffer message. Returns: A ProfilingEvent message. """ proto.client = self.client.string proto.type = self.type.string proto.duration_ms = self.duration_ms proto.event_start_epoch_ms = labdate.MillisecondsTimestamp( self.event_start) return proto
def GetRepositoryMetadata( repo: Repository.Repository) -> scrape_repos_pb2.GitHubRepoMetadata(): """Get metadata about a GitHub repository. Args: repo: A Repository instance. Returns: A GitHubRepoMetadata instance. """ meta = scrape_repos_pb2.GitHubRepoMetadata() meta.scraped_utc_epoch_ms = labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()) meta.owner = repo.owner.login meta.name = repo.name meta.num_watchers = repo.watchers_count meta.num_forks = repo.forks_count meta.num_stars = repo.stargazers_count meta.clone_from_url = repo.clone_url return meta
def CreatePackageManifest( package_root: pathlib.Path, contents: typing.List[pathlib.Path]) -> dpack_pb2.DataPackage: """Create a DataPackage message for the contents of a package. Args: package_root: The root of the package. contents: A list of relative paths to files to include. Returns: A DataPackage instance with attributes set. """ manifest = dpack_pb2.DataPackage() manifest.comment = '' manifest.utc_epoch_ms_packaged = labdate.MillisecondsTimestamp( labdate.GetUtcMillisecondsNow()) for path in contents: f = manifest.file.add() SetDataPackageFileAttributes(package_root, path, f) f.comment = f.comment or '' return manifest
def FromText(cls, text: str, atoms: typing.Set[str]) -> 'GreedyAtomizer': """Instantiate and an atomizer from a corpus text. Args: text: Text corpus atoms: A list of multi-character tokens. Returns: An atomizer instance. """ if not atoms: raise errors.UserError('No atoms specified') # Instantiate a greedy atomizer using the full vocabulary. full_vocab = dict(zip(atoms, range(len(atoms)))) c = GreedyAtomizer(full_vocab, determine_chars=True) # Derive the subset of the vocabulary required to encode the given text. tokens = sorted(list(set(c.TokenizeString(text)))) vocab_subset = dict(zip(tokens, range(len(tokens)))) end_time = labdate.MillisecondsTimestamp() # Return a new atomizer using the subset vocabulary. return GreedyAtomizer(vocab_subset)
def test_GetRepositoryMetadata_timestamp(): """Test that the timestamp in metadata is set to (aprox) now.""" now_ms = labdate.MillisecondsTimestamp(labdate.GetUtcMillisecondsNow()) meta = scraper.GetRepositoryMetadata(MockRepository()) assert now_ms - meta.scraped_utc_epoch_ms <= 1000
def test_Result_ToProto(): now = datetime.datetime.now() result = deeplearning.deepsmith.result.Result( testcase=deeplearning.deepsmith.testcase.Testcase( toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'), generator=deeplearning.deepsmith.generator.Generator( name='generator'), harness=deeplearning.deepsmith.harness.Harness(name='harness'), inputset=[ deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='src'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='void main() {}'), ), deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='data'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='[1,2]'), ), ], invariant_optset=[ deeplearning.deepsmith.testcase.TestcaseInvariantOpt( name=deeplearning.deepsmith.testcase. TestcaseInvariantOptName(string='config'), value=deeplearning.deepsmith.testcase. TestcaseInvariantOptValue(string='opt'), ), ], profiling_events=[ deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event. ProfilingEventType(string='generate', ), duration_ms=100, event_start=now, ), deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event. ProfilingEventType(string='foo', ), duration_ms=100, event_start=now, ), ]), testbed=deeplearning.deepsmith.testbed.Testbed( toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'), name='clang', optset=[ deeplearning.deepsmith.testbed.TestbedOpt( name=deeplearning.deepsmith.testbed.TestbedOptName( string='arch'), value=deeplearning.deepsmith.testbed.TestbedOptValue( string='x86_64'), ), deeplearning.deepsmith.testbed.TestbedOpt( name=deeplearning.deepsmith.testbed.TestbedOptName( string='build'), value=deeplearning.deepsmith.testbed.TestbedOptValue( string='debug+assert'), ), ], ), returncode=0, outputset=[ deeplearning.deepsmith.result.ResultOutput( name=deeplearning.deepsmith.result.ResultOutputName( string='stdout'), value=deeplearning.deepsmith.result.ResultOutputValue( truncated_value='Hello, world!'), ), deeplearning.deepsmith.result.ResultOutput( name=deeplearning.deepsmith.result.ResultOutputName( string='stderr'), value=deeplearning.deepsmith.result.ResultOutputValue( truncated_value=''), ), ], profiling_events=[ deeplearning.deepsmith.profiling_event.ResultProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='exec', ), duration_ms=500, event_start=now, ), deeplearning.deepsmith.profiling_event.ResultProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='overhead', ), duration_ms=100, event_start=now, ), ], outcome_num=6, ) proto = result.ToProto() assert proto.testcase.toolchain == 'cpp' assert proto.testcase.generator.name == 'generator' assert proto.testcase.harness.name == 'harness' assert len(proto.testcase.inputs) == 2 assert proto.testcase.inputs['src'] == 'void main() {}' assert proto.testcase.inputs['data'] == '[1,2]' assert len(proto.testcase.invariant_opts) == 1 assert proto.testcase.invariant_opts['config'] == 'opt' assert len(proto.testcase.profiling_events) == 2 assert proto.testcase.profiling_events[0].client == 'localhost' assert proto.testcase.profiling_events[0].type == 'generate' assert proto.testcase.profiling_events[0].client == 'localhost' assert proto.testbed.toolchain == 'cpp' assert proto.testbed.name == 'clang' assert len(proto.testbed.opts) == 2 assert proto.testbed.opts['arch'] == 'x86_64' assert proto.testbed.opts['build'] == 'debug+assert' assert len(proto.outputs) == 2 assert proto.outputs['stdout'] == 'Hello, world!' assert proto.outputs['stderr'] == '' assert len(proto.testcase.profiling_events) == 2 assert proto.profiling_events[0].client == 'localhost' assert proto.profiling_events[0].type == 'exec' assert proto.profiling_events[0].duration_ms == 500 assert (proto.profiling_events[0].event_start_epoch_ms == labdate.MillisecondsTimestamp(now)) assert proto.profiling_events[1].client == 'localhost' assert proto.profiling_events[1].type == 'overhead' assert proto.profiling_events[1].duration_ms == 100 assert (proto.profiling_events[1].event_start_epoch_ms == labdate.MillisecondsTimestamp(now)) assert proto.outcome == deepsmith_pb2.Result.PASS
<<<<<<< HEAD <<<<<<< HEAD ======= DatetimeFromAbslTimestamp( starting_match.group('timestamp'), year=year)), thread_id=int(starting_match.group('thread_id')), file_name=starting_match.group('filename'), line_number=int(starting_match.group('lineno')), message='\n'.join( [starting_match.group('contents')] + lines_buffer).rstrip())) ======= logging_pb2.LogRecord( level=ABSL_LEVEL_TO_LOG_RECORD_LEVEL[starting_match.group('lvl')], date_utc_epoch_ms=labdate.MillisecondsTimestamp( DatetimeFromAbslTimestamp( starting_match.group('timestamp'), year=year, ),), thread_id=int(starting_match.group('thread_id')), file_name=starting_match.group('filename'), line_number=int(starting_match.group('lineno')), message='\n'.join( [starting_match.group('contents')] + lines_buffer,).rstrip(), ),) >>>>>>> 49340dc00... Auto-format labm8 python files.:labm8/logutil.py for line in logs.split('\n'): >>>>>>> 620197b9a... Fix year on logutil.:labm8/logutil.py ======= >>>>>>> 4242aed2a... Automated code format. =======
def test_Testcase_ToProto(): now = labdate.GetUtcMillisecondsNow() testcase = deeplearning.deepsmith.testcase.Testcase( toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'), generator=deeplearning.deepsmith.generator.Generator(name='generator'), harness=deeplearning.deepsmith.harness.Harness(name='harness'), inputset=[ deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='src'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='void main() {}'), ), deeplearning.deepsmith.testcase.TestcaseInput( name=deeplearning.deepsmith.testcase.TestcaseInputName( string='data'), value=deeplearning.deepsmith.testcase.TestcaseInputValue( string='[1,2]'), ), ], invariant_optset=[ deeplearning.deepsmith.testcase.TestcaseInvariantOpt( name=deeplearning.deepsmith.testcase.TestcaseInvariantOptName( string='config'), value=deeplearning.deepsmith.testcase. TestcaseInvariantOptValue(string='opt'), ), ], profiling_events=[ deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='generate', ), duration_ms=100, event_start=now, ), deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent( client=deeplearning.deepsmith.client.Client( string='localhost'), type=deeplearning.deepsmith.profiling_event.ProfilingEventType( string='foo', ), duration_ms=100, event_start=now, ), ]) proto = testcase.ToProto() assert proto.toolchain == 'cpp' assert proto.generator.name == 'generator' assert proto.harness.name == 'harness' assert len(proto.inputs) == 2 assert proto.inputs['src'] == 'void main() {}' assert proto.inputs['data'] == '[1,2]' assert len(proto.invariant_opts) == 1 assert proto.invariant_opts['config'] == 'opt' assert len(proto.profiling_events) == 2 assert (proto.profiling_events[0].event_start_epoch_ms == labdate.MillisecondsTimestamp(now)) assert proto.profiling_events[0].client == 'localhost' assert proto.profiling_events[0].type == 'generate' assert proto.profiling_events[0].client == 'localhost'
def test_MillisecondsTimestamp_invalid_argument(): with pytest.raises(TypeError): labdate.MillisecondsTimestamp('not a date')
def Sample( self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.Iterable[model_pb2.Sample]: """Sample a model. If the model is not already trained, calling Sample() first trains the model. Thus a call to Sample() is equivalent to calling Train() then Sample(). Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A iterator over samples. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ sample_count = 1 atomizer = self.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) sample_start_time = labdate.MillisecondsTimestamp() # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size)] done = np.zeros(batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices(sampler, batch_size) # Iterate over all samples in batch to determine whether they're # done. for i in range(batch_size): if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) sample_count += 1 yield sample wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(sample_count - 1), humanize.intcomma( int((now - sample_start_time) / max(sample_count - 1, 1)))) break
def test_timestamp_datetime_equivalence(): date_in = labdate.GetUtcMillisecondsNow() timestamp = labdate.MillisecondsTimestamp(date_in) date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp) assert date_in == date_out
def SampleFast(self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.List[model_pb2.Sample]: """Sample a model. Same as Sample(), but without printing or caching samples. Because samples are not cached, infinite sampling loops are not supported, since we must return the sample protos at some point. Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A list of Sample protos. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ self.Train() sample_count = 1 with logutil.TeeLogsToFile(f'sampler_{sampler.hash}', self.cache.path / 'logs'): logging.info("Sampling: '%s'", sampler.start_text) sample_start_time = labdate.MillisecondsTimestamp() atomizer = self.corpus.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) samples = [] # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size) ] done = np.zeros(batch_size, dtype=np.bool) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices( sampler, batch_size) print("Done :" + str(done)) # Iterate over all samples in batch to determine whether they're # done. for i in range(batch_size): if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) sample_count += 1 samples.append(sample) wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(len(samples)), humanize.intcomma( int((now - sample_start_time) / len(samples)))) break return samples
def test_default_timestamp_datetime_equivalence(): now = labdate.GetUtcMillisecondsNow() timestamp = labdate.MillisecondsTimestamp() date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp) assert now.date() == date_out.date()
def _ExportOpenCLResults(cursor, program_id, proto_dir): cursor.execute( """ SELECT results.id, programs.id, testcases.id, platforms.platform, platforms.device, platforms.driver, platforms.opencl, platforms.devtype, platforms.host, testbeds.optimizations, programs.generator, programs.date, programs.generation_time, programs.src, testcases.harness, testcases.timeout, results.date, results.returncode, results.runtime, stdouts.stdout, stderrs.stderr, stderrs.truncated, threads.gsize_x, threads.gsize_y, threads.gsize_z, threads.lsize_x, threads.lsize_y, threads.lsize_z, clsmith_testcase_metas.oclverified, dsmith_testcase_metas.gpuverified, dsmith_testcase_metas.oclverified, dsmith_program_metas.contains_floats, dsmith_program_metas.vector_inputs, dsmith_program_metas.compiler_warnings FROM results LEFT JOIN testbeds ON results.testbed_id = testbeds.id LEFT JOIN platforms ON testbeds.platform_id = platforms.id LEFT JOIN testcases ON results.testcase_id = testcases.id LEFT JOIN programs ON testcases.program_id = programs.id LEFT JOIN threads ON testcases.threads_id = threads.id LEFT JOIN stdouts ON results.stdout_id = stdouts.id LEFT JOIN stderrs ON results.stderr_id = stderrs.id LEFT JOIN clsmith_testcase_metas ON testcases.id=clsmith_testcase_metas.id LEFT JOIN dsmith_testcase_metas ON testcases.id=dsmith_testcase_metas.id LEFT JOIN dsmith_program_metas ON programs.id=dsmith_program_metas.id WHERE programs.id = %s AND platforms.platform <> 'clang' """, (program_id, )) i = 0 for row in cursor: i += 1 (result_id, programs_id, testcase_id, platform_name, device_name, driver_version, opencl_version, devtype, host_os, cl_opt, generator_id, program_date, program_generation_time, program_src, harness_id, harness_timeout, result_date, returncode, runtime, stdout, stderr, truncated_stderr, gsize_x, gsize_y, gsize_z, lsize_x, lsize_y, lsize_z, clsmith_oclverified, dsmith_gpuverified, dsmith_oclverified, dsmith_program_contains_floats, dsmith_program_vector_inputs, dsmith_program_compiler_warnings) = row inputs = { 'src': program_src, } if harness_id != -1: inputs['gsize'] = f'{gsize_x},{gsize_y},{gsize_z}' inputs['lsize'] = f'{lsize_x},{lsize_y},{lsize_z}' testbed_name = OPENCL_DEVICE_MAP[device_name] testbed_opts = {} _SetIf(testbed_opts, 'opencl_device', device_name.strip()) _SetIf(testbed_opts, 'opencl_version', opencl_version.strip()) _SetIf(testbed_opts, 'host', HOSTS_MAP.get(host_os, host_os)) if testbed_name == "clang": _SetIf(testbed_opts, 'llvm_version', driver_version.strip()) else: _SetIf(testbed_opts, 'driver_version', driver_version.strip()) _SetIf(testbed_opts, 'opencl_devtype', OPENCL_DEVTYPE_MAP.get(devtype, devtype)) _SetIf(testbed_opts, 'opencl_platform', platform_name.strip()) _SetIf(testbed_opts, 'opencl_opt', 'enabled' if cl_opt else 'disabled') invariant_opts = {} if clsmith_oclverified == 0: invariant_opts['oclverify'] = 'fail' elif clsmith_oclverified == 1: invariant_opts['oclverify'] = 'pass' elif dsmith_oclverified == 0: invariant_opts['oclverify'] = 'fail' elif dsmith_oclverified == 1: invariant_opts['oclverify'] = 'pass' if dsmith_gpuverified == 0: invariant_opts['gpuverify'] = 'fail' elif dsmith_gpuverified == 1: invariant_opts['gpuverify'] = 'pass' if dsmith_program_contains_floats == 0: invariant_opts['kernel_uses_floats'] = 'false' elif dsmith_program_contains_floats == 1: invariant_opts['kernel_uses_floats'] = 'true' if dsmith_program_vector_inputs == 0: invariant_opts['kernel_has_vector_inputs'] = 'false' elif dsmith_program_vector_inputs == 1: invariant_opts['kernel_has_vector_inputs'] = 'true' if dsmith_program_compiler_warnings == 0: invariant_opts['kernel_throws_compiler_warning'] = 'false' elif dsmith_program_compiler_warnings == 1: invariant_opts['kernel_throws_compiler_warning'] = 'true' testbed = deepsmith_pb2.Testbed( toolchain='opencl', name=testbed_name, opts=testbed_opts, ) testcase = deepsmith_pb2.Testcase( toolchain="opencl", generator=_GetOpenCLGenerator(generator_id), harness=_GetOpenCLHarness(harness_id, harness_timeout), inputs=inputs, invariant_opts=invariant_opts, profiling_events=[ deepsmith_pb2.ProfilingEvent( client="cc1", type="generation", duration_ms=int(program_generation_time * 1000), event_start_epoch_ms=labdate.MillisecondsTimestamp( program_date), ), ]) result = deepsmith_pb2.Result( testcase=testcase, testbed=testbed, returncode=returncode, outputs={ "stdout": stdout, "stderr": stderr, }, profiling_events=[ deepsmith_pb2.ProfilingEvent( client={ 'Ubuntu 16.04 64bit': 'cc1', 'CentOS Linux 7.1.1503 64bit': 'fuji', 'openSUSE 13.1 64bit': 'kobol', }[host_os], type="runtime", duration_ms=int(runtime * 1000), event_start_epoch_ms=labdate.MillisecondsTimestamp( result_date), ), ], ) # Write the testcase to file. outpath = proto_dir / 'testcases' / (str(testcase_id) + '.pbtxt') pbutil.ToFile(testcase, outpath) # Write the results to file. outpath = proto_dir / 'results' / (str(result_id) + '.pbtxt') pbutil.ToFile(result, outpath)
def Sample(self, sampler: samplers.Sampler, min_num_samples: int, seed: int = None) -> typing.List[model_pb2.Sample]: """Sample a model. If the model is not already trained, calling Sample() first trains the model. Thus a call to Sample() is equivalent to calling Train() then Sample(). Args: sampler: The sampler to sample using. min_num_samples: The minimum number of samples to return. Note that the true number of samples returned may be higher than this value, as sampling occurs in batches. The model will continue producing samples until the lowest mulitple of the sampler batch size property that is larger than this value. E.g. if min_num_samples is 7 and the Sampler batch size is 10, 10 samples will be returned. seed: A numeric value to seed the RNG with. If not present, the RNG is seeded randomly. Returns: A list of Sample protos. Raises: UnableToAcquireLockError: If the model is locked (i.e. there is another process currently modifying the model). InvalidStartText: If the sampler start text cannot be encoded. InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be encoded. """ self.Train() min_num_samples = 500 sample_count = 1 #For logging purposes only self.SamplerCache(sampler).mkdir(exist_ok=True) with logutil.TeeLogsToFile(f'sampler_{sampler.hash}', self.cache.path / 'logs'): logging.info("Sampling: '%s'", sampler.start_text) if min_num_samples < 0: logging.warning( 'Entering an infinite sample loop, this process will never end!' ) sample_start_time = labdate.MillisecondsTimestamp() atomizer = self.corpus.atomizer sampler.Specialize(atomizer) batch_size = self.backend.InitSampling(sampler, seed) print("Sampling Batch Size :" + str(batch_size)) samples = [] sample_dir = self.SamplerCache(sampler) # Per-sample batch outer loop. Continues until we have as many samples # as we want. while True: samples_in_progress = [ sampler.tokenized_start_text.copy() for _ in range(batch_size) ] #print (samples_in_progress) done = np.zeros(batch_size, dtype=np.bool) #print(done) start_time = labdate.MillisecondsTimestamp() wall_time_start = start_time self.backend.InitSampleBatch(sampler, batch_size) # Sampling loop. Continues until all samples in the batch are done. while True: indices = self.backend.SampleNextIndices( sampler, batch_size) #print(indices) # Iterate over all samples in batch to determine whether they're # done. for i in range(batch_size): if done[i]: continue token = atomizer.decoder[indices[i]] samples_in_progress[i].append(token) if sampler.SampleIsComplete(samples_in_progress[i]): end_time = labdate.MillisecondsTimestamp() done[i] = 1 sample = model_pb2.Sample( text=''.join(samples_in_progress[i]), sample_start_epoch_ms_utc=start_time, sample_time_ms=end_time - start_time, wall_time_ms=end_time - wall_time_start, num_tokens=len(samples_in_progress[i])) print(f'=== BEGIN CLGEN SAMPLE {sample_count} ' f'===\n\n{sample.text}\n') sample_count += 1 #sample_id = crypto.sha256_str(sample.text) sample_path = sample_dir / f'Sample{sample_count}.mdl' #previously .txt #name of the samples with open(sample_path, 'w') as samplefile: samplefile.write(''.join( samples_in_progress[i])) #pbutil.ToFile(sample, sample_path) if min_num_samples > 0: samples.append(sample) wall_time_start = labdate.MillisecondsTimestamp() # Complete the batch. if done.all(): break # Complete sampling. Note that sample_count starts at 1. if sample_count > min_num_samples: now = labdate.MillisecondsTimestamp() logging.info( 'Produced %s samples at a rate of %s ms / sample.', humanize.intcomma(len(samples)), humanize.intcomma( int((now - sample_start_time) / max(len(samples), 1)))) break return samples
def EpochBeginCallback(self) -> None: self.last_epoch_begin_timestamp = labdate.MillisecondsTimestamp()