Beispiel #1
0
    def EndEpisodeStep(self) -> random_opt_pb2.DelayedRewardStep:
        start_ms = labdate.MillisecondsTimestamp()
        step = random_opt_pb2.DelayedRewardStep(start_time_epoch_ms=start_ms, )
        try:
            clang.Compile([self.working_bytecode_path],
                          self.binary_path,
                          copts=['-O0'])
            try:
                runtimes = self.GetRuntimes()
                self.episodes[-1].binary_runtime_ms.extend(runtimes)
                if self.BinaryIsValid():
                    step.reward = self.runtime_reward(
                        sum(runtimes) / len(runtimes))
                else:
                    self.episodes[-1].outcome = (
                        random_opt_pb2.DelayedRewardEpisode.EVAL_FAILED)
                    step.reward = self.eval_failed_reward
            except ValueError as e:
                self.episodes[-1].outcome = random_opt_pb2.Step.EXEC_FAILED
                self.episodes[-1].outcome_error_msg = text.truncate(
                    str(e), 255)
                step.reward = self.exec_failed_reward
        except clang.ClangException as e:
            self.episodes[-1].outcome = (
                random_opt_pb2.DelayedRewardEpisode.COMPILE_FAILED)
            self.episodes[-1].outcome_error_msg = text.truncate(str(e), 255)
            step.reward = self.compile_failed_reward

        obs = self.observation_space.sample()
        step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms
        self.episodes[-1].step.extend([step])
        return obs, step.reward, True, {}
Beispiel #2
0
  def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest,
                        context) -> generator_pb2.GenerateTestcasesResponse:
    """Generate testcases."""
    del context
    response = services.BuildDefaultResponse(
        generator_pb2.GenerateTestcasesResponse)

    # Generate random strings.
    for _ in range(request.num_testcases):
      # Pick a length for the random string.
      n = random.randint(self.config.string_min_len,
                         self.config.string_max_len + 1)
      # Instantiate a testcase.
      testcase = response.testcases.add()
      testcase.toolchain = self.config.toolchain
      testcase.generator.CopyFrom(self.generator)
      start_time = labdate.MillisecondsTimestamp()
      testcase.inputs['src'] = ''.join(
          random.choice(string.ascii_lowercase) for _ in range(n))
      end_time = labdate.MillisecondsTimestamp()
      p = testcase.profiling_events.add()
      p.type = 'generation'
      p.event_start_epoch_ms = start_time
      p.duration_ms = end_time - start_time

    return response
Beispiel #3
0
    def ActionStep(self, action: int) -> random_opt_pb2.DelayedRewardStep:
        if not self.action_space.contains(action):
            raise ValueError(f"Unknown action: '{action}'")
        start_ms = labdate.MillisecondsTimestamp()
        obs = self.observation_space.sample()
        opt_pass = self.config.candidate_pass[action]

        step = random_opt_pb2.DelayedRewardStep(
            start_time_epoch_ms=start_ms,
            opt_pass=opt_pass,
        )

        # Run the full list of passes and update working_bytecode file.
        try:
            all_passes = [step.opt_pass for step in self.episodes[-1].step[1:]]
            opt.RunOptPassOnBytecode(self.bytecode_path,
                                     self.working_dir / 'temp.ll', all_passes)
            step.bytecode_changed = BytecodesAreEqual(
                self.working_dir / 'temp.ll', self.working_bytecode_path)
            shutil.copyfile(self.working_dir / 'temp.ll',
                            self.working_bytecode_path)
            step.reward = (self.bytecode_changed_reward
                           if step.bytecode_changed else
                           self.bytecode_unchanged_reward)
        except llvm.LlvmError as e:
            # Opt failed, set the error message.
            step.reward = self.opt_failed_reward
            step.opt_error_msg = text.truncate(str(e), 255)

        step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms
        self.episodes[-1].step.extend([step])
        return obs, step.reward, False, {}
Beispiel #4
0
 def reset(self):
     logging.debug('$ cp %s %s', self.bytecode_path,
                   self.working_bytecode_path)
     shutil.copyfile(self.bytecode_path, self.working_bytecode_path)
     clang.Compile([self.working_bytecode_path],
                   self.binary_path,
                   copts=['-O0'])
     start_time = labdate.MillisecondsTimestamp()
     self.RunSetupCommand()
     self.RunBinary()
     if not self.BinaryIsValid():
         raise ValueError(f"Failed to validate base binary.")
     self.episodes.append(
         random_opt_pb2.Episode(step=[
             random_opt_pb2.Step(
                 start_time_epoch_ms=start_time,
                 status=random_opt_pb2.Step.PASS,
                 binary_runtime_ms=self.GetRuntimes(),
                 reward=0,
                 total_reward=0,
                 speedup=1.0,
                 total_speedup=1.0,
             )
         ]))
     self.episodes[-1].step[0].total_step_runtime_ms = (
         labdate.MillisecondsTimestamp() - start_time)
Beispiel #5
0
    def Sample(self,
               sampler: samplers.Sampler,
               sample_observers: typing.List[
                   sample_observers_lib.SampleObserver],
               seed: int = None) -> None:
        """Sample a model.

    This method uses the observer model, returning nothing. To access the
    samples produced, implement a SampleObserver and pass it in as an argument.
    Sampling continues indefinitely until one of the sample observers returns
    False when notified of a new sample.

    If the model is not already trained, calling Sample() first trains the
    model. Thus a call to Sample() is equivalent to calling Train() then
    Sample().

    Args:
      sampler: The sampler to sample using.
      sample_observers: A list of SampleObserver objects that are notified of
        new generated samples.
      seed: A numeric value to seed the RNG with. If not present, the RNG is
        seeded randomly.

    Raises:
      UserError: If called with no sample observers.
      UnableToAcquireLockError: If the model is locked (i.e. there is another
        process currently modifying the model).
      InvalidStartText: If the sampler start text cannot be encoded.
      InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be
        encoded.
    """
        if not sample_observers:
            raise errors.UserError("Cannot sample without any observers")

        sample_start_time = labdate.MillisecondsTimestamp()

        self.Train()

        with logutil.TeeLogsToFile(f'sampler_{sampler.hash}',
                                   self.cache.path / 'logs'):
            app.Log(1, "Sampling: '%s'", sampler.start_text)

            atomizer = self.corpus.atomizer
            sampler.Specialize(atomizer)
            self.backend.InitSampling(sampler, seed)
            [obs.Specialize(self, sampler) for obs in sample_observers]

            batch_count = 1
            while self._SampleBatch(sampler, atomizer, sample_observers):
                batch_count += 1

            time_now = labdate.MillisecondsTimestamp()
            app.Log(
                1, 'Produced %s sample batches at a rate of %s ms / batch.',
                humanize.Commas(batch_count),
                humanize.Commas(
                    int((time_now - sample_start_time) / max(batch_count, 1))))
Beispiel #6
0
    def Profile(self, event_name: str = ''):
        """A context manager which prints the elapsed time upon exit.

    Args:
      event_name: The name of the event being profiled.
    """
        event = ProfilingEvent(labdate.MillisecondsTimestamp(), event_name)
        yield event
        elapsed = labdate.MillisecondsTimestamp() - event.start_time
        with self._writer() as writer:
            writer.writerow((event.start_time, elapsed, event.name))
Beispiel #7
0
    def GenerateOneSource(self) -> typing.Tuple[str, int, int]:
        """Generate and return a single CLSmith program.

    Returns:
      A tuple of the source code as a string, the generation time, and the start
      time.
    """
        start_epoch_ms_utc = labdate.MillisecondsTimestamp()
        src = clsmith.Exec(*list(self.config.opt))
        wall_time_ms = labdate.MillisecondsTimestamp() - start_epoch_ms_utc
        return src, wall_time_ms, start_epoch_ms_utc
Beispiel #8
0
    def _SampleBatch(
        self, sampler: samplers.Sampler, atomizer: atomizers.AtomizerBase,
        sample_observers: typing.List[sample_observers_lib.SampleObserver]
    ) -> typing.List[model_pb2.Sample]:
        """Run a single iteration of the batched sample inner-loop."""
        samples_in_progress = [
            sampler.tokenized_start_text.copy()
            for _ in range(sampler.batch_size)
        ]
        done = np.zeros(sampler.batch_size, dtype=np.bool)
        start_time = labdate.MillisecondsTimestamp()
        wall_time_start = start_time

        self.backend.InitSampleBatch(sampler)

        # The return value of this method. If any of the sample_observers return
        # False, this value is set to False.
        continue_sampling = True

        # Sampling loop. Continues until all samples in the batch are done.
        while not done.all():
            indices = self.backend.SampleNextIndices(sampler, done)

            # Iterate over all samples in batch to determine whether they're
            # done.
            for i in range(sampler.batch_size):
                if done[i]:
                    continue

                for index in indices[i]:
                    samples_in_progress[i].append(atomizer.decoder[index])
                    if sampler.SampleIsComplete(samples_in_progress[i]):
                        end_time = labdate.MillisecondsTimestamp()
                        done[i] = 1
                        sample = model_pb2.Sample(
                            text=''.join(samples_in_progress[i]),
                            sample_start_epoch_ms_utc=start_time,
                            sample_time_ms=end_time - start_time,
                            wall_time_ms=end_time - wall_time_start,
                            num_tokens=len(samples_in_progress[i]))
                        # Notify sample observers.
                        continue_sampling &= all([
                            not obs.OnSample(sample)
                            for obs in sample_observers
                        ])

                        # Wall sample time is the difference between the end of the previous
                        # sample and the end of the current sample.
                        wall_time_start = labdate.MillisecondsTimestamp()
                        break

        return continue_sampling
Beispiel #9
0
 def _create_lock():
     lockfile = lockfile_pb2.LockFile(
         owner_process_id=os.getpid() if pid is None else pid,
         owner_process_argv=' '.join(sys.argv),
         date_acquired_utc_epoch_ms=labdate.MillisecondsTimestamp(
             labdate.GetUtcMillisecondsNow()),
         owner_hostname=system.HOSTNAME,
         owner_user=system.USERNAME)
     pbutil.ToFile(lockfile, self.path, assume_filename='LOCK.pbtxt')
Beispiel #10
0
 def EpochEndCallback(self, epoch: int, loss: float):
     now = labdate.MillisecondsTimestamp()
     epoch_time_ms = now - self.last_epoch_begin_timestamp
     telemetry = telemetry_pb2.ModelEpochTelemetry(
         timestamp_utc_epoch_ms=now,
         epoch_num=epoch,
         epoch_wall_time_ms=epoch_time_ms,
         loss=loss,
     )
     pbutil.ToFile(telemetry,
                   self.logdir / f'epoch_{epoch:03d}_telemetry.pbtxt')
Beispiel #11
0
  def GenerateTestcases(self, request: generator_pb2.GenerateTestcasesRequest,
                        context) -> generator_pb2.GenerateTestcasesResponse:
    """Generate testcases."""
    del context
    response = services.BuildDefaultResponse(
        generator_pb2.GenerateTestcasesResponse)

    # Generate random strings.
    for _ in range(request.num_testcases):
      # Instantiate a testcase.
      testcase = response.testcases.add()
      testcase.CopyFrom(self.config.testcase_to_generate)
      testcase.generator.CopyFrom(self.generator)
      start_time = labdate.MillisecondsTimestamp()
      end_time = labdate.MillisecondsTimestamp()
      p = testcase.profiling_events.add()
      p.type = 'generation'
      p.event_start_epoch_ms = start_time
      p.duration_ms = end_time - start_time

    return response
Beispiel #12
0
 def reset(self):
     """Reset the environment state."""
     logging.debug('$ cp %s %s', self.bytecode_path,
                   self.working_bytecode_path)
     shutil.copyfile(self.bytecode_path, self.working_bytecode_path)
     clang.Compile([self.bytecode_path], self.binary_path, copts=['-O0'])
     self.RunSetupCommand()
     self.episodes.append(
         random_opt_pb2.DelayedRewardEpisode(step=[
             random_opt_pb2.DelayedRewardStep(
                 start_time_epoch_ms=labdate.MillisecondsTimestamp(), )
         ]))
Beispiel #13
0
 def ConvertOne() -> logging_pb2.LogRecord:
   """Convert the current starting_match and lines_buffer into a LogRecord."""
   if starting_match:
     records.append(
         logging_pb2.LogRecord(
             level=ABSL_LEVEL_TO_LOG_RECORD_LEVEL[starting_match.group('lvl')],
             date_utc_epoch_ms=labdate.MillisecondsTimestamp(
                 DatetimeFromAbslTimestamp(starting_match.group('timestamp'),
                                           year=year)),
             thread_id=int(starting_match.group('thread_id')),
             file_name=starting_match.group('filename'),
             line_number=int(starting_match.group('lineno')),
             message='\n'.join([starting_match.group('contents')] +
                               lines_buffer).rstrip()))
Beispiel #14
0
def TestingLoop(min_interesting_results: int,
                max_testing_time_seconds: int,
                batch_size: int,
                generator: base_generator.GeneratorServiceBase,
                dut_harness: base_harness.HarnessBase,
                gs_harness: base_harness.HarnessBase,
                filters: difftests.FiltersBase,
                interesting_results_dir: pathlib.Path,
                start_time: float = None) -> None:
    """The main fuzzing loop.

  Args:
    min_interesting_results: The minimum number of interesting results to find.
    max_testing_time_seconds: The maximum time allowed to find interesting
      results.
    batch_size: The number of testcases to generate and execute in each batch.
    generator: A testcase generator.
    dut_harness: The device under test.
    gs_harness: The device to compare outputs against.
    filters: A filters instance for testcases.
    interesting_results_dir: The directory to write interesting results to.
    start_time: The starting time, as returned by time.time(). If not provided,
      the starting time will be the moment that this function is called. Set
      this value if you would like to include initialization overhead in the
      calculated testing time.
  """
    start_time = start_time or time.time()
    interesting_results_dir.mkdir(parents=True, exist_ok=True)
    num_interesting_results = 0
    batch_num = 0
    while (num_interesting_results < min_interesting_results
           and time.time() < start_time + max_testing_time_seconds):
        batch_num += 1
        logging.info('Starting generate / test / eval batch %d ...', batch_num)
        interesting_results = RunBatch(generator, dut_harness, gs_harness,
                                       filters, batch_size)
        num_interesting_results += len(interesting_results)
        for result in interesting_results:
            pbutil.ToFile(
                result, interesting_results_dir /
                (str(labdate.MillisecondsTimestamp()) + '.pbtxt'))

    logging.info(
        'Stopping after %.2f seconds and %s batches (%.0fms / testcase).\n'
        'Found %s interesting results.',
        time.time() - start_time, humanize.intcomma(batch_num),
        (((time.time() - start_time) / (batch_num * batch_size)) * 1000),
        num_interesting_results)
    logging.flush()
Beispiel #15
0
  def SetProto(self,
               proto: deepsmith_pb2.ProfilingEvent) -> \
      deepsmith_pb2.ProfilingEvent:
    """Set a protocol buffer representation.

    Args:
      proto: A protocol buffer message.

    Returns:
      A ProfilingEvent message.
    """
    proto.client = self.client.string
    proto.type = self.type.string
    proto.duration_ms = self.duration_ms
    proto.event_start_epoch_ms = labdate.MillisecondsTimestamp(self.event_start)
    return proto
Beispiel #16
0
def RunTestcase(opencl_environment: env.OpenCLEnvironment,
                testbed: deepsmith_pb2.Testbed,
                testcase: deepsmith_pb2.Testcase,
                cflags: typing.List[str]) -> deepsmith_pb2.Result:
    """Run a testcase."""
    if testcase.toolchain != 'opencl':
        raise ValueError(
            f"Unsupported testcase toolchain: '{testcase.toolchain}'")
    if testcase.harness.name != 'cldrive':
        raise ValueError(
            f"Unsupported testcase harness: '{testcase.harness.name}'")
    result = deepsmith_pb2.Result()
    result.testbed.CopyFrom(testbed)
    platform_id, device_id = opencl_environment.ids()
    driver = MakeDriver(
        testcase, True if testbed.opts['opencl_opt'] == 'enabled' else False)
    # MakeDriver() annotates the testcase, so we must only set the testcase field
    # of the output result after we have called it.
    result.testcase.CopyFrom(testcase)
    # Get a temporary file to write and run the driver from.
    with tempfile.NamedTemporaryFile(prefix='deepsmith_', delete=False) as f:
        path = pathlib.Path(f.name)
    try:
        CompileDriver(driver, path, platform_id, device_id, cflags=cflags)
        timeout = testcase.harness.opts.get('timeout_seconds', '60')
        cmd = ['timeout', '-s9', timeout, f.name]
        start_time = labdate.GetUtcMillisecondsNow()
        proc = opencl_environment.Exec(cmd)
        end_time = labdate.GetUtcMillisecondsNow()
        # Build result message.
        result.returncode = proc.returncode
        result.outputs['stdout'] = proc.stdout
        result.outputs['stderr'] = proc.stderr
        runtime = result.profiling_events.add()
        runtime.client = system.HOSTNAME
        runtime.type = 'runtime'
        runtime.duration_ms = int(
            round((end_time - start_time).total_seconds() * 1000))
        runtime.event_start_epoch_ms = labdate.MillisecondsTimestamp(
            start_time)
        result.outcome = GetResultOutcome(result)
    except DriverCompilationError as e:
        logging.warning('%s', e)
        result.outcome = deepsmith_pb2.Result.UNKNOWN
    finally:
        fs.rm(path)
    return result
Beispiel #17
0
    def __init__(self, directory: pathlib.Path, name: str = 'profile'):
        self._directory = pathlib.Path(directory)
        if not self._directory.is_dir():
            raise ValueError(f"Directory not found: {directory}")
        self._name = name

        # Create the name of the logfile now, so that is timestamped to the start of
        # execution.
        timestamp = labdate.MillisecondsTimestamp()
        log_name = '.'.join(
            [self._name, system.HOSTNAME,
             str(timestamp), 'csv'])
        self._path = self._directory / log_name

        with self._writer() as writer:
            writer.writerow(('Start Time (ms since UNIX epoch)',
                             'Elapsed Time (ms)', 'Event'))
Beispiel #18
0
def GetRepositoryMetadata(
        repo: Repository.Repository) -> scrape_repos_pb2.GitHubRepoMetadata():
    """Get metadata about a GitHub repository.

  Args:
    repo: A Repository instance.

  Returns:
    A GitHubRepoMetadata instance.
  """
    meta = scrape_repos_pb2.GitHubRepoMetadata()
    meta.scraped_utc_epoch_ms = labdate.MillisecondsTimestamp(
        labdate.GetUtcMillisecondsNow())
    meta.owner = repo.owner.login
    meta.name = repo.name
    meta.num_watchers = repo.watchers_count
    meta.num_forks = repo.forks_count
    meta.num_stars = repo.stargazers_count
    meta.clone_from_url = repo.clone_url
    return meta
Beispiel #19
0
def CreatePackageManifest(
        package_root: pathlib.Path,
        contents: typing.List[pathlib.Path]) -> dpack_pb2.DataPackage:
    """Create a DataPackage message for the contents of a package.

  Args:
    package_root: The root of the package.
    contents: A list of relative paths to files to include.

  Returns:
    A DataPackage instance with attributes set.
  """
    manifest = dpack_pb2.DataPackage()
    manifest.comment = ''
    manifest.utc_epoch_ms_packaged = labdate.MillisecondsTimestamp(
        labdate.GetUtcMillisecondsNow())
    for path in contents:
        f = manifest.file.add()
        SetDataPackageFileAttributes(package_root, path, f)
        f.comment = f.comment or ''
    return manifest
Beispiel #20
0
    def FromText(cls, text: str, atoms: typing.Set[str]) -> 'GreedyAtomizer':
        """Instantiate and an atomizer from a corpus text.

    Args:
      text: Text corpus
      atoms: A list of multi-character tokens.

    Returns:
      An atomizer instance.
    """
        if not atoms:
            raise errors.UserError('No atoms specified')

        # Instantiate a greedy atomizer using the full vocabulary.
        full_vocab = dict(zip(atoms, range(len(atoms))))
        c = GreedyAtomizer(full_vocab, determine_chars=True)
        # Derive the subset of the vocabulary required to encode the given text.
        tokens = sorted(list(set(c.TokenizeString(text))))
        vocab_subset = dict(zip(tokens, range(len(tokens))))
        end_time = labdate.MillisecondsTimestamp()
        # Return a new atomizer using the subset vocabulary.
        return GreedyAtomizer(vocab_subset)
Beispiel #21
0
    def SampleFast(self,
                   sampler: samplers.Sampler,
                   min_num_samples: int,
                   seed: int = None) -> typing.List[model_pb2.Sample]:
        """Sample a model.

    Same as Sample(), but without printing or caching samples. Because samples
    are not cached, infinite sampling loops are not supported, since we must
    return the sample protos at some point.

    Args:
      sampler: The sampler to sample using.
      min_num_samples: The minimum number of samples to return. Note that the
        true number of samples returned may be higher than this value, as
        sampling occurs in batches. The model will continue producing samples
        until the lowest mulitple of the sampler batch size property that is
        larger than this value. E.g. if min_num_samples is 7 and the Sampler
        batch size is 10, 10 samples will be returned.
      seed: A numeric value to seed the RNG with. If not present, the RNG is
        seeded randomly.

    Returns:
      A list of Sample protos.

    Raises:
      UnableToAcquireLockError: If the model is locked (i.e. there is another
        process currently modifying the model).
      InvalidStartText: If the sampler start text cannot be encoded.
      InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be
        encoded.
    """
        self.Train()

        sample_count = 1
        with logutil.TeeLogsToFile(f'sampler_{sampler.hash}',
                                   self.cache.path / 'logs'):
            logging.info("Sampling: '%s'", sampler.start_text)
            sample_start_time = labdate.MillisecondsTimestamp()
            atomizer = self.corpus.atomizer
            sampler.Specialize(atomizer)
            batch_size = self.backend.InitSampling(sampler, seed)
            samples = []

            # Per-sample batch outer loop. Continues until we have as many samples
            # as we want.
            while True:
                samples_in_progress = [
                    sampler.tokenized_start_text.copy()
                    for _ in range(batch_size)
                ]
                done = np.zeros(batch_size, dtype=np.bool)
                start_time = labdate.MillisecondsTimestamp()
                wall_time_start = start_time

                self.backend.InitSampleBatch(sampler, batch_size)

                # Sampling loop. Continues until all samples in the batch are done.
                while True:
                    indices = self.backend.SampleNextIndices(
                        sampler, batch_size)

                    # Iterate over all samples in batch to determine whether they're
                    # done.
                    for i in range(batch_size):
                        if done[i]:
                            continue

                        token = atomizer.decoder[indices[i]]
                        samples_in_progress[i].append(token)
                        if sampler.SampleIsComplete(samples_in_progress[i]):
                            end_time = labdate.MillisecondsTimestamp()
                            done[i] = 1
                            sample = model_pb2.Sample(
                                text=''.join(samples_in_progress[i]),
                                sample_start_epoch_ms_utc=start_time,
                                sample_time_ms=end_time - start_time,
                                wall_time_ms=end_time - wall_time_start,
                                num_tokens=len(samples_in_progress[i]))
                            sample_count += 1
                            samples.append(sample)
                            wall_time_start = labdate.MillisecondsTimestamp()

                    # Complete the batch.
                    if done.all():
                        break

                # Complete sampling. Note that sample_count starts at 1.
                if sample_count > min_num_samples:
                    now = labdate.MillisecondsTimestamp()
                    logging.info(
                        'Produced %s samples at a rate of %s ms / sample.',
                        humanize.intcomma(len(samples)),
                        humanize.intcomma(
                            int((now - sample_start_time) / len(samples))))
                    break

        return samples
Beispiel #22
0
    def Sample(self,
               sampler: samplers.Sampler,
               min_num_samples: int,
               seed: int = None) -> typing.List[model_pb2.Sample]:
        """Sample a model.

    If the model is not already trained, calling Sample() first trains the
    model. Thus a call to Sample() is equivalent to calling Train() then
    Sample().

    Args:
      sampler: The sampler to sample using.
      min_num_samples: The minimum number of samples to return. Note that the
        true number of samples returned may be higher than this value, as
        sampling occurs in batches. The model will continue producing samples
        until the lowest mulitple of the sampler batch size property that is
        larger than this value. E.g. if min_num_samples is 7 and the Sampler
        batch size is 10, 10 samples will be returned.
      seed: A numeric value to seed the RNG with. If not present, the RNG is
        seeded randomly.

    Returns:
      A list of Sample protos.

    Raises:
      UnableToAcquireLockError: If the model is locked (i.e. there is another
        process currently modifying the model).
      InvalidStartText: If the sampler start text cannot be encoded.
      InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be
        encoded.
    """
        self.Train()

        sample_count = 1
        self.SamplerCache(sampler).mkdir(exist_ok=True)
        with logutil.TeeLogsToFile(f'sampler_{sampler.hash}',
                                   self.cache.path / 'logs'):
            logging.info("Sampling: '%s'", sampler.start_text)
            if min_num_samples < 0:
                logging.warning(
                    'Entering an infinite sample loop, this process will never end!'
                )
            sample_start_time = labdate.MillisecondsTimestamp()

            atomizer = self.corpus.atomizer
            sampler.Specialize(atomizer)
            batch_size = self.backend.InitSampling(sampler, seed)

            samples = []
            sample_dir = self.SamplerCache(sampler)

            # Per-sample batch outer loop. Continues until we have as many samples
            # as we want.
            while True:
                samples_in_progress = [
                    sampler.tokenized_start_text.copy()
                    for _ in range(batch_size)
                ]
                done = np.zeros(batch_size, dtype=np.bool)
                start_time = labdate.MillisecondsTimestamp()
                wall_time_start = start_time

                self.backend.InitSampleBatch(sampler, batch_size)

                # Sampling loop. Continues until all samples in the batch are done.
                while True:
                    indices = self.backend.SampleNextIndices(
                        sampler, batch_size)

                    # Iterate over all samples in batch to determine whether they're
                    # done.
                    for i in range(batch_size):
                        if done[i]:
                            continue

                        token = atomizer.decoder[indices[i]]
                        samples_in_progress[i].append(token)
                        if sampler.SampleIsComplete(samples_in_progress[i]):
                            end_time = labdate.MillisecondsTimestamp()
                            done[i] = 1
                            sample = model_pb2.Sample(
                                text=''.join(samples_in_progress[i]),
                                sample_start_epoch_ms_utc=start_time,
                                sample_time_ms=end_time - start_time,
                                wall_time_ms=end_time - wall_time_start,
                                num_tokens=len(samples_in_progress[i]))
                            print(f'=== BEGIN CLGEN SAMPLE {sample_count} '
                                  f'===\n\n{sample.text}\n')
                            sample_count += 1
                            sample_id = crypto.sha256_str(sample.text)
                            sample_path = sample_dir / f'{sample_id}.pbtxt'
                            pbutil.ToFile(sample, sample_path)
                            if min_num_samples > 0:
                                samples.append(sample)
                            wall_time_start = labdate.MillisecondsTimestamp()

                    # Complete the batch.
                    if done.all():
                        break

                # Complete sampling. Note that sample_count starts at 1.
                if sample_count > min_num_samples:
                    now = labdate.MillisecondsTimestamp()
                    logging.info(
                        'Produced %s samples at a rate of %s ms / sample.',
                        humanize.intcomma(len(samples)),
                        humanize.intcomma(
                            int((now - sample_start_time) /
                                max(len(samples), 1))))
                    break

        return samples
Beispiel #23
0
def test_GetRepositoryMetadata_timestamp():
    """Test that the timestamp in metadata is set to (aprox) now."""
    now_ms = labdate.MillisecondsTimestamp(labdate.GetUtcMillisecondsNow())
    meta = scraper.GetRepositoryMetadata(MockRepository())
    assert now_ms - meta.scraped_utc_epoch_ms <= 1000
Beispiel #24
0
    def Sample(self,
               sampler: samplers.Sampler,
               min_num_samples: int,
               seed: int = None) -> typing.Iterable[model_pb2.Sample]:
        """Sample a model.

		If the model is not already trained, calling Sample() first trains the
		model. Thus a call to Sample() is equivalent to calling Train() then
		Sample().

		Args:
			sampler: The sampler to sample using.
			min_num_samples: The minimum number of samples to return. Note that the
				true number of samples returned may be higher than this value, as
				sampling occurs in batches. The model will continue producing samples
				until the lowest mulitple of the sampler batch size property that is
				larger than this value. E.g. if min_num_samples is 7 and the Sampler
				batch size is 10, 10 samples will be returned.
			seed: A numeric value to seed the RNG with. If not present, the RNG is
				seeded randomly.

		Returns:
			A iterator over samples.

		Raises:
			UnableToAcquireLockError: If the model is locked (i.e. there is another
				process currently modifying the model).
			InvalidStartText: If the sampler start text cannot be encoded.
			InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be
				encoded.
		"""
        sample_count = 1
        self.SamplerCache(sampler).mkdir(exist_ok=True)
        atomizer = self.atomizer
        sampler.Specialize(atomizer)
        batch_size = self.backend.InitSampling(sampler, seed)
        sample_start_time = labdate.MillisecondsTimestamp()
        samples = []
        sample_dir = self.SamplerCache(sampler)
        print("batch Size : " + str(batch_size))
        if (FLAGS.sampling_technique.split(" ")[0]
                in ["topK", "nucleus", "beam", "default"]):
            logging.info('Sampling technique set to :  %s with value : %s ',
                         FLAGS.sampling_technique.split(" ")[0],
                         FLAGS.sampling_technique.split(" ")[1])
        else:
            logging.info('Sampling technique set to :  default')

        # Per-sample batch outer loop. Continues until we have as many samples
        # as we want.
        while True:
            samples_in_progress = [
                sampler.tokenized_start_text.copy() for _ in range(batch_size)
            ]
            done = np.zeros(batch_size, dtype=np.bool)
            start_time = labdate.MillisecondsTimestamp()
            wall_time_start = start_time

            self.backend.InitSampleBatch(sampler, batch_size)

            # Sampling loop. Continues until all samples in the batch are done.
            while True:
                indices = self.backend.SampleNextIndices(sampler, batch_size)

                # Iterate over all samples in batch to determine whether they're
                # done.
                print
                for i in range(batch_size):
                    #print("batch_size : "+ str(i)+" :"+ str(batch_size)+" num_tokens : "+str(len(samples_in_progress[i])))
                    if done[i]:
                        continue

                    token = atomizer.decoder[indices[i]]
                    samples_in_progress[i].append(token)

                    if sampler.SampleIsComplete(samples_in_progress[i]):
                        end_time = labdate.MillisecondsTimestamp()
                        done[i] = 1
                        sample = model_pb2.Sample(
                            text=''.join(samples_in_progress[i]),
                            sample_start_epoch_ms_utc=start_time,
                            sample_time_ms=end_time - start_time,
                            wall_time_ms=end_time - wall_time_start,
                            num_tokens=len(samples_in_progress[i]))
                        print(
                            f'=== BEGIN SIMULINK MDL SAMPLE (Pretrained Model){sample_count} '
                        )
                        #f'===\n\n{sample.text}\n')
                        sample_count += 1
                        sample_path = sample_dir / f'Sample{sample_count}.mdl'
                        pbutil.ToFile(sample, sample_path)
                        if min_num_samples > 0:
                            samples.append(sample)
                        wall_time_start = labdate.MillisecondsTimestamp()

                # Complete the batch.
                if done.all():
                    break

            # Complete sampling. Note that sample_count starts at 1.
            if sample_count > min_num_samples:
                now = labdate.MillisecondsTimestamp()
                logging.info(
                    'Produced %s samples at a rate of %s ms / sample.',
                    humanize.intcomma(sample_count - 1),
                    humanize.intcomma(
                        int((now - sample_start_time) /
                            max(sample_count - 1, 1))))
                break
        return samples
Beispiel #25
0
 def EpochBeginCallback(self) -> None:
     self.last_epoch_begin_timestamp = labdate.MillisecondsTimestamp()
Beispiel #26
0
def test_MillisecondsTimestamp_invalid_argument():
    with pytest.raises(TypeError):
        labdate.MillisecondsTimestamp('not a date')
Beispiel #27
0
def test_Result_ToProto():
    now = datetime.datetime.now()

    result = deeplearning.deepsmith.result.Result(
        testcase=deeplearning.deepsmith.testcase.Testcase(
            toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'),
            generator=deeplearning.deepsmith.generator.Generator(
                name='generator'),
            harness=deeplearning.deepsmith.harness.Harness(name='harness'),
            inputset=[
                deeplearning.deepsmith.testcase.TestcaseInput(
                    name=deeplearning.deepsmith.testcase.TestcaseInputName(
                        string='src'),
                    value=deeplearning.deepsmith.testcase.TestcaseInputValue(
                        string='void main() {}'),
                ),
                deeplearning.deepsmith.testcase.TestcaseInput(
                    name=deeplearning.deepsmith.testcase.TestcaseInputName(
                        string='data'),
                    value=deeplearning.deepsmith.testcase.TestcaseInputValue(
                        string='[1,2]'),
                ),
            ],
            invariant_optset=[
                deeplearning.deepsmith.testcase.TestcaseInvariantOpt(
                    name=deeplearning.deepsmith.testcase.
                    TestcaseInvariantOptName(string='config'),
                    value=deeplearning.deepsmith.testcase.
                    TestcaseInvariantOptValue(string='opt'),
                ),
            ],
            profiling_events=[
                deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent(
                    client=deeplearning.deepsmith.client.Client(
                        string='localhost'),
                    type=deeplearning.deepsmith.profiling_event.
                    ProfilingEventType(string='generate', ),
                    duration_ms=100,
                    event_start=now,
                ),
                deeplearning.deepsmith.profiling_event.TestcaseProfilingEvent(
                    client=deeplearning.deepsmith.client.Client(
                        string='localhost'),
                    type=deeplearning.deepsmith.profiling_event.
                    ProfilingEventType(string='foo', ),
                    duration_ms=100,
                    event_start=now,
                ),
            ]),
        testbed=deeplearning.deepsmith.testbed.Testbed(
            toolchain=deeplearning.deepsmith.toolchain.Toolchain(string='cpp'),
            name='clang',
            optset=[
                deeplearning.deepsmith.testbed.TestbedOpt(
                    name=deeplearning.deepsmith.testbed.TestbedOptName(
                        string='arch'),
                    value=deeplearning.deepsmith.testbed.TestbedOptValue(
                        string='x86_64'),
                ),
                deeplearning.deepsmith.testbed.TestbedOpt(
                    name=deeplearning.deepsmith.testbed.TestbedOptName(
                        string='build'),
                    value=deeplearning.deepsmith.testbed.TestbedOptValue(
                        string='debug+assert'),
                ),
            ],
        ),
        returncode=0,
        outputset=[
            deeplearning.deepsmith.result.ResultOutput(
                name=deeplearning.deepsmith.result.ResultOutputName(
                    string='stdout'),
                value=deeplearning.deepsmith.result.ResultOutputValue(
                    truncated_value='Hello, world!'),
            ),
            deeplearning.deepsmith.result.ResultOutput(
                name=deeplearning.deepsmith.result.ResultOutputName(
                    string='stderr'),
                value=deeplearning.deepsmith.result.ResultOutputValue(
                    truncated_value=''),
            ),
        ],
        profiling_events=[
            deeplearning.deepsmith.profiling_event.ResultProfilingEvent(
                client=deeplearning.deepsmith.client.Client(
                    string='localhost'),
                type=deeplearning.deepsmith.profiling_event.ProfilingEventType(
                    string='exec', ),
                duration_ms=500,
                event_start=now,
            ),
            deeplearning.deepsmith.profiling_event.ResultProfilingEvent(
                client=deeplearning.deepsmith.client.Client(
                    string='localhost'),
                type=deeplearning.deepsmith.profiling_event.ProfilingEventType(
                    string='overhead', ),
                duration_ms=100,
                event_start=now,
            ),
        ],
        outcome_num=6,
    )
    proto = result.ToProto()
    assert proto.testcase.toolchain == 'cpp'
    assert proto.testcase.generator.name == 'generator'
    assert proto.testcase.harness.name == 'harness'
    assert len(proto.testcase.inputs) == 2
    assert proto.testcase.inputs['src'] == 'void main() {}'
    assert proto.testcase.inputs['data'] == '[1,2]'
    assert len(proto.testcase.invariant_opts) == 1
    assert proto.testcase.invariant_opts['config'] == 'opt'
    assert len(proto.testcase.profiling_events) == 2
    assert proto.testcase.profiling_events[0].client == 'localhost'
    assert proto.testcase.profiling_events[0].type == 'generate'
    assert proto.testcase.profiling_events[0].client == 'localhost'
    assert proto.testbed.toolchain == 'cpp'
    assert proto.testbed.name == 'clang'
    assert len(proto.testbed.opts) == 2
    assert proto.testbed.opts['arch'] == 'x86_64'
    assert proto.testbed.opts['build'] == 'debug+assert'
    assert len(proto.outputs) == 2
    assert proto.outputs['stdout'] == 'Hello, world!'
    assert proto.outputs['stderr'] == ''
    assert len(proto.testcase.profiling_events) == 2
    assert proto.profiling_events[0].client == 'localhost'
    assert proto.profiling_events[0].type == 'exec'
    assert proto.profiling_events[0].duration_ms == 500
    assert (proto.profiling_events[0].event_start_epoch_ms ==
            labdate.MillisecondsTimestamp(now))
    assert proto.profiling_events[1].client == 'localhost'
    assert proto.profiling_events[1].type == 'overhead'
    assert proto.profiling_events[1].duration_ms == 100
    assert (proto.profiling_events[1].event_start_epoch_ms ==
            labdate.MillisecondsTimestamp(now))
    assert proto.outcome == deepsmith_pb2.Result.PASS
Beispiel #28
0
def test_timestamp_datetime_equivalence():
    date_in = labdate.GetUtcMillisecondsNow()
    timestamp = labdate.MillisecondsTimestamp(date_in)
    date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp)
    assert date_in == date_out
Beispiel #29
0
 def SetProto(self, proto: model_pb2.Sample) -> None:
     proto.text = self.text
     proto.num_tokens = self.num_tokens
     proto.wall_time_ms = self.wall_time_ms
     proto.sample_start_epoch_ms_utc = labdate.MillisecondsTimestamp(
         self.sample_date)
Beispiel #30
0
def test_default_timestamp_datetime_equivalence():
    now = labdate.GetUtcMillisecondsNow()
    timestamp = labdate.MillisecondsTimestamp()
    date_out = labdate.DatetimeFromMillisecondsTimestamp(timestamp)
    assert now.date() == date_out.date()