Esempio n. 1
0
    def Specialize(self, atomizer: atomizers.AtomizerBase) -> None:
        """Specialize a sampler a vocabulary.

    This enables the sampler to set state specialized to a specific encoding
    vocabulary. This is guaranteed to be called before SampleIsComplete(), and
    ensures that the vocabulary used for all sample arguments to
    SampleIsComplete() is from this vocabulary.

    Args:
      atomizer: An atomizer to specialize to.

    Raises:
      InvalidStartText: If the start_text cannot be encoded using the
        vocabulary.
      UserError: In case the sampler cannot be specialized to this vocabulary.
    """
        try:
            self.encoded_start_text = atomizer.AtomizeString(self.start_text)
            self.tokenized_start_text = atomizer.TokenizeString(
                self.start_text)
        except errors.VocabError:
            raise errors.InvalidStartText(
                "Sampler start text cannot be encoded using the corpus vocabulary: "
                f"'{self.start_text}'")

        if len(self.encoded_start_text) >= self.sequence_length:
            raise errors.InvalidStartText(
                "Encoded sampler start text must be less than sampler sequence "
                f"length. Sampler sequence length={self.sequence_length}, encoded "
                f"start text length={len(self.encoded_start_text)}")

        [terminator.Specialize(atomizer) for terminator in self.terminators]
Esempio n. 2
0
 def MakeProgram(
     self,
     sampled_tokens: typing.List[str],
     backtracker: OpenClBacktrackingHelper,
     atomizer: atomizers.AtomizerBase,
 ) -> typing.List[str]:
     """Produce a kernel from a sample."""
     src = backtracker.TryToCloseProgram(sampled_tokens) or ""
     return atomizer.TokenizeString(src)