Esempio n. 1
0
    def __init__(self, *args, **kwargs):
        super(OpenClEncoder, self).__init__(*args, **kwargs)

        # We start with an empty vocabulary and build it from inputs.
        self.lexer = lexers.Lexer(type=lexers.LexerType.OPENCL,
                                  vocabulary={},
                                  max_encoded_length=100000)

        # Map relpath -> src.
        df = make_devmap_dataset.MakeGpuDataFrame(
            opencl_device_mapping_dataset.OpenClDeviceMappingsDataset().df,
            "amd_tahiti_7970",
        )
        relpath_to_src = {
            row["relpath"]: row["program:opencl_src"]
            for _, row in df.iterrows()
        }

        # Map relpath -> bytecode ID.
        with self.ir_db.Session() as session:
            relpath_to_id = {
                row.relpath: row.id
                for row in session.query(
                    ir_database.IntermediateRepresentation.id,
                    ir_database.IntermediateRepresentation.relpath,
                ).filter(
                    ir_database.IntermediateRepresentation.source_language ==
                    ir_database.SourceLanguage.OPENCL,
                    ir_database.IntermediateRepresentation.
                    compilation_succeeded == True,
                    ir_database.IntermediateRepresentation.source ==
                    "pact17_opencl_devmap",
                    ir_database.IntermediateRepresentation.relpath.in_(
                        relpath_to_src.keys()),
                )
            }

        not_found = set(relpath_to_src.keys()) - set(relpath_to_id.keys())
        if not_found:
            raise OSError(
                f"{humanize.Plural(len(not_found), 'OpenCL relpath')} not"
                " found in IR database")

        # Encode the OpenCL sources.
        sorted_id_src_pairs: List[Tuple[int, str]] = {
            (relpath_to_id[relpath], relpath_to_src[relpath])
            for relpath in sorted(relpath_to_src.keys())
        }
        sorted_encodeds: List[np.array] = self.lexer.Lex(
            [src for id, src in sorted_id_src_pairs])

        self._max_encoded_length = max(
            len(encoded) for encoded in sorted_encodeds)

        # Map id -> encoded.
        self.id_to_encoded = {
            id: encoded
            for (id, _), encoded in zip(sorted_id_src_pairs, sorted_encodeds)
        }
Esempio n. 2
0
def lexer(
  lexer_type: lexers.LexerType,
  initial_vocab: Dict[str, int],
  max_chunk_size: int,
) -> lexers.Lexer:
  """A test fixture which returns a lexer."""
  return lexers.Lexer(
    type=lexer_type, initial_vocab=initial_vocab, max_chunk_size=max_chunk_size
  )
Esempio n. 3
0
  def __init__(self, *args, **kwargs):
    super(LlvmEncoder, self).__init__(*args, **kwargs)

    # Load the vocabulary used for encoding LLVM bytecode.
    with open(LLVM_VOCAB) as f:
      data_to_load = json.load(f)
    vocab = data_to_load["vocab"]
    self._max_encoded_length = data_to_load["max_encoded_length"]

    self.lexer = lexers.Lexer(type=lexers.LexerType.LLVM, initial_vocab=vocab)
Esempio n. 4
0
def lexer(
    lexer_type: lexers.LexerType,
    vocabulary: Dict[str, int],
    max_encoded_length: int,
) -> lexers.Lexer:
    """A test fixture which returns a lexer."""
    return lexers.Lexer(
        type=lexer_type,
        vocabulary=vocabulary,
        max_encoded_length=max_encoded_length,
    )