Esempio n. 1
0
def main(argv: typing.List[str]):
  """Main entry point."""
  if len(argv) > 1:
    raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(argv[1:])))

  db = grewe_features_db.Database(FLAGS.db)
  protos_dir = pathlib.Path(FLAGS.protos_dir)
  if not protos_dir.is_dir():
    raise app.UsageError("Proto dir not found")

  if not FLAGS.origin:
    raise app.UsageError("Must set an origin")

  paths_to_import = list(protos_dir.iterdir())
  if not all(x.is_file() for x in paths_to_import):
    raise app.UsageError("Non-file input found")

  for stride in range(0, len(paths_to_import), FLAGS.batch_size):
    with tempfile.TemporaryDirectory(prefix="phd_fish_") as d:
      d = pathlib.Path(d)
      srcs = [
        CreateTempFileFromProto(d, p)
        for p in paths_to_import[stride : stride + FLAGS.batch_size]
      ]
      db.ImportStaticFeaturesFromPaths(srcs, FLAGS.origin)
  app.Log(1, "done")
Esempio n. 2
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    db = grewe_features_db.Database(FLAGS.db)
    df = opencl_device_mapping_dataset.OpenClDeviceMappingsDataset().df

    new_count = 0
    with ncc.DEEPTUNE_INST2VEC_DATA_ARCHIVE as datafolder:
        for _, row in progressbar.progressbar(list(df.iterrows())):
            with db.Session(commit=True) as session:
                obj = RowToStaticFeatures(row, datafolder)
                # Check if it already exists in the database.
                exists = (session.query(
                    grewe_features_db.StaticFeatures).filter_by(
                        src_sha256=obj.src_sha256).filter(
                            grewe_features_db.StaticFeatures.origin.like(
                                "benchmarks_%")).first())
                if not exists:
                    new_count += 1
                    session.add(obj)

    app.Log(1, "Added %d new database entries", new_count)
Esempio n. 3
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    if not FLAGS.datastore:
        raise app.UsageError("--datastore flag is required")

    db = grewe_features_db.Database(FLAGS.db)
    ds = datastore.DataStore.FromFile(pathlib.Path(FLAGS.datastore))

    with ds.Session(commit=False) as session:
        generator_id = (session.query(generator.Generator.id).filter(
            generator.Generator.name == "clgen").first())
        if not generator_id:
            raise app.UsageError("Datastore contains no CLgen generator")

        toolchain_id = (session.query(toolchain.Toolchain.id).filter(
            toolchain.Toolchain.string == "opencl").first())
        if not toolchain_id:
            raise app.UsageError("Datastore contains no opencl toolchain")

        q = (session.query(testcase.Testcase).filter(
            testcase.Testcase.generator_id == generator_id[0]).filter(
                testcase.Testcase.toolchain_id == toolchain_id[0]).order_by(
                    testcase.Testcase.id))

        batches = sqlutil.OffsetLimitBatchedQuery(
            q,
            batch_size=FLAGS.batch_size,
            start_at=FLAGS.start_at,
            compute_max_rows=True,
        )

        for batch in batches:
            app.Log(
                1,
                "Batch %03d containing testcases %s..%s of %s",
                batch.batch_num,
                humanize.Commas(batch.offset),
                humanize.Commas(batch.limit),
                humanize.Commas(batch.max_rows),
            )
            prefix = "phd_experimental_deeplearning_clgen_"
            with tempfile.TemporaryDirectory(prefix=prefix) as d:
                d = pathlib.Path(d)
                paths_to_import = [
                    CreateTempFileFromTestcase(d, r) for r in batch.rows
                ]
                db.ImportStaticFeaturesFromPaths(paths_to_import,
                                                 "clgen_dsmith")
    app.Log(1, "done")
Esempio n. 4
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    db = grewe_features_db.Database(FLAGS.db)

    with _CLGEN_1000_TAR as kernels_dir:
        paths_to_import = list((kernels_dir / "clgen-1000/kernels").iterdir())
        assert len(paths_to_import) == 1000
        db.ImportStaticFeaturesFromPaths(paths_to_import, "clgen_1000")

    app.Log(1, "done")
Esempio n. 5
0
def main():
    """Main entry point."""
    database = db.Database(FLAGS.db)
    env = cldrive_env.OpenCLEnvironment.FromName(FLAGS.env)

    while True:
        with database.Session() as session:
            benchmark_suite = GetBenchmarkSuiteToRun(session, env)
        if not benchmark_suite:
            app.Log(1, "Done. Nothing more to run!")
            return
        try:
            DriveBenchmarkSuiteAndRecordResults(database, benchmark_suite, env,
                                                FLAGS.num_runs)
        except gpgpu.BenchmarkInterrupt:
            sys.exit(1)
Esempio n. 6
0
def db() -> grewe_features_db.Database:
    """A test fixture that yields a database with three static features in it."""
    db_ = grewe_features_db.Database("sqlite://")
    with db_.Session(commit=True) as s:
        s.add_all([
            _StaticFeatures(
                "origin",
                "kernel void A(global int* a) { a[get_global_id(0)] *= 2; }"),
            _StaticFeatures(
                "github",
                "kernel void B(global int* a) { a[get_global_id(0)] *= 3; }"),
            _StaticFeatures(
                "clgen",
                "kernel void C(global int* a) { a[get_global_id(0)] *= 4; }"),
        ])
    yield db_
Esempio n. 7
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    db = grewe_features_db.Database(FLAGS.db)
    bdb = backtracking_db.Database(FLAGS.backtracking_db)

    with bdb.Session() as s, multiprocessing.Pool() as pool:
        batches = sqlutil.OffsetLimitBatchedQuery(
            s.query(backtracking_db.BacktrackingStep.src),
            batch_size=FLAGS.batch_size,
            compute_max_rows=True,
        )
        for batch in batches:
            app.Log(1, "Batch %d of %d rows", batch.batch_num, batch.max_rows)
            ProcessBatch(batch.rows, db, pool)
Esempio n. 8
0
def main():
    """Main entry point."""
    min_run_count = FLAGS.min_run_count
    cpu = FLAGS.cpu
    gpu = FLAGS.gpu
    dataset_name = FLAGS.name
    db = grewe_features_db.Database(FLAGS.db)

    with prof.Profile("query database"), db.Session(commit=True) as session:
        # The query that constructs the labelled dataset.
        query = db.CreateCpuGpuDataset(session, dataset_name, cpu, gpu,
                                       min_run_count)

        # Insert the results of the query into a table.
        insert = sql.insert(grewe_features_db.CpuGpuMappingSet).from_select(
            [column["name"] for column in query.column_descriptions], query)

        # Run the query.
        session.execute(insert)
Esempio n. 9
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    db = grewe_features_db.Database(FLAGS.db)
    kernel_dir = pathlib.Path(FLAGS.kernels_dir)
    if not kernel_dir.is_dir():
        raise app.UsageError("Kernel dir not found")

    if not FLAGS.origin:
        raise app.UsageError("Must set an origin")

    paths_to_import = list(kernel_dir.iterdir())
    if not all(x.is_file() for x in paths_to_import):
        raise app.UsageError("Non-file input found")

    db.ImportStaticFeaturesFromPaths(paths_to_import, FLAGS.origin)
    app.Log(1, "done")
Esempio n. 10
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    database = db.Database(FLAGS.db)
    env = cldrive_env.OpenCLEnvironment.FromName(FLAGS.env)

    batch_num = 0
    while True:
        batch_num += 1
        with database.Session() as session, prof.Profile(f"Batch {batch_num}"):
            with prof.Profile(f"Get batch of {FLAGS.batch_size} kernels"):
                batch = GetBatchOfKernelsToDrive(session, env,
                                                 FLAGS.batch_size)
        if not batch:
            app.Log(1, "Done. Nothing more to run!")
            return

        DriveBatchAndRecordResults(database, batch, env)
Esempio n. 11
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    conn = sqlite3.connect(FLAGS.legacy_clgen_db)
    c = conn.cursor()

    (num_kernels,
     ) = c.execute("SELECT COUNT(*) FROM PreprocessedKernels").fetchone()
    app.Log(1, "Database contains %d kernels", num_kernels)
    num_batches = math.ceil(num_kernels / FLAGS.batch_size)

    batches = BatchQueryResults(
        c.execute("SELECT kernel FROM PreprocessedKernels"))

    prefix = "phd_experimental_deeplearning_clgen_"
    with multiprocessing.Pool() as pool:
        for i, batch in enumerate(batches):
            with tempfile.TemporaryDirectory(prefix=prefix) as d:
                app.Log(1, "Batch %d of %d", i + 1, num_batches)
                d = pathlib.Path(d)
                paths_to_import = [
                    CreateTempFileFromTestcase(d, src, i)
                    for i, (src, ) in enumerate(batch)
                ]
                db = grewe_features_db.Database(FLAGS.db)
                success_count, new_row_count = db.ImportStaticFeaturesFromPaths(
                    paths_to_import, FLAGS.origin, pool)
                app.Log(
                    1,
                    "Extracted features from %d of %d kernels, %d new rows",
                    success_count,
                    FLAGS.batch_size,
                    new_row_count,
                )
Esempio n. 12
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    instance = clgen.Instance(
        clgen_pb2.Instance(
            working_dir=FLAGS.clgen_dir,
            model=model_pb2.Model(
                corpus=corpus_pb2.Corpus(
                    local_directory=FLAGS.clgen_corpus_dir,
                    ascii_character_atomizer=True,
                    preprocessor=[
                        "deeplearning.clgen.preprocessors.opencl:ClangPreprocessWithShim",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                        "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
                        "deeplearning.clgen.preprocessors.opencl:StripDoubleUnderscorePrefixes",
                        "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines",
                        "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype",
                        "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace",
                        "deeplearning.clgen.preprocessors.opencl:ClangFormat",
                        "deeplearning.clgen.preprocessors.common:MinimumLineCount3",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                    ],
                    contentfile_separator="\n\n",
                ),
                architecture=model_pb2.NetworkArchitecture(
                    backend=model_pb2.NetworkArchitecture.TENSORFLOW,
                    neuron_type=model_pb2.NetworkArchitecture.LSTM,
                    neurons_per_layer=512,
                    num_layers=2,
                    post_layer_dropout_micros=0,
                ),
                training=model_pb2.TrainingOptions(
                    num_epochs=50,
                    sequence_length=64,
                    batch_size=64,
                    shuffle_corpus_contentfiles_between_epochs=True,
                    adam_optimizer=model_pb2.AdamOptimizer(
                        initial_learning_rate_micros=2000,
                        learning_rate_decay_per_epoch_micros=50000,
                        beta_1_micros=900000,
                        beta_2_micros=999000,
                        normalized_gradient_clip_micros=5000000,
                    ),
                ),
            ),
            sampler=sampler_pb2.Sampler(
                start_text="kernel void ",
                batch_size=64,
                sequence_length=1024,
                temperature_micros=1000000,  # = 1.0 real value
                termination_criteria=[
                    sampler_pb2.SampleTerminationCriterion(
                        symtok=sampler_pb2.SymmetricalTokenDepth(
                            depth_increase_token="{",
                            depth_decrease_token="}",
                        )),
                    sampler_pb2.SampleTerminationCriterion(
                        maxlen=sampler_pb2.MaxTokenLength(
                            maximum_tokens_in_sample=20000, )),
                ],
            ),
        ), )
    db = grewe_features_db.Database(FLAGS.db)
    profile_dir = pathlib.Path(FLAGS.profile_dir)
    profile_dir.mkdir(parents=True, exist_ok=True)
    profiler = prof.AutoCsvProfiler(profile_dir)

    with instance.Session(), multiprocessing.Pool() as pool:
        while True:
            Sample(instance, db, profiler, pool)