def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(argv[1:]))) db = grewe_features_db.Database(FLAGS.db) protos_dir = pathlib.Path(FLAGS.protos_dir) if not protos_dir.is_dir(): raise app.UsageError("Proto dir not found") if not FLAGS.origin: raise app.UsageError("Must set an origin") paths_to_import = list(protos_dir.iterdir()) if not all(x.is_file() for x in paths_to_import): raise app.UsageError("Non-file input found") for stride in range(0, len(paths_to_import), FLAGS.batch_size): with tempfile.TemporaryDirectory(prefix="phd_fish_") as d: d = pathlib.Path(d) srcs = [ CreateTempFileFromProto(d, p) for p in paths_to_import[stride : stride + FLAGS.batch_size] ] db.ImportStaticFeaturesFromPaths(srcs, FLAGS.origin) app.Log(1, "done")
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) db = grewe_features_db.Database(FLAGS.db) df = opencl_device_mapping_dataset.OpenClDeviceMappingsDataset().df new_count = 0 with ncc.DEEPTUNE_INST2VEC_DATA_ARCHIVE as datafolder: for _, row in progressbar.progressbar(list(df.iterrows())): with db.Session(commit=True) as session: obj = RowToStaticFeatures(row, datafolder) # Check if it already exists in the database. exists = (session.query( grewe_features_db.StaticFeatures).filter_by( src_sha256=obj.src_sha256).filter( grewe_features_db.StaticFeatures.origin.like( "benchmarks_%")).first()) if not exists: new_count += 1 session.add(obj) app.Log(1, "Added %d new database entries", new_count)
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) if not FLAGS.datastore: raise app.UsageError("--datastore flag is required") db = grewe_features_db.Database(FLAGS.db) ds = datastore.DataStore.FromFile(pathlib.Path(FLAGS.datastore)) with ds.Session(commit=False) as session: generator_id = (session.query(generator.Generator.id).filter( generator.Generator.name == "clgen").first()) if not generator_id: raise app.UsageError("Datastore contains no CLgen generator") toolchain_id = (session.query(toolchain.Toolchain.id).filter( toolchain.Toolchain.string == "opencl").first()) if not toolchain_id: raise app.UsageError("Datastore contains no opencl toolchain") q = (session.query(testcase.Testcase).filter( testcase.Testcase.generator_id == generator_id[0]).filter( testcase.Testcase.toolchain_id == toolchain_id[0]).order_by( testcase.Testcase.id)) batches = sqlutil.OffsetLimitBatchedQuery( q, batch_size=FLAGS.batch_size, start_at=FLAGS.start_at, compute_max_rows=True, ) for batch in batches: app.Log( 1, "Batch %03d containing testcases %s..%s of %s", batch.batch_num, humanize.Commas(batch.offset), humanize.Commas(batch.limit), humanize.Commas(batch.max_rows), ) prefix = "phd_experimental_deeplearning_clgen_" with tempfile.TemporaryDirectory(prefix=prefix) as d: d = pathlib.Path(d) paths_to_import = [ CreateTempFileFromTestcase(d, r) for r in batch.rows ] db.ImportStaticFeaturesFromPaths(paths_to_import, "clgen_dsmith") app.Log(1, "done")
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) db = grewe_features_db.Database(FLAGS.db) with _CLGEN_1000_TAR as kernels_dir: paths_to_import = list((kernels_dir / "clgen-1000/kernels").iterdir()) assert len(paths_to_import) == 1000 db.ImportStaticFeaturesFromPaths(paths_to_import, "clgen_1000") app.Log(1, "done")
def main(): """Main entry point.""" database = db.Database(FLAGS.db) env = cldrive_env.OpenCLEnvironment.FromName(FLAGS.env) while True: with database.Session() as session: benchmark_suite = GetBenchmarkSuiteToRun(session, env) if not benchmark_suite: app.Log(1, "Done. Nothing more to run!") return try: DriveBenchmarkSuiteAndRecordResults(database, benchmark_suite, env, FLAGS.num_runs) except gpgpu.BenchmarkInterrupt: sys.exit(1)
def db() -> grewe_features_db.Database: """A test fixture that yields a database with three static features in it.""" db_ = grewe_features_db.Database("sqlite://") with db_.Session(commit=True) as s: s.add_all([ _StaticFeatures( "origin", "kernel void A(global int* a) { a[get_global_id(0)] *= 2; }"), _StaticFeatures( "github", "kernel void B(global int* a) { a[get_global_id(0)] *= 3; }"), _StaticFeatures( "clgen", "kernel void C(global int* a) { a[get_global_id(0)] *= 4; }"), ]) yield db_
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) db = grewe_features_db.Database(FLAGS.db) bdb = backtracking_db.Database(FLAGS.backtracking_db) with bdb.Session() as s, multiprocessing.Pool() as pool: batches = sqlutil.OffsetLimitBatchedQuery( s.query(backtracking_db.BacktrackingStep.src), batch_size=FLAGS.batch_size, compute_max_rows=True, ) for batch in batches: app.Log(1, "Batch %d of %d rows", batch.batch_num, batch.max_rows) ProcessBatch(batch.rows, db, pool)
def main(): """Main entry point.""" min_run_count = FLAGS.min_run_count cpu = FLAGS.cpu gpu = FLAGS.gpu dataset_name = FLAGS.name db = grewe_features_db.Database(FLAGS.db) with prof.Profile("query database"), db.Session(commit=True) as session: # The query that constructs the labelled dataset. query = db.CreateCpuGpuDataset(session, dataset_name, cpu, gpu, min_run_count) # Insert the results of the query into a table. insert = sql.insert(grewe_features_db.CpuGpuMappingSet).from_select( [column["name"] for column in query.column_descriptions], query) # Run the query. session.execute(insert)
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) db = grewe_features_db.Database(FLAGS.db) kernel_dir = pathlib.Path(FLAGS.kernels_dir) if not kernel_dir.is_dir(): raise app.UsageError("Kernel dir not found") if not FLAGS.origin: raise app.UsageError("Must set an origin") paths_to_import = list(kernel_dir.iterdir()) if not all(x.is_file() for x in paths_to_import): raise app.UsageError("Non-file input found") db.ImportStaticFeaturesFromPaths(paths_to_import, FLAGS.origin) app.Log(1, "done")
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) database = db.Database(FLAGS.db) env = cldrive_env.OpenCLEnvironment.FromName(FLAGS.env) batch_num = 0 while True: batch_num += 1 with database.Session() as session, prof.Profile(f"Batch {batch_num}"): with prof.Profile(f"Get batch of {FLAGS.batch_size} kernels"): batch = GetBatchOfKernelsToDrive(session, env, FLAGS.batch_size) if not batch: app.Log(1, "Done. Nothing more to run!") return DriveBatchAndRecordResults(database, batch, env)
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) conn = sqlite3.connect(FLAGS.legacy_clgen_db) c = conn.cursor() (num_kernels, ) = c.execute("SELECT COUNT(*) FROM PreprocessedKernels").fetchone() app.Log(1, "Database contains %d kernels", num_kernels) num_batches = math.ceil(num_kernels / FLAGS.batch_size) batches = BatchQueryResults( c.execute("SELECT kernel FROM PreprocessedKernels")) prefix = "phd_experimental_deeplearning_clgen_" with multiprocessing.Pool() as pool: for i, batch in enumerate(batches): with tempfile.TemporaryDirectory(prefix=prefix) as d: app.Log(1, "Batch %d of %d", i + 1, num_batches) d = pathlib.Path(d) paths_to_import = [ CreateTempFileFromTestcase(d, src, i) for i, (src, ) in enumerate(batch) ] db = grewe_features_db.Database(FLAGS.db) success_count, new_row_count = db.ImportStaticFeaturesFromPaths( paths_to_import, FLAGS.origin, pool) app.Log( 1, "Extracted features from %d of %d kernels, %d new rows", success_count, FLAGS.batch_size, new_row_count, )
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) instance = clgen.Instance( clgen_pb2.Instance( working_dir=FLAGS.clgen_dir, model=model_pb2.Model( corpus=corpus_pb2.Corpus( local_directory=FLAGS.clgen_corpus_dir, ascii_character_atomizer=True, preprocessor=[ "deeplearning.clgen.preprocessors.opencl:ClangPreprocessWithShim", "deeplearning.clgen.preprocessors.opencl:Compile", "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers", "deeplearning.clgen.preprocessors.opencl:StripDoubleUnderscorePrefixes", "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines", "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype", "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace", "deeplearning.clgen.preprocessors.opencl:ClangFormat", "deeplearning.clgen.preprocessors.common:MinimumLineCount3", "deeplearning.clgen.preprocessors.opencl:Compile", ], contentfile_separator="\n\n", ), architecture=model_pb2.NetworkArchitecture( backend=model_pb2.NetworkArchitecture.TENSORFLOW, neuron_type=model_pb2.NetworkArchitecture.LSTM, neurons_per_layer=512, num_layers=2, post_layer_dropout_micros=0, ), training=model_pb2.TrainingOptions( num_epochs=50, sequence_length=64, batch_size=64, shuffle_corpus_contentfiles_between_epochs=True, adam_optimizer=model_pb2.AdamOptimizer( initial_learning_rate_micros=2000, learning_rate_decay_per_epoch_micros=50000, beta_1_micros=900000, beta_2_micros=999000, normalized_gradient_clip_micros=5000000, ), ), ), sampler=sampler_pb2.Sampler( start_text="kernel void ", batch_size=64, sequence_length=1024, temperature_micros=1000000, # = 1.0 real value termination_criteria=[ sampler_pb2.SampleTerminationCriterion( symtok=sampler_pb2.SymmetricalTokenDepth( depth_increase_token="{", depth_decrease_token="}", )), sampler_pb2.SampleTerminationCriterion( maxlen=sampler_pb2.MaxTokenLength( maximum_tokens_in_sample=20000, )), ], ), ), ) db = grewe_features_db.Database(FLAGS.db) profile_dir = pathlib.Path(FLAGS.profile_dir) profile_dir.mkdir(parents=True, exist_ok=True) profiler = prof.AutoCsvProfiler(profile_dir) with instance.Session(), multiprocessing.Pool() as pool: while True: Sample(instance, db, profiler, pool)