def main(argv): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(' '.join( argv[1:]))) start_time = time.time() instances = [ clgen.Instance(p) for p in pbutil.FromFile( pathlib.Path(FLAGS.instances), clgen_pb2.Instances()).instance ] random.shuffle(instances) candidate_instances = collections.deque(instances) logging.info('Loaded %d instances in %s ms', len(candidate_instances), humanize.intcomma(int((time.time() - start_time) * 1000))) while candidate_instances: instance = candidate_instances.popleft() with instance.Session(): if IsEligible(instance): logging.info('Found an eligible candidate to work on') SampleModel(instance) PostprocessSampleCorpus(instance) else: logging.info('Candidate is ineligible') candidate_instances.append(instance) time.sleep(1) logging.info('Done.')
def test_Instance_working_dir_shell_variable_expansion(abc_instance_config): """Test that shell variables are expanded in working_dir.""" working_dir = abc_instance_config.working_dir os.environ["FOO"] = working_dir abc_instance_config.working_dir = "$FOO/" instance = clgen.Instance(abc_instance_config) assert str(instance.working_dir) == working_dir
def test_Instance_Session_no_working_dir(abc_instance_config): """Test that $CLEN_CACHE is not set when there's no working_dir.""" abc_instance_config.ClearField('working_dir') os.environ['CLGEN_CACHE'] = 'foo' instance = clgen.Instance(abc_instance_config) with instance.Session(): assert os.environ['CLGEN_CACHE'] == 'foo'
def test_Instance_Session_no_working_dir(abc_instance_config, tempdir2: pathlib.Path): """Test that $CLEN_CACHE is not modified config doesn't set working_dir.""" abc_instance_config.ClearField('working_dir') os.environ['CLGEN_CACHE'] = str(tempdir2) instance = clgen.Instance(abc_instance_config) with instance.Session(): assert os.environ['CLGEN_CACHE'] == str(tempdir2)
def test_main_stop_after_train(abc_instance_file): """Test that --stop_after train trains the model.""" app.FLAGS.unparse_flags() app.FLAGS( ['argv[0]', '--config', abc_instance_file, '--stop_after', 'train']) clgen.main([]) instance = clgen.Instance( pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance())) assert instance.model.is_trained
def test_main_stop_after_corpus(abc_instance_file): """Test that --stop_after corpus prevents model training.""" app.FLAGS.unparse_flags() app.FLAGS( ['argv[0]', '--config', abc_instance_file, '--stop_after', 'corpus']) clgen.main([]) instance = clgen.Instance( pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance())) assert not instance.model.is_trained
def __init__(self, config: generator_pb2.ClgenGenerator): super(ClgenGenerator, self).__init__(config, no_init=True) self.instance = clgen.Instance(self.config.instance) self.toolchain = 'opencl' self.generator = ClgenInstanceToGenerator(self.instance) if not self.config.testcase_skeleton: raise ValueError('No testcase skeletons provided') for skeleton in self.config.testcase_skeleton: skeleton.generator.CopyFrom(self.generator)
def test_main_stop_after_train(abc_instance_file): """Test that --stop_after train trains the model.""" FLAGS.unparse_flags() FLAGS(["argv0"]) FLAGS.config = abc_instance_file FLAGS.stop_after = "train" clgen.main() instance = clgen.Instance( pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance()) ) assert instance.model.is_trained
def test_main_stop_after_corpus(abc_instance_file): """Test that --stop_after corpus prevents model training.""" FLAGS.unparse_flags() FLAGS(["argv0"]) FLAGS.config = abc_instance_file FLAGS.stop_after = "corpus" clgen.main() instance = clgen.Instance( pbutil.FromFile(pathlib.Path(abc_instance_file), clgen_pb2.Instance()) ) assert not instance.model.is_trained
def main(): """Main entry point.""" config = MakeClgenInstanceConfig( FLAGS.java_clgen_working_dir, FLAGS.java_encoded_contentfiles(), FLAGS.java_training_epochs, FLAGS.java_seed_text, FLAGS.neurons_per_layer, FLAGS.num_layers, ) samples_db = FLAGS.samples_db() TrainAndSampleInstance(clgen.Instance(config), samples_db)
def test_config_is_valid(): """Test that config proto is valid.""" with tempfile.TemporaryDirectory() as d: config = pbutil.FromFile( bazelutil.DataPath( 'phd/deeplearning/clgen/tests/data/c99/config.pbtxt'), clgen_pb2.Instance()) # Change the working directory and corpus path to our bazel run dir. config.working_dir = d config.model.corpus.local_directory = str( bazelutil.DataPath('phd/deeplearning/clgen/tests/data/c99/src/')) clgen.Instance(config)
def main(): """Main entry point.""" config = java.MakeClgenInstanceConfig( FLAGS.java_clgen_working_dir, FLAGS.java_encoded_contentfiles(), FLAGS.java_training_epochs, "kernel void A(", # OpenCL-specific seed text. FLAGS.neurons_per_layer, FLAGS.num_layers, ) if not FLAGS.use_encoded_contentfiles_db: # Replace the Java corpus with an OpenCL one. config.model.corpus.CopyFrom(opencl.CreateCorpusProtoFromFlags()) samples_db = FLAGS.samples_db() java.TrainAndSampleInstance(clgen.Instance(config), samples_db)
def main(argv: typing.List[str]): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) instance = clgen.Instance( clgen_pb2.Instance( working_dir=FLAGS.clgen_dir, model=model_pb2.Model( corpus=corpus_pb2.Corpus( local_directory=FLAGS.clgen_corpus_dir, ascii_character_atomizer=True, preprocessor=[ "deeplearning.clgen.preprocessors.opencl:ClangPreprocessWithShim", "deeplearning.clgen.preprocessors.opencl:Compile", "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers", "deeplearning.clgen.preprocessors.opencl:StripDoubleUnderscorePrefixes", "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines", "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype", "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace", "deeplearning.clgen.preprocessors.opencl:ClangFormat", "deeplearning.clgen.preprocessors.common:MinimumLineCount3", "deeplearning.clgen.preprocessors.opencl:Compile", ], contentfile_separator="\n\n", ), architecture=model_pb2.NetworkArchitecture( backend=model_pb2.NetworkArchitecture.TENSORFLOW, neuron_type=model_pb2.NetworkArchitecture.LSTM, neurons_per_layer=512, num_layers=2, post_layer_dropout_micros=0, ), training=model_pb2.TrainingOptions( num_epochs=50, sequence_length=64, batch_size=64, shuffle_corpus_contentfiles_between_epochs=True, adam_optimizer=model_pb2.AdamOptimizer( initial_learning_rate_micros=2000, learning_rate_decay_per_epoch_micros=50000, beta_1_micros=900000, beta_2_micros=999000, normalized_gradient_clip_micros=5000000, ), ), ), sampler=sampler_pb2.Sampler( start_text="kernel void ", batch_size=64, sequence_length=1024, temperature_micros=1000000, # = 1.0 real value termination_criteria=[ sampler_pb2.SampleTerminationCriterion( symtok=sampler_pb2.SymmetricalTokenDepth( depth_increase_token="{", depth_decrease_token="}", )), sampler_pb2.SampleTerminationCriterion( maxlen=sampler_pb2.MaxTokenLength( maximum_tokens_in_sample=20000, )), ], ), ), ) db = grewe_features_db.Database(FLAGS.db) profile_dir = pathlib.Path(FLAGS.profile_dir) profile_dir.mkdir(parents=True, exist_ok=True) profiler = prof.AutoCsvProfiler(profile_dir) with instance.Session(), multiprocessing.Pool() as pool: while True: Sample(instance, db, profiler, pool)
def GetInstances() -> typing.List[clgen.Instance]: """Get the list of CLgen instances to test.""" return [clgen.Instance(c) for c in GetInstanceConfigs().instance]
def test_Instance_Session_yield_value(abc_instance_config): """Test that Session() yields the instance.""" instance = clgen.Instance(abc_instance_config) with instance.Session() as s: assert instance == s
def test_Instance_ToProto_equality(abc_instance_config): """Test that ToProto() returns the same as the input config.""" instance = clgen.Instance(abc_instance_config) assert abc_instance_config == instance.ToProto()
def test_Instance_Session_clgen_dir(abc_instance_config): """Test that $CLEN_CACHE is set to working_dir inside a session.""" instance = clgen.Instance(abc_instance_config) with instance.Session(): assert os.environ["CLGEN_CACHE"] == abc_instance_config.working_dir
def test_Instance_no_sampler_field(abc_instance_config): """Test that UserError is raised when no model field in config.""" abc_instance_config.ClearField("model_specification") with test.Raises(errors.UserError) as e_info: clgen.Instance(abc_instance_config) assert "Field not set: 'Instance.model_specification'" == str(e_info.value)
def CreateInstanceFromFlags() -> clgen.Instance: return clgen.Instance(CreateInstanceProtoFromFlags())
def test_Instance_no_working_dir_field(abc_instance_config): """Test that working_dir is None when no working_dir field in config.""" abc_instance_config.ClearField("working_dir") instance = clgen.Instance(abc_instance_config) assert instance.working_dir is None
def abc_instance(abc_instance_config: clgen_pb2.Instance): """Test fixture that yields an instance.""" return clgen.Instance(abc_instance_config)