Example #1
0
def abc_model_config(abc_corpus_config):
    """The proto config for a simple Model."""
    architecture = model_pb2.NetworkArchitecture(
        backend=model_pb2.NetworkArchitecture.TENSORFLOW,
        embedding_size=2,
        neuron_type=model_pb2.NetworkArchitecture.LSTM,
        neurons_per_layer=4,
        num_layers=1,
        post_layer_dropout_micros=2000,
    )
    optimizer = model_pb2.AdamOptimizer(
        initial_learning_rate_micros=2000,
        learning_rate_decay_per_epoch_micros=5000,
        beta_1_micros=900000,
        beta_2_micros=999000,
        normalized_gradient_clip_micros=5000000,
    )
    training = model_pb2.TrainingOptions(
        num_epochs=1,
        sequence_length=10,
        batch_size=5,
        shuffle_corpus_contentfiles_between_epochs=False,
        adam_optimizer=optimizer,
    )
    return model_pb2.Model(corpus=abc_corpus_config,
                           architecture=architecture,
                           training=training)
Example #2
0
def MakeClgenInstanceConfig(
    working_dir: pathlib.Path,
    encoded_db: encoded.EncodedContentFiles,
    num_training_epochs: int,
    seed_text: str,
    neurons_per_layer: int,
    num_layers: int,
) -> clgen_pb2.Instance:
    """Construct a CLgen instance.

  Args:
    working_dir: The directory to cache CLgen working files in.
    encoded_db: The directory of encoded content files.
    num_training_epochs: The number of epochs to train for.
    seed_text: The text to initiate sampling with.
    neurons_per_layer: Number of neurons in a layer.
  """
    return clgen_pb2.Instance(
        working_dir=str(working_dir),
        model=model_pb2.Model(
            corpus=corpus_pb2.Corpus(pre_encoded_corpus_url=encoded_db.url, ),
            architecture=model_pb2.NetworkArchitecture(
                backend=model_pb2.NetworkArchitecture.TENSORFLOW,
                neuron_type=model_pb2.NetworkArchitecture.LSTM,
                neurons_per_layer=neurons_per_layer,
                num_layers=num_layers,
                post_layer_dropout_micros=0,
            ),
            training=model_pb2.TrainingOptions(
                num_epochs=num_training_epochs,
                sequence_length=64,
                batch_size=64,
                shuffle_corpus_contentfiles_between_epochs=True,
                adam_optimizer=model_pb2.AdamOptimizer(
                    initial_learning_rate_micros=2000,
                    learning_rate_decay_per_epoch_micros=50000,
                    beta_1_micros=900000,
                    beta_2_micros=999000,
                    normalized_gradient_clip_micros=5000000,
                ),
            ),
        ),
        sampler=sampler_pb2.Sampler(
            start_text=seed_text,
            batch_size=64,
            sequence_length=1024,
            temperature_micros=1000000,  # = 1.0 real value
            termination_criteria=[
                sampler_pb2.SampleTerminationCriterion(
                    symtok=sampler_pb2.SymmetricalTokenDepth(
                        depth_increase_token="{",
                        depth_decrease_token="}",
                    )),
                sampler_pb2.SampleTerminationCriterion(
                    maxlen=sampler_pb2.MaxTokenLength(
                        maximum_tokens_in_sample=20000, )),
            ],
        ),
    )
Example #3
0
def CreateModelProtoFromFlags() -> model_pb2.Model:
  return model_pb2.Model(
      corpus=CreateCorpusProtoFromFlags(),
      architecture=model_pb2.NetworkArchitecture(
          backend=model_pb2.NetworkArchitecture.TENSORFLOW,
          neuron_type=model_pb2.NetworkArchitecture.LSTM,
          neurons_per_layer=FLAGS.clgen_layer_size,
          num_layers=FLAGS.clgen_num_layers,
          post_layer_dropout_micros=0,
      ),
      training=model_pb2.TrainingOptions(
          num_epochs=FLAGS.clgen_num_epochs,
          sequence_length=FLAGS.clgen_training_sequence_length,
          batch_size=FLAGS.clgen_training_batch_size,
          shuffle_corpus_contentfiles_between_epochs=True,
          adam_optimizer=model_pb2.AdamOptimizer(
              initial_learning_rate_micros=2000,
              learning_rate_decay_per_epoch_micros=50000,
              beta_1_micros=900000,
              beta_2_micros=999000,
              normalized_gradient_clip_micros=5000000,
          ),
      ))
Example #4
0
def main(argv: typing.List[str]):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(" ".join(
            argv[1:])))

    instance = clgen.Instance(
        clgen_pb2.Instance(
            working_dir=FLAGS.clgen_dir,
            model=model_pb2.Model(
                corpus=corpus_pb2.Corpus(
                    local_directory=FLAGS.clgen_corpus_dir,
                    ascii_character_atomizer=True,
                    preprocessor=[
                        "deeplearning.clgen.preprocessors.opencl:ClangPreprocessWithShim",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                        "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
                        "deeplearning.clgen.preprocessors.opencl:StripDoubleUnderscorePrefixes",
                        "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines",
                        "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype",
                        "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace",
                        "deeplearning.clgen.preprocessors.opencl:ClangFormat",
                        "deeplearning.clgen.preprocessors.common:MinimumLineCount3",
                        "deeplearning.clgen.preprocessors.opencl:Compile",
                    ],
                    contentfile_separator="\n\n",
                ),
                architecture=model_pb2.NetworkArchitecture(
                    backend=model_pb2.NetworkArchitecture.TENSORFLOW,
                    neuron_type=model_pb2.NetworkArchitecture.LSTM,
                    neurons_per_layer=512,
                    num_layers=2,
                    post_layer_dropout_micros=0,
                ),
                training=model_pb2.TrainingOptions(
                    num_epochs=50,
                    sequence_length=64,
                    batch_size=64,
                    shuffle_corpus_contentfiles_between_epochs=True,
                    adam_optimizer=model_pb2.AdamOptimizer(
                        initial_learning_rate_micros=2000,
                        learning_rate_decay_per_epoch_micros=50000,
                        beta_1_micros=900000,
                        beta_2_micros=999000,
                        normalized_gradient_clip_micros=5000000,
                    ),
                ),
            ),
            sampler=sampler_pb2.Sampler(
                start_text="kernel void ",
                batch_size=64,
                sequence_length=1024,
                temperature_micros=1000000,  # = 1.0 real value
                termination_criteria=[
                    sampler_pb2.SampleTerminationCriterion(
                        symtok=sampler_pb2.SymmetricalTokenDepth(
                            depth_increase_token="{",
                            depth_decrease_token="}",
                        )),
                    sampler_pb2.SampleTerminationCriterion(
                        maxlen=sampler_pb2.MaxTokenLength(
                            maximum_tokens_in_sample=20000, )),
                ],
            ),
        ), )
    db = grewe_features_db.Database(FLAGS.db)
    profile_dir = pathlib.Path(FLAGS.profile_dir)
    profile_dir.mkdir(parents=True, exist_ok=True)
    profiler = prof.AutoCsvProfiler(profile_dir)

    with instance.Session(), multiprocessing.Pool() as pool:
        while True:
            Sample(instance, db, profiler, pool)