Beispiel #1
0
def LoadPositiveNegativeProtos(path: pathlib.Path) -> PositiveNegativeDataset:
  """Load positive and negative training protos from a directory."""
  positive_protos = [
    pbutil.FromFile(p, fish_pb2.CompilerCrashDiscriminatorTrainingExample())
    for p in path.iterdir() if p.name.startswith('positive-')
  ]
  logging.info(
      'Loaded %s positive protos', humanize.intcomma(len(positive_protos)))
  negative_protos = [
    pbutil.FromFile(p, fish_pb2.CompilerCrashDiscriminatorTrainingExample())
    for p in path.iterdir() if p.name.startswith('negative-')
  ]
  logging.info(
      'Loaded %s negative protos', humanize.intcomma(len(negative_protos)))
  return PositiveNegativeDataset(positive_protos, negative_protos)
Beispiel #2
0
def PostprocessSampleCorpus(instance: clgen.Instance):
    """Create a corpus from the model samples and pre-process."""
    sample_dir = instance.model.SamplerCache(instance.sampler)

    # Read the sample protos and write them to a directory of content files.
    contentfiles_dir = pathlib.Path(str(sample_dir) + '.contentfiles')
    contentfiles_dir.mkdir(exist_ok=True)
    logging.info('Writing output contentfiles to %s', contentfiles_dir)
    if len(list(contentfiles_dir.iterdir())) != len(list(
            sample_dir.iterdir())):
        for proto_path in sample_dir.iterdir():
            sample = pbutil.FromFile(proto_path, model_pb2.Sample())
            with open(contentfiles_dir / proto_path.name, 'w') as f:
                f.write(sample.text)

    logging.info('Creating output corpus')
    output_corpus_config = corpus_pb2.Corpus()
    output_corpus_config.CopyFrom(instance.model.corpus.config)
    output_corpus_config.local_directory = str(contentfiles_dir)
    # We derive the programming language name from the input corpus directory.
    # This depends on corpuses being in directories named after their language,
    # e.g. ~/corpuses/opencl, or ~/corpuses/java.A
    preprocessed_dir = instance.model.corpus.preprocessed.database_path.parent
    language = (preprocessed_dir / 'contentfiles').resolve().name
    output_corpus_config.preprocessor[:] = POSTPROCESSORS[language]
    output_corpus = corpuses.Corpus(output_corpus_config)
    try:
        output_corpus.Create()
    except errors.EmptyCorpusException:
        pass
    return output_corpus
Beispiel #3
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    start_time = time.time()
    instances = [
        clgen.Instance(p) for p in pbutil.FromFile(
            pathlib.Path(FLAGS.instances), clgen_pb2.Instances()).instance
    ]
    random.shuffle(instances)
    candidate_instances = collections.deque(instances)
    logging.info('Loaded %d instances in %s ms', len(candidate_instances),
                 humanize.intcomma(int((time.time() - start_time) * 1000)))

    while candidate_instances:
        instance = candidate_instances.popleft()
        with instance.Session():
            if IsEligible(instance):
                logging.info('Found an eligible candidate to work on')
                SampleModel(instance)
                PostprocessSampleCorpus(instance)
            else:
                logging.info('Candidate is ineligible')
                candidate_instances.append(instance)
                time.sleep(1)

    logging.info('Done.')
Beispiel #4
0
def _ReadTestDataStoreFiles() -> datastore_pb2.DataStoreTestSet:
    """Read the config protos for testing.

  The datastore names are derived from the file names.

  Returns:
    A DataStoreTestSet instance.

  Raises:
    AssertionError: In case of error reading datastore configs.
  """
    paths = list(
        pathlib.Path('deeplearning/deepsmith/tests/data/datastores').iterdir())
    assert paths
    names = [p.stem for p in paths]
    protos = [
        pbutil.FromFile(path, datastore_pb2.DataStore()) for path in paths
    ]
    datastore_set = datastore_pb2.DataStoreTestSet()
    for name, proto in zip(names, protos):
        # There's no graceful error handling here, but it's important that we don't
        # run tests on a datastore unless it's specifically marked as testonly.
        assert proto.testonly
        dst_proto = datastore_set.values[name]
        dst_proto.MergeFrom(proto)
    assert len(datastore_set.values) == len(protos) == len(names) == len(paths)
    return datastore_set
Beispiel #5
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError('Unrecognized arguments')

    # Parse flags and instantiate testing objects.
    if not FLAGS.interesting_results_dir:
        raise app.UsageError('--interesting_results_dir must be set')
    interesting_results_dir = pathlib.Path(FLAGS.interesting_results_dir)
    if interesting_results_dir.exists(
    ) and not interesting_results_dir.is_dir():
        raise app.UsageError('--interesting_results_dir must be a directory')
    logging.info('Recording interesting results in %s.',
                 interesting_results_dir)

    for path in interesting_results_dir.iterdir():
        result = pbutil.FromFile(path, deepsmith_pb2.Result())
        print(f'=== BEGIN INTERESTING RESULT {path.stem} ===')
        print('Outcome:', deepsmith_pb2.Result.Outcome.Name(result.outcome))
        print()
        print('OpenCL kernel')
        print('-------------')
        print(fmt.Indent(2, result.testcase.inputs['src']))
        print()
        print('Stdout')
        print('------')
        print(fmt.Indent(2, result.outputs['stderr']))
        print()
Beispiel #6
0
 def ContentFiles(self) -> typing.Iterable[scrape_repos_pb2.ContentFile]:
   """Return an iterator over all contentfiles in the repo."""
   if self.IsIndexed():
     return (pbutil.FromFile(f, scrape_repos_pb2.ContentFile())
             for f in self.index_dir.iterdir() if f.name != 'DONE.txt')
   else:
     return []
Beispiel #7
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments '{}'".format(', '.join(
            argv[1:])))

    clone_list_path = pathlib.Path(FLAGS.clone_list or '')
    if not clone_list_path.is_file():
        raise app.UsageError('--clone_list is not a file.')
    clone_list = pbutil.FromFile(clone_list_path,
                                 scrape_repos_pb2.LanguageCloneList())

    if not FLAGS.export_path:
        raise app.UsageError('--export_path not set.')
    export_path = pathlib.Path(FLAGS.export_path)
    export_path.mkdir(parents=True, exist_ok=True)

    # To export from contentfiles database.
    # for language in clone_list.language:
    #   d = pathlib.Path(language.destination_directory)
    #   d = d.parent / (str(d.name) + '.db')
    #   db = contentfiles.ContentFiles(d)
    #   with db.Session() as session:
    #     (export_path / language.language).mkdir(exist_ok=True)
    #     ExportDatabase(session, export_path / language.language)

    # To export from index directory.
    for language in clone_list.language:
        index_path = pathlib.Path(language.destination_directory + '.index')
        if index_path.is_dir():
            (export_path / language.language).mkdir(exist_ok=True)
            ExportIndex(index_path, export_path / language.language)
Beispiel #8
0
def main(argv) -> None:
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    tiers = pbutil.FromFile(pathlib.Path(FLAGS.data_tiers),
                            data_tiers_pb2.DataTiers())
    for tier in tiers.directory:
        logging.info('Processing %s', tier.path)
        _SetDirectorySize(tier)

    if FLAGS.summary:
        # Print the size per directory.
        df = pd.DataFrame([{
            'Path': d.path,
            'Tier': d.tier,
            'Size': humanize.naturalsize(d.size_bytes),
            'Size (bytes)': d.size_bytes
        } for d in tiers.directory if d.size_bytes])
        df = df.sort_values(['Tier', 'Size (bytes)'], ascending=[True, False])
        print(df[['Path', 'Tier', 'Size']].to_string(index=False))

        # Print the total size per tier.
        df2 = df.groupby('Tier').sum()
        df2['Size'] = [
            humanize.naturalsize(d['Size (bytes)']) for _, d in df2.iterrows()
        ]
        df2 = df2.reset_index()
        df2 = df2.sort_values('Tier')
        print()
        print("Totals:")
        print(df2[['Tier', 'Size']].to_string(index=False))
    else:
        print(tiers)
Beispiel #9
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments '{}'".format(', '.join(
            argv[1:])))

    clone_list_path = pathlib.Path(FLAGS.clone_list or "")
    if not clone_list_path.is_file():
        raise app.UsageError('--clone_list is not a file.')
    clone_list = pbutil.FromFile(clone_list_path,
                                 scrape_repos_pb2.LanguageCloneList())

    # Error early if the config contains invalid preprocessors.
    for language in clone_list.language:
        for importer in language.importer:
            [
                preprocessors.GetPreprocessorFunction(p)
                for p in importer.preprocessor
            ]

    pool = multiprocessing.Pool(FLAGS.processes)
    for language in clone_list.language:
        d = pathlib.Path(language.destination_directory)
        d = d.parent / (str(d.name) + '.db')
        db = contentfiles.ContentFiles(d)
        if pathlib.Path(language.destination_directory).is_dir():
            ImportFromLanguage(db, language, pool)
Beispiel #10
0
def test_FromFile_FileNotFoundError(suffix):
    """Test that FileNotFoundError raised if file doesn't exist."""
    with tempfile.TemporaryDirectory(prefix='labm8_proto_') as d:
        with pytest.raises(FileNotFoundError):
            pbutil.FromFile(
                pathlib.Path(d) / f'proto{suffix}',
                test_protos_pb2.TestMessage())
Beispiel #11
0
def main(argv) -> None:
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    clone_list_path = pathlib.Path(FLAGS.clone_list or "")
    if not clone_list_path.is_file():
        raise app.UsageError('--clone_list is not a file.')
    clone_list = pbutil.FromFile(clone_list_path,
                                 scrape_repos_pb2.LanguageCloneList())

    meta_files = []
    for language in clone_list.language:
        directory = pathlib.Path(language.destination_directory)
        if directory.is_dir():
            meta_files += [
                pathlib.Path(directory / f) for f in directory.iterdir()
                if IsRepoMetaFile(f)
            ]
    random.shuffle(meta_files)
    worker = AsyncWorker(meta_files)
    logging.info('Cloning %s repos from GitHub ...',
                 humanize.intcomma(worker.max))
    bar = progressbar.ProgressBar(max_value=worker.max, redirect_stderr=True)
    worker.start()
    while worker.is_alive():
        bar.update(worker.i)
        worker.join(.5)
    bar.update(worker.i)
Beispiel #12
0
 def EpochTelemetry(self) -> typing.List[telemetry_pb2.ModelEpochTelemetry]:
     """Return the epoch telemetry files."""
     return [
         pbutil.FromFile(self.logdir / p,
                         telemetry_pb2.ModelEpochTelemetry())
         for p in sorted(self.logdir.iterdir())
         if re.match(r'epoch_\d\d+_telemetry\.pbtxt', str(p.name))
     ]
Beispiel #13
0
def GeneratorFromFlag(config_class,
                      generator_class) -> base_generator.GeneratorServiceBase:
    """Instantiate a generator from the --generator_config flag."""
    if not pbutil.ProtoIsReadable(FLAGS.generator_config, config_class()):
        raise app.UsageError(
            f'--generator_config is not a {config_class.__name__} proto')
    config = pbutil.FromFile(pathlib.Path(FLAGS.generator_config),
                             config_class())
    return generator_class(config)
Beispiel #14
0
def test_FromFile_required_fields_not_set(suffix):
    """Test that DecodeError raised if required fields not set."""
    with tempfile.NamedTemporaryFile(prefix='labm8_proto_',
                                     suffix=suffix) as f:
        pbutil.ToFile(test_protos_pb2.AnotherTestMessage(number=1),
                      pathlib.Path(f.name))
        with pytest.raises(pbutil.DecodeError) as e_info:
            pbutil.FromFile(pathlib.Path(f.name),
                            test_protos_pb2.TestMessage())
        assert f"Required fields not set: '{f.name}'" == str(e_info.value)
Beispiel #15
0
    def ProtoFromFile(cls, path: pathlib.Path) -> deepsmith_pb2.Testcase:
        """Instantiate a protocol buffer testcase from file.

    Args:
      path: Path to the testcase proto file.

    Returns:
      Testcase message instance.
    """
        return pbutil.FromFile(path, deepsmith_pb2.Testcase())
Beispiel #16
0
def test_FromFile_required_fields_not_set_uninitialized_okay(suffix):
    """Test that DecodeError not raised if required fields not set."""
    with tempfile.NamedTemporaryFile(prefix='labm8_proto_',
                                     suffix=suffix) as f:
        proto_in = test_protos_pb2.AnotherTestMessage(number=1)
        pbutil.ToFile(test_protos_pb2.AnotherTestMessage(number=1),
                      pathlib.Path(f.name))
        pbutil.FromFile(pathlib.Path(f.name),
                        test_protos_pb2.TestMessage(),
                        uninitialized_okay=True)
Beispiel #17
0
    def ProtoFromFile(cls, path: pathlib.Path) -> deepsmith_pb2.Result:
        """Instantiate a protocol buffer result from file.

    Args:
      path: Path to the result proto file.

    Returns:
      Result message instance.
    """
        return pbutil.FromFile(path, deepsmith_pb2.Result())
Beispiel #18
0
 def __init__(self, path: pathlib.Path):
   self.path = path.absolute()
   self.cache = cache.FSCache(self.path)
   self.corpus = NullCorpus()
   self.config = pbutil.FromFile(
       self.path / 'META.pbtxt', internal_pb2.ModelMeta()).config
   self.atomizer = atomizers.AtomizerBase.FromFile(self.path / 'atomizer')
   self.backend = {
     model_pb2.NetworkArchitecture.TENSORFLOW: tensorflow_backend.TensorFlowBackend,
     model_pb2.NetworkArchitecture.KERAS: keras_backend.KerasBackend,
   }[self.config.architecture.backend](self.config, self.cache, self.atomizer)
Beispiel #19
0
def PackDataPackage(package_dir: pathlib.Path) -> None:
    """Create an archive and sidecar of a package."""
    manifest = pbutil.FromFile(package_dir / 'MANIFEST.pbtxt',
                               dpack_pb2.DataPackage())
    PackageManifestIsValid(package_dir, manifest)
    archive_path = (package_dir /
                    f'../{package_dir.name}.dpack.tar.bz2').resolve()
    sidecar_path = (package_dir /
                    f'../{package_dir.name}.dpack.pbtxt').resolve()
    CreatePackageArchive(package_dir, manifest, archive_path)
    CreatePackageArchiveSidecar(archive_path, manifest, sidecar_path)
Beispiel #20
0
def DoFlagsAction():
    """Do the action requested by the command line flags."""
    if not FLAGS.config:
        raise app.UsageError("Missing required argument: '--config'")
    config_path = pathlib.Path(FLAGS.config)
    if not config_path.is_file():
        raise app.UsageError(f"File not found: '{config_path}'")
    config = pbutil.FromFile(config_path, clgen_pb2.Instance())
    os.environ['PWD'] = str(config_path.parent)

    if FLAGS.clgen_profiling:
        prof.enable()

    instance = Instance(config)
    with instance.Session():
        if FLAGS.print_cache_path == 'corpus':
            print(instance.model.corpus.cache.path)
            return
        elif FLAGS.print_cache_path == 'model':
            print(instance.model.cache.path)
            return
        elif FLAGS.print_cache_path == 'sampler':
            print(instance.model.SamplerCache(instance.sampler))
            return
        elif FLAGS.print_cache_path:
            raise app.UsageError(
                f"Invalid --print_cache_path argument: '{FLAGS.print_cache_path}'"
            )

        if FLAGS.print_preprocessed:
            print(instance.model.corpus.GetTextCorpus(shuffle=False))
            return

        # The default action is to sample the model.
        if FLAGS.stop_after == 'corpus':
            instance.model.corpus.Create()
        elif FLAGS.stop_after == 'train':
            instance.model.Train()
            logging.info('Model: %s', instance.model.cache.path)
        elif FLAGS.stop_after:
            raise app.UsageError(
                f"Invalid --stop_after argument: '{FLAGS.stop_after}'")
        elif FLAGS.export_model:
            instance.model.Train()
            export_dir = pathlib.Path(FLAGS.export_model)
            for path in instance.model.InferenceManifest():
                relpath = pathlib.Path(
                    os.path.relpath(path, instance.model.cache.path))
                (export_dir / relpath.parent).mkdir(parents=True,
                                                    exist_ok=True)
                shutil.copyfile(path, export_dir / relpath)
                print(export_dir / relpath)
        else:
            instance.model.Sample(instance.sampler, FLAGS.min_samples)
Beispiel #21
0
def test_ToFile_FromFile_equivalence(suffix):
    """Test that ToFile() and FromFile() are symmetrical."""
    with tempfile.TemporaryDirectory(prefix='labm8_proto_') as d:
        path = pathlib.Path(d) / f'proto{suffix}'
        proto_in = test_protos_pb2.TestMessage(string='abc', number=1)
        pbutil.ToFile(proto_in, path)
        assert path.is_file()
        proto_out = test_protos_pb2.TestMessage()
        pbutil.FromFile(path, proto_out)
        assert proto_out.string == 'abc'
        assert proto_out.number == 1
        assert proto_in == proto_out
Beispiel #22
0
def ServiceConfigFromFlag(
    flag_name: str,
    service_config: pbutil.ProtocolBuffer) -> pbutil.ProtocolBuffer:
  if not getattr(FLAGS, flag_name):
    raise app.UsageError(f'--{flag_name} not set.')
  config_path = pathlib.Path(getattr(FLAGS, flag_name))
  if not config_path.is_file():
    cls_name = type(service_config).__name__
    raise app.UsageError(f"{cls_name} file not found: '{config_path}'.")

  return pbutil.FromFile(
      config_path, service_config)
Beispiel #23
0
def GetProtos(
    export_path: pathlib.Path, outcomes: typing.List[str],
    max_src_len: int) -> typing.List[TrainingProto]:
  paths = sorted(labtypes.flatten(
      [list((export_path / outcome).iterdir()) for outcome in outcomes]))
  protos = []
  for path in paths:
    proto = pbutil.FromFile(path, TrainingProto())
    if len(proto.src) > max_src_len:
      continue
    protos.append(proto)
  return protos
Beispiel #24
0
def test_config_is_valid():
  """Test that config proto is valid."""
  with tempfile.TemporaryDirectory() as d:
    config = pbutil.FromFile(
        bazelutil.DataPath(
            'phd/deeplearning/clgen/tests/data/tiny/config.pbtxt'),
        clgen_pb2.Instance())
    # Change the working directory and corpus path to our bazel run dir.
    config.working_dir = d
    config.model.corpus.local_directory = str(bazelutil.DataPath(
        'phd/deeplearning/clgen/tests/data/tiny/corpus.tar.bz2'))
    clgen.Instance(config)
Beispiel #25
0
def ImportRepo(session: orm.session.Session,
               language: scrape_repos_pb2.LanguageToClone,
               metafile: pathlib.Path, pool: multiprocessing.Pool) -> None:
    """Import contentfiles from repository.

  Args:
    session: A database session to import to.
    language: The language specification for the repo.
    metafile: The repo metafile.
    pool: A multiprocessing pool.
  """
    meta = pbutil.FromFile(metafile, scrape_repos_pb2.GitHubRepoMetadata())
    clone_dir = metafile.parent / f'{meta.owner}_{meta.name}'
    repo = contentfiles.GitHubRepository.GetOrAdd(session, meta)
    repo.language = language.language

    for importer in language.importer:
        if not importer.source_code_pattern:
            logging.error('No source_code_pattern specified! Stopping now.')
            return

        pat = importer.source_code_pattern
        pat = f'{clone_dir}/{pat[1:]}' if pat[
            0] == '^' else f'{clone_dir}/{pat}'
        cmd = [
            'find',
            str(clone_dir), '-type', 'f', '-regex', pat, '-not', '-path',
            '*/.git/*'
        ]
        logging.debug('$ %s', ' '.join(cmd))
        paths = subprocess.check_output(
            cmd, universal_newlines=True).rstrip().split('\n')
        if len(paths) == 1 and not paths[0]:
            logging.debug('No files to import from %s', clone_dir)
            return
        logging.info("Importing %s '%s' files from %s ...",
                     humanize.intcomma(len(paths)),
                     importer.source_code_pattern, clone_dir)
        all_files_relpaths = public.GetAllFilesRelativePaths(clone_dir)
        jobs = [
            scrape_repos_pb2.ImportWorker(
                clone_from_url=meta.clone_from_url,
                clone_dir=str(clone_dir),
                abspath=p,
                all_files_relpaths=all_files_relpaths,
                preprocessors=importer.preprocessor,
            ) for p in paths
        ]
        bar = progressbar.ProgressBar(max_value=len(jobs))
        for outputs in bar(pool.imap_unordered(ImportWorker, jobs)):
            for output in outputs:
                session.add(output)
Beispiel #26
0
def main(argv):
  if len(argv) > 1:
    unknown_args = ', '.join(argv[1:])
    raise app.UsageError(f"Unknown arguments {unknown_args}")

  logging.info('Preparing OpenCL testbed.')
  config = harness_pb2.CldriveHarness()
  config.opencl_env.extend([env.OclgrindOpenCLEnvironment().name])
  config.opencl_opt.extend([FLAGS.opencl_opt])
  harness = cldrive.CldriveHarness(config)
  assert len(harness.testbeds) >= 1

  input_directories = FLAGS.input_directories
  logging.info('Reading testcases from: %s', ' '.join(input_directories))

  output_directory = pathlib.Path(FLAGS.output_directory)
  logging.info('Writing results to %s', output_directory)
  output_directory.mkdir(parents=True, exist_ok=True)

  # Load testcases.
  testcase_dirs = [
    pathlib.Path(x) for x in input_directories if
    pathlib.Path(x).is_dir()]
  if not testcase_dirs:
    raise app.UsageError('No --input_directories found.')
  testcase_paths = labtypes.flatten(
      [[pathlib.Path(y) for y in fs.ls(x, abspaths=True)]
       for x in testcase_dirs])
  testcases = [
    pbutil.FromFile(path, deepsmith_pb2.Testcase()) for path in testcase_paths]
  logging.info('Read %d testcases.', len(testcases))
  if not len(testcases):
    raise app.UsageError("No testcases found: '%s'",
                         ' '.join(input_directories))

  # Execute testcases.
  req = harness_pb2.RunTestcasesRequest()
  req.testbed.CopyFrom(harness.testbeds[0])
  req.testcases.extend(testcases)
  res = harness.RunTestcases(req, None)

  # Write results to file.
  for testcase, result in zip(testcases, res.results):
    result_id = crypto.md5_str(str(testcase))
    pbutil.ToFile(result, output_directory / f'{result_id}.pbtxt')

  logging.info('Executed %d testcases and wrote results to %s',
               len(res.results), output_directory)
  execution_times = [
    result.profiling_events[0].duration_ms for result in res.results]
  logging.info('Average time to evaluate testcase: %.2f ms',
               sum(execution_times) / len(execution_times))
Beispiel #27
0
def VerifyManifest(package_dir: pathlib.Path) -> bool:
    """Verify that the MANIFEST.pbtext file matches the contents."""
    if not (package_dir / 'MANIFEST.pbtxt').is_file():
        logging.info('%s/MANIFEST.pbtxt missing, nothing to do.', package_dir)
        return False
    manifest = pbutil.FromFile(package_dir / 'MANIFEST.pbtxt',
                               dpack_pb2.DataPackage())
    if not PackageManifestIsValid(package_dir, manifest):
        logging.error('Package %s contains errors.', package_dir)
        return False
    logging.info('%s verified. No changes to files in the manifest.',
                 package_dir)
    return True
Beispiel #28
0
def InitManifest(package_dir: pathlib.Path,
                 contents: typing.List[pathlib.Path], update: bool) -> None:
    """Write the MANIFEST.pbtxt file for a package."""
    manifest = CreatePackageManifest(package_dir, contents)
    manifest_path = package_dir / 'MANIFEST.pbtxt'
    if update and pbutil.ProtoIsReadable(manifest_path,
                                         dpack_pb2.DataPackage()):
        old = pbutil.FromFile(manifest_path, dpack_pb2.DataPackage())
        MergeManifests(manifest, old)
    elif manifest_path.is_file():
        raise OSError('Refusing to overwrite MANIFEST.pbtxt file.')
    pbutil.ToFile(manifest, manifest_path)
    logging.info('Wrote %s', manifest_path.absolute())
Beispiel #29
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    config = pathlib.Path(FLAGS.generator)
    if not pbutil.ProtoIsReadable(config, generator_pb2.ClgenGenerator()):
        raise app.UsageError(
            '--generator is not a deepsmith.ClgenGenerator proto')
    generator_config = pbutil.FromFile(config, generator_pb2.ClgenGenerator())
    output_directory = pathlib.Path(FLAGS.output_directory)
    GenerateTestcases(generator_config, output_directory, FLAGS.num_testcases)
Beispiel #30
0
def EnumerateLanguageInstanceConfigs(
    language: typing.Dict[str, typing.List[str]]
) -> typing.List[clgen_pb2.Instance]:
    """Enumerate the options for a language."""
    configs = []
    for corpus, model, sampler in itertools.product(language['corpuses'],
                                                    EnumerateModels(),
                                                    language['samplers']):
        instance_config = clgen_pb2.Instance()
        instance_config.working_dir = FLAGS.working_dir
        instance_config.model.CopyFrom(model)
        instance_config.model.corpus.CopyFrom(
            pbutil.FromFile(
                bazelutil.DataPath(
                    f'phd/experimental/deeplearning/polyglot/corpuses/{corpus}.pbtxt'
                ), corpus_pb2.Corpus()))
        instance_config.sampler.CopyFrom(
            pbutil.FromFile(
                bazelutil.DataPath(
                    f'phd/experimental/deeplearning/polyglot/samplers/{sampler}.pbtxt'
                ), sampler_pb2.Sampler()))
        configs.append(instance_config)
    return configs