Exemple #1
0
    'src_dir', None,
    'The directory to create chunks from. All files in this directory are '
    'packed into chunks.')
app.DEFINE_string(
    'chunks_dir', None,
    'The root directory of the chunks. Each chunk is a directory containing '
    'files and a manifest.')
app.DEFINE_integer(
    'size_mb', 4695,
    'The smaximum size of each chunk in megabytes. This excludes the MANIFEST '
    'and README files which are generated.')
app.DEFINE_string('chunk_prefix', 'chunk_',
                  'The name to prepend to generated chunks.')
app.DEFINE_boolean(
    'random_ordering', True,
    'Whether to randomize the ordering of files across and within chunks. If '
    '--norandom_ordering is used, the files are arranged in chunks in the order '
    'in which they are found in --src_dir. This is not recommended, as it means '
    'the loss of a chunk causes a loss in a contiguous block of files.')
app.DEFINE_integer(
    'random_ordering_seed', 0,
    'The number used to seed the random number generator. Not used if '
    '--norandom_ordering is set. Using the same seed produces the same ordering '
    'of files.')
app.DEFINE_boolean(
    'gzip_files', False,
    'Whether to gzip individual files in chunks. Files are only stored in gzip '
    'form if it is smaller than the original file. For compressed image formats '
    'like JPGs, gzip rarely offers a reduction in file size.')


def main(argv):
Exemple #2
0
from deeplearning.clgen.models import pretrained
from deeplearning.clgen.proto import clgen_pb2
from deeplearning.clgen.proto import model_pb2
from labm8 import app
from labm8 import pbutil
from labm8 import prof

FLAGS = app.FLAGS

app.DEFINE_string('config', '/clgen/config.pbtxt',
                  'Path to a clgen.Instance proto file.')
app.DEFINE_integer(
    'min_samples', 0,
    'The minimum number of samples to make. If <= 0, sampling continues '
    'indefinitely and never terminates.')
app.DEFINE_boolean('print_samples', True,
                   'If set, print the generated samples.')
app.DEFINE_boolean('cache_samples', False,
                   'If set, cache the generated sample protobufs.')
app.DEFINE_string('sample_text_dir', None,
                  'A directory to write plain text samples to.')
app.DEFINE_string('stop_after', None,
                  'Stop CLgen early. Valid options are: "corpus", or "train".')
app.DEFINE_string(
    'print_cache_path', None,
    'Print the directory of a cache and exit. Valid options are: "corpus", '
    '"model", or "sampler".')
app.DEFINE_string(
    'export_model', None,
    'Path to export a trained TensorFlow model to. This exports all of the '
    'files required for sampling to specified directory. The directory can '
    'then be used as the pretrained_model field of an Instance proto config.')
Exemple #3
0
"""A linter for ensuring that a Photo Library is organized correctly."""
import sys

import pathlib

from labm8 import app
from labm8 import humanize
from util.photolib import linters
from util.photolib import workspace

FLAGS = app.FLAGS
app.DEFINE_boolean("profile", True, "Print profiling timers on completion.")
app.DEFINE_boolean("rm_errors_cache", False,
                   "If true, empty the errors cache prior to running.")


def main(argv):  # pylint: disable=missing-docstring
    paths_to_lint = [pathlib.Path(arg) for arg in argv[1:]]
    if not paths_to_lint:
        raise app.UsageError("Usage: photolint <directory...>")

    for path in paths_to_lint:
        if not path.exists():
            app.FatalWithoutStackTrace(
                f"File or directory not found: '{path}'")

    # Linting is on a per-directory level, not per-file.
    directories_to_lint = {
        path if path.is_dir() else path.parent
        for path in paths_to_lint
    }
Exemple #4
0
FLAGS = app.FLAGS

app.DEFINE_list('targets', [], 'The bazel target(s) to export.')
app.DEFINE_list('excluded_targets', [],
                'A list of bazel targets to exclude from export.')
app.DEFINE_list(
    'extra_files', [], 'A list of additional files to export. Each element in '
    'the list is a relative path to export. E.g. `bar/baz.txt`.')
app.DEFINE_list(
    'move_file_mapping', [],
    'Each element in the list is a mapping of relative paths in the form '
    '<src>:<dst>. E.g. `foo.py:bar/baz.txt` will move file `foo.py` to '
    'destination `bar/baz.txt`.')
app.DEFINE_string('github_repo', None, 'Name of a GitHub repo to export to.')
app.DEFINE_boolean('github_create_repo', False,
                   'Whether to create the repo if it does not exist.')
app.DEFINE_boolean('github_repo_create_private', True,
                   'Whether to create new GitHub repos as private.')
app.DEFINE_boolean('export_source_tree_print_files', False,
                   'Print the files that will be exported and terminate.')
app.DEFINE_boolean(
    'ignore_last_export', False,
    'If true, run through the entire git history. Otherwise, '
    'continue from the last commit exported. Use this flag if '
    'the set of exported files changes.')


def GetOrCreateRepoOrDie(github: github_lib.Github,
                         repo_name: str) -> github_lib.Repository:
    """Get the github repository to export to. Create it if it doesn't exist."""
    try:
Exemple #5
0
import numpy as np
import progressbar

from deeplearning.clgen import samplers
from deeplearning.clgen import telemetry
from deeplearning.clgen.models import backends
from deeplearning.clgen.models import data_generators
from deeplearning.clgen.proto import model_pb2
from labm8 import app
from labm8 import humanize

FLAGS = app.FLAGS

app.DEFINE_boolean(
    'clgen_tf_backend_reset_inference_state_between_batches', False,
    'If set, reset the network state between sample batches. Else, the model '
    'state is unaffected.')
app.DEFINE_integer(
    'clgen_tf_backend_tensorboard_summary_step_count', 10,
    'The number of steps between writing tensorboard summaries.')


class TensorFlowBackend(backends.BackendBase):
    """A model with an embedding layer, using a keras backend."""
    def __init__(self, *args, **kwargs):
        """Instantiate a model.

    Args:
      args: Arguments to be passed to BackendBase.__init__().
      kwargs: Arguments to be passed to BackendBase.__init__().
    """
Exemple #6
0
FLAGS = app.FLAGS

app.DEFINE_string(
    'clgen_instance', None,
    'Path to a clgen.Instance proto file containing a full '
    'CLgen configuration.')

app.DEFINE_string('clgen_working_dir',
                  str(pathlib.Path('~/.cache/clgen').expanduser()),
                  'The directory for CLgen working files.')

# Corpus options.
app.DEFINE_string('clgen_corpus_dir',
                  "/mnt/cc/data/datasets/github/corpuses/opencl",
                  "Directory where the corpus is stored.")
app.DEFINE_boolean('clgen_multichar_tokenizer', False,
                   'If true, use multichar OpenCL token.')

# Model options.
app.DEFINE_integer('clgen_layer_size', 512, 'Size of LSTM model layers.')
app.DEFINE_integer('clgen_num_layers', 2, 'Number of layers in LSTM model.')
app.DEFINE_integer('clgen_max_sample_length', 20000,
                   'The maximum length of CLgen samples. If 0, no limit.')

# Training options.
app.DEFINE_integer("clgen_num_epochs", 50, "The number of training epochs.")
app.DEFINE_integer("clgen_training_sequence_length", 64,
                   "CLgen training sequence length.")
app.DEFINE_integer("clgen_training_batch_size", 64,
                   "CLgen training batch size.")

# Sampling options.
Exemple #7
0
import inspect
import pathlib
import typing

from labm8 import app
from labm8 import shell
from util.photolib import common
from util.photolib import contentfiles
from util.photolib import lintercache
from util.photolib import workspace
from util.photolib import xmp_cache
from util.photolib.proto import photolint_pb2

FLAGS = app.FLAGS
app.DEFINE_boolean("counts", False, "Show only the counts of errors.")
app.DEFINE_boolean("fix_it", False, "Show how to fix it.")

# A global list of all error categories. Every time you add a new linter rule, add it
# here!
ERROR_CATEGORIES = set([
    "dir/empty",
    "dir/not_empty",
    "dir/hierarchy",
    "file/name",
    "file/missing",
    "extension/lowercase",
    "extension/bad",
    "extension/unknown",
    "keywords/third_party",
    "keywords/film_format",
FLAGS = app.FLAGS

app.DEFINE_list('targets', [], 'The bazel target(s) to export.')
app.DEFINE_list('excluded_targets', [],
                'A list of bazel targets to exclude from export.')
app.DEFINE_list(
    'extra_files', [], 'A list of additional files to export. Each element in '
    'the list is a relative path to export. E.g. `bar/baz.txt`.')
app.DEFINE_list(
    'mv_files', [],
    'Each element in the list is a mapping of relative paths in the form '
    '<src>:<dst>. E.g. `foo.py:bar/baz.txt` will move file `foo.py` to '
    'destination `bar/baz.txt`.')
app.DEFINE_string('github_repo', None, 'Name of a GitHub repo to export to.')
app.DEFINE_boolean('github_create_repo', False,
                   'Whether to create the repo if it does not exist.')
app.DEFINE_boolean('github_repo_create_private', True,
                   'Whether to create new GitHub repos as private.')
app.DEFINE_boolean('export_source_tree_print_files', False,
                   'Print the files that will be exported and terminate.')


def GetOrCreateRepoOrDie(github: github_lib.Github,
                         repo_name: str) -> github_lib.Repository:
  """Get the github repository to export to. Create it if it doesn't exist."""
  try:
    if FLAGS.github_create_repo:
      return api.GetOrCreateUserRepo(
          github,
          repo_name,
          description='PhD repo subtree export',
Exemple #9
0
import sys

import contextlib
import inspect
import pathlib
import pytest
import re
import tempfile
import typing
from importlib import util as importutil

from labm8 import app

FLAGS = app.FLAGS

app.DEFINE_boolean('test_color', False, 'Colorize pytest output.')
app.DEFINE_boolean('test_skip_slow', True,
                   'Skip tests that have been marked slow.')
app.DEFINE_integer(
    'test_maxfail', 1,
    'The maximum number of tests that can fail before execution terminates. '
    'If --test_maxfail=0, all tests will execute.')
app.DEFINE_boolean('test_capture_output', True,
                   'Capture stdout and stderr during test execution.')
app.DEFINE_boolean(
    'test_print_durations', True,
    'Print the duration of the slowest tests at the end of execution. Use '
    '--test_durations to set the number of tests to print the durations of.')
app.DEFINE_integer(
    'test_durations', 3,
    'The number of slowest tests to print the durations of after execution. '