예제 #1
0
    def output_console(self):
        """outputs walltime only w/o MPI-rank averaging"""
        from rich import console, table, box

        csl = console.Console()
        tbl = table.Table(show_header=True,
                          header_style="bold blue",
                          box=box.SIMPLE_HEAVY)
        tbl.add_column("Extra")
        tbl.add_column("Data")
        for key, value in self.extra_data.items():
            tbl.add_row(key, str(value))
        if len(self.extra_data):
            csl.print(tbl)

        tbl = table.Table(show_header=True,
                          header_style="bold magenta",
                          box=box.SIMPLE_HEAVY)
        tbl.add_column("Section")
        tbl.add_column(
            "Walltime (HH:MM:SS)",
            justify="right",
        )
        for section, delta in self._commited_deltas.items():
            tbl.add_row(section, str(timedelta(seconds=delta[0])))
        if len(self._commited_deltas):
            csl.print(tbl)
        else:
            csl.print("No timings were recorded")
예제 #2
0
 def simple_progress(
     title: t.Optional[str] = None,
     refresh_per_second: int = 10,
     console: r_console.Console = r_console.Console(record=True),
     tc_log: logger.CustomLogger = None,
 ) -> "Progress":
     return Progress(
         title=title,  # setting this to str will add panel
         columns={
             "text": r_progress.TextColumn(
                 "[progress.description]{task.description}"),
             "progress": r_progress.BarColumn(),
             "percentage": r_progress.TextColumn(
                 "[progress.percentage]{task.percentage:>3.0f}%"),
             "time_elapsed": r_progress.TimeElapsedColumn(),
             "time_remaining": r_progress.TimeRemainingColumn(),
             "status": SpinnerColumn(
                 start_state_key="start",
                 finished_state_key="finished",
                 states={
                     "start": SpinnerType.dots,
                     "finished": EMOJI["white_heavy_check_mark"],
                 }
             ),
         },
         console=console,
         refresh_per_second=refresh_per_second,
         tc_log=tc_log,
     )
예제 #3
0
 def __init__(self):
     self.db_obj = DbHandler()
     self.ext_obj = ExtFilesHandler()
     self.pass_obj = PasswordGenerator()
     console_obj = console.Console()
     self.print = console_obj.print
     self.db_name = ''
예제 #4
0
def print_info(info, verbosity=0):
    cons = console.Console()
    for section in info:
        cons.print()
        cons.print(text.Text(section["section"]), style="bold")
        if verbosity:
            cons.print(
                padding.Padding(
                    text.Text(section["help_text"] or "", style="italic"),
                    (0, 0, 0, 4)))
        # tab = table.Table(show_header=False, box=box.SIMPLE_HEAD)
        tab = table.Table.grid(padding=(0, 1))
        for field in section["config_items"]:
            errors = table.Table.grid()
            for e in field["errors"]:
                errors.add_row(text.Text(e))
            if verbosity:
                tab.add_row(
                    field["field"], text.Text(field["reportable"]),
                    field["status"], errors,
                    text.Text(field["help_text"] or "", style="italic"))
            else:
                tab.add_row(field["field"], text.Text(field["reportable"]),
                            field["status"], errors)
        cons.print(tab)
예제 #5
0
def print_traceback():
    """
    Print traceback to the console.
    """
    _new_line()
    _console = console.Console()
    _console.print_exception(show_locals=True)
    _new_line()
예제 #6
0
    def rich_console(self):
        """rich_console is only set to stdout for now."""
        from rich import console

        # FIXME: Getting IO Operation on closed file error
        #  when testing with capsys, therefore we are creating
        #  one instance each time as a temporary workaround.
        return console.Console(file=self.output)
예제 #7
0
def spinner(func):
    '''
    >>> import time
    >>> def func(): time.sleep(10)
    >>> spinner(func)
    '''

    con = console.Console()
    with con.status(status='',
                    spinner='bouncingBall',
                    spinner_style='royal_blue1',
                    speed=0.4):
        func()
예제 #8
0
def python_exec(string: str, globals_: dict[str, Any] | None,
                locals_: dict[str, Any] | None) -> str:
    try:
        out = io.StringIO()
        with contextlib.redirect_stdout(out):
            exec(string, globals_, locals_)
        return out.getvalue()
    except:
        c = console.Console()
        t = traceback.Traceback()
        t.trace.stacks[0].frames = [
        ]  # IDK co jsem udělal, ale (asi) je to to co chci
        # Ok, asi to jsem tomu sebral funkcionalitu, ale alespoň jsou teď barvičky peepoHappy
        c.print(t)
    return ""
예제 #9
0
    def start_loop(self):
        if self.IS_DUMMY_SHELL:
            raise DummyShellError
        self._sort_commands()
        self._running = True
        try:
            # with patch_stdout():
            while True:
                if self._should_exit:
                    return
                inpt = self._prompt()
                if self._should_exit:
                    # If something requested exit but we are already asked user for input, exit here to prevent executing next command
                    return
                if inpt != "":
                    try:
                        self.proc_string(inpt)
                    except Exception as e:
                        if self.COMMAND_EXCEPTION_TRACEBACK:
                            c = console.Console()
                            t = traceback.Traceback(
                                show_locals=self.
                                COMMAND_EXCEPTION_TRACEBACK_LOCALS)
                            t.trace.stacks[0].frames = t.trace.stacks[
                                0].frames[2:]
                            c.print(t)

                        if self.COMMAND_EXCEPTION_RERAISE:
                            raise e
        except KeyboardInterrupt as keyboard_interrupt:
            if self.END_ON_CTRL_C:
                self._should_exit = True
            if self.RAISE_ON_CTRL_C:
                raise keyboard_interrupt
        finally:
            self._running = False
            self._should_exit = False
예제 #10
0
    def error_console(self):
        from rich import console

        return console.Console(stderr=True)
예제 #11
0
from reddash.app.utils import (
    register_blueprints,
    apply_themes,
    add_constants,
    initialize_babel,
    startup_message,
)


# Logging and terminal set up

log = logging.getLogger("werkzeug")
dashlog = logging.getLogger("reddash")
queuelog = logging.getLogger("waitress.queue")

console = console.Console()
oldexcepthook = rtb.install()
progress_bar = progress.Progress(
    "{task.description}", progress.TextColumn("{task.fields[status]}\n")
)

logging.basicConfig(format="%(message)s", handlers=[richlogging.RichHandler(console=progress_bar)])
queuelog.setLevel(logging.ERROR)

# Base variable setup
app = Flask("reddash", static_folder="app/base/static")
lock = Lock()
babel = Babel()


def create_app(host, port, rpcport, interval, debug, dev):
예제 #12
0
    def rich_console(self):
        """rich_console is only set to stdout for now."""
        from rich import console

        return console.Console()
예제 #13
0
# from network.pytorch.network import (NetworkConfig, LearningRateConfig,
#                                      ConvolutionConfig, GaugeNetwork,
#                                      NetworkOutputs)
# from utils.logger import Logger, in_notebook
# from lattice.pytorch.lattice import Lattice
# from utils.pytorch.history import Metrics, History, innerHistory
# from utils.step_timer import StepTimer

# from utils.data_containers import History, StepTimer, Metrics, innerHistory
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# logger = Logger()
if in_notebook:
    import rich.console as console
    logger = console.Console(width=135,
                             log_path=False,
                             log_time_format='[%x %X]',
                             color_system='truecolor')
else:
    logger = Logger()
# if in_notebook:
#     logger = logger.console

TWO_PI = 2. * PI

# NetworkOutputs: type = tuple[torch.Tensor, torch.Tensor, torch.Tensor]
# NetworkOutputs = "tuple[torch.Tensor, torch.Tensor, torch.Tensor]"


def rand_unif(shape: Union[tuple, list], a: float, b: float,
              requires_grad: bool):
    return (a - b) * torch.rand(shape, requires_grad=requires_grad) + b
예제 #14
0
 def __init__(self):
     """__init__."""
     self.console = rich_console.Console()
     self.executor = KubicExecutor()
     self.session = PromptSession()
     self.translator = KubicTranslator()
예제 #15
0
# Third party library imports
import boto3
import botocore
from rich import console

from libs.display import print_report, print_summary
from libs.iam_scan import complete_source_arn, iam_display, iam_display_roles, iam_extract, iam_simulate
from libs.scan import aws_scan
from libs.tools import Cache, NoColor
from config import variables

# Debug
# from pdb import set_trace as st

CONSOLE = console.Console()
VERSION = '4.0.0'


def audit_handler(session, args, meta_types, cache):
    """
    Handle audit argument
    """
    assets = aws_scan(session,
                      cache,
                      iam_action_passlist=variables.IAM_ACTION_PASSLIST,
                      iam_rolename_passlist=variables.IAM_ROLENAME_PASSLIST,
                      public_only=False,
                      meta_types=meta_types,
                      name_filter=args.name,
                      console=CONSOLE)
예제 #16
0
# NOTE: activate connection with `nc socket.cryptohack.org 13377`
# `nc` is netcat

import json

from pwn import *  # pip install pwntools
from Crypto.Util.number import long_to_bytes
import codecs
import base64
from rich import console

out = console.Console()

r = remote("socket.cryptohack.org", 13377, level="debug")


def json_recv():
    line = r.recvline()
    return json.loads(line.decode())


def json_send(hsh):
    request = json.dumps(hsh).encode()
    r.sendline(request)


def decode(encoded, encoding):
    # TODO: mv encoding steps into decoding
    if encoding == "base64":
        decoded = base64.b64decode(encoded).decode()
    elif encoding == "hex":
예제 #17
0
class Widget(abc.ABC):
    title: t.Optional[r_console.RenderableType] = None
    tc_log: logger.CustomLogger = None
    refresh_per_second: int = 10
    console: r_console.Console = r_console.Console(record=True)

    @property
    @abc.abstractmethod
    def renderable(self) -> r_console.RenderableType:
        ...

    def __post_init__(self):
        self._live = r_live.Live(
            self.renderable,
            console=self.console,
            refresh_per_second=self.refresh_per_second,
            transient=True,
        )

    def __enter__(self) -> "Widget":

        self._start_time = datetime.datetime.now()

        if self.tc_log is not None:
            _title = (self.title + ' ') if bool(self.title) else ''
            self.tc_log.info(msg=_title + "started ...")

        self._live = r_live.Live(
            self.renderable, refresh_per_second=self.refresh_per_second,
            console=self.console
        )

        self._live.start(refresh=False)

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):

        self._live.stop()

        self.console.print("")

        if self.tc_log is not None:
            # todo: use `self.console.extract_*` methods to get console frame and log it
            #   via self.tc_log .... need to do this because the RichHandler is not able
            #   to write things to file like FileHandler ... explore later
            _secs = (datetime.datetime.now() - self._start_time).total_seconds()
            _title = (self.title + ' ') if bool(self.title) else ''
            _ct = self.console.export_text()
            self.tc_log.info(
                msg=_title + f"finished in {_secs} seconds ...\n---\n"
                # + _ct
            )

    def refresh(self, update_renderable: bool = False):
        """

        Args:
            update_renderable: In case you have updates any renderable
              components on the fly

        Returns:

        """
        if self._live is not None:
            if update_renderable:
                self._live.update(
                    renderable=self.renderable, refresh=True
                )
            else:
                self._live.refresh()
예제 #18
0
def main(argv):
    ##############################################################################
    # Initial Setup. Logging, Flags, Random seeds.
    ##############################################################################
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")
    absl_logging.use_python_logging()
    flags_dict = {
        flag.name: flag.value
        for flag in FLAGS.flags_by_module_dict()[argv[0]]
    }

    if FLAGS.use_subset:
        message = (f"{colorama.Back.RED}{colorama.Fore.WHITE}"
                   f"{colorama.Style.BRIGHT}USING A SUBSET OF THE DATASET"
                   f"{colorama.Style.RESET_ALL}")
        LOGGER.warning(message)

    utils.log_module_args(LOGGER, argv[0])
    if not FLAGS.output_dir.startswith("gs://"):
        utils.check_exists(FLAG_OUTPUT_DIR.value)
        if not tf.io.gfile.isdir(FLAG_OUTPUT_DIR.value):
            raise RuntimeError("Output dir needs to be a directory.")

    tf.random.set_seed(FLAG_RANDOM_SEED.value)
    np.random.seed(FLAG_RANDOM_SEED.value)

    # Prepare the instance output directory path and save the config there
    # Prepare the path
    folder_name = time.strftime(
        f"{FLAG_RUN_NAME.value}_{FLAG_APPROACH_TYPE.value}_%Y%m%d-%H%M%S")
    instance_output_dir = os.path.join(FLAG_OUTPUT_DIR.value,
                                       folder_name).strip()
    if not instance_output_dir.endswith("/"):
        instance_output_dir += "/"
    json_target = os.path.join(instance_output_dir, "training_params.json")

    # Make the folder if we're not on gcloud
    if not json_target.strip().startswith("gs://"):
        subprocess.check_call(["mkdir", "-p", instance_output_dir])

    # Safe the config file
    utils.to_json_file(json_target, flags_dict)

    ##############################################################################
    # Initialization and Configuration of the Devices.
    ##############################################################################
    tpu_setup = None

    accel = tf_utils.current_accelerator_type()
    if FLAG_TPU_IS_LOCAL.value:
        assert accel == "TPU", accel
    if accel == "TPU":
        assert FLAG_TPU_IS_LOCAL.value, FLAG_TPU_IS_LOCAL.value

    if tf_utils.current_accelerator_type() in {"CPU", "TPU"}:
        tpu_setup = tf_utils.init_tpus(tpu_name=FLAG_TPU_NAME.value,
                                       local=FLAG_TPU_IS_LOCAL.value)

    LOGGER.debug("Devices we are computing on:\n%s",
                 utils.wrap_iterable(map(str, tf_utils.devices_to_use())))
    LOGGER.debug("All devices:")
    LOGGER.debug(tf_utils.device_mapping())

    if tf_utils.current_accelerator_type() == "GPU":
        tf.config.set_soft_device_placement(True)

    if tf_utils.current_accelerator_type() != "TPU":
        tf.debugging.set_log_device_placement(True)

    utils.check_operator(operator.ne, tf_utils.current_accelerator_type(),
                         "CPU")

    assert FLAG_TPU_NAME.value == socket.gethostname(), (
        "This is a configuration choice. You can remove this. "
        "There will be no side effects.")

    if FLAG_DISTRIBUTE_MODE.value in constants.PURE_DATA_PARALLEL_STRATEGIES:
        actual_num_replicas = len(tf_utils.devices_to_use())
    elif FLAG_DISTRIBUTE_MODE.value in constants.DATA_PARALLEL_DMC:
        actual_num_replicas = FLAG_NUM_REPLICAS.value
    else:
        actual_num_replicas = 1

    ##############################################################################
    # We load the retriever model if it is needed.
    ##############################################################################
    # Not currently used. See old commits.
    retriever = None

    ##############################################################################
    # Distributed training task
    ##############################################################################
    if FLAG_TASK.value == constants.TaskChoices.train:
        with utils.log_duration(LOGGER, "main", "Load model"):
            utils.print_mem("before loading model", LOGGER)
            model_specific = task_specific.load_model(
                FLAG_MODEL_KEY.value, FLAG_DISTRIBUTE_MODE.value, tpu_setup,
                FLAG_NUM_REPLICAS.value)
            utils.print_mem("after loading model", LOGGER)
            model = model_specific.model
            if isinstance(model, list):
                model: List[transformers.TFGPT2LMHeadModel]
            else:
                model: transformers.TFGPT2LMHeadModel

            tokenizer = model_specific.tokenizer

            def make_optimizer():
                if FLAG_OPTIMIZER_TYPE.value == constants.OptimizerTypes.adafactor:
                    return tensor2tensor.utils.adafactor.AdafactorOptimizer(
                        learning_rate=FLAG_LEARNING_RATE.value)
                elif FLAG_OPTIMIZER_TYPE.value == constants.OptimizerTypes.adam:
                    return tf.keras.optimizers.Adam(
                        learning_rate=FLAG_LEARNING_RATE.value)
                else:
                    raise ValueError(FLAG_OPTIMIZER_TYPE.value)

            if model_specific.strategy:
                with model_specific.strategy.scope():
                    optimizer = make_optimizer()
            else:
                optimizer = make_optimizer()

        ############################################################################
        # Prepare the dataset functions
        ############################################################################
        rg = np.random.default_rng(FLAG_RANDOM_SEED.value)

        def call_lm_preproc(repeat, split, random_seed):
            """Using functools.partial prevents the linter from doing its job."""
            if FLAG_DATASET_NAME.value == constants.DatasetNameChoices.kilt_eli5:
                return task_specific.create_lm_ds_kilt_eli5(
                    tokenizer=tokenizer,
                    context_window_size=model.config.n_positions,
                    dataset_name=FLAG_DATASET_NAME.value,
                    # Batches are split over the replicas:
                    batch_size=FLAG_BATCH_SIZE.value * actual_num_replicas,
                    db_path=FLAG_DB_PATH.value,
                    random_seed=random_seed,
                    use_subset=FLAG_USE_SUBSET.value,
                    subset_size=FLAG_SUBSET_SIZE.value,
                    use_helper_words=FLAG_USE_HELPER_WORDS.value,
                    approach_type=FLAG_APPROACH_TYPE.value,
                    num_retrievals=FLAG_NUM_RETRIEVALS.value,
                    retrieval_temperature=FLAG_RETRIEVAL_TEMPERATURE.value,
                    retriever=retriever,
                    repeat=repeat,
                    split=split,
                    enable_debug_checks=FLAG_DATASET_DEBUG.value,
                    retrieval_bank_size=FLAG_RETRIEVAL_BANK_SIZE.value,
                    dataset_type=FLAG_DATASET_TYPE.value,
                    qty_shuffle=FLAG_QTY_SHUFFLE.value,
                    tfr_prefix=FLAG_TFR_PREFIX.value,
                    max_length_generation=FLAG_MAX_LENGTH_GENERATION.value,
                )
            else:
                raise NotImplementedError(
                    f"FLAG_DATASET_NAME.value unsupported: `{FLAG_DATASET_NAME.value}`"
                )

        make_training_dataset: Callable[...,
                                        tf.data.Dataset] = functools.partial(
                                            call_lm_preproc,
                                            split="train",
                                            repeat=False,
                                        )
        make_eval_dataset: Callable[..., tf.data.Dataset] = functools.partial(
            call_lm_preproc,
            split="eval",
            repeat=True,
        )

        ############################################################################
        # Prepare the step functions
        ############################################################################
        utils.check_contained(FLAG_DISTRIBUTE_MODE.value,
                              constants.DistributeModeChoices.choices())
        tf_function_flags = dict(
            experimental_compile=FLAG_EXPERIMENTAL_COMPILE.value,
            experimental_relax_shapes=not FLAG_INPUT_FIXED_SIZE.value)

        training_step = build_regular_training_step(
            model,
            optimizer,
            strategy=model_specific.strategy,
            tf_function_kwargs=tf_function_flags)

        evaluation_step = build_evaluation_step(model, tf_function_flags)

        timestamp_last_ckpt_secs = time.time()
        # Model checkpoints are saved to the tmp_directory and then rsynced to GCS

        ############################################################################
        # Prepare the statistics and the logging facilities.
        ############################################################################
        # Tensorboard
        with model_specific.strategy.scope():
            checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
        saver = Saver(instance_output_dir, checkpoint)
        train_log_dir = os.path.join(instance_output_dir, "tensorboard",
                                     "train")
        eval_log_dir = os.path.join(instance_output_dir, "tensorboard", "eval")
        flags_log_dir = os.path.join(instance_output_dir, "tensorboard",
                                     "params")
        writers = dict(train=tf.summary.create_file_writer(train_log_dir),
                       eval=tf.summary.create_file_writer(eval_log_dir),
                       flags=tf.summary.create_file_writer(flags_log_dir))
        with writers["flags"].as_default():
            tf.summary.text(
                "Flags",
                # Tensorboard takes Markdown:
                json.dumps(flags_dict, indent=4).replace("\n", "\n\n"),
                step=0)

        # Different information to log.
        ma_loss = dict(train=utils.MovingAverage(0.9),
                       eval=utils.MovingAverage(0.9))
        step_counters = dict(train=0, eval=0)
        batch_counters = dict(train=0, eval=0)
        prev_batch_end = time.time()

        ############################################################################
        # Create the Eval DS object.
        # ==========================================================================
        # The eval ds has no real concept of epoch, repeats forever, shuffling
        # each time it reaches its end.
        ############################################################################
        # Create
        with utils.log_duration(LOGGER, "main", "All of make_eval_dataset"):
            eval_ds_instance = make_eval_dataset(random_seed=rg.integers(
                -2**63, 2**63 - 1), )
        # Maybe distribute
        LOGGER.debug("Distributing the eval dataset to the replicas.")
        if FLAG_DATASET_TYPE.value == "tfr":
            eval_ds_instance = (
                model_specific.strategy.experimental_distribute_dataset(
                    eval_ds_instance))
        # Start the iteration. We step by calling `next(...)`.
        LOGGER.debug("Done distributing the eval dataset to the replicas.")
        eval_ds_instance = iter(eval_ds_instance)
        step_function = dict(train=training_step, eval=evaluation_step)

        ############################################################################
        # Training Loop
        # ==========================================================================
        # Create a new training dataset object that lasts for one epoch.
        # This is different from the eval training dataset object, which loops
        # forever.
        ############################################################################
        for epoch in itertools.count():
            ##########################################################################
            # Epoch Setup
            ##########################################################################
            LOGGER.debug("EPOCH %d START", epoch)
            # Shuffle differently every epoch
            with utils.log_duration(LOGGER, "main",
                                    "All of make_training_dataset"):
                train_ds_instance = make_training_dataset(
                    random_seed=rg.integers(-2**63, 2**63 - 1), )
            LOGGER.debug(
                "Attempting to distribute the training dataset to the replicas."
            )
            if FLAG_DATASET_TYPE.value == "tfr":
                train_ds_instance = (
                    model_specific.strategy.experimental_distribute_dataset(
                        train_ds_instance))

            LOGGER.debug(
                "Done distributing the training dataset to the replicas.")
            train_ds_instance = iter(train_ds_instance)

            # To change splits, we use `itertools.islice` over the dataset generator.
            # When the training dataset generator is done, a new loop of the following
            # while loop occurs, but no training batch is done because we are taking
            # an `islice` of a generator that is done.
            did_at_least_one_training_batch = True
            split = "eval"
            while did_at_least_one_training_batch:
                utils.check_operator(operator.ne,
                                     tf_utils.current_accelerator_type(),
                                     "CPU")

                # Invert split
                if split == "train":
                    split = "eval"
                else:
                    split = "train"

                # Prepare to test if we did at least one training batch
                if split == "train":
                    did_at_least_one_training_batch = False

                ########################################################################
                # Take slices from the dataset iterator
                # ======================================================================
                # We only want to do a certain number of batches before switching splits
                # We do this by using an `itertools.islice` of the dataset iterators.
                ########################################################################
                if split == "train":
                    dataset_iterator = toolz.take(
                        FLAG_BATCHES_BETWEEN_EVALS.value, train_ds_instance)
                else:
                    # The evaluation dataset generator is infinite, reshuffles everytime
                    # it gets to its end.
                    # Still, we take a fixed size slice form that infinite generator.
                    dataset_iterator = toolz.take(
                        FLAG_NUMBER_EVAL_BATCHES.value, eval_ds_instance)

                LOGGER.debug("Batching")
                for batch in dataset_iterator:
                    if FLAG_LOG_SAMPLES.value:
                        ####################################################################
                        # Print elements of the dataset
                        ####################################################################
                        # Make ourselves resistant to values possibly being a PerReplica
                        # object
                        LOGGER.warning(
                            f"%(red)sLOGGING SAMPLES. THIS IS VERY SLOW.%(reset)s",
                            dict(
                                red=colorama.Fore.RED,
                                reset=colorama.Style.RESET_ALL,
                            ))
                        is_distributed = isinstance(batch["input_ids"],
                                                    values.PerReplica)
                        for in_batch_idx in range(FLAG_BATCH_SIZE.value):
                            for replica_idx in (range(actual_num_replicas)
                                                if is_distributed else [0]):
                                if is_distributed:
                                    sample = {
                                        k: batch[k].values[replica_idx]
                                        for k in batch
                                    }
                                else:
                                    sample = batch

                                # input_sentence = tokenizer.decode(
                                #   [x for x in sample["input_ids"][i] if x != tokenizer.eos_token_id]
                                # )

                                # LOGGER.debug(
                                #   "%sInput [%d / %d]%s:\n\"%s\"",
                                #   colorama.Fore.GREEN,
                                #   replica_idx + 1,
                                #   actual_num_replicas,
                                #   colorama.Style.RESET_ALL,
                                #   input_sentence,
                                # )
                                #
                                # answer = tokenizer.decode(
                                #   [(x if x != -100 else 0) for x in sample["label_ids"][i]]
                                # )
                                # LOGGER.debug(
                                #   "%sLabel [%d / %d]%s:\n\"%s\"",
                                #   colorama.Fore.GREEN,
                                #   replica_idx + 1,
                                #   actual_num_replicas,
                                #   colorama.Style.RESET_ALL,
                                #   answer,
                                # )

                                cons = console.Console()
                                sentences = table.Table()
                                sentences.add_column("BPE Index",
                                                     justify="center")
                                sentences.add_column("Inputs",
                                                     justify="center")
                                sentences.add_column("Labels",
                                                     justify="center")
                                for bpe_idx, (x, y) in enumerate(
                                        itertools.zip_longest(
                                            sample["input_ids"]
                                            [in_batch_idx].numpy(),
                                            sample["label_ids"]
                                            [in_batch_idx].numpy(),
                                            fillvalue=None,
                                        )):
                                    x_w = tokenizer.decode(
                                        [x]) if x >= 0 else f"[ {x} ]"
                                    y_w = tokenizer.decode(
                                        [y]) if y >= 0 else f"[ {y} ]"
                                    sentences.add_row(str(bpe_idx), x_w, y_w)

                                cons.print(sentences)

                    # We only care about training epochs as, obviously, we don't train
                    # over eval samples; the number of  eval samples seen only
                    # contributes to lowering the variance in the evaluation of when to
                    # do early stopping.
                    if split == "train":
                        did_at_least_one_training_batch = True

                    input_ids = batch["input_ids"]
                    label_ids = batch["label_ids"]

                    # Per split step counter
                    step_counters[
                        split] += FLAG_BATCH_SIZE.value * actual_num_replicas
                    batch_counters[split] += 1

                    ######################################################################
                    # Model step function.
                    ######################################################################
                    step_function_kwargs = dict(
                        input_ids=input_ids,
                        label_ids=label_ids,
                    )

                    utils.print_mem(f"[{split}] - Mem before `strategy.run`",
                                    LOGGER)
                    LOGGER.debug("[%s] - Calling `strategy.run`", split)
                    loss = model_specific.strategy.run(
                        step_function[split], kwargs=step_function_kwargs)
                    LOGGER.debug("[%s] - Done `strategy.run`", split)
                    utils.print_mem(f"[{split}] - Mem after `strategy.run`",
                                    LOGGER)

                    ####################################################################
                    # End of logging step code / Logging and saving the model.
                    ####################################################################
                    if (FLAG_DISTRIBUTE_MODE.value
                            in constants.PURE_DATA_PARALLEL_STRATEGIES):
                        utils.check_equal(len(loss.values),
                                          actual_num_replicas)
                        LOGGER.debug("[%s] - Real num replicas: %s", split,
                                     actual_num_replicas)
                        average_loss = float(
                            tf.math.reduce_mean(loss.values).numpy())

                        LOGGER.debug("[%s] - Loss: %s", str(split),
                                     str(average_loss))

                    else:
                        average_loss = float(loss.numpy())

                    tf.debugging.check_numerics(
                        loss.values if isinstance(loss, values.PerReplica) else
                        loss, "Numerics failed.")

                    now = time.time()
                    batch_duration = now - prev_batch_end
                    prev_batch_end = now
                    ma_loss[split].update(average_loss)

                    LOGGER.info("[%s] - Epoch: # %d", split, epoch)
                    LOGGER.info("[%s] - Tensorboard_dir: %s", split,
                                instance_output_dir)
                    LOGGER.info("[%s] - Batch: # %d", split,
                                batch_counters[split])
                    LOGGER.info("[%s] - Step:  # %d", split,
                                step_counters[split])
                    if FLAG_USE_SUBSET.value:
                        LOGGER.warning(">> USING A SUBSET OF THE DATASET <<")
                    LOGGER.info(
                        "[%(split)s] - Batch loss:           %(metric)f",
                        dict(split=split, metric=average_loss))
                    LOGGER.info(
                        "[%(split)s] - Moving average loss:  %(metric)f",
                        dict(split=split, metric=ma_loss[split].average))
                    LOGGER.info(
                        "[%(split)s] - Moving average ppl:   %(metric)f",
                        dict(split=split,
                             metric=np.exp(ma_loss[split].average)))
                    LOGGER.info(
                        "[%(split)s] - Batch duration:       %(duration)s",
                        dict(split=split,
                             duration=utils.TimeStamp.from_seconds(
                                 batch_duration).format()))

                    # Write to Tensorboard
                    with writers[split].as_default():
                        tf.summary.scalar(f"Loss/{split}", average_loss,
                                          step_counters[split])
                        tf.summary.scalar(f"PPL/{split}", np.exp(average_loss),
                                          step_counters[split])
                    writers[split].flush()

                    ######################################################################
                    # Save every `FLAG_SAVE_PERIOD_MIN.value` minutes.
                    ######################################################################
                    delta_sec = time.time() - timestamp_last_ckpt_secs
                    utils.check_operator(operator.gt, delta_sec, 0)
                    period_sec = 60 * FLAG_SAVE_PERIOD_MIN.value
                    utils.check_operator(operator.gt, period_sec, 0)
                    ratio = delta_sec / period_sec
                    LOGGER.info(
                        "[%(split)s] - RATIO:                  %(ratio)s",
                        dict(split=split, ratio=str(ratio)))
                    LOGGER.info(
                        "[%(split)s] - Target: %(target)s, Present: %(present)s",
                        dict(
                            split=split,
                            target=str(period_sec),
                            present=str(delta_sec),
                        ))

                    if ratio >= 1:
                        dur = delta_sec / 60
                        timestamp_last_ckpt_secs = time.time()
                        LOGGER.debug(
                            "SAVING MODEL - CAUSE: DURATION - %0.2f min", dur)
                        # checkpoint.save(ckpt_prefix)
                        saver.save_model(
                            train_steps=step_counters["train"],
                            model_or_replicas=model,
                            optimizer=optimizer,
                        )

        ############################################################################
        # Post Training Cleanup
        ############################################################################
        for writer in writers.values():
            writer.close()
예제 #19
0
from rich import console as _console, print, inspect
console = _console.Console()

from .config import config
from .script import *

import json
import os
import sys
import time


class spawn():
    def __init__(self, signature, *a, **k):
        self.signature = signature
        self.start = time.time()
        process(signature, *a, **k)
        self.log()

    def log(self, *a, **k):
        pipe = "┣"
        if (len(a) == 0): pipe = "┃"
        console.print(f"[dim]{pipe}", " ".join(a), **k, style="white")

    def warn(self, *a, **k):
        # pipe = "┃"
        warn(f"[dim white]", "".join(a), **k)

    def fail(self, msg="Fail"):
        self.log()
        self.done(msg, "red")