def output_console(self): """outputs walltime only w/o MPI-rank averaging""" from rich import console, table, box csl = console.Console() tbl = table.Table(show_header=True, header_style="bold blue", box=box.SIMPLE_HEAVY) tbl.add_column("Extra") tbl.add_column("Data") for key, value in self.extra_data.items(): tbl.add_row(key, str(value)) if len(self.extra_data): csl.print(tbl) tbl = table.Table(show_header=True, header_style="bold magenta", box=box.SIMPLE_HEAVY) tbl.add_column("Section") tbl.add_column( "Walltime (HH:MM:SS)", justify="right", ) for section, delta in self._commited_deltas.items(): tbl.add_row(section, str(timedelta(seconds=delta[0]))) if len(self._commited_deltas): csl.print(tbl) else: csl.print("No timings were recorded")
def simple_progress( title: t.Optional[str] = None, refresh_per_second: int = 10, console: r_console.Console = r_console.Console(record=True), tc_log: logger.CustomLogger = None, ) -> "Progress": return Progress( title=title, # setting this to str will add panel columns={ "text": r_progress.TextColumn( "[progress.description]{task.description}"), "progress": r_progress.BarColumn(), "percentage": r_progress.TextColumn( "[progress.percentage]{task.percentage:>3.0f}%"), "time_elapsed": r_progress.TimeElapsedColumn(), "time_remaining": r_progress.TimeRemainingColumn(), "status": SpinnerColumn( start_state_key="start", finished_state_key="finished", states={ "start": SpinnerType.dots, "finished": EMOJI["white_heavy_check_mark"], } ), }, console=console, refresh_per_second=refresh_per_second, tc_log=tc_log, )
def __init__(self): self.db_obj = DbHandler() self.ext_obj = ExtFilesHandler() self.pass_obj = PasswordGenerator() console_obj = console.Console() self.print = console_obj.print self.db_name = ''
def print_info(info, verbosity=0): cons = console.Console() for section in info: cons.print() cons.print(text.Text(section["section"]), style="bold") if verbosity: cons.print( padding.Padding( text.Text(section["help_text"] or "", style="italic"), (0, 0, 0, 4))) # tab = table.Table(show_header=False, box=box.SIMPLE_HEAD) tab = table.Table.grid(padding=(0, 1)) for field in section["config_items"]: errors = table.Table.grid() for e in field["errors"]: errors.add_row(text.Text(e)) if verbosity: tab.add_row( field["field"], text.Text(field["reportable"]), field["status"], errors, text.Text(field["help_text"] or "", style="italic")) else: tab.add_row(field["field"], text.Text(field["reportable"]), field["status"], errors) cons.print(tab)
def print_traceback(): """ Print traceback to the console. """ _new_line() _console = console.Console() _console.print_exception(show_locals=True) _new_line()
def rich_console(self): """rich_console is only set to stdout for now.""" from rich import console # FIXME: Getting IO Operation on closed file error # when testing with capsys, therefore we are creating # one instance each time as a temporary workaround. return console.Console(file=self.output)
def spinner(func): ''' >>> import time >>> def func(): time.sleep(10) >>> spinner(func) ''' con = console.Console() with con.status(status='', spinner='bouncingBall', spinner_style='royal_blue1', speed=0.4): func()
def python_exec(string: str, globals_: dict[str, Any] | None, locals_: dict[str, Any] | None) -> str: try: out = io.StringIO() with contextlib.redirect_stdout(out): exec(string, globals_, locals_) return out.getvalue() except: c = console.Console() t = traceback.Traceback() t.trace.stacks[0].frames = [ ] # IDK co jsem udělal, ale (asi) je to to co chci # Ok, asi to jsem tomu sebral funkcionalitu, ale alespoň jsou teď barvičky peepoHappy c.print(t) return ""
def start_loop(self): if self.IS_DUMMY_SHELL: raise DummyShellError self._sort_commands() self._running = True try: # with patch_stdout(): while True: if self._should_exit: return inpt = self._prompt() if self._should_exit: # If something requested exit but we are already asked user for input, exit here to prevent executing next command return if inpt != "": try: self.proc_string(inpt) except Exception as e: if self.COMMAND_EXCEPTION_TRACEBACK: c = console.Console() t = traceback.Traceback( show_locals=self. COMMAND_EXCEPTION_TRACEBACK_LOCALS) t.trace.stacks[0].frames = t.trace.stacks[ 0].frames[2:] c.print(t) if self.COMMAND_EXCEPTION_RERAISE: raise e except KeyboardInterrupt as keyboard_interrupt: if self.END_ON_CTRL_C: self._should_exit = True if self.RAISE_ON_CTRL_C: raise keyboard_interrupt finally: self._running = False self._should_exit = False
def error_console(self): from rich import console return console.Console(stderr=True)
from reddash.app.utils import ( register_blueprints, apply_themes, add_constants, initialize_babel, startup_message, ) # Logging and terminal set up log = logging.getLogger("werkzeug") dashlog = logging.getLogger("reddash") queuelog = logging.getLogger("waitress.queue") console = console.Console() oldexcepthook = rtb.install() progress_bar = progress.Progress( "{task.description}", progress.TextColumn("{task.fields[status]}\n") ) logging.basicConfig(format="%(message)s", handlers=[richlogging.RichHandler(console=progress_bar)]) queuelog.setLevel(logging.ERROR) # Base variable setup app = Flask("reddash", static_folder="app/base/static") lock = Lock() babel = Babel() def create_app(host, port, rpcport, interval, debug, dev):
def rich_console(self): """rich_console is only set to stdout for now.""" from rich import console return console.Console()
# from network.pytorch.network import (NetworkConfig, LearningRateConfig, # ConvolutionConfig, GaugeNetwork, # NetworkOutputs) # from utils.logger import Logger, in_notebook # from lattice.pytorch.lattice import Lattice # from utils.pytorch.history import Metrics, History, innerHistory # from utils.step_timer import StepTimer # from utils.data_containers import History, StepTimer, Metrics, innerHistory DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' # logger = Logger() if in_notebook: import rich.console as console logger = console.Console(width=135, log_path=False, log_time_format='[%x %X]', color_system='truecolor') else: logger = Logger() # if in_notebook: # logger = logger.console TWO_PI = 2. * PI # NetworkOutputs: type = tuple[torch.Tensor, torch.Tensor, torch.Tensor] # NetworkOutputs = "tuple[torch.Tensor, torch.Tensor, torch.Tensor]" def rand_unif(shape: Union[tuple, list], a: float, b: float, requires_grad: bool): return (a - b) * torch.rand(shape, requires_grad=requires_grad) + b
def __init__(self): """__init__.""" self.console = rich_console.Console() self.executor = KubicExecutor() self.session = PromptSession() self.translator = KubicTranslator()
# Third party library imports import boto3 import botocore from rich import console from libs.display import print_report, print_summary from libs.iam_scan import complete_source_arn, iam_display, iam_display_roles, iam_extract, iam_simulate from libs.scan import aws_scan from libs.tools import Cache, NoColor from config import variables # Debug # from pdb import set_trace as st CONSOLE = console.Console() VERSION = '4.0.0' def audit_handler(session, args, meta_types, cache): """ Handle audit argument """ assets = aws_scan(session, cache, iam_action_passlist=variables.IAM_ACTION_PASSLIST, iam_rolename_passlist=variables.IAM_ROLENAME_PASSLIST, public_only=False, meta_types=meta_types, name_filter=args.name, console=CONSOLE)
# NOTE: activate connection with `nc socket.cryptohack.org 13377` # `nc` is netcat import json from pwn import * # pip install pwntools from Crypto.Util.number import long_to_bytes import codecs import base64 from rich import console out = console.Console() r = remote("socket.cryptohack.org", 13377, level="debug") def json_recv(): line = r.recvline() return json.loads(line.decode()) def json_send(hsh): request = json.dumps(hsh).encode() r.sendline(request) def decode(encoded, encoding): # TODO: mv encoding steps into decoding if encoding == "base64": decoded = base64.b64decode(encoded).decode() elif encoding == "hex":
class Widget(abc.ABC): title: t.Optional[r_console.RenderableType] = None tc_log: logger.CustomLogger = None refresh_per_second: int = 10 console: r_console.Console = r_console.Console(record=True) @property @abc.abstractmethod def renderable(self) -> r_console.RenderableType: ... def __post_init__(self): self._live = r_live.Live( self.renderable, console=self.console, refresh_per_second=self.refresh_per_second, transient=True, ) def __enter__(self) -> "Widget": self._start_time = datetime.datetime.now() if self.tc_log is not None: _title = (self.title + ' ') if bool(self.title) else '' self.tc_log.info(msg=_title + "started ...") self._live = r_live.Live( self.renderable, refresh_per_second=self.refresh_per_second, console=self.console ) self._live.start(refresh=False) return self def __exit__(self, exc_type, exc_val, exc_tb): self._live.stop() self.console.print("") if self.tc_log is not None: # todo: use `self.console.extract_*` methods to get console frame and log it # via self.tc_log .... need to do this because the RichHandler is not able # to write things to file like FileHandler ... explore later _secs = (datetime.datetime.now() - self._start_time).total_seconds() _title = (self.title + ' ') if bool(self.title) else '' _ct = self.console.export_text() self.tc_log.info( msg=_title + f"finished in {_secs} seconds ...\n---\n" # + _ct ) def refresh(self, update_renderable: bool = False): """ Args: update_renderable: In case you have updates any renderable components on the fly Returns: """ if self._live is not None: if update_renderable: self._live.update( renderable=self.renderable, refresh=True ) else: self._live.refresh()
def main(argv): ############################################################################## # Initial Setup. Logging, Flags, Random seeds. ############################################################################## if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") absl_logging.use_python_logging() flags_dict = { flag.name: flag.value for flag in FLAGS.flags_by_module_dict()[argv[0]] } if FLAGS.use_subset: message = (f"{colorama.Back.RED}{colorama.Fore.WHITE}" f"{colorama.Style.BRIGHT}USING A SUBSET OF THE DATASET" f"{colorama.Style.RESET_ALL}") LOGGER.warning(message) utils.log_module_args(LOGGER, argv[0]) if not FLAGS.output_dir.startswith("gs://"): utils.check_exists(FLAG_OUTPUT_DIR.value) if not tf.io.gfile.isdir(FLAG_OUTPUT_DIR.value): raise RuntimeError("Output dir needs to be a directory.") tf.random.set_seed(FLAG_RANDOM_SEED.value) np.random.seed(FLAG_RANDOM_SEED.value) # Prepare the instance output directory path and save the config there # Prepare the path folder_name = time.strftime( f"{FLAG_RUN_NAME.value}_{FLAG_APPROACH_TYPE.value}_%Y%m%d-%H%M%S") instance_output_dir = os.path.join(FLAG_OUTPUT_DIR.value, folder_name).strip() if not instance_output_dir.endswith("/"): instance_output_dir += "/" json_target = os.path.join(instance_output_dir, "training_params.json") # Make the folder if we're not on gcloud if not json_target.strip().startswith("gs://"): subprocess.check_call(["mkdir", "-p", instance_output_dir]) # Safe the config file utils.to_json_file(json_target, flags_dict) ############################################################################## # Initialization and Configuration of the Devices. ############################################################################## tpu_setup = None accel = tf_utils.current_accelerator_type() if FLAG_TPU_IS_LOCAL.value: assert accel == "TPU", accel if accel == "TPU": assert FLAG_TPU_IS_LOCAL.value, FLAG_TPU_IS_LOCAL.value if tf_utils.current_accelerator_type() in {"CPU", "TPU"}: tpu_setup = tf_utils.init_tpus(tpu_name=FLAG_TPU_NAME.value, local=FLAG_TPU_IS_LOCAL.value) LOGGER.debug("Devices we are computing on:\n%s", utils.wrap_iterable(map(str, tf_utils.devices_to_use()))) LOGGER.debug("All devices:") LOGGER.debug(tf_utils.device_mapping()) if tf_utils.current_accelerator_type() == "GPU": tf.config.set_soft_device_placement(True) if tf_utils.current_accelerator_type() != "TPU": tf.debugging.set_log_device_placement(True) utils.check_operator(operator.ne, tf_utils.current_accelerator_type(), "CPU") assert FLAG_TPU_NAME.value == socket.gethostname(), ( "This is a configuration choice. You can remove this. " "There will be no side effects.") if FLAG_DISTRIBUTE_MODE.value in constants.PURE_DATA_PARALLEL_STRATEGIES: actual_num_replicas = len(tf_utils.devices_to_use()) elif FLAG_DISTRIBUTE_MODE.value in constants.DATA_PARALLEL_DMC: actual_num_replicas = FLAG_NUM_REPLICAS.value else: actual_num_replicas = 1 ############################################################################## # We load the retriever model if it is needed. ############################################################################## # Not currently used. See old commits. retriever = None ############################################################################## # Distributed training task ############################################################################## if FLAG_TASK.value == constants.TaskChoices.train: with utils.log_duration(LOGGER, "main", "Load model"): utils.print_mem("before loading model", LOGGER) model_specific = task_specific.load_model( FLAG_MODEL_KEY.value, FLAG_DISTRIBUTE_MODE.value, tpu_setup, FLAG_NUM_REPLICAS.value) utils.print_mem("after loading model", LOGGER) model = model_specific.model if isinstance(model, list): model: List[transformers.TFGPT2LMHeadModel] else: model: transformers.TFGPT2LMHeadModel tokenizer = model_specific.tokenizer def make_optimizer(): if FLAG_OPTIMIZER_TYPE.value == constants.OptimizerTypes.adafactor: return tensor2tensor.utils.adafactor.AdafactorOptimizer( learning_rate=FLAG_LEARNING_RATE.value) elif FLAG_OPTIMIZER_TYPE.value == constants.OptimizerTypes.adam: return tf.keras.optimizers.Adam( learning_rate=FLAG_LEARNING_RATE.value) else: raise ValueError(FLAG_OPTIMIZER_TYPE.value) if model_specific.strategy: with model_specific.strategy.scope(): optimizer = make_optimizer() else: optimizer = make_optimizer() ############################################################################ # Prepare the dataset functions ############################################################################ rg = np.random.default_rng(FLAG_RANDOM_SEED.value) def call_lm_preproc(repeat, split, random_seed): """Using functools.partial prevents the linter from doing its job.""" if FLAG_DATASET_NAME.value == constants.DatasetNameChoices.kilt_eli5: return task_specific.create_lm_ds_kilt_eli5( tokenizer=tokenizer, context_window_size=model.config.n_positions, dataset_name=FLAG_DATASET_NAME.value, # Batches are split over the replicas: batch_size=FLAG_BATCH_SIZE.value * actual_num_replicas, db_path=FLAG_DB_PATH.value, random_seed=random_seed, use_subset=FLAG_USE_SUBSET.value, subset_size=FLAG_SUBSET_SIZE.value, use_helper_words=FLAG_USE_HELPER_WORDS.value, approach_type=FLAG_APPROACH_TYPE.value, num_retrievals=FLAG_NUM_RETRIEVALS.value, retrieval_temperature=FLAG_RETRIEVAL_TEMPERATURE.value, retriever=retriever, repeat=repeat, split=split, enable_debug_checks=FLAG_DATASET_DEBUG.value, retrieval_bank_size=FLAG_RETRIEVAL_BANK_SIZE.value, dataset_type=FLAG_DATASET_TYPE.value, qty_shuffle=FLAG_QTY_SHUFFLE.value, tfr_prefix=FLAG_TFR_PREFIX.value, max_length_generation=FLAG_MAX_LENGTH_GENERATION.value, ) else: raise NotImplementedError( f"FLAG_DATASET_NAME.value unsupported: `{FLAG_DATASET_NAME.value}`" ) make_training_dataset: Callable[..., tf.data.Dataset] = functools.partial( call_lm_preproc, split="train", repeat=False, ) make_eval_dataset: Callable[..., tf.data.Dataset] = functools.partial( call_lm_preproc, split="eval", repeat=True, ) ############################################################################ # Prepare the step functions ############################################################################ utils.check_contained(FLAG_DISTRIBUTE_MODE.value, constants.DistributeModeChoices.choices()) tf_function_flags = dict( experimental_compile=FLAG_EXPERIMENTAL_COMPILE.value, experimental_relax_shapes=not FLAG_INPUT_FIXED_SIZE.value) training_step = build_regular_training_step( model, optimizer, strategy=model_specific.strategy, tf_function_kwargs=tf_function_flags) evaluation_step = build_evaluation_step(model, tf_function_flags) timestamp_last_ckpt_secs = time.time() # Model checkpoints are saved to the tmp_directory and then rsynced to GCS ############################################################################ # Prepare the statistics and the logging facilities. ############################################################################ # Tensorboard with model_specific.strategy.scope(): checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) saver = Saver(instance_output_dir, checkpoint) train_log_dir = os.path.join(instance_output_dir, "tensorboard", "train") eval_log_dir = os.path.join(instance_output_dir, "tensorboard", "eval") flags_log_dir = os.path.join(instance_output_dir, "tensorboard", "params") writers = dict(train=tf.summary.create_file_writer(train_log_dir), eval=tf.summary.create_file_writer(eval_log_dir), flags=tf.summary.create_file_writer(flags_log_dir)) with writers["flags"].as_default(): tf.summary.text( "Flags", # Tensorboard takes Markdown: json.dumps(flags_dict, indent=4).replace("\n", "\n\n"), step=0) # Different information to log. ma_loss = dict(train=utils.MovingAverage(0.9), eval=utils.MovingAverage(0.9)) step_counters = dict(train=0, eval=0) batch_counters = dict(train=0, eval=0) prev_batch_end = time.time() ############################################################################ # Create the Eval DS object. # ========================================================================== # The eval ds has no real concept of epoch, repeats forever, shuffling # each time it reaches its end. ############################################################################ # Create with utils.log_duration(LOGGER, "main", "All of make_eval_dataset"): eval_ds_instance = make_eval_dataset(random_seed=rg.integers( -2**63, 2**63 - 1), ) # Maybe distribute LOGGER.debug("Distributing the eval dataset to the replicas.") if FLAG_DATASET_TYPE.value == "tfr": eval_ds_instance = ( model_specific.strategy.experimental_distribute_dataset( eval_ds_instance)) # Start the iteration. We step by calling `next(...)`. LOGGER.debug("Done distributing the eval dataset to the replicas.") eval_ds_instance = iter(eval_ds_instance) step_function = dict(train=training_step, eval=evaluation_step) ############################################################################ # Training Loop # ========================================================================== # Create a new training dataset object that lasts for one epoch. # This is different from the eval training dataset object, which loops # forever. ############################################################################ for epoch in itertools.count(): ########################################################################## # Epoch Setup ########################################################################## LOGGER.debug("EPOCH %d START", epoch) # Shuffle differently every epoch with utils.log_duration(LOGGER, "main", "All of make_training_dataset"): train_ds_instance = make_training_dataset( random_seed=rg.integers(-2**63, 2**63 - 1), ) LOGGER.debug( "Attempting to distribute the training dataset to the replicas." ) if FLAG_DATASET_TYPE.value == "tfr": train_ds_instance = ( model_specific.strategy.experimental_distribute_dataset( train_ds_instance)) LOGGER.debug( "Done distributing the training dataset to the replicas.") train_ds_instance = iter(train_ds_instance) # To change splits, we use `itertools.islice` over the dataset generator. # When the training dataset generator is done, a new loop of the following # while loop occurs, but no training batch is done because we are taking # an `islice` of a generator that is done. did_at_least_one_training_batch = True split = "eval" while did_at_least_one_training_batch: utils.check_operator(operator.ne, tf_utils.current_accelerator_type(), "CPU") # Invert split if split == "train": split = "eval" else: split = "train" # Prepare to test if we did at least one training batch if split == "train": did_at_least_one_training_batch = False ######################################################################## # Take slices from the dataset iterator # ====================================================================== # We only want to do a certain number of batches before switching splits # We do this by using an `itertools.islice` of the dataset iterators. ######################################################################## if split == "train": dataset_iterator = toolz.take( FLAG_BATCHES_BETWEEN_EVALS.value, train_ds_instance) else: # The evaluation dataset generator is infinite, reshuffles everytime # it gets to its end. # Still, we take a fixed size slice form that infinite generator. dataset_iterator = toolz.take( FLAG_NUMBER_EVAL_BATCHES.value, eval_ds_instance) LOGGER.debug("Batching") for batch in dataset_iterator: if FLAG_LOG_SAMPLES.value: #################################################################### # Print elements of the dataset #################################################################### # Make ourselves resistant to values possibly being a PerReplica # object LOGGER.warning( f"%(red)sLOGGING SAMPLES. THIS IS VERY SLOW.%(reset)s", dict( red=colorama.Fore.RED, reset=colorama.Style.RESET_ALL, )) is_distributed = isinstance(batch["input_ids"], values.PerReplica) for in_batch_idx in range(FLAG_BATCH_SIZE.value): for replica_idx in (range(actual_num_replicas) if is_distributed else [0]): if is_distributed: sample = { k: batch[k].values[replica_idx] for k in batch } else: sample = batch # input_sentence = tokenizer.decode( # [x for x in sample["input_ids"][i] if x != tokenizer.eos_token_id] # ) # LOGGER.debug( # "%sInput [%d / %d]%s:\n\"%s\"", # colorama.Fore.GREEN, # replica_idx + 1, # actual_num_replicas, # colorama.Style.RESET_ALL, # input_sentence, # ) # # answer = tokenizer.decode( # [(x if x != -100 else 0) for x in sample["label_ids"][i]] # ) # LOGGER.debug( # "%sLabel [%d / %d]%s:\n\"%s\"", # colorama.Fore.GREEN, # replica_idx + 1, # actual_num_replicas, # colorama.Style.RESET_ALL, # answer, # ) cons = console.Console() sentences = table.Table() sentences.add_column("BPE Index", justify="center") sentences.add_column("Inputs", justify="center") sentences.add_column("Labels", justify="center") for bpe_idx, (x, y) in enumerate( itertools.zip_longest( sample["input_ids"] [in_batch_idx].numpy(), sample["label_ids"] [in_batch_idx].numpy(), fillvalue=None, )): x_w = tokenizer.decode( [x]) if x >= 0 else f"[ {x} ]" y_w = tokenizer.decode( [y]) if y >= 0 else f"[ {y} ]" sentences.add_row(str(bpe_idx), x_w, y_w) cons.print(sentences) # We only care about training epochs as, obviously, we don't train # over eval samples; the number of eval samples seen only # contributes to lowering the variance in the evaluation of when to # do early stopping. if split == "train": did_at_least_one_training_batch = True input_ids = batch["input_ids"] label_ids = batch["label_ids"] # Per split step counter step_counters[ split] += FLAG_BATCH_SIZE.value * actual_num_replicas batch_counters[split] += 1 ###################################################################### # Model step function. ###################################################################### step_function_kwargs = dict( input_ids=input_ids, label_ids=label_ids, ) utils.print_mem(f"[{split}] - Mem before `strategy.run`", LOGGER) LOGGER.debug("[%s] - Calling `strategy.run`", split) loss = model_specific.strategy.run( step_function[split], kwargs=step_function_kwargs) LOGGER.debug("[%s] - Done `strategy.run`", split) utils.print_mem(f"[{split}] - Mem after `strategy.run`", LOGGER) #################################################################### # End of logging step code / Logging and saving the model. #################################################################### if (FLAG_DISTRIBUTE_MODE.value in constants.PURE_DATA_PARALLEL_STRATEGIES): utils.check_equal(len(loss.values), actual_num_replicas) LOGGER.debug("[%s] - Real num replicas: %s", split, actual_num_replicas) average_loss = float( tf.math.reduce_mean(loss.values).numpy()) LOGGER.debug("[%s] - Loss: %s", str(split), str(average_loss)) else: average_loss = float(loss.numpy()) tf.debugging.check_numerics( loss.values if isinstance(loss, values.PerReplica) else loss, "Numerics failed.") now = time.time() batch_duration = now - prev_batch_end prev_batch_end = now ma_loss[split].update(average_loss) LOGGER.info("[%s] - Epoch: # %d", split, epoch) LOGGER.info("[%s] - Tensorboard_dir: %s", split, instance_output_dir) LOGGER.info("[%s] - Batch: # %d", split, batch_counters[split]) LOGGER.info("[%s] - Step: # %d", split, step_counters[split]) if FLAG_USE_SUBSET.value: LOGGER.warning(">> USING A SUBSET OF THE DATASET <<") LOGGER.info( "[%(split)s] - Batch loss: %(metric)f", dict(split=split, metric=average_loss)) LOGGER.info( "[%(split)s] - Moving average loss: %(metric)f", dict(split=split, metric=ma_loss[split].average)) LOGGER.info( "[%(split)s] - Moving average ppl: %(metric)f", dict(split=split, metric=np.exp(ma_loss[split].average))) LOGGER.info( "[%(split)s] - Batch duration: %(duration)s", dict(split=split, duration=utils.TimeStamp.from_seconds( batch_duration).format())) # Write to Tensorboard with writers[split].as_default(): tf.summary.scalar(f"Loss/{split}", average_loss, step_counters[split]) tf.summary.scalar(f"PPL/{split}", np.exp(average_loss), step_counters[split]) writers[split].flush() ###################################################################### # Save every `FLAG_SAVE_PERIOD_MIN.value` minutes. ###################################################################### delta_sec = time.time() - timestamp_last_ckpt_secs utils.check_operator(operator.gt, delta_sec, 0) period_sec = 60 * FLAG_SAVE_PERIOD_MIN.value utils.check_operator(operator.gt, period_sec, 0) ratio = delta_sec / period_sec LOGGER.info( "[%(split)s] - RATIO: %(ratio)s", dict(split=split, ratio=str(ratio))) LOGGER.info( "[%(split)s] - Target: %(target)s, Present: %(present)s", dict( split=split, target=str(period_sec), present=str(delta_sec), )) if ratio >= 1: dur = delta_sec / 60 timestamp_last_ckpt_secs = time.time() LOGGER.debug( "SAVING MODEL - CAUSE: DURATION - %0.2f min", dur) # checkpoint.save(ckpt_prefix) saver.save_model( train_steps=step_counters["train"], model_or_replicas=model, optimizer=optimizer, ) ############################################################################ # Post Training Cleanup ############################################################################ for writer in writers.values(): writer.close()
from rich import console as _console, print, inspect console = _console.Console() from .config import config from .script import * import json import os import sys import time class spawn(): def __init__(self, signature, *a, **k): self.signature = signature self.start = time.time() process(signature, *a, **k) self.log() def log(self, *a, **k): pipe = "┣" if (len(a) == 0): pipe = "┃" console.print(f"[dim]{pipe}", " ".join(a), **k, style="white") def warn(self, *a, **k): # pipe = "┃" warn(f"[dim white]", "".join(a), **k) def fail(self, msg="Fail"): self.log() self.done(msg, "red")