def setup_logger(args): aggregator_dict = OrderedDict([ ('loss', 'average'), ('weighted_loss', 'average'), ('tokens', ('average', 'performance')), ('updates', 'performance'), ('gnorm', 'average') ]) os.makedirs(args.save_dir, exist_ok=True) log_path = os.path.join(args.save_dir, args.stat_file) if os.path.exists(log_path): for i in itertools.count(): s_fname = args.stat_file.split('.') fname = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1] if len(s_fname) > 1 else args.stat_file + f'.{i}' log_path = os.path.join(args.save_dir, fname) if not os.path.exists(log_path): break if not args.distributed_world_size > 1 or args.distributed_rank == 0: dllogger.init(backends=[JSONStreamBackend(verbosity=1, filename=log_path), AggregatorBackend(verbosity=0, agg_dict=aggregator_dict), TensorBoardBackend(verbosity=1, log_dir=args.save_dir)]) else: dllogger.init(backends=[]) for k, v in vars(args).items(): dllogger.log(step='PARAMETER', data={k: v}, verbosity=0) container_setup_info = get_framework_env_vars() dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0) dllogger.metadata('loss', {'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN'}) dllogger.metadata('val_loss', {'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'VAL'}) dllogger.metadata('speed', {'unit': 'tokens/s', 'format': ':.3f', 'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN'}) dllogger.metadata('accuracy', {'unit': 'bleu', 'format': ':.2f', 'GOAL': 'MAXIMIZE', 'STAGE': 'VAL'})
def main(): # setup params arguments = PARSER.parse_args() params = Namespace(**{**vars(CONFIG), **vars(arguments)}) # setup logging # noinspection PyArgumentList logging.basicConfig( level=logging.DEBUG if params.verbose else logging.INFO, format='{asctime} {levelname:.1} {name:15} {message}', style='{' ) # remove custom tf handler that logs to stderr logging.getLogger('tensorflow').setLevel(logging.WARNING) logging.getLogger('tensorflow').handlers.clear() # setup dllogger dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=params.log_file), LoggingBackend(verbosity=dllogger.Verbosity.VERBOSE) ]) dllogger.log(step='PARAMETER', data=vars(params)) # setup dataset dataset = Dataset(params) if params.mode in ['train', 'train_and_eval']: run_training(dataset, params) if params.mode == 'eval': run_evaluation(dataset, params) if params.mode == 'infer': run_inference(dataset, params)
def setup_training(args): assert (torch.cuda.is_available()) if args.local_rank == -1: device = torch.device("cuda") args.n_gpu = torch.cuda.device_count() args.allreduce_post_accumulation = False args.allreduce_post_accumulation_fp16 = False else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='smddp', init_method='env://') args.n_gpu = 1 if args.gradient_accumulation_steps == 1: args.allreduce_post_accumulation = False args.allreduce_post_accumulation_fp16 = False if is_main_process(): dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.json_summary), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step) ]) else: dllogger.init(backends=[]) print( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, args.n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) if args.train_batch_size % args.gradient_accumulation_steps != 0: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, batch size {} should be divisible" .format(args.gradient_accumulation_steps, args.train_batch_size)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps if not args.do_train: raise ValueError(" `do_train` must be True.") if not args.resume_from_checkpoint and os.path.exists( args.output_dir) and (os.listdir(args.output_dir) and any( [i.startswith('ckpt') for i in os.listdir(args.output_dir)])): raise ValueError( "Output directory ({}) already exists and is not empty.".format( args.output_dir)) if (not args.resume_from_checkpoint or not os.path.exists(args.output_dir)) and is_main_process(): os.makedirs(args.output_dir, exist_ok=True) return device, args
def init(log_fpath, log_dir, enabled=True, tb_subsets=[], **tb_kw): if enabled: backends = [JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(log_fpath)), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format)] else: backends = [] dllogger.init(backends=backends) dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"}) for id_, pref in [('train', ''), ('train_avg', 'avg train '), ('val', ' avg val '), ('val_ema', ' EMA val ')]: dllogger.metadata(f"{id_}_loss", {"name": f"{pref}loss", "format": ":>5.2f"}) dllogger.metadata(f"{id_}_mel_loss", {"name": f"{pref}mel loss", "format": ":>5.2f"}) dllogger.metadata(f"{id_}_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) dllogger.metadata(f"{id_}_took", {"name": "took", "unit": "s", "format": ":>3.2f"}) global tb_loggers tb_loggers = {s: TBLogger(enabled, log_dir, name=s, **tb_kw) for s in tb_subsets}
def init_dllogger(log_fpath=None, dummy=False): if dummy: DLLogger.init(backends=[]) return DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, log_fpath), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format) ] ) DLLogger.metadata("train_loss", {"name": "loss", "format": ":>5.2f"}) DLLogger.metadata("train_mel_loss", {"name": "mel loss", "format": ":>5.2f"}) DLLogger.metadata("avg_train_loss", {"name": "avg train loss", "format": ":>5.2f"}) DLLogger.metadata("avg_train_mel_loss", {"name": "avg train mel loss", "format": ":>5.2f"}) DLLogger.metadata("val_loss", {"name": " avg val loss", "format": ":>5.2f"}) DLLogger.metadata("val_mel_loss", {"name": " avg val mel loss", "format": ":>5.2f"}) DLLogger.metadata( "val_ema_loss", {"name": " EMA val loss", "format": ":>5.2f"}) DLLogger.metadata( "val_ema_mel_loss", {"name": " EMA val mel loss", "format": ":>5.2f"}) DLLogger.metadata( "train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "avg_train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "val_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "val_ema_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "took", {"name": "took", "unit": "s", "format": ":>3.2f"}) DLLogger.metadata("lrate_change", {"name": "lrate"})
def __init__(self, log_file, global_batch_size, warmup_steps: int = 0, profile: bool = False): logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_file), StdOutBackend(Verbosity.VERBOSE)]) self.warmup_steps = warmup_steps self.global_batch_size = global_batch_size self.step = 0 self.profile = profile self.timestamps = []
def main(): """ Launches inference benchmark. Inference is executed on a single GPU. """ parser = argparse.ArgumentParser( description='PyTorch FastPitch Inference Benchmark') parser = parse_args(parser) args, _ = parser.parse_known_args() log_file = args.log_file DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, args.log_file), StdOutBackend(Verbosity.VERBOSE) ]) for k, v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k: v}) DLLogger.log(step="PARAMETER", data={'model_name': 'FastPitch_PyT'}) model = load_and_setup_model('FastPitch', parser, None, args.amp_run, 'cuda', unk_args=[], forward_is_infer=True, ema=False, jitable=True) # FIXME Temporarily disabled due to nn.LayerNorm fp16 casting bug in pytorch:20.02-py3 and 20.03 # model = torch.jit.script(model) warmup_iters = 3 iters = 1 gen_measures = MeasureTime() all_frames = 0 for i in range(-warmup_iters, iters): text_padded = torch.randint(low=0, high=148, size=(args.batch_size, 128), dtype=torch.long).to('cuda') input_lengths = torch.IntTensor([text_padded.size(1)] * args.batch_size).to('cuda') durs = torch.ones_like(text_padded).mul_(4).to('cuda') with torch.no_grad(), gen_measures: mels, *_ = model(text_padded, input_lengths, dur_tgt=durs) num_frames = mels.size(0) * mels.size(2) if i >= 0: all_frames += num_frames DLLogger.log(step=(i, ), data={"latency": gen_measures[-1]}) DLLogger.log(step=(i, ), data={"frames/s": num_frames / gen_measures[-1]}) measures = gen_measures[warmup_iters:] DLLogger.log(step=(), data={'avg latency': np.mean(measures)}) DLLogger.log(step=(), data={'avg frames/s': all_frames / np.sum(measures)}) DLLogger.flush()
def get_logger(params): backends = [] if hvd.rank() == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)] logger.init(backends=backends) return logger
def _initialize_dllogger(self, log_dir, filename, append): backends = [ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(log_dir, filename), append=append), StdOutBackend(Verbosity.VERBOSE), ] logger.init(backends=backends)
def __init__(self, print_interval, backends, verbose=False): self.epoch = -1 self.iteration = -1 self.val_iteration = -1 self.metrics = OrderedDict() self.backends = backends self.print_interval = print_interval self.verbose = verbose dllogger.init(backends)
def __init__(self, log_dir, global_batch_size, mode, warmup, dim, profile): logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_dir), StdOutBackend(Verbosity.VERBOSE)]) self.warmup_steps = warmup self.global_batch_size = global_batch_size self.step = 0 self.dim = dim self.mode = mode self.profile = profile self.timestamps = []
def main(): args = parse_args() dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.log_path), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE) ]) dllogger.log(data=vars(args), step='PARAMETER') model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors, mlp_layer_sizes=args.layers, dropout=args.dropout) model = model.cuda() if args.load_checkpoint_path: state_dict = torch.load(args.load_checkpoint_path) model.load_state_dict(state_dict) if args.opt_level == "O2": model = amp.initialize(model, opt_level=args.opt_level, keep_batchnorm_fp32=False, loss_scale='dynamic') model.eval() users = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_users) items = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_items) latencies = [] for _ in range(args.num_batches): torch.cuda.synchronize() start = time.time() predictions = model(users, items, sigmoid=True) torch.cuda.synchronize() latencies.append(time.time() - start) dllogger.log(data={ 'batch_size': args.batch_size, 'best_inference_throughput': args.batch_size / min(latencies), 'best_inference_latency': min(latencies), 'mean_inference_throughput': args.batch_size / np.mean(latencies), 'mean_inference_latency': np.mean(latencies), 'inference_latencies': latencies }, step=tuple()) dllogger.flush() return
def setup_logger(args): aggregator_dict = OrderedDict([('loss', 'average'), ('weighted_loss', 'average'), ('tokens', ('average', 'performance')), ('updates', 'performance'), ('gnorm', 'average')]) os.makedirs(args.save_dir, exist_ok=True) log_path = os.path.join(args.save_dir, args.stat_file) os.makedirs(args.save_dir, exist_ok=True) if not args.distributed_world_size > 1 or args.distributed_rank == 0: dllogger.init(backends=[ JSONStreamBackend(verbosity=1, filename=log_path), AggregatorBackend(verbosity=0, agg_dict=aggregator_dict), TensorBoardBackend(verbosity=1, log_dir=args.save_dir) ]) else: dllogger.init(backends=[]) for k, v in vars(args).items(): dllogger.log(step='PARAMETER', data={k: v}, verbosity=0) container_setup_info = { 'NVIDIA_PYTORCH_VERSION': os.environ.get('NVIDIA_PYTORCH_VERSION'), 'PYTORCH_VERSION': os.environ.get('PYTORCH_VERSION'), 'CUBLAS_VERSION': os.environ.get('CUBLAS_VERSION'), 'NCCL_VERSION': os.environ.get('NCCL_VERSION'), 'CUDA_DRIVER_VERSION': os.environ.get('CUDA_DRIVER_VERSION'), 'CUDNN_VERSION': os.environ.get('CUDNN_VERSION'), 'CUDA_VERSION': os.environ.get('CUDA_VERSION'), 'NVIDIA_PIPELINE_ID': os.environ.get('NVIDIA_PIPELINE_ID'), 'NVIDIA_BUILD_ID': os.environ.get('NVIDIA_BUILD_ID'), 'NVIDIA_TF32_OVERRIDE': os.environ.get('NVIDIA_TF32_OVERRIDE'), } dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0) dllogger.metadata('loss', { 'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN' }) dllogger.metadata('val_loss', { 'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'VAL' }) dllogger.metadata('speed', { 'unit': 'tokens/s', 'format': ':.3f', 'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN' }) dllogger.metadata('accuracy', { 'unit': 'bleu', 'format': ':.2f', 'GOAL': 'MAXIMIZE', 'STAGE': 'VAL' })
def get_logger(params): backends = [] if params.worker_id == 0 or params.log_all_workers: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: os.makedirs(params.log_dir, exist_ok=True) log_file = f"{params.log_dir}/log.json" backends += [JSONStreamBackend(Verbosity.VERBOSE, log_file)] logger.init(backends=backends) return logger
def get_logger(params): backends = [] worker_id = hvd_rank() if horovod_enabled() else 0 if worker_id == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: os.makedirs(params.log_dir, exist_ok=True) log_file = f"{params.log_dir}/log.json" backends += [JSONStreamBackend(Verbosity.VERBOSE, log_file)] logger.init(backends=backends) return logger
def setup_logger(args): os.makedirs(args.results, exist_ok=True) log_path = os.path.join(args.results, args.log_file) if os.path.exists(log_path): for i in itertools.count(): s_fname = args.log_file.split('.') fname = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1] if len(s_fname) > 1 else args.stat_file + f'.{i}' log_path = os.path.join(args.results, fname) if not os.path.exists(log_path): break def metric_format(metric, metadata, value): return "{}: {}".format(metric, f'{value:.5f}' if isinstance(value, float) else value) def step_format(step): if step == (): return "Finished |" elif isinstance(step, int): return "Step {0: <5} |".format(step) return "Step {} |".format(step) if not dist.is_initialized() or not args.distributed_world_size > 1 or args.distributed_rank == 0: dllogger.init(backends=[JSONStreamBackend(verbosity=1, filename=log_path), TensorBoardBackend(verbosity=1, log_dir=args.results), StdOutBackend(verbosity=2, step_format=step_format, prefix_format=lambda x: "")#, #metric_format=metric_format) ]) else: dllogger.init(backends=[]) dllogger.log(step='PARAMETER', data=vars(args), verbosity=0) container_setup_info = {**get_framework_env_vars(), **get_system_info()} dllogger.log(step='ENVIRONMENT', data=container_setup_info, verbosity=0) dllogger.metadata('loss', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN', 'format': ':1f'}) dllogger.metadata('val_loss', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format':':5f'}) dllogger.metadata('val_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'VAL', 'format': ':1f'}) dllogger.metadata('test_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('test_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('test_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('throughput', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f'}) dllogger.metadata('latency_p90', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('latency_p95', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('latency_p99', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
def setup_dllogger(enabled=True, filename=os.devnull, rank=0): if enabled and rank == 0: backends = [ dllogger.JSONStreamBackend( dllogger.Verbosity.VERBOSE, filename, ), ] dllogger.init(backends) else: dllogger.init([])
def init_logging(log_path): json_backend = dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path) stdout_backend = dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE) stdout_backend._metadata['best_auc'].update({'format': '0:.5f'}) stdout_backend._metadata['best_epoch'].update({'format': '0:.2f'}) stdout_backend._metadata['average_train_throughput'].update({'format': ':.2e'}) stdout_backend._metadata['average_test_throughput'].update({'format': ':.2e'}) dllogger.init(backends=[json_backend, stdout_backend])
def get_dllogger(params): backends = [] if is_main_process(): backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(params.log_dir, "log.json")) ] logger.init(backends=backends) return logger
def init_log(args): enabled = not dist.is_initialized() or dist.get_rank() == 0 if enabled: fpath = args.log_file or os.path.join(args.output_dir, 'nvlog.json') backends = [ JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(fpath)), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format) ] else: backends = [] dllogger.init(backends=backends) dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"}) for id_, pref in [('train', ''), ('train_avg', 'avg train '), ('dev_ema', ' dev ema ')]: dllogger.metadata(f"{id_}_loss", { "name": f"{pref}loss", "format": ":>7.2f" }) dllogger.metadata(f"{id_}_wer", { "name": f"{pref}wer", "format": ":>6.2f" }) dllogger.metadata(f"{id_}_pplx", { "name": f"{pref}pplx", "format": ":>6.2f" }) dllogger.metadata(f"{id_}_throughput", { "name": f"{pref}utts/s", "format": ":>5.0f" }) dllogger.metadata(f"{id_}_took", { "name": "took", "unit": "s", "format": ":>5.2f" }) tb_subsets = ['train', 'dev_ema'] global tb_loggers tb_loggers = { s: TBLogger(enabled, args.output_dir, name=s) for s in tb_subsets } log_parameters(vars(args), tb_subset='train')
def setup_dllogger(rank, enabled=True, filename='log.json'): if enabled and rank == 0: backends = [ StdOutBackend(Verbosity.DEFAULT), JSONStreamBackend( Verbosity.VERBOSE, filename, ), ] DLLogger.init(backends) else: DLLogger.init([])
def get_logger(params): """ Get logger object :param params: Dict with additional parameters :return: logger """ backends = [] if hvd.rank() == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)] logger.init(backends=backends) return logger
def setup_dllogger(enabled=True, filename=os.devnull): rank = utils.distributed.get_rank() if enabled and rank == 0: backends = [ dllogger.JSONStreamBackend( dllogger.Verbosity.VERBOSE, filename, ), ] dllogger.init(backends) else: dllogger.init([])
def setup_logging(args): logging.basicConfig(level=logging.DEBUG, format='{asctime}:{levelname}: {message}', style='{') if hvd.rank() == 0: dllogger.init(backends=[ dllogger.StdOutBackend(dllogger.Verbosity.DEFAULT, step_format=format_step), dllogger.JSONStreamBackend( dllogger.Verbosity.VERBOSE, os.path.join(args.workspace, args.dllogger_log)), ]) else: dllogger.init([])
def setup_training(args): assert (torch.cuda.is_available()) global ort_supplement import ort_supplement.ort_supplement as ort_supplement device = ort_supplement.setup_onnxruntime_with_mpi(args) if is_main_process(args): dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.json_summary), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step) ]) else: dllogger.init(backends=[]) print( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, args.n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) if args.train_batch_size % args.gradient_accumulation_steps != 0: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, batch size {} should be divisible" .format(args.gradient_accumulation_steps, args.train_batch_size)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps if not args.do_train: raise ValueError(" `do_train` must be True.") if not args.resume_from_checkpoint and os.path.exists( args.output_dir) and (os.listdir(args.output_dir) and any( [i.startswith('ckpt') for i in os.listdir(args.output_dir)])): raise ValueError( "Output directory ({}) already exists and is not empty.".format( args.output_dir)) if (not args.resume_from_checkpoint or not os.path.exists(args.output_dir)) and is_main_process(args): os.makedirs(args.output_dir, exist_ok=True) return device, args
def main(): args = parse_args() dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.log_path), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)]) dllogger.log(data=vars(args), step='PARAMETER') model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors, mlp_layer_sizes=args.layers, dropout=args.dropout) model = model.cuda() if args.load_checkpoint_path: state_dict = torch.load(args.load_checkpoint_path) model.load_state_dict(state_dict) if args.fp16: model.half() model.eval() batch_sizes = args.batch_sizes.split(',') batch_sizes = [int(s) for s in batch_sizes] result_data = {} for batch_size in batch_sizes: print('benchmarking batch size: ', batch_size) users = torch.cuda.LongTensor(batch_size).random_(0, args.n_users) items = torch.cuda.LongTensor(batch_size).random_(0, args.n_items) latencies = [] for _ in range(args.num_batches): torch.cuda.synchronize() start = time.time() _ = model(users, items, sigmoid=True) torch.cuda.synchronize() latencies.append(time.time() - start) result_data[f'batch_{batch_size}_mean_throughput'] = batch_size / np.mean(latencies) result_data[f'batch_{batch_size}_mean_latency'] = np.mean(latencies) result_data[f'batch_{batch_size}_p90_latency'] = np.percentile(latencies, 0.90) result_data[f'batch_{batch_size}_p95_latency'] = np.percentile(latencies, 0.95) result_data[f'batch_{batch_size}_p99_latency'] = np.percentile(latencies, 0.99) dllogger.log(data=result_data, step=tuple()) dllogger.flush() return
def __init__(self, name, json_output=None, print_freq=20): self.name = name self.train_loss_logger = IterationAverageMeter("Training loss") self.train_epoch_time_logger = EpochMeter("Training 1 epoch time") self.val_acc_logger = EpochMeter("Validation accuracy") self.print_freq = print_freq backends = [DLLogger.StdOutBackend(DLLogger.Verbosity.DEFAULT)] if json_output: backends.append( DLLogger.JSONStreamBackend(DLLogger.Verbosity.VERBOSE, json_output)) DLLogger.init(backends) self.epoch = 0 self.train_iter = 0 self.summary = {}
def setup_logger(args): if not args.no_dllogger: dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=1, filename=args.stat_file) ]) for k, v in vars(args).items(): dllogger.log(step='PARAMETER', data={k: v}, verbosity=0) container_setup_info = log_helper.get_framework_env_vars() dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0) dllogger.metadata( 'throughput', { 'unit': 'tokens/s', 'format': ':/3f', 'GOAL': 'MAXIMIZE', 'STAGE': 'INFER' }) else: dllogger.init(backends=[])
def init_logger(args, full, logger): if full: logger.setLevel(logging.INFO) log_path = os.path.join(args.results_dir, args.log_filename) os.makedirs(args.results_dir, exist_ok=True) dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)]) logger.warning('command line arguments: {}'.format(json.dumps(vars(args)))) if not os.path.exists(args.results_dir): os.mkdir(args.results_dir) with open('{}/args.json'.format(args.results_dir), 'w') as f: json.dump(vars(args), f, indent=4) else: logger.setLevel(logging.ERROR) dllogger.init(backends=[]) dllogger.log(data=vars(args), step='PARAMETER')
def setup_logger(args): dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=1, filename=args.stat_file) ]) for k, v in vars(args).items(): dllogger.log(step='PARAMETER', data={k: v}, verbosity=0) container_setup_info = { 'NVIDIA_PYTORCH_VERSION': os.environ.get('NVIDIA_PYTORCH_VERSION'), 'PYTORCH_VERSION': os.environ.get('PYTORCH_VERSION'), 'CUBLAS_VERSION': os.environ.get('CUBLAS_VERSION'), 'NCCL_VERSION': os.environ.get('NCCL_VERSION'), 'CUDA_DRIVER_VERSION': os.environ.get('CUDA_DRIVER_VERSION'), 'CUDNN_VERSION': os.environ.get('CUDNN_VERSION'), 'CUDA_VERSION': os.environ.get('CUDA_VERSION') } dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0) dllogger.metadata('throughput', { 'unit': 'tokens/s', 'format': ':/3f', 'GOAL': 'MAXIMIZE', 'STAGE': 'INFER' })