def init(log_fpath, log_dir, enabled=True, tb_subsets=[], **tb_kw): if enabled: backends = [JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(log_fpath)), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format)] else: backends = [] dllogger.init(backends=backends) dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"}) for id_, pref in [('train', ''), ('train_avg', 'avg train '), ('val', ' avg val '), ('val_ema', ' EMA val ')]: dllogger.metadata(f"{id_}_loss", {"name": f"{pref}loss", "format": ":>5.2f"}) dllogger.metadata(f"{id_}_mel_loss", {"name": f"{pref}mel loss", "format": ":>5.2f"}) dllogger.metadata(f"{id_}_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) dllogger.metadata(f"{id_}_took", {"name": "took", "unit": "s", "format": ":>3.2f"}) global tb_loggers tb_loggers = {s: TBLogger(enabled, log_dir, name=s, **tb_kw) for s in tb_subsets}
def __init__(self, log_file, global_batch_size, warmup_steps: int = 0, profile: bool = False): logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_file), StdOutBackend(Verbosity.VERBOSE)]) self.warmup_steps = warmup_steps self.global_batch_size = global_batch_size self.step = 0 self.profile = profile self.timestamps = []
def init_dllogger(log_fpath=None, dummy=False): if dummy: DLLogger.init(backends=[]) return DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, log_fpath), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format) ] ) DLLogger.metadata("train_loss", {"name": "loss", "format": ":>5.2f"}) DLLogger.metadata("train_mel_loss", {"name": "mel loss", "format": ":>5.2f"}) DLLogger.metadata("avg_train_loss", {"name": "avg train loss", "format": ":>5.2f"}) DLLogger.metadata("avg_train_mel_loss", {"name": "avg train mel loss", "format": ":>5.2f"}) DLLogger.metadata("val_loss", {"name": " avg val loss", "format": ":>5.2f"}) DLLogger.metadata("val_mel_loss", {"name": " avg val mel loss", "format": ":>5.2f"}) DLLogger.metadata( "val_ema_loss", {"name": " EMA val loss", "format": ":>5.2f"}) DLLogger.metadata( "val_ema_mel_loss", {"name": " EMA val mel loss", "format": ":>5.2f"}) DLLogger.metadata( "train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "avg_train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "val_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "val_ema_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"}) DLLogger.metadata( "took", {"name": "took", "unit": "s", "format": ":>3.2f"}) DLLogger.metadata("lrate_change", {"name": "lrate"})
def main(): """ Launches inference benchmark. Inference is executed on a single GPU. """ parser = argparse.ArgumentParser( description='PyTorch FastPitch Inference Benchmark') parser = parse_args(parser) args, _ = parser.parse_known_args() log_file = args.log_file DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, args.log_file), StdOutBackend(Verbosity.VERBOSE) ]) for k, v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k: v}) DLLogger.log(step="PARAMETER", data={'model_name': 'FastPitch_PyT'}) model = load_and_setup_model('FastPitch', parser, None, args.amp_run, 'cuda', unk_args=[], forward_is_infer=True, ema=False, jitable=True) # FIXME Temporarily disabled due to nn.LayerNorm fp16 casting bug in pytorch:20.02-py3 and 20.03 # model = torch.jit.script(model) warmup_iters = 3 iters = 1 gen_measures = MeasureTime() all_frames = 0 for i in range(-warmup_iters, iters): text_padded = torch.randint(low=0, high=148, size=(args.batch_size, 128), dtype=torch.long).to('cuda') input_lengths = torch.IntTensor([text_padded.size(1)] * args.batch_size).to('cuda') durs = torch.ones_like(text_padded).mul_(4).to('cuda') with torch.no_grad(), gen_measures: mels, *_ = model(text_padded, input_lengths, dur_tgt=durs) num_frames = mels.size(0) * mels.size(2) if i >= 0: all_frames += num_frames DLLogger.log(step=(i, ), data={"latency": gen_measures[-1]}) DLLogger.log(step=(i, ), data={"frames/s": num_frames / gen_measures[-1]}) measures = gen_measures[warmup_iters:] DLLogger.log(step=(), data={'avg latency': np.mean(measures)}) DLLogger.log(step=(), data={'avg frames/s': all_frames / np.sum(measures)}) DLLogger.flush()
def get_logger(params): backends = [] if hvd.rank() == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)] logger.init(backends=backends) return logger
def _initialize_dllogger(self, log_dir, filename, append): backends = [ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(log_dir, filename), append=append), StdOutBackend(Verbosity.VERBOSE), ] logger.init(backends=backends)
def log(logname, dice, results="/results"): dllogger = Logger(backends=[ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(results, logname)), StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: ""), ]) metrics = {} metrics.update({"Mean dice": round(dice.mean().item(), 2)}) metrics.update({f"L{j+1}": round(m.item(), 2) for j, m in enumerate(dice)}) dllogger.log(step=(), data=metrics) dllogger.flush()
def get_logger(params): backends = [] worker_id = hvd_rank() if horovod_enabled() else 0 if worker_id == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: os.makedirs(params.log_dir, exist_ok=True) log_file = f"{params.log_dir}/log.json" backends += [JSONStreamBackend(Verbosity.VERBOSE, log_file)] logger.init(backends=backends) return logger
def setup_logger(args): os.makedirs(args.results, exist_ok=True) log_path = os.path.join(args.results, args.log_file) if os.path.exists(log_path): for i in itertools.count(): s_fname = args.log_file.split('.') fname = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1] if len(s_fname) > 1 else args.stat_file + f'.{i}' log_path = os.path.join(args.results, fname) if not os.path.exists(log_path): break def metric_format(metric, metadata, value): return "{}: {}".format(metric, f'{value:.5f}' if isinstance(value, float) else value) def step_format(step): if step == (): return "Finished |" elif isinstance(step, int): return "Step {0: <5} |".format(step) return "Step {} |".format(step) if not dist.is_initialized() or not args.distributed_world_size > 1 or args.distributed_rank == 0: dllogger.init(backends=[JSONStreamBackend(verbosity=1, filename=log_path), TensorBoardBackend(verbosity=1, log_dir=args.results), StdOutBackend(verbosity=2, step_format=step_format, prefix_format=lambda x: "")#, #metric_format=metric_format) ]) else: dllogger.init(backends=[]) dllogger.log(step='PARAMETER', data=vars(args), verbosity=0) container_setup_info = {**get_framework_env_vars(), **get_system_info()} dllogger.log(step='ENVIRONMENT', data=container_setup_info, verbosity=0) dllogger.metadata('loss', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'}) dllogger.metadata('items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN', 'format': ':1f'}) dllogger.metadata('val_loss', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format':':5f'}) dllogger.metadata('val_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'}) dllogger.metadata('val_items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'VAL', 'format': ':1f'}) dllogger.metadata('test_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('test_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('test_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('throughput', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f'}) dllogger.metadata('latency_p90', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('latency_p95', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'}) dllogger.metadata('latency_p99', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
def get_dllogger(params): backends = [] if is_main_process(): backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(params.log_dir, "log.json")) ] logger.init(backends=backends) return logger
def init_log(args): enabled = not dist.is_initialized() or dist.get_rank() == 0 if enabled: fpath = args.log_file or os.path.join(args.output_dir, 'nvlog.json') backends = [ JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(fpath)), StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, metric_format=stdout_metric_format) ] else: backends = [] dllogger.init(backends=backends) dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"}) for id_, pref in [('train', ''), ('train_avg', 'avg train '), ('dev_ema', ' dev ema ')]: dllogger.metadata(f"{id_}_loss", { "name": f"{pref}loss", "format": ":>7.2f" }) dllogger.metadata(f"{id_}_wer", { "name": f"{pref}wer", "format": ":>6.2f" }) dllogger.metadata(f"{id_}_pplx", { "name": f"{pref}pplx", "format": ":>6.2f" }) dllogger.metadata(f"{id_}_throughput", { "name": f"{pref}utts/s", "format": ":>5.0f" }) dllogger.metadata(f"{id_}_took", { "name": "took", "unit": "s", "format": ":>5.2f" }) tb_subsets = ['train', 'dev_ema'] global tb_loggers tb_loggers = { s: TBLogger(enabled, args.output_dir, name=s) for s in tb_subsets } log_parameters(vars(args), tb_subset='train')
def setup_dllogger(rank, enabled=True, filename='log.json'): if enabled and rank == 0: backends = [ StdOutBackend(Verbosity.DEFAULT), JSONStreamBackend( Verbosity.VERBOSE, filename, ), ] DLLogger.init(backends) else: DLLogger.init([])
def get_logger(params): """ Get logger object :param params: Dict with additional parameters :return: logger """ backends = [] if hvd.rank() == 0: backends += [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir: backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)] logger.init(backends=backends) return logger
def log(logname, dice, epoch=None, dice_tta=None): dllogger = Logger(backends=[ JSONStreamBackend(Verbosity.VERBOSE, os.path.join( args.results, logname)), StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: ""), ]) metrics = {} if epoch is not None: metrics.update({"Epoch": epoch}) metrics.update({"Mean dice": round(dice.mean().item(), 2)}) if dice_tta is not None: metrics.update({"Mean TTA dice": round(dice_tta.mean().item(), 2)}) metrics.update({f"L{j+1}": round(m.item(), 2) for j, m in enumerate(dice)}) if dice_tta is not None: metrics.update({ f"TTA_L{j+1}": round(m.item(), 2) for j, m in enumerate(dice_tta) }) dllogger.log(step=(), data=metrics) dllogger.flush()
def main(): LOGGER.set_model_name('ResNet') LOGGER.set_backends([ StdOutBackend(log_file='std.out', logging_scope=Scope.TRAIN_ITER), CompactBackend(log_file=None, logging_scope=Scope.TRAIN_ITER, iteration_interval=5), JsonBackend(log_file='dummy.json', logging_scope=Scope.TRAIN_ITER, iteration_interval=4) ]) parser = ArgumentParser() parser.add_argument('--dummy', type=str, default='default_dummy_value') args = parser.parse_args() LOGGER.log_hardware() LOGGER.log_args(args) LOGGER.log(tags.RUN_INIT) LOGGER.register_metric('loss', meter=AverageMeter(), metric_scope=Scope.TRAIN_ITER) LOGGER.register_metric('epoch_nr', metric_scope=Scope.EPOCH) LOGGER.register_metric('epochs2') with LOGGER.timed_block(tags.SETUP_BLOCK): print("This is setup.") with LOGGER.timed_block(tags.PREPROC_BLOCK): print("This is preprocessing.") with LOGGER.timed_block(tags.RUN_BLOCK): print("This is run.") train() print("This is the end.") LOGGER.log(tags.RUN_FINAL) LOGGER.finish()
def __init__(self, args): super(Model, self).__init__() self.save_hyperparameters() self.args = args self.f1_score = F1(args) self.model = UNetLoc(args) if args.type == "pre" else get_dmg_unet( args) self.loss = Loss(args) self.best_f1 = torch.tensor(0) self.best_epoch = 0 self.tta_flips = [[2], [3], [2, 3]] self.lr = args.lr self.n_class = 2 if self.args.type == "pre" else 5 self.softmax = nn.Softmax(dim=1) self.test_idx = 0 self.dllogger = Logger(backends=[ JSONStreamBackend( Verbosity.VERBOSE, os.path.join(args.results, f"{args.logname}.json")), StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: f"Epoch: {step} "), ])
def setup_logger(config): log_path = config.get("log_path", os.getcwd()) if is_main_process(): backends = [ TensorBoardBackend(verbosity=dllogger.Verbosity.VERBOSE, log_dir=log_path), JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=os.path.join(log_path, "log.json")), AggregatorBackend(verbosity=dllogger.Verbosity.VERBOSE, agg_dict={"loss": AverageMeter}), StdOutBackend( verbosity=dllogger.Verbosity.DEFAULT, step_format=empty_step_format, metric_format=no_string_metric_format, prefix_format=empty_prefix_format, ), ] logger = Logger(backends=backends) else: logger = Logger(backends=[]) container_setup_info = get_framework_env_vars() logger.log(step="PARAMETER", data=container_setup_info, verbosity=dllogger.Verbosity.DEFAULT) logger.metadata("loss", { "unit": "nat", "GOAL": "MINIMIZE", "STAGE": "TRAIN" }) logger.metadata("val_loss", { "unit": "nat", "GOAL": "MINIMIZE", "STAGE": "VAL" }) return logger
def __init__(self, args): super(NNUnet, self).__init__() self.args = args self.save_hyperparameters() self.build_nnunet() self.loss = Loss() self.dice = Dice(self.n_class) self.best_sum = 0 self.eval_dice = 0 self.best_sum_epoch = 0 self.best_dice = self.n_class * [0] self.best_epoch = self.n_class * [0] self.best_sum_dice = self.n_class * [0] self.learning_rate = args.learning_rate if self.args.exec_mode in ["train", "evaluate"]: self.dllogger = Logger(backends=[ JSONStreamBackend(Verbosity.VERBOSE, os.path.join(args.results, "logs.json")), StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: f"Epoch: {step} "), ]) self.tta_flips = ([[2], [3], [2, 3]] if self.args.dim == 2 else [[2], [3], [4], [2, 3], [2, 4], [3, 4], [2, 3, 4]])
def main(): parser = argparse.ArgumentParser( description='TensorRT Tacotron 2 Inference') parser = parse_args(parser) args, _ = parser.parse_known_args() # initialize CUDA state torch.cuda.init() TRT_LOGGER = trt.Logger(trt.Logger.WARNING) encoder = load_engine(args.encoder, TRT_LOGGER) decoder_iter = load_engine(args.decoder, TRT_LOGGER) postnet = load_engine(args.postnet, TRT_LOGGER) waveglow = load_engine(args.waveglow, TRT_LOGGER) if args.waveglow_ckpt != "": # setup denoiser using WaveGlow PyTorch checkpoint waveglow_ckpt = load_and_setup_model('WaveGlow', parser, args.waveglow_ckpt, True, forward_is_infer=True) denoiser = Denoiser(waveglow_ckpt).cuda() # after initialization, we don't need WaveGlow PyTorch checkpoint # anymore - deleting del waveglow_ckpt torch.cuda.empty_cache() # create TRT contexts for each engine encoder_context = encoder.create_execution_context() decoder_context = decoder_iter.create_execution_context() postnet_context = postnet.create_execution_context() waveglow_context = waveglow.create_execution_context() DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.output+'/'+args.log_file), StdOutBackend(Verbosity.VERBOSE)]) texts = [] try: f = open(args.input, 'r') texts = f.readlines() except: print("Could not read file") sys.exit(1) measurements = {} sequences, sequence_lengths = prepare_input_sequence(texts) sequences = sequences.to(torch.int32) sequence_lengths = sequence_lengths.to(torch.int32) with MeasureTime(measurements, "latency"): mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet, encoder_context, decoder_context, postnet_context, sequences, sequence_lengths, measurements, args.fp16) audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16) with encoder_context, decoder_context, postnet_context, waveglow_context: pass audios = audios.float() if args.waveglow_ckpt != "": with MeasureTime(measurements, "denoiser"): audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) for i, audio in enumerate(audios): audio = audio[:mel_lengths[i]*args.stft_hop_length] audio = audio/torch.max(torch.abs(audio)) audio_path = args.output + "audio_"+str(i)+"_trt.wav" write(audio_path, args.sampling_rate, audio.cpu().numpy()) DLLogger.log(step=0, data={"tacotron2_encoder_latency": measurements['tacotron2_encoder_time']}) DLLogger.log(step=0, data={"tacotron2_decoder_latency": measurements['tacotron2_decoder_time']}) DLLogger.log(step=0, data={"tacotron2_postnet_latency": measurements['tacotron2_postnet_time']}) DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']}) DLLogger.log(step=0, data={"latency": measurements['latency']}) if args.waveglow_ckpt != "": DLLogger.log(step=0, data={"denoiser": measurements['denoiser']}) DLLogger.flush() prec = "fp16" if args.fp16 else "fp32" latency = measurements['latency'] throughput = audios.size(1)/latency log_data = "1,"+str(sequence_lengths[0].item())+","+prec+","+str(latency)+","+str(throughput)+","+str(mel_lengths[0].item())+"\n" with open("log_bs1_"+prec+".log", 'a') as f: f.write(log_data)
def main(): """ Launches text to speech (inference). Inference is executed on a single GPU or CPU. """ parser = argparse.ArgumentParser( description='PyTorch Tacotron 2 Inference') parser = parse_args(parser) args, unknown_args = parser.parse_known_args() DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.log_file), StdOutBackend(Verbosity.VERBOSE)]) for k,v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k:v}) DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) measurements_all = {"pre_processing": [], "tacotron2_latency": [], "waveglow_latency": [], "latency": [], "type_conversion": [], "data_transfer": [], "storage": [], "tacotron2_items_per_sec": [], "waveglow_items_per_sec": [], "num_mels_per_audio": [], "throughput": []} print("args:", args, unknown_args) tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, args.amp_run, args.cpu_run, forward_is_infer=True) waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, args.amp_run, args.cpu_run) if args.cpu_run: denoiser = Denoiser(waveglow, args.cpu_run) else: denoiser = Denoiser(waveglow, args.cpu_run).cuda() jitted_tacotron2 = torch.jit.script(tacotron2) texts = ["The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."] texts = [texts[0][:args.input_length]] texts = texts*args.batch_size warmup_iters = 3 for iter in range(args.num_iters): measurements = {} with MeasureTime(measurements, "pre_processing", args.cpu_run): sequences_padded, input_lengths = prepare_input_sequence(texts, args.cpu_run) with torch.no_grad(): with MeasureTime(measurements, "latency", args.cpu_run): with MeasureTime(measurements, "tacotron2_latency", args.cpu_run): mel, mel_lengths, _ = jitted_tacotron2(sequences_padded, input_lengths) with MeasureTime(measurements, "waveglow_latency", args.cpu_run): audios = waveglow.infer(mel, sigma=args.sigma_infer) audios = audios.float() audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) num_mels = mel.size(0)*mel.size(2) num_samples = audios.size(0)*audios.size(1) with MeasureTime(measurements, "type_conversion", args.cpu_run): audios = audios.float() with MeasureTime(measurements, "data_transfer", args.cpu_run): audios = audios.cpu() with MeasureTime(measurements, "storage", args.cpu_run): audios = audios.numpy() for i, audio in enumerate(audios): audio_path = "audio_"+str(i)+".wav" write(audio_path, args.sampling_rate, audio[:mel_lengths[i]*args.stft_hop_length]) measurements['tacotron2_items_per_sec'] = num_mels/measurements['tacotron2_latency'] measurements['waveglow_items_per_sec'] = num_samples/measurements['waveglow_latency'] measurements['num_mels_per_audio'] = mel.size(2) measurements['throughput'] = num_samples/measurements['latency'] if iter >= warmup_iters: for k,v in measurements.items(): measurements_all[k].append(v) DLLogger.log(step=(iter-warmup_iters), data={k: v}) DLLogger.flush() print_stats(measurements_all)
def main(): """ Launches text to speech (inference). Inference is executed on a single GPU. """ parser = argparse.ArgumentParser( description='PyTorch Tacotron 2 Inference') parser = parse_args(parser) args, _ = parser.parse_known_args() DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, args.output + '/' + args.log_file), StdOutBackend(Verbosity.VERBOSE) ]) for k, v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k: v}) DLLogger.log(step="PARAMETER", data={'model_name': 'Tacotron2_PyT'}) tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, args.amp_run, forward_is_infer=True) waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, args.amp_run, forward_is_infer=True) denoiser = Denoiser(waveglow).cuda() jitted_tacotron2 = torch.jit.script(tacotron2) texts = [] try: f = open(args.input, 'r') texts = f.readlines() except: print("Could not read file") sys.exit(1) if args.include_warmup: sequence = torch.randint(low=0, high=148, size=(1, 50), dtype=torch.long).cuda() input_lengths = torch.IntTensor([sequence.size(1)]).cuda().long() for i in range(3): with torch.no_grad(): mel, mel_lengths = jitted_tacotron2(sequence, input_lengths) _ = waveglow(mel) measurements = {} sequences_padded, input_lengths = prepare_input_sequence(texts) with torch.no_grad(), MeasureTime(measurements, "tacotron2_time"): mel, mel_lengths = jitted_tacotron2(sequences_padded, input_lengths) with torch.no_grad(), MeasureTime(measurements, "waveglow_time"): audios = waveglow(mel, sigma=args.sigma_infer) audios = audios.float() audios = denoiser(audios, strength=args.denoising_strength).squeeze(1) print("Stopping after", mel.size(2), "decoder steps") tacotron2_infer_perf = mel.size(0) * mel.size( 2) / measurements['tacotron2_time'] waveglow_infer_perf = audios.size(0) * audios.size( 1) / measurements['waveglow_time'] DLLogger.log(step=0, data={"tacotron2_items_per_sec": tacotron2_infer_perf}) DLLogger.log(step=0, data={"tacotron2_latency": measurements['tacotron2_time']}) DLLogger.log(step=0, data={"waveglow_items_per_sec": waveglow_infer_perf}) DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']}) DLLogger.log(step=0, data={ "latency": (measurements['tacotron2_time'] + measurements['waveglow_time']) }) for i, audio in enumerate(audios): audio = audio[:mel_lengths[i] * args.stft_hop_length] audio = audio / torch.max(torch.abs(audio)) audio_path = args.output + "audio_" + str(i) + ".wav" write(audio_path, args.sampling_rate, audio.cpu().numpy()) DLLogger.flush()
def main(): """ Launches text to speech (inference). Inference is executed on a single GPU. """ parser = argparse.ArgumentParser(description='PyTorch FastPitch Inference', allow_abbrev=False) parser = parse_args(parser) args, unk_args = parser.parse_known_args() if args.p_arpabet > 0.0: cmudict.initialize(args.cmudict_path, keep_ambiguous=True) torch.backends.cudnn.benchmark = args.cudnn_benchmark if args.output is not None: Path(args.output).mkdir(parents=False, exist_ok=True) log_fpath = args.log_file or str(Path(args.output, 'nvlog_infer.json')) log_fpath = unique_log_fpath(log_fpath) DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_fpath), StdOutBackend(Verbosity.VERBOSE, metric_format=stdout_metric_format)]) init_inference_metadata() [DLLogger.log("PARAMETER", {k: v}) for k, v in vars(args).items()] device = torch.device('cuda' if args.cuda else 'cpu') if args.fastpitch != 'SKIP': generator = load_and_setup_model( 'FastPitch', parser, args.fastpitch, args.amp, device, unk_args=unk_args, forward_is_infer=True, ema=args.ema, jitable=args.torchscript) if args.torchscript: generator = torch.jit.script(generator) else: generator = None if args.waveglow != 'SKIP': with warnings.catch_warnings(): warnings.simplefilter("ignore") waveglow = load_and_setup_model( 'WaveGlow', parser, args.waveglow, args.amp, device, unk_args=unk_args, forward_is_infer=True, ema=args.ema) denoiser = Denoiser(waveglow).to(device) waveglow = getattr(waveglow, 'infer', waveglow) else: waveglow = None if len(unk_args) > 0: raise ValueError(f'Invalid options {unk_args}') fields = load_fields(args.input) batches = prepare_input_sequence( fields, device, args.symbol_set, args.text_cleaners, args.batch_size, args.dataset_path, load_mels=(generator is None), p_arpabet=args.p_arpabet) # Use real data rather than synthetic - FastPitch predicts len for _ in tqdm(range(args.warmup_steps), 'Warmup'): with torch.no_grad(): if generator is not None: b = batches[0] mel, *_ = generator(b['text']) if waveglow is not None: audios = waveglow(mel, sigma=args.sigma_infer).float() _ = denoiser(audios, strength=args.denoising_strength) gen_measures = MeasureTime(cuda=args.cuda) waveglow_measures = MeasureTime(cuda=args.cuda) gen_kw = {'pace': args.pace, 'speaker': args.speaker, 'pitch_tgt': None, 'pitch_transform': build_pitch_transformation(args)} if args.torchscript: gen_kw.pop('pitch_transform') print('NOTE: Pitch transforms are disabled with TorchScript') all_utterances = 0 all_samples = 0 all_letters = 0 all_frames = 0 reps = args.repeats log_enabled = reps == 1 log = lambda s, d: DLLogger.log(step=s, data=d) if log_enabled else None for rep in (tqdm(range(reps), 'Inference') if reps > 1 else range(reps)): for b in batches: if generator is None: log(rep, {'Synthesizing from ground truth mels'}) mel, mel_lens = b['mel'], b['mel_lens'] else: with torch.no_grad(), gen_measures: mel, mel_lens, *_ = generator(b['text'], **gen_kw) gen_infer_perf = mel.size(0) * mel.size(2) / gen_measures[-1] all_letters += b['text_lens'].sum().item() all_frames += mel.size(0) * mel.size(2) log(rep, {"fastpitch_frames/s": gen_infer_perf}) log(rep, {"fastpitch_latency": gen_measures[-1]}) if args.save_mels: for i, mel_ in enumerate(mel): m = mel_[:, :mel_lens[i].item()].permute(1, 0) fname = b['output'][i] if 'output' in b else f'mel_{i}.npy' mel_path = Path(args.output, Path(fname).stem + '.npy') np.save(mel_path, m.cpu().numpy()) if waveglow is not None: with torch.no_grad(), waveglow_measures: audios = waveglow(mel, sigma=args.sigma_infer) audios = denoiser(audios.float(), strength=args.denoising_strength ).squeeze(1) all_utterances += len(audios) all_samples += sum(audio.size(0) for audio in audios) waveglow_infer_perf = ( audios.size(0) * audios.size(1) / waveglow_measures[-1]) log(rep, {"waveglow_samples/s": waveglow_infer_perf}) log(rep, {"waveglow_latency": waveglow_measures[-1]}) if args.output is not None and reps == 1: for i, audio in enumerate(audios): audio = audio[:mel_lens[i].item() * args.stft_hop_length] if args.fade_out: fade_len = args.fade_out * args.stft_hop_length fade_w = torch.linspace(1.0, 0.0, fade_len) audio[-fade_len:] *= fade_w.to(audio.device) audio = audio / torch.max(torch.abs(audio)) fname = b['output'][i] if 'output' in b else f'audio_{i}.wav' audio_path = Path(args.output, fname) write(audio_path, args.sampling_rate, audio.cpu().numpy()) if generator is not None and waveglow is not None: log(rep, {"latency": (gen_measures[-1] + waveglow_measures[-1])}) log_enabled = True if generator is not None: gm = np.sort(np.asarray(gen_measures)) rtf = all_samples / (all_utterances * gm.mean() * args.sampling_rate) log((), {"avg_fastpitch_letters/s": all_letters / gm.sum()}) log((), {"avg_fastpitch_frames/s": all_frames / gm.sum()}) log((), {"avg_fastpitch_latency": gm.mean()}) log((), {"avg_fastpitch_RTF": rtf}) log((), {"90%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.90) / 2) * gm.std()}) log((), {"95%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.95) / 2) * gm.std()}) log((), {"99%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.99) / 2) * gm.std()}) if waveglow is not None: wm = np.sort(np.asarray(waveglow_measures)) rtf = all_samples / (all_utterances * wm.mean() * args.sampling_rate) log((), {"avg_waveglow_samples/s": all_samples / wm.sum()}) log((), {"avg_waveglow_latency": wm.mean()}) log((), {"avg_waveglow_RTF": rtf}) log((), {"90%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.90) / 2) * wm.std()}) log((), {"95%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.95) / 2) * wm.std()}) log((), {"99%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.99) / 2) * wm.std()}) if generator is not None and waveglow is not None: m = gm + wm rtf = all_samples / (all_utterances * m.mean() * args.sampling_rate) log((), {"avg_samples/s": all_samples / m.sum()}) log((), {"avg_letters/s": all_letters / m.sum()}) log((), {"avg_latency": m.mean()}) log((), {"avg_RTF": rtf}) log((), {"90%_latency": m.mean() + norm.ppf((1.0 + 0.90) / 2) * m.std()}) log((), {"95%_latency": m.mean() + norm.ppf((1.0 + 0.95) / 2) * m.std()}) log((), {"99%_latency": m.mean() + norm.ppf((1.0 + 0.99) / 2) * m.std()}) DLLogger.flush()
def main(_): # get e2e training time begin = time.time() logging.info("Training started at: {}".format(time.asctime())) hvd.init() # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs if FLAGS.lr: config.learning_rate = FLAGS.lr if FLAGS.warmup_value: config.lr_warmup_init = FLAGS.warmup_value if FLAGS.warmup_epochs: config.lr_warmup_epoch = FLAGS.warmup_epochs config.backbone_init = FLAGS.backbone_init config.mixed_precision = FLAGS.amp config.image_size = model_utils.parse_image_size(config.image_size) # get eval config eval_config = hparams_config.get_detection_config(FLAGS.model_name) eval_config.override(FLAGS.hparams) eval_config.val_json_file = FLAGS.val_json_file eval_config.val_file_pattern = FLAGS.val_file_pattern eval_config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS eval_config.drop_remainder = False # eval all examples w/o drop. eval_config.image_size = model_utils.parse_image_size( eval_config['image_size']) # setup setup.set_flags(FLAGS, config, training=True) if FLAGS.debug: tf.config.experimental_run_functions_eagerly(True) tf.debugging.set_log_device_placement(True) tf.random.set_seed(111111) logging.set_verbosity(logging.DEBUG) # Check data path if FLAGS.training_file_pattern is None or FLAGS.val_file_pattern is None or FLAGS.val_json_file is None: raise RuntimeError( 'You must specify --training_file_pattern, --val_file_pattern and --val_json_file for training.' ) steps_per_epoch = (FLAGS.num_examples_per_epoch + (FLAGS.batch_size * get_world_size()) - 1) // (FLAGS.batch_size * get_world_size()) if FLAGS.benchmark == True: # For ci perf training runs, run for a fixed number of iterations per epoch steps_per_epoch = FLAGS.benchmark_steps params = dict(config.as_dict(), model_name=FLAGS.model_name, model_dir=FLAGS.model_dir, steps_per_epoch=steps_per_epoch, checkpoint_period=FLAGS.checkpoint_period, batch_size=FLAGS.batch_size, num_shards=get_world_size(), val_json_file=FLAGS.val_json_file, testdev_dir=FLAGS.testdev_dir, mode='train') logging.info('Training params: {}'.format(params)) # make output dir if it does not exist tf.io.gfile.makedirs(FLAGS.model_dir) # dllogger setup backends = [] if is_main_process(): log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename) backends += [ JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path), StdOutBackend(verbosity=Verbosity.DEFAULT) ] DLLogger.init(backends=backends) def get_dataset(is_training, params): file_pattern = (FLAGS.training_file_pattern if is_training else FLAGS.val_file_pattern) if not file_pattern: raise ValueError('No matching files.') return dataloader.InputReader( file_pattern, is_training=is_training, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=config.max_instances_per_image, enable_map_parallelization=FLAGS.enable_map_parallelization)( params) num_samples = (FLAGS.eval_samples + get_world_size() - 1) // get_world_size() num_samples = (num_samples + FLAGS.eval_batch_size - 1) // FLAGS.eval_batch_size eval_config.num_samples = num_samples def get_eval_dataset(eval_config): dataset = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, max_instances_per_image=eval_config.max_instances_per_image)( eval_config, batch_size=FLAGS.eval_batch_size) dataset = dataset.shard(get_world_size(), get_rank()) dataset = dataset.take(num_samples) return dataset eval_dataset = get_eval_dataset(eval_config) # pick focal loss implementation focal_loss = train_lib.StableFocalLoss( params['alpha'], params['gamma'], label_smoothing=params['label_smoothing'], reduction=tf.keras.losses.Reduction.NONE) model = train_lib.EfficientDetNetTrain(params['model_name'], config) model.build((None, *config.image_size, 3)) model.compile( optimizer=optimizer_builder.get_optimizer(params), loss={ 'box_loss': train_lib.BoxLoss(params['delta'], reduction=tf.keras.losses.Reduction.NONE), 'box_iou_loss': train_lib.BoxIouLoss(params['iou_loss_type'], params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size'], reduction=tf.keras.losses.Reduction.NONE), 'class_loss': focal_loss, 'seg_loss': tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) }) train_from_epoch = util_keras.restore_ckpt(model, params['model_dir'], config.moving_average_decay, steps_per_epoch=steps_per_epoch) print("training_mode: {}".format(FLAGS.training_mode)) callbacks = callback_builder.get_callbacks(params, FLAGS.training_mode, eval_config, eval_dataset, DLLogger, FLAGS.time_history, FLAGS.log_steps, FLAGS.lr_tb, FLAGS.benchmark) history = model.fit( get_dataset(True, params=params), epochs=params['num_epochs'], steps_per_epoch=steps_per_epoch, initial_epoch=train_from_epoch, callbacks=callbacks, verbose=1 if is_main_process() else 0, validation_data=get_dataset(False, params=params) if FLAGS.validate else None, validation_steps=(FLAGS.eval_samples // FLAGS.eval_batch_size) if FLAGS.validate else None) if is_main_process(): model.save_weights(os.path.join(FLAGS.model_dir, 'ckpt-final')) # log final stats stats = {} for callback in callbacks: if isinstance(callback, callback_builder.TimeHistory): if callback.epoch_runtime_log: stats[ 'avg_fps_training'] = callback.average_examples_per_second stats[ 'avg_fps_training_per_GPU'] = callback.average_examples_per_second / get_world_size( ) stats[ 'avg_latency_training'] = callback.average_time_per_iteration if history and history.history: train_hist = history.history #Gets final loss from training. stats['training_loss'] = float( hvd.allreduce(tf.constant(train_hist['loss'][-1], dtype=tf.float32), average=True)) if os.path.exists(os.path.join(FLAGS.model_dir, 'ema_weights')): ckpt_epoch = "%02d" % sorted(set([ int(f.rsplit('.')[0].rsplit('-')[1]) for f in os.listdir(os.path.join(FLAGS.model_dir, 'ema_weights')) if 'emackpt' in f ]), reverse=True)[0] ckpt = os.path.join(FLAGS.model_dir, 'ema_weights', 'emackpt-' + str(ckpt_epoch)) util_keras.restore_ckpt(model, ckpt, eval_config.moving_average_decay, steps_per_epoch=0, skip_mismatch=False, expect_partial=True) if is_main_process(): model.save(os.path.join(FLAGS.model_dir, 'emackpt-final')) else: ckpt_epoch = 'final' ckpt = os.path.join(FLAGS.model_dir, 'ckpt-' + ckpt_epoch) if is_main_process(): model.save(os.path.join(FLAGS.model_dir, 'ckpt-' + ckpt_epoch)) # Start evaluation of final ema checkpoint logging.set_verbosity(logging.WARNING) @tf.function def model_fn(images, labels): cls_outputs, box_outputs = model(images, training=False) detections = postprocess.generate_detections(eval_config, cls_outputs, box_outputs, labels['image_scales'], labels['source_ids']) tf.numpy_function(evaluator.update_state, [ labels['groundtruth_data'], postprocess.transform_detections(detections) ], []) if FLAGS.benchmark == False and FLAGS.training_mode == 'train': # Evaluator for AP calculation. label_map = label_util.get_label_map(eval_config.label_map) evaluator = coco_metric.EvaluationMetric( filename=eval_config.val_json_file, label_map=label_map) evaluator.reset_states() # evaluate all images. pbar = tf.keras.utils.Progbar(num_samples) for i, (images, labels) in enumerate(eval_dataset): model_fn(images, labels) if is_main_process(): pbar.update(i) # gather detections from all ranks evaluator.gather() if is_main_process(): # compute the final eval results. metrics = evaluator.result() metric_dict = {} for i, name in enumerate(evaluator.metric_names): metric_dict[name] = metrics[i] if label_map: for i, cid in enumerate(sorted(label_map.keys())): name = 'AP_/%s' % label_map[cid] metric_dict[name] = metrics[i + len(evaluator.metric_names)] # csv format csv_metrics = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl'] csv_format = ",".join( [str(ckpt_epoch)] + [str(round(metric_dict[key] * 100, 2)) for key in csv_metrics]) print(FLAGS.model_name, metric_dict, "csv format:", csv_format) MPI.COMM_WORLD.Barrier() if is_main_process(): stats['e2e_training_time'] = time.time() - begin DLLogger.log(step=(), data=stats)
def main(_): tf.get_logger().setLevel(logging.ERROR) hvd.init() FLAGS = PARSER.parse_args() backends = [] if hvd.rank() == 0: backends += [StdOutBackend(Verbosity.DEFAULT)] if FLAGS.log_dir: backends += [JSONStreamBackend(Verbosity.DEFAULT, FLAGS.log_dir)] DLLogger.init(backends=backends) for key in vars(FLAGS): DLLogger.log(step="PARAMETER", data={str(key): vars(FLAGS)[key]}) os.environ['CUDA_CACHE_DISABLE'] = '0' os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private' os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1' os.environ['TF_ADJUST_HUE_FUSED'] = '1' os.environ['TF_ADJUST_SATURATION_FUSED'] = '1' os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '2' os.environ['TF_DISABLE_NVTX_RANGES'] = '1' dataset = MSDDataset(json_path=os.path.join(FLAGS.data_dir, 'dataset.json'), dst_size=FLAGS.input_shape, seed=FLAGS.seed, interpolator=FLAGS.resize_interpolator, data_normalization=FLAGS.data_normalization, batch_size=FLAGS.batch_size, train_split=FLAGS.train_split, split_seed=FLAGS.split_seed) FLAGS.labels = dataset.labels gpu_options = tf.GPUOptions() config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.use_amp: config.graph_options.rewrite_options.auto_mixed_precision = 1 run_config = tf.estimator.RunConfig( save_summary_steps=None, save_checkpoints_steps=None if FLAGS.benchmark else dataset.train_steps * FLAGS.train_epochs, save_checkpoints_secs=None, tf_random_seed=None, session_config=config, keep_checkpoint_max=1) estimator = tf.estimator.Estimator( model_fn=vnet_v2, model_dir=FLAGS.model_dir if hvd.rank() == 0 else None, config=run_config, params=FLAGS) train_hooks = [hvd.BroadcastGlobalVariablesHook(0)] if 'train' in FLAGS.exec_mode: steps = dataset.train_steps * FLAGS.train_epochs if FLAGS.benchmark: steps = FLAGS.warmup_steps * 2 if hvd.rank() == 0: train_hooks += [ ProfilingHook(FLAGS.warmup_steps, FLAGS.batch_size * hvd.size(), DLLogger) ] else: if hvd.rank() == 0: train_hooks += [TrainHook(FLAGS.log_every, DLLogger)] estimator.train(input_fn=lambda: dataset.train_fn(FLAGS.augment), steps=steps, hooks=train_hooks) if 'evaluate' in FLAGS.exec_mode: if hvd.rank() == 0: if FLAGS.train_split >= 1.0: raise ValueError("Missing argument: --train_split < 1.0") result = estimator.evaluate(input_fn=dataset.eval_fn, steps=dataset.eval_steps, hooks=[]) DLLogger.log(step=tuple(), data={ 'background_dice': str(result['background dice']), 'anterior_dice': str(result['Anterior dice']), 'posterior_dice': str(result['Posterior dice']) }) if 'predict' in FLAGS.exec_mode: count = 1 hooks = [] if hvd.rank() == 0: if FLAGS.benchmark: count = math.ceil( (FLAGS.warmup_steps * 2) / dataset.test_steps) hooks += [ ProfilingHook(FLAGS.warmup_steps, FLAGS.batch_size * hvd.size(), DLLogger, training=False) ] predictions = estimator.predict( input_fn=lambda: dataset.test_fn(count=count), hooks=hooks) pred = [p['prediction'] for p in predictions] predict_path = os.path.join(FLAGS.model_dir, 'predictions') if os.path.exists(predict_path): shutil.rmtree(predict_path) os.makedirs(predict_path) pickle.dump( pred, open(os.path.join(predict_path, 'predictions.pkl'), 'wb'))