def test_write_results(): true_event_handler_time = 0.125 true_max_epochs = 3 true_num_iters = 2 test_folder = "./test_log_folder" if os.path.exists(test_folder): shutil.rmtree(test_folder) os.makedirs(test_folder) profiler = BasicTimeProfiler() dummy_trainer = Engine(_do_nothing_update_fn) profiler.attach(dummy_trainer) @dummy_trainer.on(Events.STARTED) def delay_start(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.COMPLETED) def delay_complete(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.EPOCH_STARTED) def delay_epoch_start(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.EPOCH_COMPLETED) def delay_epoch_complete(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.ITERATION_STARTED) def delay_iter_start(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.ITERATION_COMPLETED) def delay_iter_complete(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.GET_BATCH_STARTED) def delay_get_batch_started(engine): time.sleep(true_event_handler_time) @dummy_trainer.on(Events.GET_BATCH_COMPLETED) def delay_get_batch_completed(engine): time.sleep(true_event_handler_time) dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs) profiler.write_results(test_folder + "/test_log.csv") assert os.path.isfile(test_folder + "/test_log.csv") file_length = 0 with open(test_folder + "/test_log.csv") as f: for l in f: file_length += 1 assert file_length == (true_max_epochs * true_num_iters) + 1 # cleanup test log directory shutil.rmtree(test_folder)
def test_profilers_wrong_inputs(): profiler = BasicTimeProfiler() with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"): profiler.attach(None) with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"): with patch.dict("sys.modules", {"pandas": None}): profiler.write_results("") profiler = HandlersTimeProfiler() with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"): profiler.attach(None) with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"): with patch.dict("sys.modules", {"pandas": None}): profiler.write_results("")
def test_write_results_basic_profiler(dirname): true_event_handler_time = 0.125 true_max_epochs = 3 true_num_iters = 2 profiler = BasicTimeProfiler() dummy_trainer = get_prepared_engine_for_basic_profiler(true_event_handler_time) profiler.attach(dummy_trainer) dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs) fp = os.path.join(dirname, "test_log.csv") profiler.write_results(fp) assert os.path.isfile(fp) file_length = 0 with open(fp) as f: for _ in f: file_length += 1 assert file_length == (true_max_epochs * true_num_iters) + 1
def create_trainer(loader, model, opt, loss_fn, device, args): def _update(engine, batch): model.train() x = batch['x'].to(engine.state.device, non_blocking=True) y = batch['y'].to(engine.state.device, non_blocking=True) m = batch['m'].to(engine.state.device, non_blocking=True) opt.zero_grad() y_pred = model(x) softmax = nn.Softmax() masked_loss = softmax(y_pred) #masked_loss = y_pred*m loss = loss_fn(masked_loss, y) if m.sum().item() / m.numel() > 0.7: loss.backward() opt.step() masked_loss = (masked_loss>0.5).float() acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:]) return { 'x': x.detach(), 'y': y.detach(), 'm': m.detach(), 'y_pred': y_pred.detach(), 'loss': loss.item(), 'acc' : acc } def _inference(engine, batch): model.eval() with th.no_grad(): x = batch['x'].to(engine.state.device, non_blocking=True) y = batch['y'].to(engine.state.device, non_blocking=True) m = batch['m'].to(engine.state.device, non_blocking=True) y_pred = model(x) softmax = nn.Softmax(dim=1) masked_loss = softmax(y_pred) #masked_loss = y_pred*m loss = loss_fn(masked_loss, y) masked_loss = (masked_loss[-3:]>0.5).float() acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:]) return { 'x': x.detach(), 'y': y.detach(), 'm': m.detach(), 'y_pred': y_pred.detach(), 'loss': loss.item(), 'acc' : acc } #wandb.watch(model, log ='all') trainer = Engine(_update) evaluator = Engine(_inference) profiler = BasicTimeProfiler() profiler.attach(trainer) logdir = args.logdir save_ = (not args.devrun) and (not args.nosave) # initialize trainer state trainer.state.device = device trainer.state.hparams = args trainer.state.save = save_ trainer.state.logdir = logdir trainer.state.df = defaultdict(dict) trainer.state.metrics = dict() trainer.state.val_metrics = dict() trainer.state.best_metrics = defaultdict(list) trainer.state.gradnorm = defaultdict(dict) # initialize evaluator state evaluator.logger = setup_logger('evaluator') evaluator.state.device = device evaluator.state.df = defaultdict(dict) evaluator.state.metrics = dict() pbar = ProgressBar(persist=True) ebar = ProgressBar(persist=False) pbar.attach(trainer, ['loss']) ebar.attach(evaluator, ['loss']) pbar.attach(trainer,['acc']) ebar.attach(evaluator,['acc']) # model summary if args.model_summary: trainer.add_event_handler( Events.STARTED, print_model_summary, model ) # terminate on nan trainer.add_event_handler( Events.ITERATION_COMPLETED, TerminateOnNan(lambda x: x['loss']) ) # metrics trainer.add_event_handler( Events.ITERATION_COMPLETED, _metrics ) evaluator.add_event_handler( Events.ITERATION_COMPLETED, _metrics ) trainer.add_event_handler( Events.EPOCH_COMPLETED, _metrics_mean ) evaluator.add_event_handler( Events.COMPLETED, _metrics_mean ) trainer.add_event_handler( #Events.STARTED | Events.EPOCH_COMPLETED, Events.EPOCH_COMPLETED, _evaluate, evaluator, loader ) # logging trainer.add_event_handler( Events.EPOCH_COMPLETED, _log_metrics ) # early stopping if args.early_stopping > 0: es_p = args.early_stopping es_s = lambda engine: -engine.state.metrics['loss'] evaluator.add_event_handler( Events.COMPLETED, EarlyStopping(patience=es_p, score_function=es_s, trainer=trainer) ) # lr schedulers if args.epoch_length is None: el = len(loader['train']) else: el = args.epoch_length if args.lr_scheduler is not None: lr_sched = create_lr_scheduler(opt, args, num_steps=el) if args.lr_scheduler != 'plateau': def _sched_fun(engine): lr_sched.step() else: def _sched_fun(engine): e = engine.state.epoch v = engine.state.val_metrics[e]['nmse'] lr_sched.step(v) if args.lr_scheduler == 'linearcycle': trainer.add_event_handler(Events.ITERATION_STARTED, lr_sched) else: trainer.add_event_handler(Events.EPOCH_COMPLETED, _sched_fun) # FIXME: warmup is modifying opt base_lr -> must create last if args.lr_warmup > 0: wsched = create_lr_scheduler(opt, args, 'warmup', num_steps=el) wsts = wsched.total_steps trainer.add_event_handler( Events.ITERATION_COMPLETED(event_filter=lambda _, i: i <= wsts), lambda _: wsched.step() ) # saving if save_: to_save = { 'model': model, 'optimizer': opt, 'trainer': trainer, 'evaluator': evaluator } trainer.add_event_handler( Events.EPOCH_COMPLETED, Checkpoint(to_save, DiskSaver(logdir), n_saved=3) ) # handler = Checkpoint( # {'model': model}, # DiskSaver(logdir), # n_saved = 3, # filename_prefix = 'best', # score_function = lambda engine: -engine.state.metrics['nmae'], # score_name = 'val_nmae', # ) # evaluator.add_event_handler( # Events.COMPLETED, # handler # ) # handler = Checkpoint( # {'model': model}, # DiskSaver(logdir), # n_saved = 3, # filename_prefix = 'best', # score_function = lambda engine: -engine.state.metrics['nmse'], # score_name = 'val_nmse', # ) # evaluator.add_event_handler( # Events.COMPLETED, # handler # ) # handler = Checkpoint( # {'model': model}, # DiskSaver(logdir), # n_saved = 3, # filename_prefix = 'best', # score_function = lambda engine: engine.state.metrics['R2'], # score_name = 'val_R2', # ) # evaluator.add_event_handler( # Events.COMPLETED, # handler # ) trainer.add_event_handler( Events.EPOCH_COMPLETED, _save_metrics ) # timer trainer.add_event_handler( Events.COMPLETED | Events.TERMINATE, lambda _: profiler.write_results(logdir + '/time.csv') ) return trainer