optimizer = torch.optim.SGD(params_list, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) # config lr policy total_iteration = config.nepochs * config.niters_per_epoch lr_policy = PolyLR(base_lr, config.lr_power, total_iteration) if engine.distributed: if torch.cuda.is_available(): model.cuda() model = DistributedDataParallel(model) else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = DataParallelModel(model, device_ids=engine.devices) model.to(device) engine.register_state(dataloader=train_loader, model=model, optimizer=optimizer) if engine.continue_state_object: engine.restore_checkpoint() model.train() for epoch in range(engine.state.epoch, config.nepochs): if engine.distributed: train_sampler.set_epoch(epoch) bar_format = '{desc}[{elapsed}<{remaining},{rate_fmt}]' pbar = tqdm(range(config.niters_per_epoch),
# config lr policy total_iteration = config.nepochs * config.niters_per_epoch lr_policy = PolyLR(base_lr, config.lr_power, total_iteration) optimizer = torch.optim.SGD(params_list, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) if engine.distributed: if torch.cuda.is_available(): model.cuda() model = DistributedDataParallel(model) else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = DataParallelModel(model, engine.devices) model.to(device) engine.register_state(dataloader=train_loader, model=model, optimizer=optimizer) if engine.continue_state_object: engine.restore_checkpoint() optimizer.zero_grad() model.train() for epoch in range(engine.state.epoch, config.nepochs): if engine.distributed: train_sampler.set_epoch(epoch) bar_format = '{desc}[{elapsed}<{remaining},{rate_fmt}]'