def main(): args = get_arguments() utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets( args, path='/home/mulns/My_project/VV/MedicalZooPytorch/datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=args.classes) if args.cuda: # model=torch.nn.DataParallel(model) model = model.cuda() print("Model transferred in GPU.....") trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None) print("START TRAINING...") trainer.training()
def main(): args = get_arguments() utils.reproducibility(args, seed) # utils.make_dirs(args.save) if not os.path.exists(args.save): os.makedirs(args.save) # training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets(args, training_generator, val_generator, full_volume, affine, dataset = medical_loaders.generate_datasets(args, path='/data/hejy/MedicalZooPytorch_2cls/datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=2, skip_index_after=args.classes, weight = torch.tensor([1, 1]).cuda(), sigmoid_normalization=True) # criterion = WeightedCrossEntropyLoss() if args.cuda: model = model.cuda() # model.restore_checkpoint(args.pretrained) dataloader_train = MICCAI2020_RIBFRAC_DataLoader3D(dataset, args.batchSz, args.dim, num_threads_in_multithreaded=2) tr_transforms = get_train_transform(args.dim) training_generator_aug = SingleThreadedAugmenter(dataloader_train, tr_transforms,) trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None, dataset = dataset, train_data_loader_aug=training_generator_aug) trainer.training()
def main(): args = get_arguments() utils.reproducibility(args, seed) # utils.make_dirs(args.save) if not os.path.exists(args.save): os.makedirs(args.save) training_generator, val_generator, full_volume, affine, dataset = medical_loaders.generate_datasets( args, path='/data/hejy/MedicalZooPytorch_2cls/datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=2, skip_index_after=args.classes, weight=torch.tensor([1, 1]).cuda(), sigmoid_normalization=True) # criterion = WeightedCrossEntropyLoss() if args.cuda: model = model.cuda() # model.restore_checkpoint(args.pretrained) trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None) trainer.training()
def main(args: argparse.Namespace): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") network = model.utils.get_model(args).to(device) criterion = model.utils.get_criterion(args) settings = TrainSettingsDecoder().decode(args, network) mean, std = dataset.get_stats() spatial_transforms = dataset.SpatialTransformRepository( mean, std).get_transform_obj(args.transforms_json) temporal_transforms = dataset.TemporalTransformRepository( ).get_transform_obj(args.transforms_json) with open(args.config_json) as f: config: Mapping[str, Any] = json.load(f) videodata_repository = dataset.get_dataset( args, spatial_transforms, temporal_transforms, **config, ) train_loader = DataLoader( videodata_repository, batch_size=config["batch_size"], shuffle=True, num_workers=args.num_workers, collate_fn=dataset.collate_data, drop_last=False, ) trainer = train.Trainer(settings, network, criterion, device) trainer.train(train_loader)
def main(): args = get_arguments() utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets( args, path='.././datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=args.classes) if args.cuda: model = model.cuda() print("Model transferred in GPU.....") trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None) print("START TRAINING...") trainer.training() visualize_3D_no_overlap_new(args, full_volume, affine, model, 10, args.dim)
def main(): args = get_arguments() utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets(args, path='.././datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=args.classes) if args.cuda: model = model.cuda() trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator) trainer.training()
def main(**kwargs): """ Main function for training ESRGAN model and exporting it as a SavedModel2.0 Args: config: path to config yaml file. log_dir: directory to store summary for tensorboard. data_dir: directory to store / access the dataset. manual: boolean to denote if data_dir is a manual directory. model_dir: directory to store the model into. """ for physical_device in tf.config.experimental.list_physical_devices("GPU"): tf.config.experimental.set_memory_growth(physical_device, True) sett = settings.Settings(kwargs["config"]) Stats = settings.Stats(os.path.join(sett.path, "stats.yaml")) summary_writer = tf.summary.create_file_writer(kwargs["log_dir"]) profiler.start_profiler_server(6009) generator = model.RRDBNet(out_channel=3) discriminator = model.VGGArch() training = train.Trainer( summary_writer=summary_writer, settings=sett, data_dir=kwargs["data_dir"], manual=kwargs["manual"]) phases = list(map(lambda x: x.strip(), kwargs["phases"].lower().split("_"))) if not Stats["train_step_1"] and "phase1" in phases: logging.info("starting phase 1") training.warmup_generator(generator) Stats["train_step_1"] = True if not Stats["train_step_2"] and "phase2" in phases: logging.info("starting phase 2") training.train_gan(generator, discriminator) Stats["train_step_2"] = True if Stats["train_step_1"] and Stats["train_step_2"]: # Attempting to save "Interpolated" Model as SavedModel2.0 interpolated_generator = utils.interpolate_generator( partial(model.RRDBNet, out_channel=3), discriminator, sett["interpolation_parameter"], sett["dataset"]["hr_dimension"]) tf.saved_model.save(interpolated_generator, kwargs["model_dir"])
def main(): args = get_arguments() utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets(args, path='.././datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=args.classes) # ,skip_index_after=2,weight=torch.tensor([0.00001,1,1,1]).cuda()) if args.cuda: model = model.cuda() print("Model transferred in GPU.....") trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator) print("START TRAINING...") trainer.training()
def main(): args = get_arguments() if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." torch.backends.cudnn.benchmark = True utils.reproducibility(args, seed) # utils.make_dirs(args.save) if not os.path.exists(args.save): os.makedirs(args.save) training_generator, val_generator, full_volume, affine, dataset = medical_loaders.generate_datasets( args, path='/data/hejy/MedicalZooPytorch_2cls/datasets') model, optimizer = medzoo.create_model(args) if args.sync_bn: model = apex.parallel.convert_syncbn_model(model) criterion = DiceLoss(classes=2, skip_index_after=args.classes, weight=torch.tensor([1, 1]).cuda(), sigmoid_normalization=True) # criterion = WeightedCrossEntropyLoss() if args.cuda: model = model.cuda() if args.distributed: model = DDP(model, delay_allreduce=True) # model.restore_checkpoint(args.pretrained) trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None) trainer.training()
def main(): args = get_arguments() utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets(args, path='./datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=args.classes) # print("training_generator shape:", training_generator.dim()) # print("val_generator shape:", val_generator.dim()) if args.cuda: model = model.cuda() print("start training...") # torch.save(training_generator, "training_generator.tch") # torch.save(val_generator, "val_generator.tch") trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator) trainer.training()
def main(): args = get_arguments() if args.distributed: torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." #1 torch.backends.cudnn.benchmark = True utils.reproducibility(args, seed) utils.make_dirs(args.save) training_generator, val_generator, full_volume, affine = medical_loaders.generate_datasets( args, path='.././datasets') model, optimizer = medzoo.create_model(args) criterion = DiceLoss(classes=11, skip_index_after=args.classes) if args.sync_bn: model = apex.parallel.convert_syncbn_model(model) if args.cuda: model = model.cuda() print("Model transferred in GPU.....") if args.distributed: model = DDP(model, delay_allreduce=True) trainer = train.Trainer(args, model, criterion, optimizer, train_data_loader=training_generator, valid_data_loader=val_generator, lr_scheduler=None) print("START TRAINING...") trainer.training()
def main(**kwargs): """ Main function for training ESRGAN model and exporting it as a SavedModel2.0 Args: config: path to config yaml file. log_dir: directory to store summary for tensorboard. data_dir: directory to store / access the dataset. manual: boolean to denote if data_dir is a manual directory. model_dir: directory to store the model into. """ for physical_device in tf.config.experimental.list_physical_devices("GPU"): tf.config.experimental.set_memory_growth(physical_device, True) strategy = utils.SingleDeviceStrategy() scope = utils.assign_to_worker(kwargs["tpu"]) sett = settings.Settings(kwargs["config"]) Stats = settings.Stats(os.path.join(sett.path, "stats.yaml")) tf.random.set_seed(10) if kwargs["tpu"]: cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( kwargs["tpu"]) tf.config.experimental_connect_to_host(cluster_resolver.get_master()) tf.tpu.experimental.initialize_tpu_system(cluster_resolver) strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) with tf.device(scope), strategy.scope(): summary_writer_1 = tf.summary.create_file_writer( os.path.join(kwargs["log_dir"], "phase1")) summary_writer_2 = tf.summary.create_file_writer( os.path.join(kwargs["log_dir"], "phase2")) # profiler.start_profiler_server(6009) discriminator = model.VGGArch(batch_size=sett["batch_size"], num_features=64) if not kwargs["export_only"]: generator = model.RRDBNet(out_channel=3) logging.debug("Initiating Convolutions") generator.unsigned_call(tf.random.normal([1, 128, 128, 3])) training = train.Trainer(summary_writer=summary_writer_1, summary_writer_2=summary_writer_2, settings=sett, model_dir=kwargs["model_dir"], data_dir=kwargs["data_dir"], manual=kwargs["manual"], strategy=strategy) phases = list( map(lambda x: x.strip(), kwargs["phases"].lower().split("_"))) if not Stats["train_step_1"] and "phase1" in phases: logging.info("starting phase 1") training.warmup_generator(generator) Stats["train_step_1"] = True if not Stats["train_step_2"] and "phase2" in phases: logging.info("starting phase 2") training.train_gan(generator, discriminator) Stats["train_step_2"] = True if Stats["train_step_1"] and Stats["train_step_2"]: # Attempting to save "Interpolated" Model as SavedModel2.0 interpolated_generator = utils.interpolate_generator( partial(model.RRDBNet, out_channel=3, first_call=False), discriminator, sett["interpolation_parameter"], [720, 1080], basepath=kwargs["model_dir"]) tf.saved_model.save(interpolated_generator, os.path.join(kwargs["model_dir"], "esrgan"))