def train_prepare(args): args.train_name = f'{args.name}_r{args.subset_ratio:.2f}t{args.train_ratio:.2f}b{args.block_size}ch{args.in_channels}m{args.message}l{args.layers}' args.run_label = f'{args.train_name}_{time.strftime("%m%d-%H%M%S")}' args.runs_folder = './runs' args.tb_logger_folder = './logger' args.this_run_folder = os.path.join(args.runs_folder, args.run_label) args.image_dir = os.path.join(args.this_run_folder, 'images') args.tb_logger_dir = os.path.join(args.tb_logger_folder, args.run_label) args.model_path = os.path.join(args.this_run_folder, 'trained-model.pth') utils.ensure_dir(args.runs_folder) utils.ensure_dir(args.tb_logger_folder) utils.ensure_dir(args.this_run_folder) utils.ensure_dir(args.image_dir) utils.ensure_dir(args.tb_logger_dir) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join(args.this_run_folder, f'{args.train_name}.log')), logging.StreamHandler(sys.stdout) ]) logging.info( f'Tensorboard is enabled. Creating logger at {args.tb_logger_dir}') args.tb_logger = TensorBoardLogger(args.tb_logger_dir)
train_x = train_x.astype("float32") test_x = test_x.astype("float32") train_mask = train_mask.astype("float32") train_mask_y = train_mask_y.astype("float32") train_cat_mask = train_cat_mask.astype("float32") train_x = train_x / 255.0 train_mask = train_mask / 255.0 test_x = test_x / 255.0 train_cat_mask = train_cat_mask / 255.0 # logs and callback logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) tensorboard_logger = TensorBoardLogger(logdir + '/train_loss', test_x, test_y) callbacks = [tensorboard_callback, tensorboard_logger] model = RCNNModel(num_category=3) losses = { "objectness": loss.dice_loss, "bboxes": loss.masked_mae_loss, "category": loss.category_mask_loss } all_train_mask = np.concatenate([train_mask_y, train_mask], axis=-1) targets = { "objectness": train_mask, "bboxes": all_train_mask, "category": train_cat_mask
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser( description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') # Anno dir new_run_parser.add_argument( '--anno-dir', '-a', type=str, help= 'The directory where the annotations are stored. Specify only if you have annotations in a different folder.' ) new_run_parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) new_run_parser.add_argument('--message', '-m', default=256, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) # parser.add_argument('--tensorboard', dest='tensorboard', action='store_true', # help='If specified, use adds a Tensorboard log. On by default') new_run_parser.add_argument('--tensorboard', action='store_true', help='Use to switch on Tensorboard logging.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) new_run_parser.set_defaults(tensorboard=False) new_run_parser.set_defaults(enable_fp16=False) new_run_parser.add_argument('--vocab-path', '-v', type=str, default='./data/vocab.pkl', help='load the vocab') continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument( '--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument( '--data-dir', '-d', required=False, type=str, help= 'The directory where the data is stored. Specify a value only if you want to override the previous value.' ) # Anno dir continue_parser.add_argument( '--anno-dir', '-a', required=False, type=str, help= 'The directory where the annotations are stored. Specify a value only if you want to override the previous value.' ) continue_parser.add_argument( '--epochs', '-e', required=False, type=int, help= 'Number of epochs to run the simulation. Specify a value only if you want to override the previous value.' ) args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) if args.command == 'continue': this_run_folder = args.folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 if args.data_dir is not None: train_options.train_folder = os.path.join(args.data_dir, 'train') train_options.validation_folder = os.path.join( args.data_dir, 'val') if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print( f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.' ) exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), ann_train=os.path.join(args.data_dir, 'ann_train.json'), ann_val=os.path.join(args.data_dir, 'ann_val.json'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch, experiment_name=args.name) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3, vocab_size=len(vocab), enable_fp16=args.enable_fp16) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join( this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) if (args.command == 'new' and args.tensorboard) or \ (args.command == 'continue' and os.path.isdir(os.path.join(this_run_folder, 'tb-logs'))): logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info( f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, this_run_folder, tb_logger, vocab)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') parser.add_argument('--size', '-s', default=128, type=int) parser.add_argument('--data-dir', '-d', required=True, type=str) parser.add_argument('--runs-folder', '-sf', default=os.path.join('.', 'runs'), type=str) parser.add_argument('--message', '-m', default=30, type=int) parser.add_argument('--epochs', '-e', default=400, type=int) parser.add_argument('--batch-size', '-b', required=True, type=int) parser.add_argument('--continue-from-folder', '-c', default='', type=str) parser.add_argument('--tensorboard', dest='tensorboard', action='store_true') parser.add_argument('--no-tensorboard', dest='tensorboard', action='store_false') parser.set_defaults(tensorboard=True) args = parser.parse_args() checkpoint = None if args.continue_from_folder != '': this_run_folder = args.continue_from_folder train_options, hidden_config, noise_config = utils.load_options( this_run_folder) checkpoint = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] else: start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch) # noise_config = [ # { # 'type': 'resize', # 'resize_ratio': 0.4 # }] noise_config = [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3) this_run_folder = utils.create_folder_for_run(train_options) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) noiser = Noiser(noise_config, device) if args.tensorboard: print('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None model = Hidden(hidden_config, device, noiser, tb_logger) if args.continue_from_folder != '': # if we are continuing, we have to load the model params assert checkpoint is not None utils.model_from_checkpoint(model, checkpoint) print('HiDDeN model: {}\n'.format(model.to_stirng())) print('Model Configuration:\n') pprint.pprint(vars(hidden_config)) print('\nNoise configuration:\n') pprint.pprint(str(noise_config)) print('\nTraining train_options:\n') pprint.pprint(vars(train_options)) print() train(model, device, hidden_config, train_options, this_run_folder, tb_logger)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser( description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') new_run_parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument('--adv_loss', default=0, required=False, type=float, help='Coefficient of the adversarial loss.') new_run_parser.add_argument('--residual', default=0, required=False, type=int, help='If to use residual or not.') new_run_parser.add_argument('--video_dataset', default=0, required=False, type=int, help='If to use video dataset or not.') new_run_parser.add_argument( '--save-dir', '-sd', default='runs', required=True, type=str, help='The save directory where the result is stored.') new_run_parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) new_run_parser.add_argument('--message', '-m', default=30, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) # parser.add_argument('--tensorboard', dest='tensorboard', action='store_true', # help='If specified, use adds a Tensorboard log. On by default') new_run_parser.add_argument('--tensorboard', action='store_true', help='Use to switch on Tensorboard logging.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) new_run_parser.add_argument('--hostname', default=socket.gethostname(), help='the host name of the running server') new_run_parser.add_argument( '--cover-dependent', default=1, required=False, type=int, help='If to use cover dependent architecture or not.') new_run_parser.add_argument('--jpeg_type', '-j', required=False, type=str, default='jpeg', help='Jpeg type used in the combined2 noise.') new_run_parser.set_defaults(tensorboard=False) new_run_parser.set_defaults(enable_fp16=False) continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument( '--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument( '--data-dir', '-d', required=False, type=str, help= 'The directory where the data is stored. Specify a value only if you want to override the previous value.' ) continue_parser.add_argument( '--epochs', '-e', required=False, type=int, help= 'Number of epochs to run the simulation. Specify a value only if you want to override the previous value.' ) # continue_parser.add_argument('--tensorboard', action='store_true', # help='Override the previous setting regarding tensorboard logging.') # Setting up a seed for debug seed = 123 torch.manual_seed(seed) np.random.seed(seed) args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None print(args.cover_dependent) if not args.video_dataset: if args.hostname == 'ee898-System-Product-Name': args.data_dir = '/home/ee898/Desktop/chaoning/ImageNet' args.hostname = 'ee898' elif args.hostname == 'DL178': args.data_dir = '/media/user/SSD1TB-2/ImageNet' else: args.data_dir = '/workspace/data_local/imagenet_pytorch' else: if args.hostname == 'ee898-System-Product-Name': args.data_dir = '/home/ee898/Desktop/chaoning/ImageNet' args.hostname = 'ee898' elif args.hostname == 'DL178': args.data_dir = '/media/user/SSD1TB-2/ImageNet' else: args.data_dir = './oops_dataset/oops_video' assert args.data_dir if args.command == 'continue': this_run_folder = args.folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 if args.data_dir is not None: train_options.train_folder = os.path.join(args.data_dir, 'train') train_options.validation_folder = os.path.join( args.data_dir, 'val') if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print( f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.' ) exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', args.save_dir), start_epoch=start_epoch, experiment_name=args.name, video_dataset=args.video_dataset) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration( H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=args.adv_loss, cover_dependent=args.cover_dependent, residual=args.residual, enable_fp16=args.enable_fp16) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join( this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) if (args.command == 'new' and args.tensorboard) or \ (args.command == 'continue' and os.path.isdir(os.path.join(this_run_folder, 'tb-logs'))): logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None noiser = Noiser(noise_config, device, args.jpeg_type) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info( f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) # train(model, device, hidden_config, train_options, this_run_folder, tb_logger) # train_other_noises(model, device, hidden_config, train_options, this_run_folder, tb_logger) if str(args.noise[0]) == "WebP()": noise = 'webp' elif str(args.noise[0]) == "JpegCompression2000()": noise = 'jpeg2000' elif str(args.noise[0]) == "MPEG4()": noise = 'mpeg4' elif str(args.noise[0]) == "H264()": noise = 'h264' elif str(args.noise[0]) == "XVID()": noise = 'xvid' elif str(args.noise[0]) == "DiffQFJpegCompression2()": noise = 'diff_qf_jpeg2' elif str(args.noise[0]) == "DiffCorruptions()": noise = 'diff_corruptions' else: noise = 'jpeg' train_own_noise(model, device, hidden_config, train_options, this_run_folder, tb_logger, noise)
test_x = test_x.astype("float32") train_mask = train_mask.astype("float32") train_mask_y = train_mask_y.astype("float32") train_x = train_x / 255.0 train_mask = train_mask / 255.0 test_x = test_x / 255.0 all_train_mask = np.concatenate([train_mask_y, train_mask], axis=-1) model = RCNNModel() # logs and callback logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) tensorboard_logger = TensorBoardLogger(model, logdir + '/train_loss', test_x, test_y) callbacks = [tensorboard_callback, tensorboard_logger] def remove_none_grad(grads, var_list): return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)] def grad(model, inputs, targets): with tf.GradientTape() as t: model_output = model(inputs) current_dice_loss = loss_lib.dice_loss(targets[0], model_output[0]) current_bboxes_loss = loss_lib.masked_mae_loss(targets[1], model_output[1]) total_loss = current_dice_loss + current_bboxes_loss total_grad = t.gradient(total_loss, model.trainable_variables)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') parser.add_argument('--epochs', '-e', default=400, type=int, help='Number of epochs to run the simulation.') parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') parser.add_argument( '--runs-folder', '-sf', default=os.path.join('.', 'runs'), type=str, help='The root folder where data about experiments are stored.') parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) parser.add_argument('--message', '-m', default=30, type=int, help='The length in bits of the watermark.') parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) parser.add_argument( '--tensorboard', dest='tensorboard', action='store_true', help='If specified, use adds a Tensorboard log. On by default') parser.add_argument('--no-tensorboard', dest='tensorboard', action='store_false', help='Use to switch off Tensorboard logging.') parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) parser.set_defaults(tensorboard=True) args = parser.parse_args() checkpoint = None if args.continue_from_folder != '': this_run_folder = args.continue_from_folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 else: start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch, experiment_name=args.name) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join(this_run_folder, f'{args.name}.log')), logging.StreamHandler(sys.stdout) ]) noiser = Noiser(noise_config, device) if args.tensorboard: logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None model = Hidden(hidden_config, device, noiser, tb_logger) if args.continue_from_folder != '': # if we are continuing, we have to load the model params assert checkpoint is not None utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, this_run_folder, tb_logger)
def main(): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser(description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') new_run_parser.add_argument('--batch-size', '-b', default=30, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument('--size', '-s', default=128, type=int, help='The size of the images (images are square so this is height and width).') new_run_parser.add_argument('--in_channels', default=3, type=int, help='input channel size') new_run_parser.add_argument('--message', '-m', default=32, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument('--ratio', default=0.2, type=float, help='ratio of dataset.') new_run_parser.add_argument('--continue-from-folder', '-c', default='', type=str, help='The folder from where to continue a previous run. Leave blank if you are starting a new experiment.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument('--noise', nargs='*', action=NoiseArgParser, help="Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'") new_run_parser.set_defaults(enable_fp16=False) continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument('--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument('--data-dir', '-d', required=False, type=str, help='The directory where the data is stored. Specify a value only if you want to override the previous value.') continue_parser.add_argument('--epochs', '-e', required=False, type=int, help='Number of epochs to run the simulation. Specify a value only if you want to override the previous value.') args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None if args.command == 'continue': options_file = os.path.join(args.folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options(options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint(os.path.join(args.folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 train_options.best_epoch = checkpoint['best_epoch'] train_options.best_cond = checkpoint['best_cond'] if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print(f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.') exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, data_ratio=args.ratio, data_dir=args.data_dir, runs_folder='./runs', tb_logger_folder='./logger', start_epoch=start_epoch, experiment_name=f'{args.name}_r{int(100*args.ratio):03d}b{args.size}ch{args.in_channels}m{args.message}') noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size,input_channels=args.in_channels, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3, enable_fp16=args.enable_fp16 ) utils.create_folder_for_run(train_options) with open(train_options.options_file, 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler(os.path.join(train_options.this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) logging.info(f'Tensorboard is enabled. Creating logger at {train_options.tb_logger_dir}') tb_logger = TensorBoardLogger(train_options.tb_logger_dir) noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info(f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, train_options.this_run_folder, tb_logger)