def load_model(config_file, device): config_folder = os.path.join('config_folder', config_file) option_file = os.path.join(config_folder, 'options.pickle') checkpoint_file = os.path.join(config_folder, 'checkpoint.pyt') checkpoint = torch.load(checkpoint_file, map_location='cpu') _, hidden_config, noise_config = utils.load_options(option_file) noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger=None) utils.model_from_checkpoint(model, checkpoint) return model
def init_prepare(args): args.device = torch.device("cuda" if not args.disable_gpu and torch.cuda.is_available() else "cpu") args.output = f'{args.run_folder}/output' utils.ensure_dir(args.output) if args.arch == 'hidden': args.options_file = f'{args.run_folder}/options-and-config.pickle' args.checkpoint_file = f'{args.run_folder}/checkpoints/hidden-best-model.pyt' train_options, hidden_config, noise_config = utils.load_options( args.options_file) noiser = Noiser(noise_config, device=args.device) checkpoint = torch.load(args.checkpoint_file, map_location=args.device) hidden_net = Hidden(hidden_config, args.device, noiser, None) utils.model_from_checkpoint(hidden_net, checkpoint) args.model = hidden_net args.block_size = hidden_config.H args.message_length = hidden_config.message_length args.hidden_config = hidden_config args.in_channels = hidden_config.input_channels elif args.arch == 'ms-hidden': checkpoint = torch.load(f'{args.run_folder}/trained-model.pth') options = argparse.Namespace(**checkpoint['option']) options.device = args.device noiser = Noiser(options.noise, device=args.device) model = nets.MS_Hidden(options, noiser).to(args.device) model.load_state_dict(checkpoint['model']) args.model = model args.block_size = options.block_size args.message_length = options.message args.in_channels = options.in_channels args.layers = options.layers
def train(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options) images_to_save = 8 saved_images_size = (512, 512) best_epoch = train_options.best_epoch best_cond = train_options.best_cond for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): logging.info( f'\nStarting epoch {epoch}/{train_options.number_of_epochs} [{best_epoch}]' ) training_losses = defaultdict(functions.AverageMeter) epoch_start = time.time() for image, _ in tqdm(train_data, ncols=80): image = image.to(device) #.squeeze(0) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, _ = model.train_on_batch([image, message]) for name, loss in losses.items(): training_losses[name].update(loss) train_duration = time.time() - epoch_start logging.info('Epoch {} training duration {:.2f} sec'.format( epoch, train_duration)) logging.info('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), training_losses, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses('train_loss', training_losses, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) tb_logger.writer.flush() validation_losses = defaultdict(functions.AverageMeter) logging.info('Running validation for epoch {}/{}'.format( epoch, train_options.number_of_epochs)) val_image_patches = () val_encoded_patches = () val_noised_patches = () for image, _ in tqdm(val_data, ncols=80): image = image.to(device) #.squeeze(0) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = model.validate_on_batch( [image, message]) for name, loss in losses.items(): validation_losses[name].update(loss) if hidden_config.enable_fp16: image = image.float() encoded_images = encoded_images.float() pick = np.random.randint(0, image.shape[0]) val_image_patches += (F.interpolate( image[pick:pick + 1, :, :, :].cpu(), size=(hidden_config.W, hidden_config.H)), ) val_encoded_patches += (F.interpolate( encoded_images[pick:pick + 1, :, :, :].cpu(), size=(hidden_config.W, hidden_config.H)), ) val_noised_patches += (F.interpolate( noised_images[pick:pick + 1, :, :, :].cpu(), size=(hidden_config.W, hidden_config.H)), ) if tb_logger is not None: tb_logger.save_losses('val_loss', validation_losses, epoch) tb_logger.writer.flush() val_image_patches = torch.stack(val_image_patches).squeeze(1) val_encoded_patches = torch.stack(val_encoded_patches).squeeze(1) val_noised_patches = torch.stack(val_noised_patches).squeeze(1) utils.save_images(val_image_patches[:images_to_save, :, :, :], val_encoded_patches[:images_to_save, :, :, :], val_noised_patches[:images_to_save, :, :, :], epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) curr_cond = validation_losses['encoder_mse'].avg + validation_losses[ 'bitwise-error'].avg if best_cond is None or curr_cond < best_cond: best_cond = curr_cond best_epoch = epoch utils.log_progress(validation_losses) logging.info('-' * 40) utils.save_checkpoint(model, train_options.experiment_name, epoch, best_epoch, best_cond, os.path.join(this_run_folder, 'checkpoints')) logging.info( f'Current best epoch = {best_epoch}, loss = {best_cond:.6f}') utils.write_losses(os.path.join(this_run_folder, 'validation.csv'), validation_losses, epoch, time.time() - epoch_start)
def main(): # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') parser.add_argument('--hostname', default=socket.gethostname(), help='the host name of the running server') # parser.add_argument('--size', '-s', default=128, type=int, help='The size of the images (images are square so this is height and width).') parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') parser.add_argument( '--runs_root', '-r', default=os.path.join('.', 'experiments'), type=str, help='The root folder where data about experiments are stored.') parser.add_argument('--batch-size', '-b', default=1, type=int, help='Validation batch size.') args = parser.parse_args() if args.hostname == 'ee898-System-Product-Name': args.data_dir = '/home/ee898/Desktop/chaoning/ImageNet' args.hostname = 'ee898' elif args.hostname == 'DL178': args.data_dir = '/media/user/SSD1TB-2/ImageNet' else: args.data_dir = '/workspace/data_local/imagenet_pytorch' assert args.data_dir print_each = 25 completed_runs = [ o for o in os.listdir(args.runs_root) if os.path.isdir(os.path.join(args.runs_root, o)) and o != 'no-noise-defaults' ] print(completed_runs) write_csv_header = True current_run = args.runs_root print(f'Run folder: {current_run}') options_file = os.path.join(current_run, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) train_options.train_folder = os.path.join(args.data_dir, 'val') train_options.validation_folder = os.path.join(args.data_dir, 'val') train_options.batch_size = args.batch_size checkpoint, chpt_file_name = utils.load_last_checkpoint( os.path.join(current_run, 'checkpoints')) print(f'Loaded checkpoint from file {chpt_file_name}') noiser = Noiser(noise_config, device, 'jpeg') model = Hidden(hidden_config, device, noiser, tb_logger=None) utils.model_from_checkpoint(model, checkpoint) print('Model loaded successfully. Starting validation run...') _, val_data = utils.get_data_loaders(hidden_config, train_options) file_count = len(val_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 with torch.no_grad(): noises = ['webp_10', 'webp_25', 'webp_50', 'webp_75', 'webp_90'] for noise in noises: losses_accu = {} step = 0 for image, _ in val_data: step += 1 image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, ( encoded_images, noised_images, decoded_messages) = model.validate_on_batch_specific_noise( [image, message], noise=noise) if not losses_accu: # dict is empty, initialize for name in losses: losses_accu[name] = AverageMeter() for name, loss in losses.items(): losses_accu[name].update(loss) if step % print_each == 0 or step == steps_in_epoch: print(f'Step {step}/{steps_in_epoch}') utils.print_progress(losses_accu) print('-' * 40) # utils.print_progress(losses_accu) write_validation_loss(os.path.join(args.runs_root, 'validation_run.csv'), losses_accu, noise, checkpoint['epoch'], write_header=write_csv_header) write_csv_header = False
def train(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options) file_count = len(train_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 print_each = 10 images_to_save = 8 saved_images_size = (512, 512) for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): logging.info('\nStarting epoch {}/{}'.format( epoch, train_options.number_of_epochs)) logging.info('Batch size = {}\nSteps in epoch = {}'.format( train_options.batch_size, steps_in_epoch)) training_losses = defaultdict(AverageMeter) epoch_start = time.time() step = 1 #train for image, _ in train_data: image = image.to(device) """ message = torch.Tensor(np.random.choice([0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, _ = model.train_on_batch([image, message]) print(losses) """ #crop imgs imgs = cropImg(32, image) #iterate img bitwise_arr = [] main_losses = None encoded_imgs = [] for img in imgs: img = img.to(device) message = torch.Tensor( np.random.choice( [0, 1], (img.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = model.train_on_batch( [img, message]) encoded_imgs.append( encoded_images[0][0].cpu().detach().numpy()) main_losses = losses for name, loss in losses.items(): if (name == 'bitwise-error '): bitwise_arr.append(loss) Total = 0 Vcount = 0 V_average = 0 H_average = 0 for i in range(0, len(encoded_imgs) - 1): if ((i + 1) % 4 != 0): img = encoded_imgs[i] img_next = encoded_imgs[i + 1] average_img = 0 average_img_next = 0 for j in range(0, 32): for k in range(0, 10): average_img = average_img + img[j][31 - k] average_img_next = average_img_next + img_next[j][k] average_blocking = np.abs(average_img - average_img_next) / 320 V_average = V_average + average_blocking for j in range(0, 32): distinct = np.abs(img[j][31] - img_next[j][0]) Total = Total + 1 if (distinct > 0.5): Vcount = Vcount + 1 V_average = V_average / 12 Hcount = 0 for i in range(0, len(encoded_imgs) - 4): img = encoded_imgs[i] img_next = encoded_imgs[i + 4] average_img = 0 average_img_next = 0 for j in range(0, 32): for k in range(0, 10): average_img = average_img + img[31 - k][j] average_img_next = average_img_next + img_next[k][j] average_blocking = np.abs(average_img - average_img_next) / 320 H_average = H_average + average_blocking for j in range(0, 32): distinct = np.abs(img[31][j] - img_next[0][j]) Total = Total + 1 if (distinct > 0.5): Hcount = Hcount + 1 H_average = H_average / 12 bitwise_arr = np.array(bitwise_arr) bitwise_avg = np.average(bitwise_arr) #blocking_loss = (Vcount+Hcount)/Total blocking_loss = (H_average + V_average) / 2 for name, loss in main_losses.items(): if (name == 'bitwise-error '): training_losses[name].update(bitwise_avg) else: if (name == 'blocking_effect'): training_losses[name].update(blocking_loss) else: training_losses[name].update(loss) if step % print_each == 0 or step == steps_in_epoch: logging.info('Epoch: {}/{} Step: {}/{}'.format( epoch, train_options.number_of_epochs, step, steps_in_epoch)) utils.log_progress(training_losses) logging.info('-' * 40) step += 1 train_duration = time.time() - epoch_start logging.info('Epoch {} training duration {:.2f} sec'.format( epoch, train_duration)) logging.info('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), training_losses, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses(training_losses, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) first_iteration = True validation_losses = defaultdict(AverageMeter) logging.info('Running validation for epoch {}/{}'.format( epoch, train_options.number_of_epochs)) #val for image, _ in val_data: image = image.to(device) #crop imgs imgs = cropImg(32, image) #iterate img bitwise_arr = [] main_losses = None encoded_imgs = [] blocking_imgs = [] for img in imgs: img = img.to(device) message = torch.Tensor( np.random.choice( [0, 1], (img.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = model.validate_on_batch( [img, message]) encoded_imgs.append(encoded_images) blocking_imgs.append( encoded_images[0][0].cpu().detach().numpy()) main_losses = losses for name, loss in losses.items(): if (name == 'bitwise-error '): bitwise_arr.append(loss) Total = 0 Vcount = 0 V_average = 0 H_average = 0 for i in range(0, len(blocking_imgs) - 1): if ((i + 1) % 4 != 0): img = blocking_imgs[i] img_next = blocking_imgs[i + 1] average_img = 0 average_img_next = 0 for j in range(0, 32): for k in range(0, 10): average_img = average_img + img[j][31 - k] average_img_next = average_img_next + img_next[j][k] average_blocking = np.abs(average_img - average_img_next) / 320 V_average = V_average + average_blocking for j in range(0, 32): distinct = np.abs(img[j][31] - img_next[j][0]) Total = Total + 1 if (distinct > 0.5): Vcount = Vcount + 1 V_average = V_average / 12 Hcount = 0 for i in range(0, len(blocking_imgs) - 4): img = blocking_imgs[i] img_next = blocking_imgs[i + 4] for j in range(0, 32): for k in range(0, 10): average_img = average_img + img[31 - k][j] average_img_next = average_img_next + img_next[k][j] average_blocking = np.abs(average_img - average_img_next) / 320 H_average = H_average + average_blocking for j in range(0, 32): distinct = np.abs(img[31][j] - img_next[0][j]) Total = Total + 1 if (distinct > 0.5): Hcount = Hcount + 1 H_average = H_average / 12 bitwise_arr = np.array(bitwise_arr) bitwise_avg = np.average(bitwise_arr) #blocking_loss = (Vcount+Hcount)/Total blocking_loss = (H_average + V_average) / 2 for name, loss in main_losses.items(): if (name == 'bitwise-error '): validation_losses[name].update(bitwise_avg) else: if (name == 'blocking_effect'): validation_losses[name].update(blocking_loss) else: validation_losses[name].update(loss) #concat image encoded_images = concatImgs(encoded_imgs) if first_iteration: if hidden_config.enable_fp16: image = image.float() encoded_images = encoded_images.float() utils.save_images( image.cpu()[:images_to_save, :, :, :], encoded_images[:images_to_save, :, :, :].cpu(), epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) first_iteration = False utils.log_progress(validation_losses) logging.info('-' * 40) utils.save_checkpoint(model, train_options.experiment_name, epoch, os.path.join(this_run_folder, 'checkpoints')) utils.write_losses(os.path.join(this_run_folder, 'validation.csv'), validation_losses, epoch, time.time() - epoch_start)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser( description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') # Anno dir new_run_parser.add_argument( '--anno-dir', '-a', type=str, help= 'The directory where the annotations are stored. Specify only if you have annotations in a different folder.' ) new_run_parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) new_run_parser.add_argument('--message', '-m', default=256, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) # parser.add_argument('--tensorboard', dest='tensorboard', action='store_true', # help='If specified, use adds a Tensorboard log. On by default') new_run_parser.add_argument('--tensorboard', action='store_true', help='Use to switch on Tensorboard logging.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) new_run_parser.set_defaults(tensorboard=False) new_run_parser.set_defaults(enable_fp16=False) new_run_parser.add_argument('--vocab-path', '-v', type=str, default='./data/vocab.pkl', help='load the vocab') continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument( '--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument( '--data-dir', '-d', required=False, type=str, help= 'The directory where the data is stored. Specify a value only if you want to override the previous value.' ) # Anno dir continue_parser.add_argument( '--anno-dir', '-a', required=False, type=str, help= 'The directory where the annotations are stored. Specify a value only if you want to override the previous value.' ) continue_parser.add_argument( '--epochs', '-e', required=False, type=int, help= 'Number of epochs to run the simulation. Specify a value only if you want to override the previous value.' ) args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) if args.command == 'continue': this_run_folder = args.folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 if args.data_dir is not None: train_options.train_folder = os.path.join(args.data_dir, 'train') train_options.validation_folder = os.path.join( args.data_dir, 'val') if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print( f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.' ) exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), ann_train=os.path.join(args.data_dir, 'ann_train.json'), ann_val=os.path.join(args.data_dir, 'ann_val.json'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch, experiment_name=args.name) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3, vocab_size=len(vocab), enable_fp16=args.enable_fp16) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join( this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) if (args.command == 'new' and args.tensorboard) or \ (args.command == 'continue' and os.path.isdir(os.path.join(this_run_folder, 'tb-logs'))): logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info( f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, this_run_folder, tb_logger, vocab)
def train(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options) block_size = hidden_config.block_size block_number = int(hidden_config.H/hidden_config.block_size) val_folder = train_options.validation_folder loss_type = train_options.loss_mode m_length = hidden_config.message_length alpha = train_options.alpha img_names = listdir(val_folder+"/valid_class") img_names.sort() out_folder = train_options.output_folder default = train_options.default beta = train_options.beta crop_width = int(beta*block_size) file_count = len(train_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 print_each = 10 images_to_save = 8 saved_images_size = (512, 512) icount = 0 plot_block = [] for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): logging.info('\nStarting epoch {}/{}'.format(epoch, train_options.number_of_epochs)) logging.info('Batch size = {}\nSteps in epoch = {}'.format(train_options.batch_size, steps_in_epoch)) training_losses = defaultdict(AverageMeter) epoch_start = time.time() step = 1 #train for image, _ in train_data: image = image.to(device) #crop imgs into blocks imgs, modified_imgs, entropies = cropImg(block_size,image,crop_width,alpha) bitwise_arr=[] main_losses = None encoded_imgs = [] batch = 0 for img, modified_img, entropy in zip(imgs,modified_imgs, entropies): img=img.to(device) modified_img = modified_img.to(device) entropy = entropy.to(device) message = torch.Tensor(np.random.choice([0, 1], (img.shape[0], m_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = \ model.train_on_batch([img, message, modified_img, entropy,loss_type]) encoded_imgs.append(encoded_images) batch = encoded_images.shape[0] #get loss in the last block if main_losses is None: main_losses = losses for k in losses: main_losses[k] = losses[k]/len(imgs) else: for k in main_losses: main_losses[k] += losses[k]/len(imgs) #blocking effect loss calculation blocking_loss = blocking_value(encoded_imgs,batch,block_size,block_number) #update bitwise training loss for name, loss in main_losses.items(): if(default == False and name == 'blocking_effect'): training_losses[name].update(blocking_loss) else: training_losses[name].update(loss) #statistic if step % print_each == 0 or step == steps_in_epoch: logging.info( 'Epoch: {}/{} Step: {}/{}'.format(epoch, train_options.number_of_epochs, step, steps_in_epoch)) utils.log_progress(training_losses) logging.info('-' * 40) step += 1 train_duration = time.time() - epoch_start logging.info('Epoch {} training duration {:.2f} sec'.format(epoch, train_duration)) logging.info('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), training_losses, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses(training_losses, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) first_iteration = True validation_losses = defaultdict(AverageMeter) logging.info('Running validation for epoch {}/{}'.format(epoch, train_options.number_of_epochs)) #validation ep_blocking = 0 ep_total = 0 for image, _ in val_data: image = image.to(device) #crop imgs imgs, modified_imgs, entropies = cropImg(block_size,image,crop_width,alpha) bitwise_arr=[] main_losses = None encoded_imgs = [] batch = 0 for img, modified_img, entropy in zip(imgs,modified_imgs, entropies): img=img.to(device) modified_img = modified_img.to(device) entropy = entropy.to(device) message = torch.Tensor(np.random.choice([0, 1], (img.shape[0], m_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = \ model.train_on_batch([img, message, modified_img, entropy,loss_type]) encoded_imgs.append(encoded_images) batch = encoded_images.shape[0] #get loss in the last block if main_losses is None: main_losses = losses for k in losses: main_losses[k] = losses[k]/len(imgs) else: for k in main_losses: main_losses[k] += losses[k]/len(imgs) #blocking value for plotting blocking_loss = blocking_value(encoded_imgs,batch,block_size,block_number) ep_blocking = ep_blocking+ blocking_loss ep_total = ep_total+1 for name, loss in main_losses.items(): if(default == False and name == 'blocking_effect'): validation_losses[name].update(blocking_loss) else: validation_losses[name].update(loss) #concat image encoded_images = concatImgs(encoded_imgs,block_number) #save_image(encoded_images,"enc_img"+str(epoch)+".png") #save_image(image,"original_img"+str(epoch)+".png") if first_iteration: if hidden_config.enable_fp16: image = image.float() encoded_images = encoded_images.float() utils.save_images(image.cpu()[:images_to_save, :, :, :], encoded_images[:images_to_save, :, :, :].cpu(), epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) first_iteration = False #save validation in the last epoch if(epoch == train_options.number_of_epochs): if(train_options.ats): for i in range(0,batch): image = encoded_images[i].cpu() image = (image + 1) / 2 f_dst = out_folder+"/"+img_names[icount] save_image(image,f_dst) icount = icount+1 #append block effect for plotting plot_block.append(ep_blocking/ep_total) utils.log_progress(validation_losses) logging.info('-' * 40) utils.save_checkpoint(model, train_options.experiment_name, epoch, os.path.join(this_run_folder, 'checkpoints')) utils.write_losses(os.path.join(this_run_folder, 'validation.csv'), validation_losses, epoch, time.time() - epoch_start)
def train_own_noise(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger, noise): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options) file_count = len(train_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 steps_in_epoch = 313 print_each = 10 images_to_save = 8 saved_images_size = ( 512, 512) # for qualitative check purpose to use a larger size for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): logging.info('\nStarting epoch {}/{}'.format( epoch, train_options.number_of_epochs)) logging.info('Batch size = {}\nSteps in epoch = {}'.format( train_options.batch_size, steps_in_epoch)) training_losses = defaultdict(AverageMeter) if train_options.video_dataset: random.shuffle(train_data.dataset) epoch_start = time.time() step = 1 for image, _ in train_data: image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, _ = model.train_on_batch([image, message]) for name, loss in losses.items(): training_losses[name].update(loss) if step % print_each == 0 or step == steps_in_epoch: #import pdb; pdb.set_trace() logging.info('Epoch: {}/{} Step: {}/{}'.format( epoch, train_options.number_of_epochs, step, steps_in_epoch)) utils.log_progress(training_losses) logging.info('-' * 40) step += 1 if step == steps_in_epoch: break train_duration = time.time() - epoch_start logging.info('Epoch {} training duration {:.2f} sec'.format( epoch, train_duration)) logging.info('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), training_losses, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses(training_losses, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) first_iteration = True validation_losses = defaultdict(AverageMeter) logging.info('Running validation for epoch {}/{} for noise {}'.format( epoch, train_options.number_of_epochs, noise)) step = 1 for image, _ in val_data: image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, ( encoded_images, noised_images, decoded_messages) = model.validate_on_batch_specific_noise( [image, message], noise=noise) for name, loss in losses.items(): validation_losses[name].update(loss) if first_iteration: if hidden_config.enable_fp16: image = image.float() encoded_images = encoded_images.float() utils.save_images( image.cpu()[:images_to_save, :, :, :], encoded_images[:images_to_save, :, :, :].cpu(), epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) first_iteration = False step += 1 if step == steps_in_epoch // 10: break utils.log_progress(validation_losses) logging.info('-' * 40) utils.save_checkpoint(model, train_options.experiment_name, epoch, os.path.join(this_run_folder, 'checkpoints')) utils.write_losses( os.path.join(this_run_folder, 'validation_' + noise + '.csv'), validation_losses, epoch, time.time() - epoch_start)
def train(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger, vocab): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options, vocab) file_count = len(train_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 print_each = 10 images_to_save = 8 saved_images_size = (512, 512) for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): logging.info('\nStarting epoch {}/{}'.format( epoch, train_options.number_of_epochs)) logging.info('Batch size = {}\nSteps in epoch = {}'.format( train_options.batch_size, steps_in_epoch)) training_losses = defaultdict(AverageMeter) epoch_start = time.time() step = 1 for image, ekeys, dkeys, caption, length in train_data: image, caption, ekeys, dkeys = image.to(device), caption.to( device), ekeys.to(device), dkeys.to(device) losses, _ = model.train_on_batch( [image, ekeys, dkeys, caption, length]) for name, loss in losses.items(): training_losses[name].update(loss) if step % print_each == 0 or step == steps_in_epoch: logging.info('Epoch: {}/{} Step: {}/{}'.format( epoch, train_options.number_of_epochs, step, steps_in_epoch)) utils.log_progress(training_losses) logging.info('-' * 40) step += 1 train_duration = time.time() - epoch_start logging.info('Epoch {} training duration {:.2f} sec'.format( epoch, train_duration)) logging.info('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), training_losses, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses(training_losses, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) first_iteration = True validation_losses = defaultdict(AverageMeter) logging.info('Running validation for epoch {}/{}'.format( epoch, train_options.number_of_epochs)) for image, ekeys, dkeys, caption, length in val_data: image, caption, ekeys, dkeys = image.to(device), caption.to( device), ekeys.to(device), dkeys.to(device) losses, (encoded_images, noised_images, decoded_messages, predicted_sents) = \ model.validate_on_batch([image, ekeys, dkeys, caption, length]) #print(predicted) #exit() predicted_sents = predicted_sents.cpu().numpy() for i in range(train_options.batch_size): try: #print(''.join([vocab.idx2word[int(w)] + ' ' for w in predicted.cpu().numpy()[i::train_options.batch_size]][1:length[i]-1])) print("".join([ vocab.idx2word[int(idx)] + ' ' for idx in predicted_sents[i] ])) break except IndexError: print(f'{i}th batch does not have enough length.') for name, loss in losses.items(): validation_losses[name].update(loss) if first_iteration: if hidden_config.enable_fp16: image = image.float() encoded_images = encoded_images.float() utils.save_images( image.cpu()[:images_to_save, :, :, :], encoded_images[:images_to_save, :, :, :].cpu(), epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) first_iteration = False utils.log_progress(validation_losses) logging.info('-' * 40) utils.save_checkpoint(model, train_options.experiment_name, epoch, os.path.join(this_run_folder, 'checkpoints')) utils.write_losses(os.path.join(this_run_folder, 'validation.csv'), validation_losses, epoch, time.time() - epoch_start)
def train(model: Hidden, device: torch.device, hidden_config: HiDDenConfiguration, train_options: TrainingOptions, this_run_folder: str, tb_logger): """ Trains the HiDDeN model :param model: The model :param device: torch.device object, usually this is GPU (if avaliable), otherwise CPU. :param hidden_config: The network configuration :param train_options: The training settings :param this_run_folder: The parent folder for the current training run to store training artifacts/results/logs. :param tb_logger: TensorBoardLogger object which is a thin wrapper for TensorboardX logger. Pass None to disable TensorboardX logging :return: """ train_data, val_data = utils.get_data_loaders(hidden_config, train_options) file_count = len(train_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 print_each = 10 images_to_save = 8 saved_images_size = (512, 512) for epoch in range(train_options.start_epoch, train_options.number_of_epochs + 1): print('\nStarting epoch {}/{}'.format(epoch, train_options.number_of_epochs)) print('Batch size = {}\nSteps in epoch = {}'.format( train_options.batch_size, steps_in_epoch)) losses_accu = {} epoch_start = time.time() step = 1 for image, _ in train_data: image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, _ = model.train_on_batch([image, message]) if not losses_accu: # dict is empty, initialize for name in losses: losses_accu[name] = [] for name, loss in losses.items(): losses_accu[name].append(loss) if step % print_each == 0 or step == steps_in_epoch: print('Epoch: {}/{} Step: {}/{}'.format( epoch, train_options.number_of_epochs, step, steps_in_epoch)) utils.print_progress(losses_accu) print('-' * 40) step += 1 train_duration = time.time() - epoch_start print('Epoch {} training duration {:.2f} sec'.format( epoch, train_duration)) print('-' * 40) utils.write_losses(os.path.join(this_run_folder, 'train.csv'), losses_accu, epoch, train_duration) if tb_logger is not None: tb_logger.save_losses(losses_accu, epoch) tb_logger.save_grads(epoch) tb_logger.save_tensors(epoch) first_iteration = True print('Running validation for epoch {}/{}'.format( epoch, train_options.number_of_epochs)) for image, _ in val_data: image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = model.validate_on_batch( [image, message]) if not losses_accu: # dict is empty, initialize for name in losses: losses_accu[name] = [] for name, loss in losses.items(): losses_accu[name].append(loss) if first_iteration: utils.save_images( image.cpu()[:images_to_save, :, :, :], encoded_images[:images_to_save, :, :, :].cpu(), epoch, os.path.join(this_run_folder, 'images'), resize_to=saved_images_size) first_iteration = False utils.print_progress(losses_accu) print('-' * 40) utils.save_checkpoint(model, epoch, losses_accu, os.path.join(this_run_folder, 'checkpoints')) utils.write_losses(os.path.join(this_run_folder, 'validation.csv'), losses_accu, epoch, time.time() - epoch_start)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') parser.add_argument('--size', '-s', default=128, type=int) parser.add_argument('--data-dir', '-d', required=True, type=str) parser.add_argument('--runs-folder', '-sf', default=os.path.join('.', 'runs'), type=str) parser.add_argument('--message', '-m', default=30, type=int) parser.add_argument('--epochs', '-e', default=400, type=int) parser.add_argument('--batch-size', '-b', required=True, type=int) parser.add_argument('--continue-from-folder', '-c', default='', type=str) parser.add_argument('--tensorboard', dest='tensorboard', action='store_true') parser.add_argument('--no-tensorboard', dest='tensorboard', action='store_false') parser.set_defaults(tensorboard=True) args = parser.parse_args() checkpoint = None if args.continue_from_folder != '': this_run_folder = args.continue_from_folder train_options, hidden_config, noise_config = utils.load_options( this_run_folder) checkpoint = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] else: start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch) # noise_config = [ # { # 'type': 'resize', # 'resize_ratio': 0.4 # }] noise_config = [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3) this_run_folder = utils.create_folder_for_run(train_options) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) noiser = Noiser(noise_config, device) if args.tensorboard: print('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None model = Hidden(hidden_config, device, noiser, tb_logger) if args.continue_from_folder != '': # if we are continuing, we have to load the model params assert checkpoint is not None utils.model_from_checkpoint(model, checkpoint) print('HiDDeN model: {}\n'.format(model.to_stirng())) print('Model Configuration:\n') pprint.pprint(vars(hidden_config)) print('\nNoise configuration:\n') pprint.pprint(str(noise_config)) print('\nTraining train_options:\n') pprint.pprint(vars(train_options)) print() train(model, device, hidden_config, train_options, this_run_folder, tb_logger)
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser( description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') new_run_parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument('--adv_loss', default=0, required=False, type=float, help='Coefficient of the adversarial loss.') new_run_parser.add_argument('--residual', default=0, required=False, type=int, help='If to use residual or not.') new_run_parser.add_argument('--video_dataset', default=0, required=False, type=int, help='If to use video dataset or not.') new_run_parser.add_argument( '--save-dir', '-sd', default='runs', required=True, type=str, help='The save directory where the result is stored.') new_run_parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) new_run_parser.add_argument('--message', '-m', default=30, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) # parser.add_argument('--tensorboard', dest='tensorboard', action='store_true', # help='If specified, use adds a Tensorboard log. On by default') new_run_parser.add_argument('--tensorboard', action='store_true', help='Use to switch on Tensorboard logging.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) new_run_parser.add_argument('--hostname', default=socket.gethostname(), help='the host name of the running server') new_run_parser.add_argument( '--cover-dependent', default=1, required=False, type=int, help='If to use cover dependent architecture or not.') new_run_parser.add_argument('--jpeg_type', '-j', required=False, type=str, default='jpeg', help='Jpeg type used in the combined2 noise.') new_run_parser.set_defaults(tensorboard=False) new_run_parser.set_defaults(enable_fp16=False) continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument( '--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument( '--data-dir', '-d', required=False, type=str, help= 'The directory where the data is stored. Specify a value only if you want to override the previous value.' ) continue_parser.add_argument( '--epochs', '-e', required=False, type=int, help= 'Number of epochs to run the simulation. Specify a value only if you want to override the previous value.' ) # continue_parser.add_argument('--tensorboard', action='store_true', # help='Override the previous setting regarding tensorboard logging.') # Setting up a seed for debug seed = 123 torch.manual_seed(seed) np.random.seed(seed) args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None print(args.cover_dependent) if not args.video_dataset: if args.hostname == 'ee898-System-Product-Name': args.data_dir = '/home/ee898/Desktop/chaoning/ImageNet' args.hostname = 'ee898' elif args.hostname == 'DL178': args.data_dir = '/media/user/SSD1TB-2/ImageNet' else: args.data_dir = '/workspace/data_local/imagenet_pytorch' else: if args.hostname == 'ee898-System-Product-Name': args.data_dir = '/home/ee898/Desktop/chaoning/ImageNet' args.hostname = 'ee898' elif args.hostname == 'DL178': args.data_dir = '/media/user/SSD1TB-2/ImageNet' else: args.data_dir = './oops_dataset/oops_video' assert args.data_dir if args.command == 'continue': this_run_folder = args.folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 if args.data_dir is not None: train_options.train_folder = os.path.join(args.data_dir, 'train') train_options.validation_folder = os.path.join( args.data_dir, 'val') if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print( f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.' ) exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', args.save_dir), start_epoch=start_epoch, experiment_name=args.name, video_dataset=args.video_dataset) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration( H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=args.adv_loss, cover_dependent=args.cover_dependent, residual=args.residual, enable_fp16=args.enable_fp16) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join( this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) if (args.command == 'new' and args.tensorboard) or \ (args.command == 'continue' and os.path.isdir(os.path.join(this_run_folder, 'tb-logs'))): logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None noiser = Noiser(noise_config, device, args.jpeg_type) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info( f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) # train(model, device, hidden_config, train_options, this_run_folder, tb_logger) # train_other_noises(model, device, hidden_config, train_options, this_run_folder, tb_logger) if str(args.noise[0]) == "WebP()": noise = 'webp' elif str(args.noise[0]) == "JpegCompression2000()": noise = 'jpeg2000' elif str(args.noise[0]) == "MPEG4()": noise = 'mpeg4' elif str(args.noise[0]) == "H264()": noise = 'h264' elif str(args.noise[0]) == "XVID()": noise = 'xvid' elif str(args.noise[0]) == "DiffQFJpegCompression2()": noise = 'diff_qf_jpeg2' elif str(args.noise[0]) == "DiffCorruptions()": noise = 'diff_corruptions' else: noise = 'jpeg' train_own_noise(model, device, hidden_config, train_options, this_run_folder, tb_logger, noise)
def main(): # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') device = torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') # parser.add_argument('--size', '-s', default=128, type=int, help='The size of the images (images are square so this is height and width).') parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') parser.add_argument( '--runs_root', '-r', default=os.path.join('.', 'experiments'), type=str, help='The root folder where data about experiments are stored.') args = parser.parse_args() print_each = 25 completed_runs = [ o for o in os.listdir(args.runs_root) if os.path.isdir(os.path.join(args.runs_root, o)) and o != 'no-noise-defaults' ] print(completed_runs) write_csv_header = True for run_name in completed_runs: current_run = os.path.join(args.runs_root, run_name) print(f'Run folder: {current_run}') options_file = os.path.join(current_run, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) train_options.train_folder = os.path.join(args.data_dir, 'val') train_options.validation_folder = os.path.join(args.data_dir, 'val') train_options.batch_size = 4 checkpoint = utils.load_last_checkpoint( os.path.join(current_run, 'checkpoints')) noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger=None) utils.model_from_checkpoint(model, checkpoint) print('Model loaded successfully. Starting validation run...') _, val_data = utils.get_data_loaders(hidden_config, train_options) file_count = len(val_data.dataset) if file_count % train_options.batch_size == 0: steps_in_epoch = file_count // train_options.batch_size else: steps_in_epoch = file_count // train_options.batch_size + 1 losses_accu = {} step = 0 for image, _ in val_data: step += 1 image = image.to(device) message = torch.Tensor( np.random.choice( [0, 1], (image.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = model.validate_on_batch( [image, message]) if not losses_accu: # dict is empty, initialize for name in losses: losses_accu[name] = [] for name, loss in losses.items(): losses_accu[name].append(loss) if step % print_each == 0: print(f'Step {step}/{steps_in_epoch}') utils.print_progress(losses_accu) print('-' * 40) utils.print_progress(losses_accu) write_validation_loss(os.path.join(args.runs_root, 'validation_run.csv'), losses_accu, run_name, checkpoint['epoch'], write_header=write_csv_header) write_csv_header = False
def main(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') parser = argparse.ArgumentParser(description='Training of HiDDeN nets') parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') parser.add_argument('--batch-size', '-b', required=True, type=int, help='The batch size.') parser.add_argument('--epochs', '-e', default=400, type=int, help='Number of epochs to run the simulation.') parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') parser.add_argument( '--runs-folder', '-sf', default=os.path.join('.', 'runs'), type=str, help='The root folder where data about experiments are stored.') parser.add_argument( '--size', '-s', default=128, type=int, help= 'The size of the images (images are square so this is height and width).' ) parser.add_argument('--message', '-m', default=30, type=int, help='The length in bits of the watermark.') parser.add_argument( '--continue-from-folder', '-c', default='', type=str, help= 'The folder from where to continue a previous run. Leave blank if you are starting a new experiment.' ) parser.add_argument( '--tensorboard', dest='tensorboard', action='store_true', help='If specified, use adds a Tensorboard log. On by default') parser.add_argument('--no-tensorboard', dest='tensorboard', action='store_false', help='Use to switch off Tensorboard logging.') parser.add_argument( '--noise', nargs='*', action=NoiseArgParser, help= "Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'" ) parser.set_defaults(tensorboard=True) args = parser.parse_args() checkpoint = None if args.continue_from_folder != '': this_run_folder = args.continue_from_folder options_file = os.path.join(this_run_folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options( options_file) checkpoint = utils.load_last_checkpoint( os.path.join(this_run_folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 else: start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, train_folder=os.path.join(args.data_dir, 'train'), validation_folder=os.path.join(args.data_dir, 'val'), runs_folder=os.path.join('.', 'runs'), start_epoch=start_epoch, experiment_name=args.name) noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3) this_run_folder = utils.create_folder_for_run( train_options.runs_folder, args.name) with open(os.path.join(this_run_folder, 'options-and-config.pickle'), 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler( os.path.join(this_run_folder, f'{args.name}.log')), logging.StreamHandler(sys.stdout) ]) noiser = Noiser(noise_config, device) if args.tensorboard: logging.info('Tensorboard is enabled. Creating logger.') from tensorboard_logger import TensorBoardLogger tb_logger = TensorBoardLogger(os.path.join(this_run_folder, 'tb-logs')) else: tb_logger = None model = Hidden(hidden_config, device, noiser, tb_logger) if args.continue_from_folder != '': # if we are continuing, we have to load the model params assert checkpoint is not None utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, this_run_folder, tb_logger)
def main(): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') parent_parser = argparse.ArgumentParser(description='Training of HiDDeN nets') subparsers = parent_parser.add_subparsers(dest='command', help='Sub-parser for commands') new_run_parser = subparsers.add_parser('new', help='starts a new run') new_run_parser.add_argument('--data-dir', '-d', required=True, type=str, help='The directory where the data is stored.') new_run_parser.add_argument('--batch-size', '-b', default=30, type=int, help='The batch size.') new_run_parser.add_argument('--epochs', '-e', default=300, type=int, help='Number of epochs to run the simulation.') new_run_parser.add_argument('--name', required=True, type=str, help='The name of the experiment.') new_run_parser.add_argument('--size', '-s', default=128, type=int, help='The size of the images (images are square so this is height and width).') new_run_parser.add_argument('--in_channels', default=3, type=int, help='input channel size') new_run_parser.add_argument('--message', '-m', default=32, type=int, help='The length in bits of the watermark.') new_run_parser.add_argument('--ratio', default=0.2, type=float, help='ratio of dataset.') new_run_parser.add_argument('--continue-from-folder', '-c', default='', type=str, help='The folder from where to continue a previous run. Leave blank if you are starting a new experiment.') new_run_parser.add_argument('--enable-fp16', dest='enable_fp16', action='store_true', help='Enable mixed-precision training.') new_run_parser.add_argument('--noise', nargs='*', action=NoiseArgParser, help="Noise layers configuration. Use quotes when specifying configuration, e.g. 'cropout((0.55, 0.6), (0.55, 0.6))'") new_run_parser.set_defaults(enable_fp16=False) continue_parser = subparsers.add_parser('continue', help='Continue a previous run') continue_parser.add_argument('--folder', '-f', required=True, type=str, help='Continue from the last checkpoint in this folder.') continue_parser.add_argument('--data-dir', '-d', required=False, type=str, help='The directory where the data is stored. Specify a value only if you want to override the previous value.') continue_parser.add_argument('--epochs', '-e', required=False, type=int, help='Number of epochs to run the simulation. Specify a value only if you want to override the previous value.') args = parent_parser.parse_args() checkpoint = None loaded_checkpoint_file_name = None if args.command == 'continue': options_file = os.path.join(args.folder, 'options-and-config.pickle') train_options, hidden_config, noise_config = utils.load_options(options_file) checkpoint, loaded_checkpoint_file_name = utils.load_last_checkpoint(os.path.join(args.folder, 'checkpoints')) train_options.start_epoch = checkpoint['epoch'] + 1 train_options.best_epoch = checkpoint['best_epoch'] train_options.best_cond = checkpoint['best_cond'] if args.epochs is not None: if train_options.start_epoch < args.epochs: train_options.number_of_epochs = args.epochs else: print(f'Command-line specifies of number of epochs = {args.epochs}, but folder={args.folder} ' f'already contains checkpoint for epoch = {train_options.start_epoch}.') exit(1) else: assert args.command == 'new' start_epoch = 1 train_options = TrainingOptions( batch_size=args.batch_size, number_of_epochs=args.epochs, data_ratio=args.ratio, data_dir=args.data_dir, runs_folder='./runs', tb_logger_folder='./logger', start_epoch=start_epoch, experiment_name=f'{args.name}_r{int(100*args.ratio):03d}b{args.size}ch{args.in_channels}m{args.message}') noise_config = args.noise if args.noise is not None else [] hidden_config = HiDDenConfiguration(H=args.size, W=args.size,input_channels=args.in_channels, message_length=args.message, encoder_blocks=4, encoder_channels=64, decoder_blocks=7, decoder_channels=64, use_discriminator=True, use_vgg=False, discriminator_blocks=3, discriminator_channels=64, decoder_loss=1, encoder_loss=0.7, adversarial_loss=1e-3, enable_fp16=args.enable_fp16 ) utils.create_folder_for_run(train_options) with open(train_options.options_file, 'wb+') as f: pickle.dump(train_options, f) pickle.dump(noise_config, f) pickle.dump(hidden_config, f) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler(os.path.join(train_options.this_run_folder, f'{train_options.experiment_name}.log')), logging.StreamHandler(sys.stdout) ]) logging.info(f'Tensorboard is enabled. Creating logger at {train_options.tb_logger_dir}') tb_logger = TensorBoardLogger(train_options.tb_logger_dir) noiser = Noiser(noise_config, device) model = Hidden(hidden_config, device, noiser, tb_logger) if args.command == 'continue': # if we are continuing, we have to load the model params assert checkpoint is not None logging.info(f'Loading checkpoint from file {loaded_checkpoint_file_name}') utils.model_from_checkpoint(model, checkpoint) logging.info('HiDDeN model: {}\n'.format(model.to_stirng())) logging.info('Model Configuration:\n') logging.info(pprint.pformat(vars(hidden_config))) logging.info('\nNoise configuration:\n') logging.info(pprint.pformat(str(noise_config))) logging.info('\nTraining train_options:\n') logging.info(pprint.pformat(vars(train_options))) train(model, device, hidden_config, train_options, train_options.this_run_folder, tb_logger)