def __init__(self, model, loss=MeanAbsoluteError(), learning_rate=PiecewiseConstantDecay(boundaries=[200000], values=[1e-4, 5e-5]), checkpoint_dir='./ckpt/edsr'): super().__init__(model, loss, learning_rate, checkpoint_dir)
def main(): batch = 8 epoch = 20 loss = MeanAbsoluteError() learning_rate = PiecewiseConstantDecay(boundaries=[100000], values=[1e-4, 5e-5]) res_blocks = [ 15, 15, 15, 15, 15, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3 ] checkpoint_dir = './ckpt/edsr' # 데이터 가져오기 ds_train = VCTK(subset='train').dataset() ds_valid = VCTK(subset='valid').dataset() # 모델 빌딩 edsr_model = edsr2(scale=4, res_blocks=res_blocks, res_block_scaling=0.7) # 훈련 edsr_trainer = EDSRTrainer(model=edsr_model, loss=loss, learning_rate=learning_rate, checkpoint_dir=checkpoint_dir) edsr_trainer.train(train_dataset=ds_train, valid_dataset=ds_valid, batch=batch, epoch=epoch) edsr_model.save_weights( f'./weights/EDSR_16000_{len(res_blocks)}res_{batch}batch_{epoch}epochs_tanh_entropy_glorot_uniform.h5' )
def __init__(self, lr, lr_mode, lr_interval, lr_value, total_epochs, steps_per_epoch, initial_epoch): super(OptionalLearningRateSchedule, self).__init__() self.lr = lr self.lr_mode = lr_mode self.lr_interval = lr_interval self.lr_value = lr_value self.total_epochs = total_epochs self.steps_per_epoch = steps_per_epoch self.initial_epoch = initial_epoch if self.lr_mode == 'exponential': decay_epochs = [int(e) for e in self.lr_interval.split(',')] lr_values = [ self.lr * (self.lr_value**k) for k in range(len(decay_epochs) + 1) ] self.lr_scheduler = PiecewiseConstantDecay(decay_epochs, lr_values) elif self.lr_mode == 'cosine': self.lr_scheduler = CosineDecay(self.lr, self.total_epochs) elif self.lr_mode == 'constant': self.lr_scheduler = lambda x: self.lr else: raise ValueError(self.lr_mode)
def get_lr_scheduler(learning_rate, decay_type, decay_steps): if decay_type: decay_type = decay_type.lower() if decay_type == None: lr_scheduler = learning_rate elif decay_type == 'cosine': lr_scheduler = CosineDecay( initial_learning_rate=learning_rate, decay_steps=decay_steps, alpha=0.2) # use 0.2*learning_rate as final minimum learning rate elif decay_type == 'exponential': lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9) elif decay_type == 'polynomial': lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate / 100) elif decay_type == 'piecewise_constant': #apply a piecewise constant lr scheduler, including warmup stage boundaries = [500, int(decay_steps * 0.9), decay_steps] values = [ 0.001, learning_rate, learning_rate / 10., learning_rate / 100. ] lr_scheduler = PiecewiseConstantDecay(boundaries=boundaries, values=values) else: raise ValueError('Unsupported lr decay type') return lr_scheduler
def main(train, eval, test_img_path=None, test_subtle=False): # Construct model input_tensor = Input(shape=(32, 32, 3)) x = Lambda(lambda input_tensor: input_tensor)(input_tensor) for _ in range(4): x = Conv2D(64, 3, padding='same', activation='relu')(x) x = Conv2D(3, 3, padding='same')(x) model = Model(input_tensor, x) lr_schedule = PiecewiseConstantDecay([30 * 1563, 60 * 1563], [1e-4, 1e-5, 5e-6]) model.compile(optimizer=Adam(lr_schedule), loss='mse') # Train if train: # Load data (x_train, _), (_, _) = cifar10.load_data() x_train = x_train.astype('float32') / 255. y_train = x.train.copy() x_train += np.random.normal(0, .1, x_train.shape) model.fit(x_train, y_train, batch_size=32, epochs=100) model.save_weights('./checkpoints/model1') else: model.load_weights('./checkpoints/model1') if eval: # Load data (_, _), (x_test, _) = cifar10.load_data() x_test = x_test.astype('float32') / 255. y_test = x_test.copy() x_test += np.random.normal(0, .1, x_test.shape) print('Evaluating: ') model.evaluate(x_test, y_test) # Test if test_img_path is not None: test_img = load_img(test_img_path) test_img = img_to_array(test_img).astype(np.float32) / 255. new_img = np.zeros_like(test_img) if test_subtle: i_end = test_img.shape[0] - 16 j_end = test_img.shape[1] - 16 for i in range(0, i_end, 16): for j in range(0, j_end, 16): predicted = model.predict( np.expand_dims(test_img[i:i + 32, j:j + 32], 0)) new_img[i+8*(i!=0) : i+32-8*(i!=i_end-16), j+8*(j!=0) : j+32-8*(j!=j_end-16)] \ = predicted[:, 8*(i!=0) : 32-8*(i!=i_end-16), 8*(j!=0) : 32-8*(j!=j_end-16)] else: for i in range(0, test_img.shape[0], 32): for j in range(0, test_img.shape[1], 32): new_img[i:i + 32, j:j + 32] = model.predict( np.expand_dims(test_img[i:i + 32, j:j + 32], 0)) new_img = array_to_img(new_img) new_img.save('data/Model1.png') new_img.show()
def __init__(self, model, checkpoint_dir, learning_rate=PiecewiseConstantDecay(boundaries=[200000], values=[1e-3, 5e-4])): super().__init__(model, loss=MeanAbsoluteError(), learning_rate=learning_rate, checkpoint_dir=checkpoint_dir)
def get_optimizer(steps_per_epoch, initial_lr=1e-4, halve_epochs=[80, 120, 150]): from tensorflow.keras.optimizers import Adam from tensorflow.keras.optimizers.schedules import PiecewiseConstantDecay lr_schedule = PiecewiseConstantDecay( [steps_per_epoch * epochs for epochs in halve_epochs], [initial_lr / (2**i) for i in range(len(halve_epochs) + 1)]) optimizer = Adam(learning_rate=lr_schedule) return optimizer
def test_stepped_lr_schedule_output(self): step_boundaries = [2, 5, 10] lr_values = [0.1, 0.2, 0.3, 0.4] schedule = PiecewiseConstantDecay(step_boundaries, lr_values) step_boundaries = step_boundaries.copy() step_boundaries.insert(0, -1) step_boundaries.append(step_boundaries[-1] * 2) for i, lr_value in enumerate(lr_values): step = randint(step_boundaries[i] + 1, step_boundaries[i + 1]) assert schedule(step) == lr_value
def train(self, train_data, train_target, epochs, steps_per_epoch, val_split, boundries, values): optim_edsr = Adam(learning_rate=PiecewiseConstantDecay( boundaries=boundries, values=values)) self.model.compile(optimizer=optim_edsr, loss='mean_absolute_error') self.model.fit(x=train_data, y=train_target, epochs=epochs, steps_per_epoch=steps_per_epoch, batch_size=self.batch, validation_split=val_split)
def __init__(self, generator, discriminator, cycle_loss_weight, identity_loss_weight, gradient_penalty_weight, learning_rate=PiecewiseConstantDecay(boundaries=[100], values=[2e-4, 2e-5]), beta_1=0.5): self.A2B_G = generator self.B2A_G = generator self.A_D = discriminator self.B_D = discriminator self.generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1) self.discriminator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1) self.cycle_loss_weight = cycle_loss_weight self.identity_loss_weight = identity_loss_weight self.gradient_penalty_weight = gradient_penalty_weight self.mean_squared_error = MeanSquaredError() self.mean_absolute_error = MeanAbsoluteError()
def __init__(self, model, loss, checkpoint_dir, learning_rate=PiecewiseConstantDecay(boundaries=[200000], values=[1e-3, 5e-4])): if loss == 'MAE': loss = MeanAbsoluteError() elif loss == 'MSE': loss = MeanSquaredError() else: raise ValueError("loss specified incorrectly") super().__init__(model, loss=MeanAbsoluteError(), learning_rate=learning_rate, checkpoint_dir=checkpoint_dir)
def __init__(self, generator, discriminator, content_loss='VGG54', learning_rate=PiecewiseConstantDecay(boundaries=[100000], values=[1e-4, 1e-5])): self.vgg = vgg_54() self.content_loss = content_loss self.generator = generator self.discriminator = discriminator self.generator_optimizer = Adam(learning_rate=learning_rate) self.discriminator_optimizer = Adam(learning_rate=learning_rate) self.binary_cross_entropy = BinaryCrossentropy(from_logits=False) self.mean_squared_error = MeanSquaredError()
def __init__(self,generator,discriminator,content_loss='VGG54',learning_rate=PiecewiseConstantDecay(boundaries=[100000], values=[1e-4, 1e-5])): if content_loss == 'VGG22': self.vgg = srgan.vgg_22() elif content_loss == 'VGG54': self.vgg = srgan.vgg_54() else: raise ValueError("content_loss must be either 'VGG22' or 'VGG54'") self.content_loss = content_loss self.generator = generator self.discriminator = discriminator self.generator_optimizer = Adam(learning_rate=learning_rate) self.discriminator_optimizer = Adam(learning_rate=learning_rate) self.binary_cross_entropy = BinaryCrossentropy(from_logits=False) self.mean_squared_error = MeanSquaredError()
def setup_model(): if FLAGS.ImageType == 'MNIST': Generator = Gen_Net(1) Discriminator = Dis_Net([FLAGS.DisInSize, FLAGS.DisInSize], 1) else: Generator = Gen_Net(3) Discriminator = Dis_Net([FLAGS.DisInSize, FLAGS.DisInSize], 3) if FLAGS.ImageType == 'MNIST': one_epoch_size = 60000 // FLAGS.BATCH_SIZE + 1 else: one_epoch_size = 162769 // FLAGS.BATCH_SIZE + 1 boundaries = [ one_epoch_size * 15, one_epoch_size * 25, one_epoch_size * 28 ] values = [FLAGS.lr, FLAGS.lr / 10, FLAGS.lr / 100, FLAGS.lr / 1000] lr_fn = PiecewiseConstantDecay(boundaries, values) G_opt = tf.keras.optimizers.Adam(lr_fn, 0.5) D_opt = tf.keras.optimizers.Adam(lr_fn, 0.5) return Generator, Discriminator, G_opt, D_opt
def create_model(w=19, D=500, initial_lr=0.001): """ create a CNN for coronary artery centerline extraction :param initial_lr: initial learning rate for Adam optimizer :param w: on of the three input dimensions of the isotropic 3D image :param D: number of categories of directions :return: constructed model """ if IMAGE_FORMAT == "channels_last": inputs = Input(shape=(w, w, w, 1), name="input") else: inputs = Input(shape=(1, w, w, w), name="input") x = conv_block(inputs, 32, (3, 3, 3), 1, idx=1) x = conv_block(x, 32, (3, 3, 3), 1, idx=2) x = conv_block(x, 32, (3, 3, 3), 2, idx=3) x = conv_block(x, 32, (3, 3, 3), 4, idx=4) # tracker x = conv_block(x, 64, (3, 3, 3), 1, idx=5) x = conv_block(x, 64, (1, 1, 1), 1, idx=6) x = Conv3D(D + 1, (1, 1, 1), dilation_rate=1, name="conv7")(x) x = Flatten(name="flatten")(x) x = final_activation(x) x = Concatenate(name="concatenate")(x) # # discriminator # x = conv_block(x, 64, (3, 3, 3), 1, idx=5) # x = conv_block(x, 64, (1, 1, 1), 1, idx=6) # x = Conv3D(D + 1, (1, 1, 1), dilation_rate=1, name="conv7")(x) # x = Flatten(name="flatten")(x) model = Model(inputs=inputs, outputs=x) schedule = PiecewiseConstantDecay( [i * 10000 for i in range(1, 6)], [initial_lr * (0.1**i) for i in range(0, 6)]) optimizer = Adam(learning_rate=schedule) model.compile(optimizer=optimizer, loss=custom_loss) return model
config.pretrained_type, checkpoint_dir=config.checkpoint_dir) except Exception as e: print(e) print('The program is exiting...') sys.exit() criterion = losses.create_losses(config.neg_ratio, NUM_CLASSES) steps_per_epoch = train_length // config.batch_size lr_fn = PiecewiseConstantDecay(boundaries=[ int(steps_per_epoch * config.num_epochs * 2 / 3), int(steps_per_epoch * config.num_epochs * 5 / 6) ], values=[ config.initial_lr, config.initial_lr * 0.1, config.initial_lr * 0.01 ]) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_fn) ''' optimizer = tf.keras.optimizers.SGD( learning_rate=lr_fn, momentum=config.momentum) ''' train_log_dir = 'logs/train' #val_log_dir = 'logs/val' train_summary_writer = tf.summary.create_file_writer(train_log_dir) #val_summary_writer = tf.summary.create_file_writer(val_log_dir)
w = bird_data.get_w(alpha=1) # (50*150) train_class_list, test_class_list = bird_data.get_class_split(mode="easy") train_ds, test_ds = bird_data.load_gpu(batch_size=BATCH_SIZE) #path_root = os.path.abspath(os.path.dirname(__file__)) #database = DataSet("/Volumes/Watermelon") # path_root) #PHI = database.get_phi() #DS, DS_test = database.load_gpu(batch_size=5) # image_batch, label_batch modelaki = FinalModel() # define loss and opt functions loss_fun = Loss().final_loss step = tf.Variable(0, trainable=False) boundaries = [187 * 5, 187 * 10] values = [0.05, 0.005, 0.0005] learning_rate_fn = PiecewiseConstantDecay(boundaries, values) # Later, whenever we perform an optimization step, we pass in the step. learning_rate = learning_rate_fn(step) opt_fun = tfa.optimizers.SGDW(learning_rate=learning_rate, weight_decay=5 * 1e-4, momentum=0.9) # opt_fun = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9) # define checkpoint settings ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=opt_fun, net=modelaki) manager = tf.train.CheckpointManager( ckpt, path_root + '/tf_ckpts', max_to_keep=10) # keep only the three most recent checkpoints ckpt.restore(
perception_kernel_size=3), perception_kernel_type=watcher.rlog("neural_model", perception_kernel_type='sobel'), perception_kernel_norm_value=watcher.rlog( "neural_model", perception_kernel_norm_value=8), observation_angle=watcher.rlog("neural_model", observation_angle=0.0), last_conv_filters=watcher.rlog("neural_model", last_conv_filters=128), last_conv_kernel_size=watcher.rlog("neural_model", last_conv_kernel_size=1), stochastic_update=watcher.rlog("neural_model", stochastic_update=True)) lr = watcher.rlog("optimizer", learning_rate=2e-3) lr_multiplier = watcher.rlog("optimizer", lr_multiplier=0.1) boundaries_decay_values = [lr, lr * lr_multiplier] lr_scheduler = PiecewiseConstantDecay( boundaries=[watcher.rlog("optimizer", boundaries=2000)], values=boundaries_decay_values) model_optimizer = Adam(lr_scheduler) model.compile(optimizer=model_optimizer, loss=l2_loss) trainer = TFCATrainer(data_generator=data_generator, model=model, optimizer=model_optimizer, watcher=watcher, loss_function=l2_loss) trainer.train(train_steps=watcher.rlog("training_process", train_steps=2000), batch_size=watcher.rlog("training_process", batch_size=8), grad_norm_value=watcher.rlog("training_process", grad_norm_value=1e-8),
ssd = SSD(num_classes=len(dataset.label_ids) + 1, input_shape=INPUT_SHAPE) checkpoint = tf.train.Checkpoint(ssd) ssd.summary() _ = input("Press Enter to continue...") ## 4. Generate default boxes print("\t4. Default boxes generation...") fm_shapes = ssd.output_shape aspect_ratios = ASPECT_RATIOS scales = SCALES default_boxes = Image.generate_default_boxes(fm_shapes, aspect_ratios, scales) ## 5. Learning initializations print("\t5. Learning initialization...") learning_rate = PiecewiseConstantDecay(boundaries=BOUNDARIES, values=LR_VALUES) ssd_optimizer = SGD(learning_rate=learning_rate, momentum=MOMENTUM) ssd_loss = SSD_Loss(default_boxes=default_boxes, num_classes=ssd.num_classes, regression_type=REGRESSION_TYPE, hard_negative_ratio=3, alpha=ALPHA) ## 6. Training initializations print("\t6. Final training initializations...") last_iter = 0 iterations = [] mb_losses, loc_losses, conf_losses = [], [], [] if TENSORBOARD_LOGS: writer = SummaryWriter(comment="SSD | __" + DATASET_NAME + DATASET_KEY + "__")
model.add(layers.Dense(10, activation=None, name='output')) # None means no activation or linear model.summary() # In[7]: from tensorflow.keras.optimizers.schedules import PiecewiseConstantDecay batch_size = 64 num_instance = 50000 # Let's say, if we want to decay the learning rate at 40, 80, 160 epochs boundary = (40 * num_instance // batch_size, 80 * num_instance // batch_size, 160 * num_instance // batch_size) value = (1e-3, 3e-4, 1e-4, 3e-5) learning_rate = PiecewiseConstantDecay(boundary, value) print(learning_rate) # in the loss, from_logits=True because we use linear activation at the last layer model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) print('Model compiled.') # In[8]: # Train the model epochs = 60 print(x_train.shape) print(y_train.shape)
DECAY_STEPS = (STEPS_PER_EPOCH * EPOCHS) // ACCUM_STEPS E1 = 30 - CURR_EPOCH E2 = 60 - CURR_EPOCH E3 = 90 - CURR_EPOCH S1 = (STEPS_PER_EPOCH * E1) // ACCUM_STEPS S2 = (STEPS_PER_EPOCH * E2) // ACCUM_STEPS S3 = (STEPS_PER_EPOCH * E3) // ACCUM_STEPS print("--- LR decay --- \nstep {}: {} \nstep {}: {} \nstep {}: {}".format(S1, 1e-2, S2, 1e-3, S3, 1e-4)) learning_rate_fn = PiecewiseConstantDecay( boundaries = [S1, S2, S3], values = [0.1, 0.01, 0.001, 0.0001] ) model.compile( optimizer = SGD(learning_rate=learning_rate_fn, momentum=0.9, decay=0.0001), loss=CategoricalCrossentropy(from_logits=True), metrics = ['accuracy'] ) callbacks = [ ModelCheckpoint( MODEL_PATH, monitor='val_accuracy', mode='max',
mode='train', augmentation=['flip']) # the patching algorithm is currently causing bottleneck sometimes dummy = tf.random.normal((1, 300, 300, 3)) ssd = create_pre_ssd_mobilenetv1_lite(weights=None) pretrained_type = 'specified' checkpoint_path = CHECKPOINT_PATH net = init_ssd(ssd, pretrained_type, checkpoint_path) criterion = create_losses(NEG_RATIO, NUM_CLASSES) steps_per_epoch = info['length'] // BATCH_SIZE lr_fn = PiecewiseConstantDecay( boundaries=[int(steps_per_epoch * NUM_EPOCHS * 2 / 3), int(steps_per_epoch * NUM_EPOCHS * 5 / 6)], values=[INITIAL_LR, INITIAL_LR * 0.1, INITIAL_LR * 0.01]) optimizer = tf.keras.optimizers.SGD( learning_rate=INITIAL_LR, momentum=MOMENTUM) train_log_dir = 'logs/train' val_log_dir = 'logs/val' train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) @tf.function def train_step(imgs, gt_confs, gt_locs, ssd, criterion, optimizer): with tf.GradientTape() as tape:
def main(args): tf.random.set_seed(0) # Search for an existing checkpoint _ensure_exists(args.checkpoint_dir) initial_epoch = 0 checkpoint = None for filename in os.listdir(args.checkpoint_dir): pieces = path.splitext(filename)[0].split('_') if args.name != '_'.join(pieces[:-1]): continue if pieces[-1] == 'best': # Don't overwrite any existing best model i = 0 base = path.join(args.checkpoint_dir, '_'.join(pieces) + '{}.h5') while path.exists(base.format(i)): i += 1 os.rename(base.format(''), base.format(i)) elif 'best' not in pieces[-1]: epoch = int(pieces[-1]) if epoch > initial_epoch: initial_epoch = epoch checkpoint = filename # Load or create the model if checkpoint is not None: model = load_model(path.join(args.checkpoint_dir, checkpoint)) need_compile = False need_learning_rate = False elif args.model_file is not None: model = load_model(args.model_file) need_compile = False need_learning_rate = True else: model = mobilenet(input_size=tuple(args.size), l2_decay=args.l2_decay) if args.weight_file is not None: model.load_weights(args.weight_file) need_compile = True need_learning_rate = True # Load the dataset (train and validation splits) train_data, train_steps = imagenet('train', tuple(args.size), augment=not args.no_augment) val_data, val_steps = imagenet('val', tuple(args.size), augment=False) # Prepare the model for training if need_compile: model.compile(optimizer=getattr(tf.keras.optimizers, args.optimizer)(), loss='categorical_crossentropy', metrics=['accuracy', 'top_k_categorical_accuracy']) if need_learning_rate: if len(args.learning_rates) > 1: model.optimizer.learning_rate = PiecewiseConstantDecay( [x * train_steps for x in args.learning_rate_boundaries], args.learning_rates) else: model.optimizer.learning_rate = args.learning_rates[0] # Set up training callbacks (checkpointing and TensorBoard) best_filename = path.join(args.checkpoint_dir, args.name + '_best.h5') callbacks = [ ModelCheckpoint( path.join(args.checkpoint_dir, args.name + '_{epoch:d}.h5')), ModelCheckpoint(best_filename, save_best_only=True) ] if args.tensorboard_dir != '': _ensure_exists(args.tensorboard_dir) now_str = datetime.now().strftime('_%Y-%m-%d_%H-%M-%S') callbacks.append( TensorBoard(log_dir=path.join(args.tensorboard_dir, args.name + now_str))) model.fit(x=train_data, epochs=args.epochs, verbose=args.verbosity, callbacks=callbacks, validation_data=val_data, initial_epoch=initial_epoch, steps_per_epoch=train_steps, validation_steps=val_steps) # Finalize and clean up _ensure_exists(args.model_dir) best_loss = np.inf for filename in os.listdir(args.checkpoint_dir): pieces = path.splitext(filename)[0].split('_') if args.name != '_'.join(pieces[:-1]): continue if 'best' in pieces[-1]: # Is this the best of the "best" models? model = load_model(path.join(args.checkpoint_dir, filename)) loss = model.evaluate(x=val_data, steps=val_steps)[0] if loss < best_loss: best_loss = loss model.save(path.join(args.model_dir, args.name + '.h5')) os.remove(path.join(args.checkpoint_dir, filename))
classes_to_include = [ 'X4' ] ### initalize loaders ### train_ds = dp.training_data_loader(base_dir="C:/Users/Noah Barrett/Desktop/School/Research 2020/code/main/scraps/super-res/super-resolution/.div2k/images/DIV2K_train_LR_bicubic/train_data") test_ds = dp.testing_data_loader(base_dir="C:/Users/Noah Barrett/Desktop/School/Research 2020/code/main/scraps/super-res/super-resolution/.div2k/images/DIV2K_train_LR_bicubic/test_data") ### load data ### train_ds.load_data(selected_classes=classes_to_include) test_ds.load_data(selected_classes=classes_to_include) train_ds = train_ds.get_dataset() print(type(train_ds)) """ train model """ # Create directory for saving model weights weights_dir = 'weights/article' os.makedirs(weights_dir, exist_ok=True) # EDSR baseline as described in the EDSR paper (1.52M parameters) model_edsr = edsr(scale=4, num_res_blocks=16) # Adam optimizer with a scheduler that halfs learning rate after 200,000 steps optim_edsr = Adam(learning_rate=PiecewiseConstantDecay(boundaries=[200000], values=[1e-4, 5e-5])) # Compile and train model for 300,000 steps with L1 pixel loss model_edsr.compile(optimizer=optim_edsr, loss='mean_absolute_error') model_edsr.fit(train_ds, epochs=300, steps_per_epoch=1000) # Save model weights model_edsr.save_weights(os.path.join(weights_dir, 'weights-edsr-16-x4.h5'))
def optimizer(self): self.opt = Adam(learning_rate=PiecewiseConstantDecay( boundaries=[40], values=[self.lr * 10, self.lr]))
def __init__( self, learning_rate, discount, actions_num, epsilon, batch_size, input_shape, stop_decay_after, epsilon_min, memory_size, update_every, model_filename, layers, neurons, architecture, # lr_decay_rate, lr_decay_steps, lr_bounds, lr_values, trained_model=None, ): self.action_space = [i for i in range(actions_num)] self.discount = discount self.epsilon = epsilon # self.epsilon_decay_rate = epsilon_decay_rate self.epsilon_decay_rate = math.exp( math.log(epsilon_min) / stop_decay_after) self.epsilon_min = epsilon_min self.update_every = update_every self.batch_size = batch_size self.learn_step_counter = 0 self.memory = ReplayMemory(memory_size, input_shape) self.main_model = make_model(architecture, layers=layers, actions_num=actions_num, neurons=neurons) self.target_model = make_model(architecture, layers=layers, actions_num=actions_num, neurons=neurons) lr_schedule = PiecewiseConstantDecay(lr_bounds, lr_values) self.main_model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='mse') self.target_model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse') self.model_file = trained_model if trained_model != None: # Da bi nastale tezine mora se jednom pozvati model self.main_model(tf.ones((1, *input_shape))) self.target_model(tf.ones((1, *input_shape))) self.load_trained_model() # self.weights_file = trained_model # elf.load_trained_model() self.model_filename = model_filename
args.pretrained_type, checkpoint_dir=args.checkpoint_dir, checkpoint_path=args.checkpoint_path) except Exception as e: print(e) print('The program is exiting...') sys.exit() criterion = create_losses(args.neg_ratio, NUM_CLASSES) steps_per_epoch = info['length'] // args.batch_size lr_fn = PiecewiseConstantDecay(boundaries=[ int(steps_per_epoch * args.num_epochs * 2 / 3), int(steps_per_epoch * args.num_epochs * 5 / 6) ], values=[ args.initial_lr, args.initial_lr * 0.1, args.initial_lr * 0.01 ]) optimizer = tf.keras.optimizers.SGD(learning_rate=lr_fn, momentum=args.momentum) train_log_dir = 'logs/train' val_log_dir = 'logs/val' train_summary_writer = tf.summary.create_file_writer(train_log_dir) val_summary_writer = tf.summary.create_file_writer(val_log_dir) for epoch in range(args.num_epochs): avg_loss = 0.0 avg_conf_loss = 0.0
scale=scale, subset="residual_sr", noise_mean=noise_mean, noise_sigma=noise_sigmas[3], hr_shape=[64 * scale, 64 * scale], hr_model="difference") test_ds = test_ds.dataset(batch_size=1, random_transform=False) # ----------------------- # Trainer # ----------------------- trainer_sr = residual_sr_trainer( model=residual_sr_v3(scale=scale), checkpoint_dir=f'.ckpt/residual_sr_{scale}_v3_difference_01', learning_rate=PiecewiseConstantDecay(boundaries=[150000], values=[2e-4, 1e-6])) for lr, hr in test_ds.take(10): sr = trainer_sr.SR_return(lr) output = tf.image.resize(lr, [hr.shape[1], hr.shape[2]], method=tf.image.ResizeMethod.BICUBIC) hr = hr + output output = output + sr for i in range(total_iter): # Location of model weights (needed for demo) weights_dir = f'weights/ircnn_denoise_{noise_sigmas[i] // 1}' weights_file = os.path.join(weights_dir, 'weights.h5') os.makedirs(weights_dir, exist_ok=True) # ----------------------- # Trainer
def train(model, train_dataset, test_dataset, epochs, patch_size, tensorboard_images, number_test_images=3): '''Training the model with the given datasets''' # Define filewriters for Tensorboard tracking. current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_loss_log_dir = 'logs/gradient_tape/' + current_time + '/train_loss' test_loss_log_dir = 'logs/gradient_tape/' + current_time + '/test_loss' psnr_log_dir = 'logs/gradient_tape/' + current_time + '/psnr' ssim_log_dir = 'logs/gradient_tape/' + current_time + '/ssim' img_log_dir = 'logs/gradient_tape/' + current_time + '/img' train_loss_summary_writer = tf.summary.create_file_writer( train_loss_log_dir) test_loss_summary_writer = tf.summary.create_file_writer(test_loss_log_dir) psnr_summary_writer = tf.summary.create_file_writer(psnr_log_dir) ssim_summary_writer = tf.summary.create_file_writer(ssim_log_dir) img_summary_writer = tf.summary.create_file_writer(img_log_dir) tf.keras.backend.clear_session() #get example images from dataset for tensorboard process tracking example_images = get_example_images(tensorboard_images, number_test_images, patch_size) # hyperparameters num_epochs = epochs learning_rate = 1e-4 running_average_factor = 0.95 # initialize L1 loss -> mean absolute error (we take the mean of the absolute difference between label and prediction) loss = tf.keras.losses.MeanAbsoluteError() # learning rate will be halved 4 times in the process optimizer = tf.keras.optimizers.Adam(learning_rate=PiecewiseConstantDecay( boundaries=[ num_epochs // 5, (num_epochs // 5) * 2, (num_epochs // 5) * 3, (num_epochs // 5) * 4 ], values=[ learning_rate, (learning_rate) / 2, (learning_rate) / (2 * 2), (learning_rate) / (2 * 4), (learning_rate) / (2 * 8) ]), beta_1=0.9, beta_2=0.999) train_losses = [] test_losses = [] psnr_list = [] ssim_list = [] # test once before we start train_loss, _, _ = test(model, train_dataset, loss, patch_size) train_losses.append(train_loss) test_loss, psnr, ssim = test(model, test_dataset, loss, patch_size) test_losses.append(test_loss) psnr_list.append(psnr) ssim_list.append(ssim) # initialize time training_time = time.time() # train for num_epochs for epoch in range(num_epochs): epoch_time = time.time() running_average = 0 for i, (input, target) in enumerate(train_dataset): train_loss = train_step(model, input, target, loss, optimizer, patch_size) running_average = running_average_factor * running_average + ( 1 - running_average_factor) * train_loss # append train losses train_losses.append(running_average) # testing test_loss, psnr, ssim = test(model, test_dataset, loss, patch_size) # append test losses test_losses.append(test_loss) psnr_list.append(psnr) ssim_list.append(ssim) with train_loss_summary_writer.as_default(): tf.summary.scalar('loss', train_losses[-1], step=epoch) with test_loss_summary_writer.as_default(): tf.summary.scalar('loss', test_losses[-1], step=epoch) with test_loss_summary_writer.as_default(): tf.summary.scalar('psnr', psnr_list[-1], step=epoch) with test_loss_summary_writer.as_default(): tf.summary.scalar('ssim', ssim_list[-1], step=epoch) # load produced images into Tensorboard pred_images = [] for image in example_images: pred_images.append(model(tf.expand_dims(image, axis=0))) with img_summary_writer.as_default(): tf.summary.image("upscaled images", tf.squeeze(pred_images), step=epoch, max_outputs=number_test_images) print( f'Epoch {str(epoch)}: training loss = {running_average}, test loss = {test_loss}, psnr = {psnr}, ssim = {ssim}, time {timing(epoch_time)} seconds' )