def _prepare_callback(self): early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=1000, verbose=1, mode='min') file_path = "ds_" + str(self.dataset_name) + "_ac_" + str( self.accuracy) + "_weights-{epoch:02d}-{loss:.5f}.h5" checkpoint = ModelCheckpoint(file_path, monitor='loss', verbose=1, save_best_only=True, mode='min') csv_logger = CSVLogger('log.csv', append=True, separator=';') clr = CyclicLR(mode=LearningConfig.CLR_METHOD, base_lr=LearningConfig.MIN_LR, max_lr=LearningConfig.MAX_LR, step_size=LearningConfig.STEP_SIZE * (self.SUM_OF_ALL_TRAIN_SAMPLES // self.BATCH_SIZE)) logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") # tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) # return [checkpoint, early_stop, csv_logger, clr, tensorboard_callback] return [checkpoint, early_stop, csv_logger, clr]
def callbacks_clr(train_size, batch_size): train_steps = np.ceil(float(train_size) / float(batch_size)) BEST_WEIGHTS = 'model_save/weight_fold_%s.hdf5' % ( datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) TRAINING_LOG = 'logs/trainlog_fold_%s.csv' % ( datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) clr = CyclicLR(base_lr=1e-7, max_lr=2e-4, step_size=4 * train_steps, mode='exp_range', gamma=0.99994) early_stop = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='max') save_checkpoint = ModelCheckpoint(BEST_WEIGHTS, monitor='val_acc', verbose=1, save_weights_only=True, save_best_only=True, mode='max') csv_logger = CSVLogger(TRAINING_LOG, append=False) callbacks = [early_stop, save_checkpoint, csv_logger, clr] return callbacks
def _train_model(self): clr_triangular = CyclicLR(mode='triangular') self.model.fit(self.X_train, self.labels, batch_size=64, epochs=5, verbose=2, validation_split=self.validation_split)
def fit(model, ds, loss="categorical_crossentropy", metrics=["acc"], epochs=3, finetune=False, verbose=1): optim = Nadam() base_lr = 0.001 max_lr = 0.006 clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=2000., mode='triangular') model.compile(optimizer=optim, loss=loss, metrics=metrics) if finetune: orig_epochs = epochs epochs //= 2 model.fit_generator(generator=ds.train_gen, steps_per_epoch=ds.train_steps, epochs=epochs, verbose=verbose, callbacks=[clr], validation_data=ds.val_gen, validation_steps=ds.val_steps) base_lr /= 2 max_lr /= 2 for layer in model.layers: layer.trainable = True model.compile(optimizer=optim, loss=loss, metrics=metrics) model.fit_generator(generator=ds.train_gen, steps_per_epoch=ds.train_steps, epochs=epochs, verbose=verbose, callbacks=[clr], validation_data=ds.val_gen, validation_steps=ds.val_steps) epochs = orig_epochs return model.fit_generator( generator=ds.train_gen, steps_per_epoch=ds.train_steps, epochs=epochs, verbose=verbose, callbacks=[clr], validation_data=ds.val_gen, validation_steps=ds.val_steps).history['val_loss'][-1]
def _train_model(self): bs = 64 # Custom cyclical learning rate callback clr_triangular = CyclicLR(mode='triangular', step_size=6*(len(self.X_train)/bs)) self.model = self.model(self.vocab_size, self.seq_len) self.model.fit(self.X_train, self.y_train, batch_size=bs, epochs=5, verbose=2, # validation_split=self.validation_split, validation_data=(self.X_val, self.y_val), callbacks=[clr_triangular] )
def fitWithCLR(model, ds, epochs = 4, optim = SGD(nesterov=True), finetune = False): base_lr = 0.001 max_lr = 0.006 if finetune: base_lr /= 2 max_lr /= 2 epochs //= 2 clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=2000., mode='triangular') model.compile(optimizer = optim, loss = "categorical_crossentropy", metrics = ["acc"]) return model.fit_generator(generator=ds.train_generator, steps_per_epoch=ds.train_steps, epochs = epochs, verbose=1, callbacks=[clr], validation_data=ds.val_generator, validation_steps=ds.val_steps)
def model(observation, char_indices, indices_char, x, y): char_model = models.rnn_embedding_model(x, y) # Set up model training variables optimizer = RMSprop(lr=0.01) char_model.compile(loss='categorical_crossentropy', optimizer=optimizer) batch_size = 4096 num_epochs = 200 if lib.get_conf('test_run'): num_epochs = 2 # Set up callbacks tf_log_path = os.path.join(os.path.expanduser('~/log_dir'), lib.get_batch_name()) logging.info('Using Tensorboard path: {}'.format(tf_log_path)) mc_log_path = os.path.join( lib.get_conf('model_checkpoint_path'), lib.get_batch_name() + '_epoch_{epoch:03d}_loss_{loss:.2f}.h5py') logging.info('Using mc_log_path path: {}'.format(mc_log_path)) sentence_generator = SentenceGenerator(verbose=1) clr_step_size = numpy.floor((float(x.shape[0]) / batch_size) * 4) clr = CyclicLR(base_lr=.005, max_lr=.02, mode='triangular2', step_size=clr_step_size) logging.info('Using CRL step size: {}'.format(clr_step_size)) callbacks = [ TensorBoard(log_dir=tf_log_path), ModelCheckpoint(mc_log_path), sentence_generator, clr ] # Train the model, output generated text after each iteration char_model.fit(x, y, batch_size=batch_size, epochs=num_epochs, callbacks=callbacks) print sentence_generator.sentences
def train(model: Model, optimizer: Optimizer, epochs: int, batch_size: int, patch_size: tuple, num_validation: int, name: str, loss: str, preprocessing_func, output_reshape_func, training_generator_threads, training_sample_cache, load_path=None): """ Universal method to make training of different networks with different input_sizes easier and to minimize code duplication :param model: tf.keras model instance :param optimizer: tf.keras.optimizers.Optimizer :param epochs: number of epochs to train the model :param batch_size: batch size :param patch_size: network input shape :param num_validation: number of samples to validate on :param name: unique name to identify model :param loss: string like 'mae' or custom loss function :param preprocessing_func: callable to preprocess loaded data :param output_reshape_func: callable to reshape output to fit into the network :param training_generator_threads: number of threads the generator should run on :param training_sample_cache: number of samples to cache :param load_path: path to load model from :return: training history """ model.compile(optimizer, loss=loss, metrics=['mse']) if load_path is not None: model.load_weights(load_path) save_path = 'models/{}/best'.format(name) model.save_weights(save_path) checkpointing = keras.callbacks.ModelCheckpoint(save_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=False) clr = CyclicLR(base_lr=0.001, max_lr=0.01, step_size=2000.) tensorboard = keras.callbacks.TensorBoard(log_dir='./graphs/{}/graph'.format(name), histogram_freq=0, write_graph=True, write_images=True) es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='auto', patience=200, verbose=1) callbacks = [checkpointing, clr, tensorboard, es] train_generator, validation_generator = get_generators(patch_size, batch_size, preprocessing_func, output_reshape_func, num_validation, training_generator_threads, training_sample_cache) hist = model.fit(train_generator, epochs=epochs, callbacks=callbacks, validation_data=validation_generator, max_queue_size=0) model.save_weights('models/{}/final'.format(name)) return hist
def __init__(self, name, d, model, lr, epochs, batch_size, use_cyclic_lr=True, use_random_eraser=True, use_mixup=True): self.name = name self.d = d self.model = model self.lr = lr self.epochs = epochs self.batch_size = batch_size self.use_random_eraser = use_random_eraser self.use_mixup = use_mixup self.callbacks = [ ModelCheckpoint('%s/best.h5' % name, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True), keras.callbacks.TensorBoard(log_dir='%s/log%s' % (name, name), histogram_freq=0, write_graph=True, write_images=True) ] if use_cyclic_lr: self.callbacks.append( CyclicLR(base_lr=lr, max_lr=lr * 10, step_size=d.X_train.shape[0] // batch_size, mode='triangular')) print('using cyclic lr') else: print('not using cyclic lr') self.get_datagen() util.ensure_folder(name)
batchsize=100000, on_epoch_end=False, publish = publishpath+"_event_"+ str(ev), use_event=ev) ) model, history = train.trainModel(nepochs=1, run_eagerly=True, batchsize=nbatch, batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=100, additional_callbacks=callbacks+ [CyclicLR (base_lr = learningrate, max_lr = learningrate*5., step_size = 10)]) loss_config.energy_loss_weight = 1e-3 loss_config.position_loss_weight=1e-3 loss_config.timing_loss_weight = 1e-5 learningrate = 3e-5 loss_config.beta_loss_scale = 1. model, history = train.trainModel(nepochs=1+3, run_eagerly=True, batchsize=nbatch, batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity,
PRETRAINED_WEIGHTS = 'weights/pretrained_weights_fold%d_%s.hdf5' % ( fold, ftype) kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) for sub_fold, (train_index, valid_index) in enumerate(kf.split(x_valid, y_valid1)): x_train_fold, x_valid_fold = x_valid[train_index], x_valid[ valid_index] y_train_fold, y_valid_fold = y_valid[train_index], y_valid[ valid_index] WEIGHTS_BEST = 'weights/best_weights_fold%d_subfold%d_%s.hdf5' % ( fold, sub_fold, ftype) clr = CyclicLR(base_lr=1e-8, max_lr=8e-5) early_stoping = EarlyStopping(monitor='val_acc', patience=20, verbose=1) save_checkpoint = ModelCheckpoint(WEIGHTS_BEST, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') callbacks = [early_stoping, save_checkpoint, clr] model = Stacking_Model() model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=8e-5), metrics=['accuracy'])
def main(data_module, model_module, optimizer_module, filename, config, use_val=False): """Patch everything together.""" batch_size = config['train']['batch_size'] nb_epoch = config['train']['epochs'] today = datetime.datetime.now() datestring = today.strftime('%Y%m%d-%H%M-%S') # The data, shuffled and split between train and test sets: data = data_module.load_data(config) print("Data loaded.") X_train, y_train = data['x_train'], data['y_train'] X_train = data_module.preprocess(X_train) if 'use_val' in config['train']: use_val = config['train']['use_val'] use_val = True if use_val: X_test, y_test = data['x_val'], data['y_val'] else: X_test, y_test = data['x_test'], data['y_test'] X_test = data_module.preprocess(X_test) # load hierarchy, if present if 'hierarchy_path' in config['dataset']: ret = handle_hierarchies(config, data_module, X_train, y_train, X_test, y_test) # hierarchy = ret['hierarchy'] X_train = ret['X_train'] y_train = ret['y_train'] X_test = ret['X_test'] y_test = ret['y_test'] nb_classes = data_module.n_classes logging.info("# classes = {}".format(data_module.n_classes)) img_rows = data_module.img_rows img_cols = data_module.img_cols img_channels = data_module.img_channels da = config['train']['data_augmentation'] # Convert class vectors to binary class matrices. Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # Y_train = Y_train.reshape((-1, 1, 1, nb_classes)) # For fcn # Y_test = Y_test.reshape((-1, 1, 1, nb_classes)) if 'smooth_train' in config['dataset']: Y_train = np.load(config['dataset']['smooth_train']) if 'smooth_test_path' in config['dataset']: Y_test = np.load(config['dataset']['smooth_test_path']) # Input shape depends on the backend if K.image_dim_ordering() == "th": input_shape = (img_channels, img_rows, img_cols) else: input_shape = (img_rows, img_cols, img_channels) model = model_module.create_model(nb_classes, input_shape, config) print("Model created") if 'initializing_model_path' in config['model']: init_model_path = config['model']['initializing_model_path'] if not os.path.isfile(init_model_path): logging.error( "initializing_model={} not found".format(init_model_path)) sys.exit(-1) init_model = load_model(init_model_path) layer_dict_init = dict([(layer.name, layer) for layer in init_model.layers]) layer_dict_model = dict([(layer.name, layer) for layer in model.layers]) for layer_name in layer_dict_model.keys(): if layer_name in layer_dict_init: print("\tLoad layer weights '{}'".format(layer_name)) weights = layer_dict_init[layer_name].get_weights() try: layer_dict_model[layer_name].set_weights(weights) except ValueError: print("\t\twrong shape - skip") logging.info("Done initializing") model.summary() optimizer = optimizer_module.get_optimizer(config) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) print("Finished compiling") print("Building model...") checkpoint_fname = os.path.basename(config['train']['artifacts_path']) if 'saveall' in config['train'] and config['train']['saveall']: checkpoint_fname = ("{}_{}.chk.{{epoch:02d}}.h5".format( checkpoint_fname, datestring)) save_best_only = False else: checkpoint_fname = "{}_{}.chk.h5".format(checkpoint_fname, datestring) save_best_only = True model_chk_path = os.path.join(config['train']['artifacts_path'], checkpoint_fname) model_chk_path = get_nonexistant_path(model_chk_path) checkpoint = ModelCheckpoint(model_chk_path, monitor="val_acc", save_best_only=save_best_only, save_weights_only=False) history_cb = History() callbacks = [checkpoint, history_cb] # remote, if 'tensorboard' in config['train'] and config['train']['tensorboard']: tensorboard = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True) callbacks.append(tensorboard) if 'remote' in config['train'] and config['train']['remote']: remote = RemoteMonitor(root='http://localhost:9000') callbacks.append(remote) if 'lr_reducer' in config['train'] and config['train']['lr_reducer']: lr_reducer = ReduceLROnPlateau(monitor='val_acc', factor=0.3, cooldown=0, patience=3, min_lr=0.5e-6, verbose=1) callbacks.append(lr_reducer) if 'clr' in config['train']: clr = CyclicLR(base_lr=config['train']['clr']['base_lr'], max_lr=config['train']['clr']['max_lr'], step_size=(config['train']['clr']['step_size'] * (X_train.shape[0] // batch_size)), mode=config['train']['clr']['mode']) callbacks.append(clr) X_train = np.append(X_train, X_test, axis=0) Y_train = np.append(Y_train, Y_test, axis=0) if not da: print('Not using data augmentation.') model.save(model_chk_path.format(epoch=0).replace('.00.', '.00.a.')) t0 = time.time() model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, validation_data=(X_test, Y_test), shuffle=True, callbacks=callbacks) t1 = time.time() t2 = t1 epochs_augmented_training = 0 else: print('Using real-time data augmentation.') if 'hue_shift' in da: hsv_augmentation = (da['hue_shift'], da['saturation_scale'], da['saturation_shift'], da['value_scale'], da['value_shift']) else: hsv_augmentation = None # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=da['featurewise_center'], # set each sample mean to 0 samplewise_center=da['samplewise_center'], # divide inputs by std of the dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=da['samplewise_std_normalization'], zca_whitening=da['zca_whitening'], # randomly rotate images in the range (degrees, 0 to 180) rotation_range=da['rotation_range'], # randomly shift images horizontally (fraction of total width) width_shift_range=da['width_shift_range'], # randomly shift images vertically (fraction of total height) height_shift_range=da['height_shift_range'], horizontal_flip=da['horizontal_flip'], vertical_flip=da['vertical_flip'], hsv_augmentation=hsv_augmentation, zoom_range=da['zoom_range'], shear_range=da['shear_range'], channel_shift_range=da['channel_shift_range']) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(X_train, seed=0) # Apply normalization to test data for i in range(len(X_test)): X_test[i] = datagen.standardize(X_test[i]) # Fit the model on the batches generated by datagen.flow(). steps_per_epoch = X_train.shape[0] // batch_size model.save(model_chk_path.format(epoch=0).replace('.00.', '.00.a.')) t0 = time.time() model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch=steps_per_epoch, epochs=nb_epoch, validation_data=(X_test, Y_test), callbacks=callbacks) t1 = time.time() # Train one epoch without augmentation to make sure data distribution # is fit well loss_history = history_cb.history["loss"] epochs_augmented_training = len(loss_history) model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, validation_data=(X_test, Y_test), shuffle=True, callbacks=callbacks, initial_epoch=len(loss_history)) t2 = time.time() loss_history = history_cb.history["loss"] acc_history = history_cb.history["acc"] val_acc_history = history_cb.history["val_acc"] np_loss_history = np.array(loss_history) np_acc_history = np.array(acc_history) np_val_acc_history = np.array(val_acc_history) history_data = zip(list(range(1, len(np_loss_history) + 1)), np_loss_history, np_acc_history, np_val_acc_history) history_data = [(el[0], "%0.4f" % el[1], "%0.4f" % el[2], "%0.4f" % el[3]) for el in history_data] history_fname = os.path.basename(config['train']['artifacts_path']) history_fname = "{}_{}_history.csv".format(history_fname, datestring) csv_path = os.path.join(config['train']['artifacts_path'], history_fname) csv_path = get_nonexistant_path(csv_path) with open(csv_path, 'w') as fp: writer = csv.writer(fp, delimiter=',') writer.writerows([("epoch", "loss", "acc", "val_acc")]) writer.writerows(history_data) training_time = t1 - t0 readjustment_time = t2 - t1 print("wall-clock training time: {}s".format(training_time)) model_fn = os.path.basename(config['train']['artifacts_path']) model_fn = "{}_{}.h5".format(model_fn, datestring) model_fn = os.path.join(config['train']['artifacts_path'], model_fn) model_fn = get_nonexistant_path(model_fn) model.save(model_fn) # Store training meta data data = { 'training_time': training_time, 'readjustment_time': readjustment_time, 'HOST': platform.node(), 'epochs': len(history_data), 'epochs_augmented_training': epochs_augmented_training, 'config': config } meta_train_fname = os.path.join(config['train']['artifacts_path'], "train-meta_{}.json".format(datestring)) meta_train_fname = get_nonexistant_path(meta_train_fname) with open(meta_train_fname, 'w') as outfile: str_ = json.dumps(data, indent=4, sort_keys=True, separators=(',', ': '), ensure_ascii=False) outfile.write(str_)
def TrainModel(args): ''' train keras convolutional network ''' os.makedirs(args.saveDir, exist_ok=True) # assemble model inputShape = (args.tileSize, args.tileSize, args.nChannel) model = CreateModel(inputShape, args) print('compiling model...') model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.adam(lr=0.0001, decay=1e-6), metrics=['accuracy']) model.summary() plot_model(model, to_file=os.path.join(args.saveDir, 'model.png'), show_shapes=True) # instantiate data flow train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, vertical_flip=True, width_shift_range=0.2, height_shift_range=0.2, rotation_range=360, brightness_range=(0.8, 1.2)) train_generator = train_datagen.flow_from_directory( args.trainDir, target_size=(args.tileSize, args.tileSize), batch_size=args.batchSize, shuffle=True, class_mode='categorical') validation_generator = train_datagen.flow_from_directory( args.valDir, target_size=(args.tileSize, args.tileSize), batch_size=args.batchSize, shuffle=True, class_mode='categorical') # save class IDs for downstream analysis print('saving class IDs') classIDs = pd.DataFrame.from_dict(data=train_generator.class_indices, orient='index') classIDs.to_csv(os.path.join(args.saveDir, 'classIDs.csv'), header=False) # generate class weight matrix counter = Counter(train_generator.classes) max_val = float(max(counter.values())) class_weights = { class_id: max_val / num_images for class_id, num_images in counter.items() } print('class weights matrix computed as', class_weights) # instantiate callbacks csv_logger = CSVLogger(os.path.join(args.saveDir, 'training.log')) term_nan = TerminateOnNaN() earlystop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) checkpointer = ModelCheckpoint(os.path.join(args.saveDir, args.modelName), monitor='val_loss') clr = CyclicLR(base_lr=0.0001, max_lr=0.001, step_size=5 * train_generator.samples // args.batchSize, mode='triangular') # fit model print('fitting model') model.fit_generator( train_generator, validation_data=validation_generator, epochs=args.epochs, callbacks=[csv_logger, term_nan, earlystop, checkpointer, clr], class_weight=class_weights, steps_per_epoch=train_generator.samples // args.batchSize, validation_steps=validation_generator.samples // args.batchSize) # save model model.save(os.path.join(args.saveDir, args.modelName))
save_weights_only=False, mode=mode, period=1) reduce_lr = ReduceLROnPlateau( monitor=monitor, factor=0.1, patience=5, min_lr=1e-9, epsilon=0.00001, verbose=1, mode=mode) clr = CyclicLR( base_lr=args.learning_rate / 4, max_lr=args.learning_rate, step_size=int(math.ceil(len(IDX_TRAIN_SPLIT) / args.batch_size)) * 1, mode='exp_range', gamma=0.99994) callbacks = [save_checkpoint] if args.cyclic_learning_rate: callbacks.append(clr) else: callbacks.append(reduce_lr) generator = dense_generator if args.dense else sparse_generator model.fit_generator( generator=generator(args, IDX_TRAIN_SPLIT, train=True), steps_per_epoch=np.ceil(
train.compileModel(learningrate=learningrate, loss=None, metrics=None, ) model, history = train.trainModel(nepochs=1, run_eagerly=True, batchsize=nbatch, extend_truth_list_by = len(train.keras_model.outputs_keys)-2, #just adapt truth list to avoid keras error (no effect on model) batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=38, additional_callbacks= [CyclicLR (base_lr = learningrate/3, max_lr = learningrate, step_size = 50)]+cb) print("freeze BN") # Note the submodel here its not just train.keras_model for l in train.keras_model.model.layers: if 'gooey_batch_norm' in l.name: l.max_viscosity = 1. l.fluidity_decay= 5e-4 #reaches constant 1 after about one epoch if 'FullOCLoss' in l.name: l.use_average_cc_pos = 0.1 l.q_min = 0.1 #also stop GravNetLLLocalClusterLoss* from being evaluated learningrate/=3. nbatch = 150000
def trainModel(self, model, train_generator, validation_generator): os.makedirs(self.model_path) model_json = model.to_json() with open(self.model_path + '/model_architecture.json', 'w') as outfile: json.dump(model_json, outfile) #Callback definition checkpoint = ModelCheckpoint(self.model_path + "/model_weights_{epoch:04d}.h5py", monitor='val_acc', verbose=1, save_best_only=True, mode='auto') csv_logger = CSVLogger(self.model_path + '/csvLogger_' + self.date + '.csv', separator=',', append=True) tboard = TensorBoard(log_dir=self.model_path, write_grads=True, write_images=True, batch_size=self.batch_size, write_graph=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, histogram_freq=0) clr = CyclicLR(base_lr=0.00001, max_lr=0.00006, step_size=8 * len(train_generator)) early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='auto', baseline=None, restore_best_weights=True) callbacks_list = [checkpoint, csv_logger, tboard, clr, early_stop] # Training/Test phase using fit_generator start_time = time.time() history = model.fit_generator( train_generator, steps_per_epoch=len(train_generator), epochs=self.epochs, validation_data=validation_generator, validation_steps=len(validation_generator), callbacks=callbacks_list) training_time = print('Training duration: {}'.format(time.time() - start_time)) return history, training_time
# ) # # ) # # unfix # train.keras_model = fixLayersContaining(train.keras_model, "batch_normalization") # train.keras_model = fixLayersContaining(train.keras_model, "bn_") train.compileModel(learningrate=1e-4, loss=None) # print('frozen:') # for l in train.keras_model.layers: # if not l.trainable: # print(l.name) # 0/0 # train.saveModel('jan.h5') # # 0/0 model, history = train.trainModel( nepochs=10, run_eagerly=True, batchsize=nbatch, batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=100, additional_callbacks=callbacks + [CyclicLR(base_lr=learningrate, max_lr=learningrate * 2., step_size=100)])
def get_lr_schedule(schedule, num_samples, batch_size, schedule_args={}): """ Creates a learning rate schedule. # Arguments: - schedule: Name of the schedule. Possible values: - 'sgd': Stochastic Gradient Descent with ReduceLROnPlateau or LearningRateSchedule callback. - 'sgdr': Stochastic Gradient Descent with Cosine Annealing and Warm Restarts. - 'clr': Cyclical Learning Rates. - 'resnet-schedule': Hand-crafted schedule used by He et al. for training ResNet. - num_samples: Number of training samples. - batch_size: Number of samples per batch. - schedule_args: Further arguments for the specific learning rate schedule. 'sgd' supports: - 'sgd_patience': Number of epochs without improvement before reducing the LR. Default: 10. - 'sgd_min_lr': Minimum learning rate. Default : 1e-4 - 'sgd_schedule': Comma-separated list of `epoch:lr` pairs, defining a learning rate schedule. The total number of epochs can be appended to this list, separated by a comma as well. If this is specified, the learning rate will not be reduced on plateaus automatically and `sgd_patience` and `sgd_min_lr` will be ignored. The following example would mean to train for 50 epochs, starting with a learning rate of 0.1 and reducing it by a factor of 10 after 30 and 40 epochs: "1:0.1,31:0.01,41:0.001,50". 'sgdr' supports: - 'sgdr_base_len': Length of the first cycle. Default: 12. - 'sgdr_mul': Factor multiplied with the length of the cycle after the end of each one. Default: 2. - 'sgdr_max_lr': Initial learning rate at the beginning of each cycle. Default: 0.1. 'clr' supports: - 'clr_step_len': Number of training epochs per half-cycle. Default: 12. - 'clr_min_lr': Minimum learning rate. Default: 1e-5. - 'clr_max_lr': Maximum learning rate: Default: 0.1. # Returns: - a list of callbacks for being passed to the fit function, - a suggested number of training epochs. """ if schedule.lower() == 'sgd': if ('sgd_schedule' in schedule_args) and ( schedule_args['sgd_schedule'] is not None) and (schedule_args['sgd_schedule'] != ''): def lr_scheduler(schedule, epoch, cur_lr): if schedule[0][0] > epoch: return cur_lr for i in range(1, len(schedule)): if schedule[i][0] > epoch: return schedule[i - 1][1] if schedule[ i - 1][1] is not None else cur_lr return schedule[-1][1] if schedule[-1][ 1] is not None else cur_lr schedule = [ (int(point[0]) - 1, float(point[1]) if len(point) > 1 else None) for sched_tuple in schedule_args['sgd_schedule'].split(',') for point in [sched_tuple.split(':')] ] schedule.sort() return [ keras.callbacks.LearningRateScheduler( lambda ep, cur_lr: lr_scheduler(schedule, ep, cur_lr)) ], schedule[-1][0] + 1 else: if 'sgd_patience' not in schedule_args: schedule_args['sgd_patience'] = 10 if 'sgd_min_lr' not in schedule_args: schedule_args['sgd_min_lr'] = 1e-4 return [ keras.callbacks.ReduceLROnPlateau( 'val_loss', patience=schedule_args['sgd_patience'], epsilon=1e-4, min_lr=schedule_args['sgd_min_lr'], verbose=True) ], 200 elif schedule.lower() == 'sgdr': if 'sgdr_base_len' not in schedule_args: schedule_args['sgdr_base_len'] = 12 if 'sgdr_mul' not in schedule_args: schedule_args['sgdr_mul'] = 2 if 'sgdr_max_lr' not in schedule_args: schedule_args['sgdr_max_lr'] = 0.1 return ([ SGDR(1e-6, schedule_args['sgdr_max_lr'], schedule_args['sgdr_base_len'], schedule_args['sgdr_mul']) ], sum(schedule_args['sgdr_base_len'] * (schedule_args['sgdr_mul']**i) for i in range(5))) elif schedule.lower() == 'clr': if 'clr_step_len' not in schedule_args: schedule_args['clr_step_len'] = 12 if 'clr_min_lr' not in schedule_args: schedule_args['clr_min_lr'] = 1e-5 if 'clr_max_lr' not in schedule_args: schedule_args['clr_max_lr'] = 0.1 return ([ CyclicLR(schedule_args['clr_min_lr'], schedule_args['clr_max_lr'], schedule_args['clr_step_len'] * (num_samples // batch_size), mode='triangular') ], schedule_args['clr_step_len'] * 20) elif schedule.lower() == 'resnet-schedule': def resnet_scheduler(epoch): if epoch >= 120: return 0.001 elif epoch >= 80: return 0.01 elif epoch >= 1: return 0.1 else: return 0.01 return [keras.callbacks.LearningRateScheduler(resnet_scheduler)], 164 else: raise ValueError('Unknown learning rate schedule: {}'.format(schedule))
samplefile=samplepath, after_n_batches=100, batchsize=100000, on_epoch_end=False, use_event=ev)) model, history = train.trainModel( nepochs=1, run_eagerly=True, batchsize=nbatch, batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=100, additional_callbacks=callbacks + [ CyclicLR( base_lr=learningrate / 5., max_lr=learningrate * 5., step_size=20) ]) loss_config.energy_loss_weight = 0.01 loss_config.position_loss_weight = 0.01 learningrate = 3e-5 model, history = train.trainModel( nepochs=1 + 3, run_eagerly=True, batchsize=nbatch, batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=100, additional_callbacks=callbacks + [
def train(self): """ train VAE model """ train_datagen = ImageDataGenerator(rescale=1. / (2**self.image_res - 1), horizontal_flip=True, vertical_flip=True) # colormode needs to be set depending on num_channels if self.nchannel == 1: train_generator = train_datagen.flow_from_directory( self.data_dir, target_size=(self.image_size, self.image_size), batch_size=self.batch_size, color_mode='grayscale', class_mode='input') elif self.nchannel == 3: print('using three channel generator!') train_generator = train_datagen.flow_from_directory( self.data_dir, target_size=(self.image_size, self.image_size), batch_size=self.batch_size, color_mode='rgb', class_mode='input') else: # expecting data saved as numpy array train_generator = NumpyDataGenerator(self.data_dir, batch_size=self.batch_size, image_size=self.image_size, nchannel=self.nchannel, image_res=self.image_res, shuffle=True) # if files saved as single npy block # instantiate callbacks callbacks = [] term_nan = TerminateOnNaN() callbacks.append(term_nan) csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), separator='\t') callbacks.append(csv_logger) checkpointer = ModelCheckpoint(os.path.join( self.save_dir, 'checkpoints/vae_weights.hdf5'), verbose=1, save_best_only=True, save_weights_only=True) callbacks.append(checkpointer) if self.earlystop: earlystop = EarlyStopping(monitor='loss', min_delta=0, patience=8) callbacks.append(earlystop) if self.use_clr: clr = CyclicLR(base_lr=self.learn_rate, max_lr=0.0001, step_size=0.25 * self.steps_per_epoch, mode='triangular') callbacks.append(clr) if self.use_vaecb: vaecb = VAEcallback(self) callbacks.append(vaecb) self.history = self.vae.fit_generator( train_generator, epochs=self.epochs, callbacks=callbacks, steps_per_epoch=self.steps_per_epoch) print('saving model weights to', self.model_dir) self.vae.save_weights(os.path.join(self.model_dir, 'weights_vae.hdf5')) self.encoder.save_weights( os.path.join(self.model_dir, 'weights_encoder.hdf5')) self.decoder.save_weights( os.path.join(self.model_dir, 'weights_decoder.hdf5')) self.encode() print('done!')
def train_attention(hypa: ty.Dict[str, str], force_retrain: bool, use_validation: bool) -> None: """MAKEDOC: what is train_attention doing?""" logg = logging.getLogger(f"c.{__name__}.train_attention") # logg.setLevel("INFO") logg.debug("Start train_attention") # build the model name model_name = build_attention_name(hypa, use_validation) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "attention" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "attention" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # get the word list words = words_types[hypa["words_type"]] num_labels = len(words) # load data processed_folder = Path("data_proc") processed_path = processed_folder / f"{hypa['dataset_name']}" data, labels = load_processed(processed_path, words) # concatenate train and val for final train val_data = None if use_validation: x = data["training"] y = labels["training"] val_data = (data["validation"], labels["validation"]) logg.debug("Using validation data") else: x = np.concatenate((data["training"], data["validation"])) y = np.concatenate((labels["training"], labels["validation"])) logg.debug("NOT using validation data") # the shape of each sample input_shape = data["training"][0].shape # from hypa extract model param model_param = get_model_param_attention(hypa, num_labels, input_shape) batch_size_types = {"01": 32, "02": 16} batch_size = batch_size_types[hypa["batch_size_type"]] epoch_num_types = {"01": 15, "02": 30, "03": 2, "04": 4} epoch_num = epoch_num_types[hypa["epoch_num_type"]] # magic to fix the GPUs setup_gpus() model = AttentionModel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "exp_decay_step_01", "04": "exp_decay_smooth_01", "05": "clr_triangular2_01", "06": "clr_triangular2_02", "07": "clr_triangular2_03", "08": "clr_triangular2_04", "09": "clr_triangular2_05", "10": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-3 elif lr_value == "fixed02": lr = 1e-4 else: lr = 1e-3 optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr) } opt = optimizer_types[hypa["optimizer_type"]] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # setup cyclic learning rate if lr_value.startswith("clr_triangular2"): base_lr = 1e-5 max_lr = 1e-3 # training iteration per epoch = num samples // batch size # step size suggested = 2~8 * iterations if lr_value == "clr_triangular2_01": step_factor = 8 step_size = step_factor * x.shape[0] // batch_size elif lr_value == "clr_triangular2_02": step_factor = 2 step_size = step_factor * x.shape[0] // batch_size # target_cycles = the number of cycles we want in those epochs # it_per_epoch = num_samples // batch_size # total_iterations = it_per_epoch * epoch_num # step_size = total_iterations // target_cycles elif lr_value == "clr_triangular2_03": # the number of cycles we want in those epochs target_cycles = 4 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_04": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) elif lr_value == "clr_triangular2_05": # the number of cycles we want in those epochs target_cycles = 2 it_per_epoch = x.shape[0] // batch_size total_iterations = it_per_epoch * epoch_num step_size = total_iterations // (target_cycles * 2) # set bigger starting value max_lr = 1e-2 logg.debug(f"x.shape[0]: {x.shape[0]}") logg.debug(f"CLR is using step_size: {step_size}") mode = "triangular2" cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode) callbacks.append(cyclic_lr) # setup early stopping if learning_rate_type in ["01", "02", "03", "04"]: metric_to_monitor = "val_loss" if use_validation else "loss" early_stop = EarlyStopping( monitor=metric_to_monitor, patience=4, restore_best_weights=True, verbose=1, ) callbacks.append(early_stop) # model_checkpoint = ModelCheckpoint( # model_name, # monitor="val_loss", # save_best_only=True, # ) # a dict to recreate this training # FIXME this should be right before fit and have epoch_num/batch_size/lr info recap: ty.Dict[str, ty.Any] = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["use_validation"] = use_validation recap["model_name"] = model_name recap["version"] = "001" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) results = model.fit( x, y, validation_data=val_data, epochs=epoch_num, batch_size=batch_size, callbacks=callbacks, ) results_recap: ty.Dict[str, ty.Any] = {} results_recap["model_name"] = model_name results_recap["results_recap_version"] = "002" # eval performance on the various metrics eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") results_recap["fscore"] = fscore # save the histories results_recap["history_train"] = { mn: results.history[mn] for mn in model.metrics_names } if use_validation: results_recap["history_val"] = { f"val_{mn}": results.history[f"val_{mn}"] for mn in model.metrics_names } # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) # if cyclic_lr was used save the history if lr_value.startswith("clr_triangular2"): logg.debug(f"cyclic_lr.history.keys(): {cyclic_lr.history.keys()}") clr_recap = {} for metric_name, values in cyclic_lr.history.items(): clr_recap[metric_name] = list(float(v) for v in values) clr_recap_path = info_folder / "clr_recap.json" clr_recap_path.write_text(json.dumps(clr_recap, indent=4)) # save the trained model model.save(model_path) placeholder_path.write_text(f"Trained. F-score: {fscore}")
# if schedule is not None: # callbacks = [LearningRateScheduler(schedule)] # decay = 0.0 # if args["schedule"] == "standard": # print("[INFO] using 'keras standard' learning rate decay...") # decay = 1e-1 / epochs # elif schedule is None: # print("[INFO] no learning rate schedule being used") # stepSize = config.STEP_SIZE * (train_images.shape[0] // config.BATCH_SIZE) file_path = "xception-hepatocyte.h5" es, msave, reduce_lr, tb_log, log_cv = get_callbacks(file_path, top_model, patience=10) clr = CyclicLR(mode=config.CLR_METHOD, base_lr=config.MIN_LR, max_lr=config.MAX_LR, step_size=stepSize) print("[INFO] training network...") H = top_model.fit_generator( train_datagen.flow(train_images, Y_train, batch_size=config.BATCH_SIZE), validation_data=valid_gen, steps_per_epoch=train_images.shape[0] // batch_size_for_generators, validation_steps=valid_images.shape[0] // batch_size_for_generators, epochs=config.NUM_EPOCHS, callbacks=[clr, msave, log_cv], verbose=1) print("[INFO] evaluating network...") predictions = top_model.predict(valid_images, batch_size=config.BATCH_SIZE) print( classification_report(Y_valid.argmax(axis=1), predictions.argmax(axis=1),
def main(path_imap, path_mmap, batch_size, num_epochs, model_name, num_imaps_per_mmap, hist_path=None, validation_split=0.2, no_validation=False, inputs_to_network="result", ground_truth="imap,mmap", resolution=128, gpu=0, load_weights=None, cyclic_lr=False, base_lr=1e-3, max_lr=6e-3): # change gpu id os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) # should be 0 or 1 if not os.path.isdir(path_imap): print(f"{path_imap} not a valid directory") exit(-1) if not os.path.isdir(path_mmap): print(f"{path_mmap} not a valid directory") exit(-1) if num_imaps_per_mmap <= 0: print(f"ratio: num imaps {num_imaps_per_mmap} must be greater than 0") exit(-1) input_size = (resolution, resolution, 3) print(f'input size: {input_size}') # determines model name net = None if model_name == "janknet": net = JankNet(input_size=input_size) elif model_name == 'unet': net = UNet(input_size=input_size) elif model_name == 'simpleJanknet': net = SimpleJankNet(input_size=input_size) elif model_name == 'janknet2head': net = JankNet2Head(input_size=input_size) elif model_name == 'mikenet': net = MikeNet(input_size=input_size) elif model_name == "strongerJanknet": net = StrongerJankNet(input_size=input_size) elif model_name == "brucenet": net = BruceNet(input_size=input_size) elif model_name == "dualunet": net = DualUNet(input_size=input_size) elif model_name == "pmaxdualunet": net = PmaxDualUNet(input_size=input_size) elif model_name == "testJanknet": net = TestJankNet(input_size=input_size) else: print(f"model name {model_name} not found") exit(-1) print(f"model name is {model_name}") net.model.summary() # saves the model architecture if doesn't exist already # net.save_model_architecture(model_name, path=f'./models/{model_name}') if load_weights: net.load_weights(load_weights) curtime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") # make a directory for this current instance new_dir = f'./models/{model_name}/instance_{curtime}' # add additional naming convention for retraining models if load_weights: old_instance = re.findall( 'instance.+?(?=/)', load_weights)[0] # grabs the instance_{curtime} new_dir = f'./models/{model_name}/retrained_{old_instance}' if not os.path.isdir(new_dir): os.makedirs(new_dir) # pass in the names of files beforehand # assert that the path exists assert os.path.isdir(path_imap) and os.path.isdir(path_mmap) imap_files = [x for x in os.listdir(path_imap) if x.endswith('npy')] mmap_files = [x for x in os.listdir(path_mmap) if x.endswith('npy')] mmap_files = mmap_files * num_imaps_per_mmap LEN_DATA = min(len(imap_files), len(mmap_files)) # check that each element in input images is valid inputs_to_network = inputs_to_network.split(",") ground_truth = ground_truth.split(",") print("input types are", inputs_to_network) print("ground truth types are", ground_truth) valid_images = ['ambient', 'direct', 'imap', 'mmap', 'result'] for i in inputs_to_network: if i not in valid_images: raise Exception(f"{i} is not a valid type for input to network") for i in ground_truth: if i not in valid_images: raise Exception(f"{i} is not a valid type for ground truth") if no_validation: validation_split = 0 validation_len_data = int(validation_split * LEN_DATA) train_len_data = LEN_DATA - validation_len_data random.shuffle(imap_files) random.shuffle(mmap_files) imap_files_train = imap_files[validation_len_data:] imap_files_validation = imap_files[:validation_len_data] mmap_files_train = mmap_files[validation_len_data:] mmap_files_validation = mmap_files[:validation_len_data] VALID_LEN_DATA = train_len_data - train_len_data % batch_size VALID_VALIDATION_LEN_DATA = validation_len_data - validation_len_data % batch_size if no_validation: print("not using validation") else: print("using validation") print("[model_train.py] number of samples of training data", VALID_LEN_DATA) print("[model_train.py] number of samples of validation data", VALID_VALIDATION_LEN_DATA) # make the validation data the length of valid_validation_len_data imap_files_validation = imap_files_validation[:VALID_VALIDATION_LEN_DATA] mmap_files_validation = mmap_files_validation[:VALID_VALIDATION_LEN_DATA] # make the training data the length of valid_len_data imap_files_train = imap_files_train[:VALID_LEN_DATA] mmap_files_train = mmap_files_train[:VALID_LEN_DATA] assert (len(imap_files_train) == len(mmap_files_train)) assert (len(imap_files_validation) == len(mmap_files_validation)) assert (len(imap_files_validation) == VALID_VALIDATION_LEN_DATA) assert (len(imap_files_train) == VALID_LEN_DATA) # number of batch updates batch_updates_per_epoch = VALID_LEN_DATA / batch_size ####### SETUP CALLBACKS # checkpoint filepath = f"weights-{model_name}" + "-{epoch:02d}-{loss:.2f}" + ".hdf5" full_filepath = os.path.join(new_dir, filepath) # this checkpoint only saves losses that have improved checkpoint = ModelCheckpoint(full_filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') # add a csv logger file that tracks metrics as training progresses csvlogger = CSVLogger( os.path.join(new_dir, f"rolling_log-{model_name}-{curtime}.csv")) # find a good learning rate if specified # whether or not to use cyclic learning rates # step size is the number of batch_updates per half cycle # Leslie Smith (author of cyclic policy paper suggests 2-8 * number of batch_updates), here we choose 4 if cyclic_lr: clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=4 * batch_updates_per_epoch, mode='triangular2') callbacks_list = [checkpoint, csvlogger, clr] else: callbacks_list = [checkpoint, csvlogger] ###### CALL TRAIN if no_validation: history_obj = net.train(VALID_LEN_DATA, batch_size, num_epochs, data_gen.generator(imap_files_train, mmap_files_train, path_mmap, path_imap, inputs_to_network, ground_truth, batch_size=batch_size, resolution=resolution), validation_gen=None, validation_len_data=None, callbacks=callbacks_list) else: # Fit the model history_obj = net.train(VALID_LEN_DATA, batch_size, num_epochs, data_gen.generator(imap_files_train, mmap_files_train, path_mmap, path_imap, inputs_to_network, ground_truth, batch_size=batch_size, resolution=resolution), validation_gen=data_gen.generator( imap_files_validation, mmap_files_validation, path_mmap, path_imap, inputs_to_network, ground_truth, batch_size=batch_size, resolution=resolution), validation_len_data=VALID_VALIDATION_LEN_DATA, callbacks=callbacks_list) # save the history object to a pickle file if not hist_path: hist_path = model_name json.dump(history_obj.history, open(os.path.join(new_dir, hist_path + "_" + curtime), "w")) final_epoch_fpath = os.path.join(new_dir, f"final_epoch_weights_{curtime}.hdf5") print(f"saving model to {final_epoch_fpath}") net.model.save(final_epoch_fpath)
def create_net(time_steps, input_length, output_length, num_layers=8): # going to try ~halving the history size, since it should be shorter in general # vector_size = whatever the flattened total board size is # so for example with 10 the number would be 9 + 8 I think # could try normalizing word vectors, but it will remove the notion of length # this could be an issue since some words(layer types) will occur more often and should # have more weight # https://stats.stackexchange.com/questions/177905/should-i-normalize-word2vecs-word-vectors-before-using-them #skips_input = Input(shape=(time_steps, vector_size)) #non_skips_input = Input(shape=(time_steps, vector_size)) inp = Input(shape=(time_steps, input_length)) # x = conv_layer(x) x = inp for _ in range(num_layers): x = lstm_layer(x) policy = lstm_policy_head(x, output_length) value = lstm_value_head(x) # could try TimeDistributedDense instead of Flatten + Dense + softmax. Not sure which is better optim = Nadam() base_lr = 0.001 max_lr = 0.006 clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=2000., mode='triangular') model.compile(optimizer=optim, loss=AlphaZeroLoss, callbacks=[clr]) def AlphaZeroLoss(true_value, pred_value): # should have l2 regularization return K.pow(true_value - pred_value, 2) - K.dot(transpose(mcts_probas), K.log(pred_probas)) model = Model(inputs=inp, outputs=[policy, value]) return model # idea: conv filters selected by an LSTM # progressively shrinking filters # goal is to split up an image by it's most important components # maybe a conv2dlstm which does rotations, flips, and crops, shears, blurs, etc # idea: send in last 8 game states (maybe 16) or it might be less since there # are in general less game states in architecture search # for the rollout net just do a certain size window, maybe 3x3 which would equal # 9 spaces # since I am using vectors rather than matrices, I think 1d Convs and LSTM's would be # best # Idea, stochastically drop some part of the graph between the end and the first layer # (cant drop the first layer) # this will create more training data # more ways to try to create more data will be helpful # can try randomly masking a couple of grid spots and see if that helps improve # accuracy # My goal is to make everything a lot smaller so I will be aiming for 1/8 of the size # for everything (for now) # from keras.layers import LSTM, Lambda # from keras.layers.merge import add # def make_residual_lstm_layers(input, rnn_width=17, rnn_depth=2, rnn_dropout=.2): # """ # The intermediate LSTM layers return sequences, while the last returns a single element. # The input is also a sequence. In order to match the shape of input and output of the LSTM # to sum them we can do it only for all layers but the last. # """ # x = input # for i in range(rnn_depth): # return_sequences = i < rnn_depth - 1 # x_rnn = Bidirectional(LSTM(rnn_width, recurrent_dropout=rnn_dropout, dropout=rnn_dropout, # return_sequences=return_sequences, activation=None))(x) # x_rnn = BatchNormalization()(x_rnn) # if return_sequences: # # Intermediate layers return sequences, input is also a sequence. # if i > 0 or input.shape[-1] == rnn_width: # x = add([x, x_rnn]) # else: # # Note that the input size and RNN output has to match, due to the sum operation. # # If we want different rnn_width, we'd have to perform the sum from layer 2 on. # x = x_rnn # else: # # Last layer does not return sequences, just the last element # # so we select only the last element of the previous output. # def slice_last(x): # return x[..., -1, :] # x = add([Lambda(slice_last)(x), x_rnn]) # x = Activation("tanh")(x) # return x
nbatch = 100000 #quick first training with simple examples = low # hits train.compileModel(learningrate=learningrate, loss=None, metrics=None) model, history = train.trainModel( nepochs=1, run_eagerly=True, batchsize=nbatch, extend_truth_list_by=len(train.keras_model.outputs) - 2, #just adapt truth list to avoid keras error (no effect on model) batchsize_use_sum_of_squares=False, checkperiod=1, # saves a checkpoint model every N epochs verbose=verbosity, backup_after_batches=100, additional_callbacks=[ CyclicLR(base_lr=learningrate / 10., max_lr=learningrate, step_size=20) ] + cb) #print("freeze BN") #for l in train.keras_model.layers: # if isinstance(l, BatchNormalization): # l.trainable=False # if 'GravNetLLLocalClusterLoss' in l.name: # l.active=False #also stop GravNetLLLocalClusterLoss* from being evaluated learningrate = learningrate / 10. train.compileModel(learningrate=learningrate, loss=None, metrics=None) model, history = train.trainModel(
early_stoping = EarlyStopping(monitor='val_acc', patience=8, verbose=1) save_checkpoint = ModelCheckpoint(WEIGHTS_BEST, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=4, min_lr=1e-8, verbose=1) csv_logger = CSVLogger(TRAINING_LOG, append=True) clr = CyclicLR(base_lr=1e-8, max_lr=4e-5, step_size=2000., mode='exp_range', gamma=0.99994) callbacks_warmup = [save_checkpoint, csv_logger] callbacks_clr = [early_stoping, save_checkpoint, clr, csv_logger] callbacks = [early_stoping, save_checkpoint, reduce_lr, csv_logger] model = InceptionV3_Model() # warm up for layer in model.layers[0:-3]: layer.trainable = False model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=8e-5), metrics=['accuracy'])
# make sure we have a log directory if not os.path.isdir(args.log_dir): os.mkdir(args.log_dir) # instantiate callbacks early = EarlyStopping(monitor='val_loss', patience=20, verbose=True) tb = TensorBoard(log_dir=args.log_dir, histogram_freq=0, write_graph=False) callbacks = [early, tb] if args.cycle: if args.skip_test is not None: base_lr = args.lr_min_multiplier * args.skip_test else: base_lr = args.lr_min_multiplier * best_lr step_size = 4 * batches_per_epoch cycle = CyclicLR(base_lr=base_lr, max_lr=lr, step_size=step_size) callbacks.append(cycle) print('Cycling learning rate from {} to {} over {} steps'.format( base_lr, lr, step_size)) else: print('Starting training with lr={}'.format(lr)) print('Learning rate will be reduced on plateau') rlr = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=True) callbacks.append(rlr) if args.save_model: if not os.path.isdir(os.path.join(args.log_dir, 'models')): os.mkdir(os.path.join(args.log_dir, 'models')) check = ModelCheckpoint(os.path.join(args.log_dir, 'models', args.dataset + '_epoch_{epoch:02d}.h5')) callbacks.append(check)
############################################################################### # Train the model ############################################################################### early_stopper = EarlyStopping(monitor='val_loss', verbose=1, patience=args.patience) model_checkpoint = ModelCheckpoint(args.model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=1) callbacks = [early_stopper, model_checkpoint] if args.cyclical_learning_rate: callbacks.append( CyclicLR(base_lr=0.0005, max_lr=0.006, step_size=4 * STEPS_PER_EPOCH, mode='triangular2')) kernel_size = (3, 3) model = Sequential() model.add(BatchNormalization(input_shape=(args.image_size, args.image_size, 3))) model.add( Conv2D(64, kernel_size, padding='same', strides=1, activation='relu', input_shape=(args.image_size, args.image_size, 3))) model.add( Conv2D(128, kernel_size, padding='same', strides=1, activation='relu'))
xtrain_fold = np.vstack((xtrain[tidxs, :], xtrain_flip[tidxs, :])) ytrain_fold = np.vstack((ytrain[tidxs, :], ytrain[tidxs, :])) xtrain_fold, ytrain_fold = shuffle(xtrain_fold, ytrain_fold) xvalid_fold = xtrain[vidxs, :] yvalid_fold = ytrain[vidxs, :] train_steps = np.ceil(float(2 * len(tidxs)) / float(BATCH_SIZE)) WEIGHTS_BEST = 'weights/best_weight_part%d_fold%d.hdf5' % (part, fold) clr = CyclicLR(base_lr=1e-7, max_lr=1e-3, step_size=6 * train_steps, mode='exp_range', gamma=0.99994) early_stopping = EarlyStopping(monitor='val_acc', patience=20, verbose=1, mode='max') save_checkpoint = ModelCheckpoint(WEIGHTS_BEST, monitor='val_acc', verbose=1, save_weights_only=True, save_best_only=True, mode='max') callbacks = [save_checkpoint, early_stopping, clr] model = Model()
from keras.layers.noise import * from keras import backend as K from numeraicb import ConsistencySecondLoss from clr_callback import CyclicLR batch_size = 64 cb1 = ConsistencySecondLoss(tournament, 'bernie', 0) cb2 = ConsistencySecondLoss(tournament, 'charles', 1) cb3 = ConsistencySecondLoss(tournament, 'elizabeth', 2) cb4 = ConsistencySecondLoss(tournament, 'jordan', 3) cb5 = ConsistencySecondLoss(tournament, 'ken', 4) ch = ModelCheckpoint('nmr_mt.hdf5', verbose=1, save_best_only=True, monitor='val_loss') es = EarlyStopping(monitor='val_loss', patience = 15) clr = CyclicLR(base_lr=0.00005, max_lr=0.0001, step_size=2 * (len(X_train) / batch_size), mode='triangular') inputs = Input(shape=(50,)) c = Dense(100, activation='relu')(inputs) c = Dropout(0.1)(c) predictions_target_bernie = Dense(1, activation='sigmoid', name = 'target_bernie')(c) predictions_target_charles = Dense(1, activation='sigmoid', name = 'target_charles')(c) predictions_target_elizabeth = Dense(1, activation='sigmoid', name = 'target_elizabeth')(c) predictions_target_jordan = Dense(1, activation='sigmoid', name = 'target_jordan')(c) predictions_target_ken = Dense(1, activation='sigmoid', name = 'target_ken')(c) model = Model(inputs=[inputs], outputs=[ predictions_target_bernie, predictions_target_charles,