def train(model, data, hard_training, args): # unpacking the data (x_train, y_train), (x_train2), (x_test, y_test), (x_test2) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log' + appendix + '.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug), write_grads=False) checkpoint1 = CustomModelCheckpoint(model, args.save_dir + '/best_weights_1' + appendix + '.h5', monitor='val_capsnet_acc', save_best_only=False, save_weights_only=True, verbose=1) checkpoint2 = CustomModelCheckpoint(model, args.save_dir + '/best_weights_2' + appendix + '.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * 0.5**(epoch // 10)) if (args.numGPU > 1): parallel_model = multi_gpu_model(model, gpus=args.numGPU) else: parallel_model = model if (not hard_training): parallel_model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1, 0.4], metrics={'capsnet': "accuracy"}) else: parallel_model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss_hard, 'mse'], loss_weights=[1, 0.4], metrics={'capsnet': "accuracy"}) # Begin: Training with data augmentation def train_generator(x1, x2, y, batch_size, shift_fraction=args.shift_fraction): train_datagen = ImageDataGenerator() # shift up to 2 pixel for MNIST genX1 = train_datagen.flow(x1, y, batch_size=batch_size, seed=1) genX2 = train_datagen.flow(x2, y, batch_size=batch_size, seed=1) while True: x1_batch, y_batch = genX1.next() x2_batch, y_batch = genX2.next() yield ([x1_batch, x2_batch, y_batch], [y_batch, x1_batch]) parallel_model.fit_generator( generator=train_generator(x_train, x_train2, y_train, args.batch_size, args.shift_fraction), steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[[x_test, x_test2, y_test], [y_test, x_test]], callbacks=[lr_decay, log, checkpoint1, checkpoint2], initial_epoch=int(args.ep_num), shuffle=True) parallel_model.save(args.save_dir + '/trained_model_multi_gpu.h5') model.save(args.save_dir + '/trained_model.h5') return parallel_model
# 优化器的选择 optimizer = SGD(lr=LEARNING_RATE, momentum=0.9, decay=0.001, nesterov=True) #optimizer = Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=1e-8) #optimizer = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) # autosave best Model best_model_file = "./log/11_InceptionV3_best_vehicleModel.h5" # Define several callbacks # 存储多GPU模型只能保存权重,不能保存结构;可以在训练完成后,重新保存,详见mutigpu_to_cpu.py best_model = CustomModelCheckpoint(model, best_model_file, monitor_index=monitor_index) reduce_lr = ReduceLROnPlateau(monitor=monitor_index, factor=0.5, patience=5, verbose=1, min_lr=0.00001) early_stop = EarlyStopping(monitor=monitor_index, patience=20, verbose=1) # 准备数据 train_data_lines = open(train_path).readlines() # Check if image path exists. train_data_lines = [ w.strip() for w in train_data_lines if os.path.exists(w.strip().split(' ')[0]) ]
optimizer=adam, metrics=['accuracy']) model.summary() model_json = phi_model.to_json() with open(output_folder + "model.json", "w") as json_file: json_file.write(model_json) copyfile(os.path.basename(__file__), output_folder + os.path.basename(__file__)) tensorboard = TensorBoard(log_dir=output_log) checkpointer = CustomModelCheckpoint( model_for_saving=model, filepath=output_weight + "weights_{epoch:02d}_{val_loss:.2f}.h5", save_best_only=True, monitor='val_loss', save_weights_only=True) generator_training = RotNetDataGenerator(input_shape=input_shape, batch_size=batch_size, one_hot=True, preprocess_func=preprocess_input, shuffle=True).generate( paths_train, labels_train, len(classes_focal)) generator_valid = RotNetDataGenerator(input_shape=input_shape, batch_size=batch_size, one_hot=True, preprocess_func=preprocess_input, shuffle=True).generate(
def train(model, data, hard_training, args): # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log' + appendix + '.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug), write_grads=False) checkpoint1 = CustomModelCheckpoint(model, args.save_dir + '/best_weights_1' + appendix + '.h5', monitor='val_capsnet_acc', save_best_only=False, save_weights_only=True, verbose=1) checkpoint2 = CustomModelCheckpoint(model, args.save_dir + '/best_weights_2' + appendix + '.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * 0.5**(epoch // 10)) if (args.numGPU > 1): parallel_model = multi_gpu_model(model, gpus=args.numGPU) else: parallel_model = model if (not hard_training): parallel_model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1, 0.4], metrics={'capsnet': "accuracy"}) else: parallel_model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss_hard, 'mse'], loss_weights=[1, 0.4], metrics={'capsnet': "accuracy"}) # Begin: Training with data augmentation def train_generator(x, y, batch_size, shift_fraction=args.shift_fraction): train_datagen = ImageDataGenerator( featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-06, rotation_range=0.1, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.0, zoom_range=0.1, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=True, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None) # shift up to 2 pixel for MNIST train_datagen.fit(x) generator = train_datagen.flow(x, y, batch_size=batch_size, shuffle=True) while True: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) parallel_model.fit_generator( generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[lr_decay, log, checkpoint1, checkpoint2], initial_epoch=int(args.ep_num), shuffle=True) parallel_model.save(args.save_dir + '/trained_model_multi_gpu.h5') model.save(args.save_dir + '/trained_model.h5') return parallel_model
workspace="c00k1ez", project_name="low-resource-lm-research", experiment_name=exp_name) # get all config data to send in to comet.ml config_data = {} cfg_raw = config.get() for key in cfg_raw.keys(): config_data.update(dict(cfg_raw[key])) logger.experiment.log_parameters(config_data) model_name = args.model + '_' + config['dataloaders'][ 'tokenizer_type'].get() # setup my custom checkpoint callback checkpoint_callback = CustomModelCheckpoint( model_name=model_name, filepath=config['general']['checkpoint_path'].get(), save_top_k=1, verbose=True, monitor='val_loss', mode='min', prefix=args.model + '_') trainer = pl.Trainer(**config['trainer_params'].get(), checkpoint_callback=checkpoint_callback, print_nan_grads=True, profiler=True, logger=logger) trainer.fit(framework)
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) #model.compile(loss=amsoftmax_loss, optimizer=optimizer, metrics=["accuracy"]) #inception.summary() print(len(inception.layers)) # autosave best Model best_model_file = "./log/best_model.h5" # Define several callbacks #loging = TensorBoard(log_dir='./log') # 存储多GPU模型只能保存权重,不能保存结构;可以在训练完成后,重新保存,详见mutigpu_to_cpu.py best_model = CustomModelCheckpoint(model, best_model_file) reduce_lr = ReduceLROnPlateau(monitor='val_' + monitor_index, factor=0.5, patience=10, verbose=1, min_lr=0.000001) early_stop = EarlyStopping(monitor='val_' + monitor_index, patience=30, verbose=1) # 准备数据 train_data_lines = open(train_path).readlines() # Check if image path exists. train_data_lines = [