valid_gen = datagen.flow_from_dataframe( dataframe=DF_TRAIN, directory=TRAIN_PATH, x_col="filename", y_col="category", subset="validation", batch_size=40, # seed = 1, color_mode='rgb', shuffle=False, class_mode="binary", target_size=(IMAGE_SIZE, IMAGE_SIZE)) vit_model = vit.vit_b32(image_size=IMAGE_SIZE, activation='softmax', pretrained=True, include_top=False, pretrained_top=False, classes=2) model = tf.keras.Sequential( [ vit_model, tf.keras.layers.Flatten(), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dense( 11, activation=tfa.activations.gelu), #tf.keras.layers.ReLU), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dense( 5, 'softmax') # tf.keras.layers.Dense(5, 'softmax') ], name='vision_transformer')
valGen = valdatagen.flow_from_directory(valid_dir, shuffle=True, target_size=(ImageSize, ImageSize), batch_size=BS) if LabelNum == 2: activation = 'sigmoid' else: activation = 'softmax' model = vit.vit_b32( image_size=ImageSize, activation=activation, pretrained=True, include_top=True, pretrained_top=False, classes=LabelNum, weights="imagenet21k", dropout=0.2, mlp_dim=3072, # 3072 hidden layer dimension num_heads=12, # 12 Number of heads in Multi-head Attention layer num_layers=12, # 12 number of transformer hidden_size=768 #768 embedding dimension ) filepath = os.path.sep.join([ OUTPUT_PATH, MODEL + '-' + str(LabelNum) + "class - " + str(slice_num) + ".hdf5" ]) checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
import tensorflow_addons as tfa physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) tf.keras.regularizers.l2(l2=0.01) datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True) train_csv = pd.read_csv(r"F:\Pycharm_projects\Kaggle Cassava\data\train.csv") train_csv["label"] = train_csv["label"].astype(str) image_size = 512 base_model = vit.vit_b32(image_size=image_size, activation="softmax", pretrained=True, include_top=True, pretrained_top=True, classes=5) train = train_csv.iloc[:int(len(train_csv) * 0.8), :] test = train_csv.iloc[int(len(train_csv) * 0.8):, :] print((len(train), len(test))) base_model.trainable = False fold_number = 0 n_splits = 5 oof_accuracy = [] batch_size = 17 first_decay_steps = 500 lr = (tf.keras.experimental.CosineDecayRestarts(0.04, first_decay_steps))
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset', '--dataset_dir', metavar='PATH', default=config.DEFAULT_DATASET_DIR, help='Dataset directory.') parser.add_argument('--optimizer', default='sgd', choices=['sgd', 'adam', 'rmsprop'], help='Optimizer.') parser.add_argument('-d', '--dtype', default='fp32', choices=['fp32', 'bf16'], help='Data type.') parser.add_argument('--batch_size', type=int, default=32, help='Global batch size.') parser.add_argument('--lr_sched', default='WarmupCosine', choices=[ 'linear', 'exp', 'steps', 'constant', 'WarmupCosine'], help='Learning rate scheduler.') parser.add_argument('--initial_lr', type=float, default=6e-2, help='Initial learning rate.') parser.add_argument('--final_lr', type=float, default=1e-5, help='Final learning rate.') parser.add_argument('--warmup_steps', type=int, default=4000, help='Warmup steps.') parser.add_argument('--epochs', type=int, default=10, help='Total number of epochs for training.') parser.add_argument('--steps_per_epoch', type=int, help='Number of steps for training per epoch, overrides default value.') parser.add_argument('--validation_steps', type=int, help='Number of steps for validation, overrides default value.') parser.add_argument('--model', default='ViT-B_16', choices=['ViT-B_16', 'ViT-L_16', 'ViT-B_32', 'ViT-L_32'], help='Model.') parser.add_argument('--train_subset', default='train', help='Pattern to detect train subset in dataset directory.') parser.add_argument('--val_subset', default='validation', help='Pattern to detect validation subset in dataset directory.') parser.add_argument('--grad_accum_steps', type=int, default=8, help='Gradient accumulation steps.') parser.add_argument('--resume_from_checkpoint_path', metavar='PATH', help='Path to checkpoint to start from.') parser.add_argument('--resume_from_epoch', metavar='EPOCH_INDEX', type=int, default=0, help='Initial epoch index.') parser.add_argument('--evaluate_checkpoint_path', metavar='PATH', help='Checkpoint path for evaluating the model on --val_subset') parser.add_argument('--weights_path', metavar='PATH', help='Path to weights cache directory. ~/.keras is used if not set.') parser.add_argument('--deterministic', action='store_true', default=False, help='Enable deterministic behavior, this will also disable data augmentation. --seed must be set.') parser.add_argument('--seed', type=int, help='Seed to be used by random functions.') parser.add_argument('--device', default='HPU', choices=['CPU', 'HPU'], help='Device type.') parser.add_argument('--distributed', action='store_true', default=False, help='Enable distributed training.') parser.add_argument('--base_tf_server_port', type=int, default=7850, help='Rank 0 port used by tf.distribute.') parser.add_argument('--save_summary_steps', type=int, default=0, help='Steps between saving summaries to TensorBoard.') parser.add_argument('--recipe_cache', default='/tmp/vit_recipe_cache', help='Path to recipe cache directory. Set to empty to disable recipe cache. Externally set \'TF_RECIPE_CACHE_PATH\' will override this setting.') parser.add_argument( '--dump_config', help='Side-by-side config file. Internal, do not use.') args = parser.parse_args() if args.weights_path is not None: config.WEIGHTS_DIR = args.weights_path if args.dtype == 'bf16': tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') if args.device == 'HPU': if args.distributed: os.environ['TF_HCCL_MEMORY_ALLOWANCE_MB'] = '500' from habana_frameworks.tensorflow import load_habana_module from habana_frameworks.tensorflow.ops.layer_norm import HabanaLayerNormalization load_habana_module() tf.keras.layers.LayerNormalization = HabanaLayerNormalization # Handle recipe caching. recipe_cache = args.recipe_cache if 'TF_RECIPE_CACHE_PATH' not in os.environ.keys() and recipe_cache: os.environ['TF_RECIPE_CACHE_PATH'] = recipe_cache # Clear previous recipe cache. if not args.distributed or comm_rank() == 0: if os.path.exists(recipe_cache) and os.path.isdir(recipe_cache): import shutil shutil.rmtree(recipe_cache) # Wait for rank 0 to remove cache. if args.distributed: from mpi4py import MPI MPI.COMM_WORLD.Barrier() # Handle determinism. config.DETERMINISTIC = args.deterministic config.SEED = args.seed if args.deterministic: assert args.seed is not None, "Deterministic behavior require seed to be set." tf.config.threading.set_inter_op_parallelism_threads(1) tf.config.threading.set_intra_op_parallelism_threads(1) os.environ['TF_DETERMINISTIC_OPS'] = '1' config.DATA_AUGMENTATION = False if args.seed is not None: random.seed(args.seed) np.random.seed(args.seed) tf.random.set_seed(args.seed) # Handle distribution strategy. if args.distributed: tf_distribute_config(args.base_tf_server_port) if args.device == 'HPU': os.environ['HBN_TF_REGISTER_DATASETOPS'] = '1' from habana_frameworks.tensorflow.distribute import HPUStrategy strategy = HPUStrategy() else: strategy = tf.distribute.MultiWorkerMirroredStrategy() else: strategy = tf.distribute.OneDeviceStrategy(f'device:{args.device}:0') if not args.distributed or comm_rank() == 0: print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) num_classes = 1000 batch_size = args.batch_size nb_epoch = args.epochs dataset = args.dataset resume_from_checkpoint_path = args.resume_from_checkpoint_path resume_from_epoch = args.resume_from_epoch optim_name = args.optimizer initial_lr = args.initial_lr final_lr = args.final_lr lr_sched = args.lr_sched warmup_steps = args.warmup_steps model_name = args.model grad_accum_steps = args.grad_accum_steps ds_train = get_dataset(dataset, args.train_subset, batch_size, is_training=True, distributed=args.distributed) ds_valid = get_dataset(dataset, args.val_subset, batch_size, False, distributed=args.distributed) if args.dump_config is not None: vit.CONFIG_B['dropout'] = 0.0 vit.CONFIG_L['dropout'] = 0.0 # Load our model with strategy.scope(): image_size = 384 if model_name == 'ViT-B_16': model = vit.vit_b16( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-L_16': model = vit.vit_l16( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-B_32': model = vit.vit_b32( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-L_32': model = vit.vit_l32( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") else: print( "Model is not supported, please use either ViT-B_16 or ViT-L_16 or ViT-B_32 or ViT-L_32") exit(0) optimizer = get_optimizer( optim_name, initial_lr, accumulation_steps=grad_accum_steps, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=False) # Start training steps_per_epoch = 1281167 // batch_size if args.steps_per_epoch is not None: steps_per_epoch = args.steps_per_epoch validation_steps = 50000 // batch_size if args.validation_steps is not None: validation_steps = args.validation_steps total_steps = nb_epoch * steps_per_epoch resume_step = resume_from_epoch * steps_per_epoch lrate = get_lr_func(nb_epoch, lr_sched, initial_lr, final_lr, warmup_steps, resume_step, total_steps) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='train_loss') callbacks = [lrate, model_ckpt] if args.save_summary_steps > 0: callbacks += [TensorBoardWithHParamsV2( vars(args), log_dir=config.LOG_DIR, update_freq=args.save_summary_steps)] callbacks += [ExamplesPerSecondKerasHookV2( output_dir=config.LOG_DIR, every_n_steps=args.save_summary_steps, batch_size=args.batch_size)] if (args.evaluate_checkpoint_path is not None): model.load_weights(args.evaluate_checkpoint_path) results = model.evaluate(x=ds_valid, steps=validation_steps) print("Test loss, Test acc:", results) exit() if ((resume_from_epoch is not None) and (resume_from_checkpoint_path is not None)): model.load_weights(resume_from_checkpoint_path) with dump_callback(args.dump_config): model.fit(x=ds_train, y=None, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=not args.deterministic, verbose=1 if not args.distributed else comm_rank() == 0, validation_data=(ds_valid, None), validation_steps=validation_steps, ) if not args.distributed or comm_rank() == 0: model.save(f'{config.SAVE_DIR}/{save_name}-model-final.h5')