def load_model(self, n_classes, image_shape, growth_rate, nb_layers, reduction): config = [n_classes, image_shape, growth_rate, nb_layers, reduction] if config == self.initial_config: self.model.set_weights(self.initial_weights) else: self.model = densenet_model(classes=n_classes, shape=image_shape, growth_rate=growth_rate, nb_layers=nb_layers, reduction=reduction) self.initial_weights = self.model.get_weights() self.initial_config = config
def create_base_model(model_name=None, image_shape=None): if model_name == 'DenseNet': base_model = densenet_model(shape=image_shape, growth_rate=64, nb_layers=[6, 6], reduction=0.5, with_output_block=False) elif model_name == '': print('Warning. No model base selected.') base_model = None else: print('Error: Wrong model name. DenseNet will be used as default.') base_model = densenet_model(shape=image_shape, growth_rate=64, nb_layers=[6, 6], reduction=0.5, with_output_block=False) return base_model
def eval(config): # Files path model_file_path = f"{config['model.path']}" data = load(config, datagen_flow=True) # Determine device if config['data.cuda']: cuda_num = config['data.gpu'] device_name = f'GPU:{cuda_num}' else: device_name = 'CPU:0' if config['data.weight_classes']: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() loss_object = weighted_loss(loss_object, data["class_weights"]) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() model = densenet_model(classes=data["nb_classes"], shape=data["image_shape"], growth_rate=config['model.growth_rate'], nb_layers=config['model.nb_layers'], reduction=config['model.reduction']) model.load_weights(model_file_path) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') _, _, test_step = steps(model, loss_object, optimizer, train_loss=train_loss, train_accuracy=train_accuracy, test_loss=test_loss, test_accuracy=test_accuracy, engine=config['engine']) with tf.device(device_name): batches = 0 for test_images, test_labels in data["test_gen"]: test_step(test_images, test_labels) batches += 1 if batches >= data["test_size"] / config['data.batch_size']: # we need to break the loop by hand because # the generator loops indefinitely break print('Test Loss: {} Test Acc: {}'.format(test_loss.result(), test_accuracy.result() * 100))
def create_model(model_name=None, nb_classes=None, image_shape=None, optimizer=None, loss_object=None, weights=None, bm_trainable=False): if model_name not in models: print('Error: Wrong model name. VGG16 will be used as default.') model_name = 'VGG16' weights = None if weights == '' else weights w, h, c = image_shape if model_name == 'DenseNet': base_model = densenet_model(shape=(w, h, 3), growth_rate=64, nb_layers=[ 6, 12], reduction=0.5, with_output_block=False) if weights != None: base_model.load_weights(weights, by_name=True) else: base_model = models[model_name]( include_top=False, weights=weights, input_shape=(w, h, 3)) base_model.trainable = bm_trainable global_average_layer = GlobalAveragePooling2D() hidden_dense_layer = Dense(1024, activation='relu') prediction_layer = Dense(nb_classes, activation='softmax') model = tf.keras.Sequential() if c < 3: model.add(Input(shape=(w, h, 1))) model.add(Conv2D(3, (3, 3), padding='same')) model.add(base_model) model.add(global_average_layer) model.add(hidden_dense_layer) model.add(prediction_layer) # compile the model (should be done *after* setting layers to non-trainable) model.compile(optimizer=optimizer, loss=loss_object) return model
def train_densenet(dataset_name="rwth", rotation_range=10, width_shift_range=0.10, height_shift_range=0.10, horizontal_flip=True, growth_rate=128, nb_layers=[6, 12], reduction=0.0, lr=0.001, epochs=400, max_patience=25, batch_size=16, checkpoints=False, weight_classes=False, train_size=None, test_size=None): # log log_freq = 1 save_freq = 40 models_directory = 'models/' results_directory = 'results/' config_directory = 'config/' general_directory = "./results/" save_directory = general_directory + "{}/dense-net/".format(dataset_name) results = 'epoch,loss,accuracy,test_loss,test_accuracy\n' date = datetime.now().strftime("%Y_%m_%d-%H:%M:%S") identifier = "{}-growth-{}-densenet-{}".format( '-'.join([str(i) for i in nb_layers]), growth_rate, dataset_name) + date summary_file = general_directory + 'summary.csv' # create summary file if not exists if not os.path.exists(summary_file): file = open(summary_file, 'w') file.write("datetime, model, config, min_loss, min_loss_accuracy\n") file.close() print("hyperparameters set") #print(tf.test.is_gpu_available()) x, y = load(dataset_name) image_shape = np.shape(x)[1:] x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=train_size, test_size=test_size, random_state=42, stratify=y) x_train, x_test = x_train / 255.0, x_test / 255.0 n_classes = len(np.unique(y)) if weight_classes: class_weights = compute_class_weight('balanced', np.unique(y), y) print("data loaded") datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, horizontal_flip=horizontal_flip, fill_mode='constant', cval=0) datagen.fit(x_train) test_datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, fill_mode='constant', cval=0) test_datagen.fit(x_train) model = densenet_model(classes=n_classes, shape=image_shape, growth_rate=growth_rate, nb_layers=nb_layers, reduction=reduction) print("model created") if weight_classes: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() def weightedLoss(originalLossFunc, weightsList): @tf.function def lossFunc(true, pred): axis = -1 #if channels last #axis= 1 #if channels first #argmax returns the index of the element with the greatest value #done in the class axis, it returns the class index classSelectors = tf.argmax(true, axis=axis, output_type=tf.int32) #considering weights are ordered by class, for each class #true(1) if the class index is equal to the weight index classSelectors = [ tf.equal(i, classSelectors) for i in range(len(weightsList)) ] #casting boolean to float for calculations #each tensor in the list contains 1 where ground true class is equal to its index #if you sum all these, you will get a tensor full of ones. classSelectors = [ tf.cast(x, tf.float32) for x in classSelectors ] #for each of the selections above, multiply their respective weight weights = [ sel * w for sel, w in zip(classSelectors, weightsList) ] #sums all the selections #result is a tensor with the respective weight for each element in predictions weightMultiplier = weights[0] for i in range(1, len(weights)): weightMultiplier = weightMultiplier + weights[i] #make sure your originalLossFunc only collapses the class axis #you need the other axes intact to multiply the weights tensor loss = originalLossFunc(true, pred) loss = loss * weightMultiplier return loss return lossFunc loss_object = weightedLoss(loss_object, class_weights) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(tf.cast(images, tf.float32), training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) @tf.function def test_step(images, labels): predictions = model(tf.cast(images, tf.float32), training=False) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) # create summary writers train_summary_writer = tf.summary.create_file_writer(save_directory + 'summaries/train/' + identifier) test_summary_writer = tf.summary.create_file_writer(save_directory + 'summaries/test/' + identifier) # create data generators train_gen = datagen.flow(x_train, y_train, batch_size=batch_size) test_gen = test_datagen.flow(x_test, y_test, batch_size=batch_size, shuffle=False) print("starting training") min_loss = 100 min_loss_acc = 0 patience = 0 for epoch in range(epochs): batches = 0 for images, labels in train_gen: train_step(images, labels) batches += 1 if batches >= len(x_train) / 32: # we need to break the loop by hand because # the generator loops indefinitely break batches = 0 for test_images, test_labels in test_gen: test_step(test_images, test_labels) batches += 1 if batches >= len(x_test) / 32: # we need to break the loop by hand because # the generator loops indefinitely break if (epoch % log_freq == 0): results += '{},{},{},{},{}\n'.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100) print( 'Epoch: {}, Train Loss: {}, Train Acc:{}, Test Loss: {}, Test Acc: {}' .format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if (test_loss.result() < min_loss): if not os.path.exists(save_directory + models_directory): os.makedirs(save_directory + models_directory) # serialize weights to HDF5 model.save_weights(save_directory + models_directory + "best{}.h5".format(identifier)) min_loss = test_loss.result() min_loss_acc = test_accuracy.result() patience = 0 else: patience += 1 with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss.result(), step=epoch) tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch) train_loss.reset_states() train_accuracy.reset_states() with test_summary_writer.as_default(): tf.summary.scalar('loss', test_loss.result(), step=epoch) tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch) test_loss.reset_states() test_accuracy.reset_states() if checkpoints and epoch % save_freq == 0: if not os.path.exists(save_directory + models_directory): os.makedirs(save_directory + models_directory) # serialize weights to HDF5 model.save_weights(save_directory + models_directory + "{}_epoch{}.h5".format(identifier, epoch)) if patience >= max_patience: break if not os.path.exists(save_directory + results_directory): os.makedirs(save_directory + results_directory) file = open( save_directory + results_directory + 'results-' + identifier + '.csv', 'w') file.write(results) file.close() if not os.path.exists(save_directory + config_directory): os.makedirs(save_directory + config_directory) config = { 'data.dataset_name': dataset_name, 'data.rotation_range': rotation_range, 'data.width_shift_range': width_shift_range, 'data.height_shift_range': height_shift_range, 'data.horizontal_flip': horizontal_flip, 'model.growth_rate': growth_rate, 'model.nb_layers': nb_layers, 'model.reduction': reduction, 'train.lr': lr, 'train.epochs': epochs, 'train.max_patience': max_patience, 'train.batch_size': batch_size, } file = open(save_directory + config_directory + identifier + '.json', 'w') file.write(json.dumps(config, indent=2)) file.close() file = open(summary_file, 'a+') summary = "{}, {}, dense-net, {}, {}, {}\n".format( date, dataset_name, save_directory + config_directory + identifier + '.json', min_loss, min_loss_acc) file.write(summary) file.close()
def eval(config): # Files path model_file_path = f"{config['model.path']}" data_dir = f"data/" _, _, test, nb_classes, image_shape, class_weights = load( dataset_name=config['data.dataset'], batch_size=config['data.batch_size'], train_size=config['data.train_size'], test_size=config['data.test_size'], weight_classes=config['data.weight_classes'], datagen_flow=True, ) (test_gen, test_len, _) = test # Determine device if config['data.cuda']: cuda_num = config['data.gpu'] device_name = f'GPU:{cuda_num}' else: device_name = 'CPU:0' if config['data.weight_classes']: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() loss_object = weightedLoss(loss_object, class_weights) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() model = densenet_model(classes=nb_classes, shape=image_shape, growth_rate=config['model.growth_rate'], nb_layers=config['model.nb_layers'], reduction=config['model.reduction']) model.load_weights(model_file_path) model.summary() test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') _, test_step = steps(model, loss_object, optimizer, test_loss=test_loss, test_accuracy=test_accuracy) print("Starting evaluation") batches = 0 for test_images, test_labels in test_gen: test_step(test_images, test_labels) batches += 1 if batches >= test_len / config['data.batch_size']: # we need to break the loop by hand because # the generator loops indefinitely break print('Test Loss: {} Test Acc: {}'.format(test_loss.result(), test_accuracy.result() * 100))
def main(): parser = DenseNetArgumentParser( description=( "train.py is the main training/evaluation script for DenseNet. " "In order to run training on multiple Gaudi cards, use demo_densenet.py or run " "train.py with mpirun.")) args, _ = parser.parse_known_args() strategy = None verbose = 1 os.environ['ENABLE_EXPERIMENTAL_FLAGS'] = 'true' os.environ['RUN_TPC_FUSER'] = '******' if args.deterministic: if args.inputs is None: raise ValueError("Must provide inputs for deterministic mode") if args.resume_from_checkpoint_path is None: raise ValueError("Must provide checkpoint for deterministic mode") if args.dtype == 'bf16': os.environ['TF_BF16_CONVERSION'] = '1' if args.run_on_hpu: load_habana_module() if args.use_hpu_strategy: hls_addresses = str(os.environ.get( "MULTI_HLS_IPS", "127.0.0.1")).split(",") TF_BASE_PORT = 2410 mpi_rank = comm_rank() mpi_size = comm_size() if mpi_rank > 0: verbose = 0 worker_hosts = "" for address in hls_addresses: # worker_hosts: comma-separated list of worker ip:port pairs. worker_hosts = worker_hosts + ",".join( [address + ':' + str(TF_BASE_PORT + rank) for rank in range(mpi_size//len(hls_addresses))]) task_index = mpi_rank # Configures cluster spec for distribution strategy. _ = distribution_utils.configure_cluster(worker_hosts, task_index) strategy = HPUStrategy() print('Number of devices: {}'.format( strategy.num_replicas_in_sync)) else: strategy = tf.distribute.MultiWorkerMirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) if args.seed is not None: os.environ['TF_DETERMINISTIC_OPS'] = '1' random.seed(args.seed) np.random.seed(args.seed) tf.random.set_seed(args.seed) img_rows, img_cols = 224, 224 # Resolution of inputs channel = 3 num_classes = 1000 batch_size = args.batch_size nb_epoch = args.epochs dataset_dir = args.dataset_dir resume_from_checkpoint_path = args.resume_from_checkpoint_path resume_from_epoch = args.resume_from_epoch dropout_rate = args.dropout_rate weight_decay = args.weight_decay optim_name = args.optimizer initial_lr = args.initial_lr model_name = args.model save_summary_steps = args.save_summary_steps if model_name == "densenet121": growth_rate = 32 nb_filter = 64 nb_layers = [6, 12, 24, 16] elif model_name == "densenet161": growth_rate = 48 nb_filter = 96 nb_layers = [6, 12, 36, 24] elif model_name == "densenet169": growth_rate = 32 nb_filter = 64 nb_layers = [6, 12, 32, 32] else: print("model is not supported") exit(1) # Load our model if strategy: with strategy.scope(): model = densenet_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, dropout_rate=dropout_rate, weight_decay=weight_decay, num_classes=num_classes, growth_rate=growth_rate, nb_filter=nb_filter, nb_layers=nb_layers) optimizer = get_optimizer( model_name, optim_name, initial_lr, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) else: model = densenet_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, dropout_rate=dropout_rate, weight_decay=weight_decay, num_classes=num_classes, growth_rate=growth_rate, nb_filter=nb_filter, nb_layers=nb_layers) optimizer = get_optimizer( model_name, optim_name, initial_lr, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # Start training steps_per_epoch = 1281167 // batch_size if args.steps_per_epoch is not None: steps_per_epoch = args.steps_per_epoch validation_steps = 50000 // batch_size if args.validation_steps is not None: validation_steps = args.validation_steps warmup_steps = args.warmup_epochs * steps_per_epoch lr_sched = {0: 1, 30: 0.1, 60: 0.01, 80: 0.001} lr_sched_steps = { epoch * steps_per_epoch: multiplier for (epoch, multiplier) in lr_sched.items()} lrate = StepLearningRateScheduleWithWarmup(initial_lr=initial_lr, initial_global_step=0, warmup_steps=warmup_steps, decay_schedule=lr_sched_steps, verbose=0) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(args.model_dir, config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='train_loss') callbacks = [lrate, model_ckpt] if save_summary_steps is not None and save_summary_steps > 0: log_dir = os.path.join(args.model_dir, config.LOG_DIR) local_batch_size = batch_size if args.use_hpu_strategy: log_dir = os.path.join(log_dir, 'worker_' + str(comm_rank())) local_batch_size = batch_size // strategy.num_replicas_in_sync callbacks += [ TensorBoardWithHParamsV2( args.__dict__, log_dir=log_dir, update_freq=save_summary_steps, profile_batch=0), ExamplesPerSecondKerasHookV2( save_summary_steps, output_dir=log_dir, batch_size=local_batch_size), ] if (args.evaluate_checkpoint_path is not None): model.load_weights(args.evaluate_checkpoint_path) results = model.evaluate(x=ds_valid, steps=validation_steps) print("Test loss, Test acc:", results) exit() if ((resume_from_epoch is not None) and (resume_from_checkpoint_path is not None)): model.load_weights(resume_from_checkpoint_path) if args.deterministic: set_deterministic() if not os.path.isfile(args.dump_config): raise FileNotFoundError("wrong dump config path") import pickle x_path = os.path.join(args.inputs, "input") y_path = os.path.join(args.inputs, "target") x = pickle.load(open(x_path, 'rb')) y = pickle.load(open(y_path, 'rb')) with dump_callback(args.dump_config): model.fit(x=x, y=y, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=False, verbose=verbose, validation_data=None, validation_steps=0, ) else: ds_train = get_dataset(dataset_dir, args.train_subset, batch_size) ds_valid = get_dataset(dataset_dir, args.val_subset, batch_size) model.fit(x=ds_train, y=None, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=True, verbose=verbose, validation_data=(ds_valid, None), validation_steps=validation_steps, validation_freq=1, )
def train(config): data = load(config, datagen_flow=True, with_datasets=config['engine'] == 'maml') # Determine device if config['data.cuda']: cuda_num = config['data.gpu'] device_name = f'GPU:{cuda_num}' else: device_name = 'CPU:0' if config['data.weight_classes']: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() loss_object = weighted_loss(loss_object, data["class_weights"]) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() time_start = time.time() # Compiles a model, prints the model summary, and saves the model diagram into a png file. model = densenet_model(classes=data["nb_classes"], shape=data["image_shape"], growth_rate=config['model.growth_rate'], nb_layers=config['model.nb_layers'], reduction=config['model.reduction']) # model.summary() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='val_accuracy') with tf.device(device_name): train_engine.train( log_info=config, model=model, batch_size=config['data.batch_size'], epochs=config['train.epochs'], max_patience=config['train.patience'], engine=config['engine'], lr=config['train.lr'], train_loss=train_loss, train_accuracy=train_accuracy, test_loss=val_loss, test_accuracy=val_accuracy, val_loss=val_loss, val_accuracy=val_accuracy, optimizer=optimizer, loss_object=loss_object, **data, ) time_end = time.time() elapsed = time_end - time_start h, min = elapsed // 3600, elapsed % 3600 // 60 sec = elapsed - min * 60 print(f"Training took: {h:.2f}h {min:.2f}m {sec:.2f}s!")
def eval_densenet(dataset_name = "rwth", growth_rate = 128, nb_layers = [6,12], reduction = 0.0, max_patience = 25, batch_size = 16, checkpoints = False, weight_classes = False, model_path = "", test_size=0.25, train_size=0.75): np.random.seed(2019) tf.random.set_seed(2019) # log log_freq = 1 print("hyperparameters set") x, y = load(dataset_name) image_shape = np.shape(x)[1:] x_train, x_test, _, y_test = train_test_split(x, y, test_size=test_size, train_size=train_size, stratify=y) x_train, x_test = x_train / 255.0, x_test / 255.0 n_classes = len(np.unique(y)) if weight_classes: class_weights = compute_class_weight('balanced', np.unique(y), y) print("data loaded") test_datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, fill_mode='constant', cval=0) test_datagen.fit(x_train) model = densenet_model(classes=n_classes, shape=image_shape, growth_rate=growth_rate, nb_layers=nb_layers, reduction=reduction) model.load_weights(model_path) print("model created") if weight_classes: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() def weightedLoss(originalLossFunc, weightsList): @tf.function def lossFunc(true, pred): axis = -1 #if channels last # axis= 1 #if channels first # argmax returns the index of the element with the greatest value # done in the class axis, it returns the class index classSelectors = tf.argmax(true, axis=axis, output_type=tf.int32) # considering weights are ordered by class, for each class # true(1) if the class index is equal to the weight index classSelectors = [tf.equal(i, classSelectors) for i in range(len(weightsList))] # casting boolean to float for calculations # each tensor in the list contains 1 where ground true class is equal to its index # if you sum all these, you will get a tensor full of ones. classSelectors = [tf.cast(x, tf.float32) for x in classSelectors] # for each of the selections above, multiply their respective weight weights = [sel * w for sel,w in zip(classSelectors, weightsList)] # sums all the selections # result is a tensor with the respective weight for each element in predictions weightMultiplier = weights[0] for i in range(1, len(weights)): weightMultiplier = weightMultiplier + weights[i] # make sure your originalLossFunc only collapses the class axis # you need the other axes intact to multiply the weights tensor loss = originalLossFunc(true,pred) loss = loss * weightMultiplier return loss return lossFunc loss_object = weightedLoss(loss_object, class_weights) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') @tf.function def test_step(images, labels): predictions = model(tf.cast(images, tf.float32), training=False) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) test_gen = test_datagen.flow(x_test, y_test, batch_size=batch_size, shuffle=False) print("starting evaluation") batches = 0 for test_images, test_labels in test_gen: test_step(test_images, test_labels) batches += 1 if batches >= len(x_test) / 32: # we need to break the loop by hand because # the generator loops indefinitely break print ('Test Loss: {} Test Acc: {}'.format(test_loss.result(), test_accuracy.result()*100))
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' cifar10 = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.reshape(x_train.shape[0], 32, 32, 3) # 给数据增加一个维度,使数据和网络结构匹配 x_test = x_test.reshape(x_test.shape[0], 32, 32, 3) x_train, x_test = x_train / 255.0, x_test / 255.0 image_gen_train = ImageDataGenerator( rotation_range=15, #随机10度旋转 width_shift_range=0.15, #宽度偏移 height_shift_range=0.15, #高度偏移 horizontal_flip=True, #水平翻转 ) image_gen_train.fit(x_train) model = densenet_model(classes=10) model.build(input_shape=(None, 32, 32, 3)) sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) checkpoint_save_path = "./checkpoint/mnist.ckpt" if os.path.exists(checkpoint_save_path + '.index'): print('-------------load the model-----------------') model.load_weights(checkpoint_save_path) cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_save_path, save_weights_only=True,
rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, horizontal_flip=horizontal_flip, fill_mode='constant', cval=0) datagen.fit(x_train) test_datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, fill_mode='constant', cval=0) test_datagen.fit(x_train) model = densenet_model(classes=n_classes, shape=image_shape, growth_rate=growth_rate, nb_layers=nb_layers, reduction=reduction) print("model created") loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape:
def train(config): np.random.seed(2020) tf.random.set_seed(2020) # Useful data now = datetime.now() now_as_str = now.strftime('%y_%m_%d-%H:%M:%S') # Output files checkpoint_path = f"{config['model.save_path']}" config_path = f"{config['output.config_path'].format(now_as_str)}" csv_output_path = f"{config['output.train_path'].format(now_as_str)}" train_summary_file_path = f"{config['summary.save_path'].format('train', config['data.dataset'], config['model.name'], config['model.type'], now_as_str)}" test_summary_file_path = f"{config['summary.save_path'].format('test', config['data.dataset'], config['model.name'], config['model.type'], now_as_str)}" summary_path = f"results/summary.csv" # Output dirs data_dir = f"data/" checkpoint_dir = checkpoint_path[:checkpoint_path.rfind('/')] config_dir = config_path[:config_path.rfind('/')] results_dir = csv_output_path[:csv_output_path.rfind('/')] # Create folder for model if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Create output for train process if not os.path.exists(results_dir): os.makedirs(results_dir) file = open(f"{csv_output_path}", 'w') file.write("") file.close() # Create folder for config if not os.path.exists(config_dir): os.makedirs(config_dir) # generate config file file = open(config_path, 'w') file.write(json.dumps(config, indent=2)) file.close() # create summary file if not exists if not os.path.exists(summary_path): file = open(summary_path, 'w') file.write("datetime, model, config, min_loss, min_loss_accuracy\n") file.close() # Data loader if not os.path.exists(data_dir): os.makedirs(data_dir) train, val, _, nb_classes, image_shape, class_weights = load( dataset_name=config['data.dataset'], dataset_dir=config['data.dataset_dir'], batch_size=config['data.batch_size'], train_size=config['data.train_size'], test_size=config['data.test_size'], weight_classes=config['data.weight_classes'], rotation_range=config['data.rotation_range'], width_shift_range=config['data.width_shift_range'], height_shift_range=config['data.height_shift_range'], horizontal_flip=config['data.horizontal_flip'], datagen_flow=True, ) (train_gen, train_len, _) = train (val_gen, val_len, _) = val # Determine device if config['data.cuda']: cuda_num = config['data.gpu'] device_name = f'GPU:{cuda_num}' else: device_name = 'CPU:0' if config['data.weight_classes']: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() loss_object = weightedLoss(loss_object, class_weights) else: loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() time_start = time.time() # Compiles a model, prints the model summary, and saves the model diagram into a png file. model = densenet_model(classes=nb_classes, shape=image_shape, growth_rate=config['model.growth_rate'], nb_layers=config['model.nb_layers'], reduction=config['model.reduction']) model.summary() tf.keras.utils.plot_model(model, "{}/model.png".format(results_dir), show_shapes=True) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='val_accuracy') train_step, test_step = steps(model, loss_object, optimizer, train_loss, train_accuracy, val_loss, val_accuracy) # create summary writers train_summary_writer = tf.summary.create_file_writer( train_summary_file_path) val_summary_writer = tf.summary.create_file_writer(test_summary_file_path) print("Starting training") loss, acc = train_engine.train( model=model, batch_size=config['data.batch_size'], epochs=config['train.epochs'], max_patience=config['train.patience'], train_gen=train_gen, train_len=train_len, val_gen=val_gen, val_len=val_len, train_loss=train_loss, train_accuracy=train_accuracy, val_loss=val_loss, val_accuracy=val_accuracy, train_step=train_step, test_step=test_step, checkpoint_path=checkpoint_path, train_summary_writer=train_summary_writer, val_summary_writer=val_summary_writer, csv_output_file=csv_output_path, ) time_end = time.time() summary = "{}, {}, {}, {}, {}, {}\n".format(now_as_str, config['data.dataset'], config['model.name'], config_path, loss, acc) print(summary) file = open(summary_path, 'a+') file.write(summary) file.close() model_path = tf.train.latest_checkpoint(checkpoint_dir, latest_filename=checkpoint_path) if not model_path: print("Skipping evaluation. No checkpoint found in: {}".format( checkpoint_dir)) else: model_from_saved = tf.keras.models.load_model(model_path) model_from_saved.summary() # Runs test data through the reloaded model to make sure the results are same. predictions_from_saved = model_from_saved.predict(val_gen) elapsed = time_end - time_start h, min = elapsed // 3600, elapsed % 3600 // 60 sec = elapsed - min * 60 print(f"Training took: {h:.2f}h {min:.2f}m {sec:.2f}s!")
def main(): args = parse_arguments() lr = args.lr batch_size = args.batch_size epochs = args.epochs print_every_n_steps = args.print_every keep_prob = args.keep_prob growth_rate = args.growth_rate reg = args.reg logdir = args.log_dir cifar10_dir = args.cifar10_dir if logdir == None: raise ValueError( "Please specify the logdir to specify where to save tensorboard summary and models using --log_dir." ) if cifar10_dir == None: raise ValueError( "Please specify the cifar10 dataset directory using --cifar10_dir." ) model = densenet_model(growth_rate) start = dt.datetime.now() train_logdir = os.path.join(logdir, "dn-train-" + start.strftime("%Y%m%d-%H%M%S")) train_writer = tf.summary.FileWriter(train_logdir) test_logdir = os.path.join(logdir, "dn-test-" + start.strftime("%Y%m%d-%H%M%S")) test_writer = tf.summary.FileWriter(test_logdir) saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() global_step = 0 num_batches_per_epoch = math.ceil(50000 / batch_size) for epoch in xrange(1, epochs + 1): if epoch == 150 or epoch == 225: lr /= 10. total_train_loss, total_train_accuracy = train( sess, global_step, epoch, args, model, train_writer) total_test_loss, total_test_accuracy = test( sess, global_step, epoch, args, model, test_writer) global_step += num_batches_per_epoch print( "{} : step={} epoch_step={:.4f} train_loss={:.4f} train_acc={:.4f} test_loss={:.4f} test_acc={:.4f} EPOCH FINISHED={}" .format(dt.datetime.now(), global_step, epoch, total_train_loss, total_train_accuracy, total_test_loss, total_test_accuracy, epoch), flush=True) saver.save(sess, os.path.join(train_logdir, 'densenet-cifar10'), global_step=epoch) train_writer.close() test_writer.close()