def train( network_backbone, pre_trained_model=None, trainset_filename='data/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt', valset_filename='data/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt', images_dir='data/datasets/VOCdevkit/VOC2012/JPEGImages/', labels_dir='data/datasets/VOCdevkit/VOC2012/SegmentationClass/', trainset_augmented_filename='data/datasets/SBD/train_noval.txt', images_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/img/', labels_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/cls/', model_dir=None, log_dir='data/logs/deeplab/'): if not model_dir: model_dir = 'data/models/deeplab/{}_voc2012/'.format(network_backbone) num_classes = 21 ignore_label = 255 num_epochs = 1000 minibatch_size = 8 # Unable to do minibatch_size = 12 :( random_seed = 0 learning_rate = 1e-5 weight_decay = 5e-4 batch_norm_decay = 0.99 image_shape = [513, 513] # validation_scales = [0.5, 1, 1.5] validation_scales = [1] if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) # Prepare datasets train_dataset = Dataset(dataset_filename=trainset_filename, images_dir=images_dir, labels_dir=labels_dir, image_extension='.jpg', label_extension='.png') valid_dataset = Dataset(dataset_filename=valset_filename, images_dir=images_dir, labels_dir=labels_dir, image_extension='.jpg', label_extension='.png') # Calculate image channel means channel_means = save_load_means( means_filename='channel_means.npz', image_filenames=train_dataset.image_filenames, recalculate=False) voc2012_preprocessor = DataPreprocessor(channel_means=channel_means, output_size=image_shape, min_scale_factor=0.5, max_scale_factor=2.0) # Prepare dataset iterators train_iterator = Iterator(dataset=train_dataset, minibatch_size=minibatch_size, process_func=voc2012_preprocessor.preprocess, random_seed=random_seed, scramble=True, num_jobs=1) valid_iterator = Iterator(dataset=valid_dataset, minibatch_size=minibatch_size, process_func=voc2012_preprocessor.preprocess, random_seed=None, scramble=False, num_jobs=1) # Prepare augmented dataset train_augmented_dataset = Dataset( dataset_filename=trainset_augmented_filename, images_dir=images_augmented_dir, labels_dir=labels_augmented_dir, image_extension='.jpg', label_extension='.mat') channel_augmented_means = save_load_means( means_filename='channel_augmented_means.npz', image_filenames=train_augmented_dataset.image_filenames, recalculate=False) voc2012_augmented_preprocessor = DataPreprocessor( channel_means=channel_augmented_means, output_size=image_shape, min_scale_factor=0.5, max_scale_factor=2.0) train_augmented_iterator = Iterator( dataset=train_augmented_dataset, minibatch_size=minibatch_size, process_func=voc2012_augmented_preprocessor.preprocess, random_seed=random_seed, scramble=True, num_jobs=1) model = DeepLab(network_backbone, num_classes=num_classes, ignore_label=ignore_label, batch_norm_momentum=batch_norm_decay, pre_trained_model=pre_trained_model, log_dir=log_dir) best_mIoU = 0 for i in range(num_epochs): print('Epoch number: {}'.format(i)) print('Start validation...') valid_loss_total = 0 num_pixels_union_total = np.zeros(num_classes) num_pixels_intersection_total = np.zeros(num_classes) # Multi-scale inputs prediction for _ in trange(valid_iterator.dataset_size): image, label = valid_iterator.next_raw_data() image = subtract_channel_means(image=image, channel_means=channel_means) output, valid_loss = multiscale_single_validate( image=image, label=label, input_scales=validation_scales, validator=model.validate) valid_loss_total += valid_loss prediction = np.argmax(output, axis=-1) num_pixels_union, num_pixels_intersection = count_label_prediction_matches( labels=[np.squeeze(label, axis=-1)], predictions=[prediction], num_classes=num_classes, ignore_label=ignore_label) num_pixels_union_total += num_pixels_union num_pixels_intersection_total += num_pixels_intersection # validation_single_demo(image=image, label=np.squeeze(label, axis=-1), prediction=prediction, demo_dir=os.path.join(results_dir, 'validation_demo'), filename=str(_)) mean_IOU = mean_intersection_over_union( num_pixels_union=num_pixels_union_total, num_pixels_intersection=num_pixels_intersection_total) valid_loss_ave = valid_loss_total / valid_iterator.dataset_size print('Validation loss: {:.4f} | mIoU: {:.4f}'.format( valid_loss_ave, mean_IOU)) if mean_IOU > best_mIoU: best_mIoU = mean_IOU model_savename = '{}_{:.4f}.ckpt'.format(network_backbone, best_mIoU) print('New best mIoU achieved, model saved as {}.'.format( model_savename)) model.save(model_dir, model_savename) print('Start training...') train_loss_total = 0 num_pixels_union_total = np.zeros(num_classes) num_pixels_intersection_total = np.zeros(num_classes) print('Training using VOC2012...') for _ in trange( np.ceil(train_iterator.dataset_size / minibatch_size).astype(int)): images, labels = train_iterator.next_minibatch() balanced_weight_decay = weight_decay * sum( labels != ignore_label) / labels.size outputs, train_loss = model.train( inputs=images, labels=labels, target_height=image_shape[0], target_width=image_shape[1], learning_rate=learning_rate, weight_decay=balanced_weight_decay) train_loss_total += train_loss predictions = np.argmax(outputs, axis=-1) num_pixels_union, num_pixels_intersection = count_label_prediction_matches( labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes, ignore_label=ignore_label) num_pixels_union_total += num_pixels_union num_pixels_intersection_total += num_pixels_intersection # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_) train_iterator.shuffle_dataset() print('Training using SBD...') for _ in trange( np.ceil(train_augmented_iterator.dataset_size / minibatch_size).astype(int)): images, labels = train_augmented_iterator.next_minibatch() balanced_weight_decay = weight_decay * sum( labels != ignore_label) / labels.size outputs, train_loss = model.train( inputs=images, labels=labels, target_height=image_shape[0], target_width=image_shape[1], learning_rate=learning_rate, weight_decay=balanced_weight_decay) train_loss_total += train_loss predictions = np.argmax(outputs, axis=-1) num_pixels_union, num_pixels_intersection = count_label_prediction_matches( labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes, ignore_label=ignore_label) num_pixels_union_total += num_pixels_union num_pixels_intersection_total += num_pixels_intersection # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_) train_augmented_iterator.shuffle_dataset() mIoU = mean_intersection_over_union( num_pixels_union=num_pixels_union_total, num_pixels_intersection=num_pixels_intersection_total) train_loss_ave = train_loss_total / ( train_iterator.dataset_size + train_augmented_iterator.dataset_size) print('Training loss: {:.4f} | mIoU: {:.4f}'.format( train_loss_ave, mIoU)) model.close()
from glob import glob import numpy as np from model import DeepLab from utils import (save_load_means, subtract_channel_means, single_demo, read_image) import gpu_limit if __name__ == '__main__': demo_dir = 'data/demos/deeplab/hello/' models_dir = 'data/models/deeplab/resnet_101_voc2012/' model_filename = 'resnet_101_0.0040.ckpt' channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=None, recalculate=False) deeplab = DeepLab('resnet_101', training=False) deeplab.load(osp.join(models_dir, model_filename)) files = glob(demo_dir + '*.jpg') for image_filename in files: filename = osp.basename(image_filename).split('.')[0] image = read_image(image_filename=image_filename) image_input = subtract_channel_means(image=image, channel_means=channel_means) output = deeplab.test(inputs=[image_input], target_height=image.shape[0], target_width=image.shape[1])[0] single_demo(image, np.argmax(output, axis=-1), demo_dir, filename)
from model import DeepLab from tqdm import trange from utils import (Dataset, Iterator, save_load_means, subtract_channel_means, validation_single_demo_collage) if __name__ == '__main__': data_dir = '/content/Data_Camera_SanTennis_Labeled/' testset_filename = osp.join(data_dir, 'valid.txt') images_dir = osp.join(data_dir, 'RGBs/') labels_dir = osp.join(data_dir, 'Labels/') demo_dir = 'data/demos/deeplab/resnet_101_voc2012/' models_dir = '/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/Models/' model_filename = 'resnet_101_0.7076.ckpt' channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=None) minibatch_size = 16 test_dataset = Dataset(dataset_filename=testset_filename, images_dir=images_dir, labels_dir=labels_dir, image_extension='.png', label_extension='.png') test_iterator = Iterator(dataset=test_dataset, minibatch_size=minibatch_size, process_func=None, random_seed=None, scramble=False, num_jobs=1) deeplab = DeepLab('resnet_101', training=False, num_classes=5) deeplab.load(osp.join(models_dir, model_filename)) n_samples = 8 for i in trange(n_samples): image, label = test_iterator.next_raw_data() # image_input = subtract_channel_means(image=image, channel_means=channel_means)
def train(network_backbone, pre_trained_model=None, trainset_filename='/content/Data_Camera_SanTennis_Labeled/train.txt', valset_filename='/content/Data_Camera_SanTennis_Labeled/valid.txt', images_dir='/content/Data_Camera_SanTennis_Labeled/RGBs/', labels_dir='/content/Data_Camera_SanTennis_Labeled/Labels/', trainset_augmented_filename='data/datasets/SBD/train_noval.txt', images_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/img/', labels_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/cls/', model_dir=None, log_dir='data/logs/deeplab/'): if not model_dir: model_dir = '/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/Models/' num_classes = 5 ignore_label = 255 num_epochs = 1000 minibatch_size = 4 # Unable to do minibatch_size = 12 :( random_seed = 0 learning_rate = 1e-3 weight_decay = 5e-4 batch_norm_decay = 0.99 image_shape = [480, 640] # validation_scales = [0.5, 1, 1.5] validation_scales = [1] if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) # Prepare datasets train_dataset = Dataset(dataset_filename=trainset_filename, images_dir=images_dir, labels_dir=labels_dir, image_extension='.png', label_extension='.png') valid_dataset = Dataset(dataset_filename=valset_filename, images_dir=images_dir, labels_dir=labels_dir, image_extension='.png', label_extension='.png') # Calculate image channel means channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=train_dataset.image_filenames, recalculate=False) voc2012_preprocessor = DataPreprocessor(channel_means=channel_means, output_size=image_shape, min_scale_factor=0.5, max_scale_factor=2.0) # Prepare dataset iterators train_iterator = Iterator(dataset=train_dataset, minibatch_size=minibatch_size, process_func=voc2012_preprocessor.preprocess, random_seed=random_seed, scramble=True, num_jobs=1) valid_iterator = Iterator(dataset=valid_dataset, minibatch_size=minibatch_size, process_func=voc2012_preprocessor.preprocess, random_seed=None, scramble=False, num_jobs=1) # Prepare augmented dataset # train_augmented_dataset = Dataset(dataset_filename=trainset_augmented_filename, images_dir=images_augmented_dir, labels_dir=labels_augmented_dir, image_extension='.jpg', label_extension='.mat') # # channel_augmented_means = save_load_means(means_filename='channel_augmented_means.npz', image_filenames=train_augmented_dataset.image_filenames, recalculate=False) # # voc2012_augmented_preprocessor = DataPreprocessor(channel_means=channel_augmented_means, output_size=image_shape, min_scale_factor=0.5, max_scale_factor=2.0) # train_augmented_iterator = Iterator(dataset=train_augmented_dataset, minibatch_size=minibatch_size, process_func=voc2012_augmented_preprocessor.preprocess, random_seed=random_seed, scramble=True, num_jobs=1) model = DeepLab(network_backbone, num_classes=num_classes, ignore_label=ignore_label, batch_norm_momentum=batch_norm_decay, pre_trained_model=pre_trained_model, log_dir=log_dir) best_mIoU = 0 train_loss = "," train_mIoU = "," valid_loss = "," valid_mIoU = "," for i in range(num_epochs): print('Epoch number: {}'.format(i)) print('Start validation...') valid_loss_total = 0 num_pixels_union_total = np.zeros(num_classes) num_pixels_intersection_total = np.zeros(num_classes) rand = np.random.randint(0, valid_iterator.dataset_size - 1) count = 0 # Multi-scale inputs prediction for _ in trange(valid_iterator.dataset_size): image, label = valid_iterator.next_raw_data() # image = subtract_channel_means(image=image, channel_means=channel_means) output, valid_loss = multiscale_single_validate(image=image, label=label, input_scales=validation_scales, validator=model.validate) valid_loss_total += valid_loss prediction = np.argmax(output, axis=-1) num_pixels_union, num_pixels_intersection = count_label_prediction_matches( labels=[np.squeeze(label, axis=-1)], predictions=[prediction], num_classes=num_classes, ignore_label=ignore_label) num_pixels_union_total += num_pixels_union num_pixels_intersection_total += num_pixels_intersection if count == rand: validation_single_demo_collage(image=image, label=np.squeeze(label, axis=-1), prediction=prediction, demo_dir=os.path.join( "/content/CustomDeeplabv3/data/demos/deeplab/resnet_101_voc2012/", 'validation_demo'), val_no=str(i)) count += 1 mean_IOU = mean_intersection_over_union(num_pixels_union=num_pixels_union_total, num_pixels_intersection=num_pixels_intersection_total) valid_loss_ave = valid_loss_total / valid_iterator.dataset_size print('Validation loss: {:.4f} | mIoU: {:.4f}'.format(valid_loss_ave, mean_IOU)) # valid_loss += str(train_loss_total / train_iterator.dataset_size) + "," valid_mIoU += str(mean_IOU) + "," if mean_IOU > best_mIoU and mean_IOU > 0.25: best_mIoU = mean_IOU model_savename = '{}_{:.4f}.ckpt'.format(network_backbone, best_mIoU) print('New best mIoU achieved, model saved as {}.'.format(model_savename)) model.save(model_dir, model_savename) print('Start training...') train_loss_total = 0 num_pixels_union_total = np.zeros(num_classes) num_pixels_intersection_total = np.zeros(num_classes) print('Training using Data Nhà làm...') for _ in trange(np.ceil(train_iterator.dataset_size / minibatch_size).astype(int)): images, labels = train_iterator.next_minibatch() balanced_weight_decay = weight_decay * sum(labels != ignore_label) / labels.size outputs, train_loss = model.train(inputs=images, labels=labels, target_height=image_shape[0], target_width=image_shape[1], learning_rate=learning_rate, weight_decay=balanced_weight_decay) train_loss_total += train_loss predictions = np.argmax(outputs, axis=-1) num_pixels_union, num_pixels_intersection = count_label_prediction_matches( labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes, ignore_label=ignore_label) num_pixels_union_total += num_pixels_union num_pixels_intersection_total += num_pixels_intersection validation_demo_collage(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join( "/content/CustomDeeplabv3/data/demos/deeplab/resnet_101_voc2012/", 'training_demo'), batch_no=i) train_iterator.shuffle_dataset() # print('Training using SBD...') # for _ in trange(np.ceil(train_augmented_iterator.dataset_size / minibatch_size).astype(int)): # images, labels = train_augmented_iterator.next_minibatch() # balanced_weight_decay = weight_decay * sum(labels != ignore_label) / labels.size # outputs, train_loss = model.train(inputs=images, labels=labels, target_height=image_shape[0], target_width=image_shape[1], learning_rate=learning_rate, weight_decay=balanced_weight_decay) # train_loss_total += train_loss # # predictions = np.argmax(outputs, axis=-1) # num_pixels_union, num_pixels_intersection = count_label_prediction_matches(labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes, ignore_label=ignore_label) # # num_pixels_union_total += num_pixels_union # num_pixels_intersection_total += num_pixels_intersection # # # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_) # train_augmented_iterator.shuffle_dataset() mIoU = mean_intersection_over_union(num_pixels_union=num_pixels_union_total, num_pixels_intersection=num_pixels_intersection_total) # train_loss_ave = train_loss_total / (train_iterator.dataset_size + train_augmented_iterator.dataset_size) train_loss_ave = train_loss_total / train_iterator.dataset_size print('Training loss: {:.4f} | mIoU: {:.4f}'.format(train_loss_ave, mIoU)) # train_loss += str(train_loss_total / train_iterator.dataset_size) + "," train_mIoU += str(mIoU) + "," # loss_log = open("/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/loss_log.txt", "w") mIoU_log = open("/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/mIoU_log.txt", "w") # loss_log.write(train_loss + "\n" + valid_loss) mIoU_log.write(train_mIoU + "\n" + valid_mIoU) model.close()