def __init__(self, input_path, image_path, image_path_val, validation=0.25, test=0, verbose=False, included_folders=[], image_size=224, only_val=False): self.input = input_path self.image_path = image_path self.image_path_val = image_path_val self.validation = validation self.test = test self.verbose = verbose self.included_folders = included_folders self.image_size = image_size self.only_val = only_val # Image preprocessors self.preprocessors = [ AspectAwarePreprocessor(224, 224), ImageToArrayPreprocessor() ] if self.validation < 0 or self.validation > 1: raise ValueError( 'Error, validation must be a float between 0 and 1') if self.test < 0 or self.test > 1: raise ValueError('Error, test must be a float between 0 and 1') self.train_split = round(1 - (self.validation + self.test), 2) if self.train_split < 0: raise ValueError( 'Error, validation and test can\'t add to more than 1') print("Input split: train {}%, validation {}%, test {}%".format( self.train_split * 100, self.validation * 100, self.test * 100)) if self.verbose: print("===== Dataset =====")
"submission_at_%s.csv" % args["start_epoch"]]) # load encoded_class to category_id mapping... mapping_dict = json.loads(open(ID_MAPPING, "r").read()) encodedLabel_to_className = mapping_dict["encodedLabel_to_className"] className_to_categoryID = mapping_dict["className_to_categoryID"] # load submission.csv & reset 0 submission = pd.read_csv("./sample_submission.csv") submission["Category"] = [0] * submission.shape[0] print("[INFO] sample_sumission\n") print(submission.head()) print("[INFO] expect to predict =", submission.shape) ## augmentation aap = AspectAwarePreprocessor(64, 64) iap = ImageToArrayPreprocessor() means = json.loads(open(DATASET_MEAN).read()) mp = MeanPreprocessor(means["R"], means["G"], means["B"]) sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test") # load in images print("[INFO] loading test images....") imagePaths = list(paths.list_images(args["dataset"])) print("[INFO] fetched %d images to test" % len(imagePaths)) data, names = sdl.load(imagePaths, verbose=1e4) testX = data.astype("float") / 255.0 imageIds = [name.split(".")[0] for name in names] ## load in models & predict
testLabels = [-1] * len(testPaths) """ - construct a dict to pairing image paths, labels, output HDF5 of 4 datasets """ datasets = { "train": [trainPaths, trainLabels, config.TRAIN_HDF5], "trainval": [trainvalPaths, trainvalLabels, config.TRAINVAL_HDF5], "val": [valPaths, valLabels, config.VAL_HDF5], "test": [testPaths, testLabels, config.TEST_HDF5], } """ - use image preprocessors to preprocess images """ # initialize image preprocesser & store RGB mean values aap = AspectAwarePreprocessor(256, 256) R, G, B = ([], [], []) ## loop over datasets for dtype, dinfo in datasets.items(): paths, labels, hdfpath = dinfo print("[INFO] building dataset = ", dtype, ", labels distribution =", Counter(labels)) # build a hdf writer writer = HDF5DatasetWriter(hdfpath, (len(paths), 256, 256, 3)) # loop over the image paths & preprocess images for i in tqdm(range(len(paths))): path, label = paths[i], labels[i] image = cv2.imread(path)
# HDF5 files TRAIN_HDF5 = config.TRAIN_HDF5 VAL_HDF5 = config.VAL_HDF5 TEST_HDF5 = config.TEST_HDF5 # construct a list pairing the training, validation, and testing # image paths along with their corresponding labels and output HDF5 # files datasets = [("train", trainPaths, trainLabels, TRAIN_HDF5), ("val", valPaths, valLabels, VAL_HDF5), ("test", testPaths, testLabels, TEST_HDF5)] # initialize the image pre-processor and the lists of RGB channel # averages import datetime print(datetime.datetime.now()) aap = AspectAwarePreprocessor(224, 224) (R, G, B) = ([], [], []) # loop over the dataset tuples for (dType, paths, labels, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter((len(paths), 224, 224, 3), outputPath) # initialize the progress bar #widgets = ["Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] #pbar = progressbar.ProgressBar(maxval=len(paths),widgets=widgets).start() # loop over the image paths for (i, (path, label)) in enumerate(zip(paths, labels)): # load the image and process it
"test3" : load_model("./output/test3_patch_imgtoarr_version2aug/model-resnet50_new_head-003-0.0464-14179.hdf5"), "test4" : load_model("./output/test4_simple_imgtoarr_version2aug/model-resnet50_new_head-004-0.0673-2515.hdf5"), "test5" : load_model("./output/test5_simple_meansub_imgtoarr_version2aug/model-resnet50_new_head-003-0.0933-5503.hdf5"), "test6" : load_model("./output/test6_aspect_meansub_imgtoarr_version2aug/model-resnet50_new_head-002-0.0974-11163.hdf5"), } model = ModelBanks[modelname] ## initialize preprocessors sp = SimplePreprocessor(224, 224) aap = AspectAwarePreprocessor(224, 224) iap = ImageToArrayPreprocessor() cp1 = CropPreprocessor(224, 224) # 10-crops TTA trainmeans = json.loads(open("./output/dogs_vs_cats_mean.json").read()) mp = MeanPreprocessor(trainmeans["R"], trainmeans["G"], trainmeans["B"]) print("[INFO] using %s model..." % modelname) predictions = [] ids = [] submission = pd.read_csv("./sample_submission.csv") # columns = [id/int, label/float] # preprocess batch images & do prediction if useTTA == "True": print("[INFO] applying TTA..")
# USAGE # python build_dataset.py # import the necessary packages import config from imutils import paths import random import shutil import os from aspectawarepreprocessor import AspectAwarePreprocessor #from imagetoarraypreprocessor import ImageToArrayPreprocessor import cv2 aap = AspectAwarePreprocessor(128, 128) # grab the paths to all input images in the original input directory # and shuffle them imagePaths = sorted( list(paths.list_images('/floyd/home/datasets/orig/DATASET'))) random.seed(42) random.shuffle(imagePaths) # compute the training and testing split i = int(len(imagePaths) * config.TRAIN_SPLIT) trainPaths = imagePaths[:i] testPaths = imagePaths[i:] # we'll be using part of the training data for validation i = int(len(trainPaths) * config.VAL_SPLIT) valPaths = trainPaths[:i] trainPaths = trainPaths[i:]
help="if apply 10_crops TTA while evaluating") args = vars(parser.parse_args()) ## cache vars B = 128 modelname = args["model"] ModelBanks = { "alexnet": load_model("./output/model-alexnet-075-0.2944_without_padding_10283.hdf5"), "alexnet2": load_model("./output/model-alexnet2-075-0.2972_with_padding_9299.hdf5"), } model = ModelBanks[modelname] aap = AspectAwarePreprocessor(256, 256) iap = ImageToArrayPreprocessor() cp1 = CropPreprocessor(227, 227) # 10-crops TTA ## list & sort imagePaths in testset #imagePaths = sorted(list(paths.list_images("./data/test1"))) imagePaths = sorted(list(paths.list_images("./data/redux-edition/test"))) N = len(imagePaths) useTTA = args["TTA"] # MUST be str!!! print("[INFO] using %s model..." % modelname) predictions = [] submission = pd.read_csv("./sample_submission.csv") # columns = [id,label] # preprocess batch images & do prediction if useTTA == "True":
## cache variables NUM_CLASSES = 2 TRAIN_HDF5 = "./data/train.hdf5" TRAINVAL_HDF5 = "./data/trainval.hdf5" VAL_HDF5 = "./data/val.hdf5" TEST_HDF5 = "./data/test.hdf5" DATASET_MEAN = "./output/dogs_vs_cats_mean.json" OUTPUT_PATH = "./output" BATCH_SIZE = 16 ## initiate image preprocessors sp = SimplePreprocessor(224, 224) pp = PatchPreprocessor(224, 224) iap = ImageToArrayPreprocessor() aap = AspectAwarePreprocessor(256, 256) #trainmeans = json.loads(open("./output/dogs_vs_cats_mean.json").read()) trainmeans = {"R": 124.96, "G": 115.97, "B": 106.13} mp = MeanPreprocessor(trainmeans["R"], trainmeans["G"], trainmeans["B"]) paths = list(paths.list_images("./data/train")) random.shuffle(paths) print(paths[:5]) for path in paths[:10]: cvs = np.zeros(shape=[900, 900, 3]) image = cv2.imread(path) cv2.imshow("org", image) #cvs[:image.shape[0], :image.shape[1], :] = image