def __init__(self,
                 input_path,
                 image_path,
                 image_path_val,
                 validation=0.25,
                 test=0,
                 verbose=False,
                 included_folders=[],
                 image_size=224,
                 only_val=False):
        self.input = input_path
        self.image_path = image_path
        self.image_path_val = image_path_val
        self.validation = validation
        self.test = test
        self.verbose = verbose
        self.included_folders = included_folders
        self.image_size = image_size
        self.only_val = only_val

        # Image preprocessors
        self.preprocessors = [
            AspectAwarePreprocessor(224, 224),
            ImageToArrayPreprocessor()
        ]

        if self.validation < 0 or self.validation > 1:
            raise ValueError(
                'Error, validation must be a float between 0 and 1')
        if self.test < 0 or self.test > 1:
            raise ValueError('Error, test must be a float between 0 and 1')

        self.train_split = round(1 - (self.validation + self.test), 2)
        if self.train_split < 0:
            raise ValueError(
                'Error, validation and test can\'t add to more than 1')

        print("Input split: train {}%, validation {}%, test {}%".format(
            self.train_split * 100, self.validation * 100, self.test * 100))
        if self.verbose:
            print("===== Dataset =====")
Beispiel #2
0
     "submission_at_%s.csv" % args["start_epoch"]])

# load encoded_class to category_id mapping...
mapping_dict = json.loads(open(ID_MAPPING, "r").read())
encodedLabel_to_className = mapping_dict["encodedLabel_to_className"]
className_to_categoryID = mapping_dict["className_to_categoryID"]

# load submission.csv & reset 0
submission = pd.read_csv("./sample_submission.csv")
submission["Category"] = [0] * submission.shape[0]
print("[INFO] sample_sumission\n")
print(submission.head())
print("[INFO] expect to predict =", submission.shape)

## augmentation
aap = AspectAwarePreprocessor(64, 64)
iap = ImageToArrayPreprocessor()
means = json.loads(open(DATASET_MEAN).read())
mp = MeanPreprocessor(means["R"], means["G"], means["B"])
sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test")

# load in images
print("[INFO] loading test images....")
imagePaths = list(paths.list_images(args["dataset"]))
print("[INFO] fetched %d images to test" % len(imagePaths))

data, names = sdl.load(imagePaths, verbose=1e4)
testX = data.astype("float") / 255.0
imageIds = [name.split(".")[0] for name in names]

## load in models & predict
Beispiel #3
0
testLabels = [-1] * len(testPaths)
"""
- construct a dict to pairing image paths, labels, output HDF5 of 4 datasets
"""
datasets = {
    "train": [trainPaths, trainLabels, config.TRAIN_HDF5],
    "trainval": [trainvalPaths, trainvalLabels, config.TRAINVAL_HDF5],
    "val": [valPaths, valLabels, config.VAL_HDF5],
    "test": [testPaths, testLabels, config.TEST_HDF5],
}
"""
- use image preprocessors to preprocess images
"""
# initialize image preprocesser & store RGB mean values
aap = AspectAwarePreprocessor(256, 256)
R, G, B = ([], [], [])

## loop over datasets
for dtype, dinfo in datasets.items():
    paths, labels, hdfpath = dinfo
    print("[INFO] building dataset = ", dtype, ", labels distribution =",
          Counter(labels))

    # build a hdf writer
    writer = HDF5DatasetWriter(hdfpath, (len(paths), 256, 256, 3))

    # loop over the image paths & preprocess images
    for i in tqdm(range(len(paths))):
        path, label = paths[i], labels[i]
        image = cv2.imread(path)
Beispiel #4
0
# HDF5 files
TRAIN_HDF5 = config.TRAIN_HDF5
VAL_HDF5 = config.VAL_HDF5
TEST_HDF5 = config.TEST_HDF5
# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5
# files
datasets = [("train", trainPaths, trainLabels, TRAIN_HDF5),
            ("val", valPaths, valLabels, VAL_HDF5),
            ("test", testPaths, testLabels, TEST_HDF5)]

# initialize the image pre-processor and the lists of RGB channel
# averages
import datetime
print(datetime.datetime.now())
aap = AspectAwarePreprocessor(224, 224)
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 224, 224, 3), outputPath)

    # initialize the progress bar
    #widgets = ["Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    #pbar = progressbar.ProgressBar(maxval=len(paths),widgets=widgets).start()

    # loop over the image paths
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # load the image and process it
Beispiel #5
0
        "test3" :
        load_model("./output/test3_patch_imgtoarr_version2aug/model-resnet50_new_head-003-0.0464-14179.hdf5"),
        "test4" : 
        load_model("./output/test4_simple_imgtoarr_version2aug/model-resnet50_new_head-004-0.0673-2515.hdf5"),
        "test5" :
        load_model("./output/test5_simple_meansub_imgtoarr_version2aug/model-resnet50_new_head-003-0.0933-5503.hdf5"),
        "test6" :
        load_model("./output/test6_aspect_meansub_imgtoarr_version2aug/model-resnet50_new_head-002-0.0974-11163.hdf5"),

        }
model = ModelBanks[modelname]


## initialize preprocessors
sp = SimplePreprocessor(224, 224)
aap = AspectAwarePreprocessor(224, 224)
iap = ImageToArrayPreprocessor()
cp1 = CropPreprocessor(224, 224) # 10-crops TTA

trainmeans = json.loads(open("./output/dogs_vs_cats_mean.json").read())
mp = MeanPreprocessor(trainmeans["R"], trainmeans["G"], trainmeans["B"])


print("[INFO] using %s model..." % modelname)
predictions = []
ids = []
submission = pd.read_csv("./sample_submission.csv")  # columns = [id/int, label/float]

# preprocess batch images & do prediction
if useTTA == "True":
    print("[INFO] applying TTA..") 
Beispiel #6
0
# USAGE
# python build_dataset.py

# import the necessary packages
import config
from imutils import paths
import random
import shutil
import os
from aspectawarepreprocessor import AspectAwarePreprocessor
#from imagetoarraypreprocessor import ImageToArrayPreprocessor
import cv2

aap = AspectAwarePreprocessor(128, 128)
# grab the paths to all input images in the original input directory
# and shuffle them
imagePaths = sorted(
    list(paths.list_images('/floyd/home/datasets/orig/DATASET')))
random.seed(42)
random.shuffle(imagePaths)

# compute the training and testing split
i = int(len(imagePaths) * config.TRAIN_SPLIT)
trainPaths = imagePaths[:i]
testPaths = imagePaths[i:]

# we'll be using part of the training data for validation
i = int(len(trainPaths) * config.VAL_SPLIT)
valPaths = trainPaths[:i]
trainPaths = trainPaths[i:]
Beispiel #7
0
        help="if apply 10_crops TTA while evaluating")
args = vars(parser.parse_args())

## cache vars
B = 128
modelname = args["model"]

ModelBanks = {
    "alexnet":
    load_model("./output/model-alexnet-075-0.2944_without_padding_10283.hdf5"),
    "alexnet2":
    load_model("./output/model-alexnet2-075-0.2972_with_padding_9299.hdf5"),
}
model = ModelBanks[modelname]

aap = AspectAwarePreprocessor(256, 256)
iap = ImageToArrayPreprocessor()
cp1 = CropPreprocessor(227, 227)  # 10-crops TTA

## list & sort imagePaths in testset
#imagePaths = sorted(list(paths.list_images("./data/test1")))
imagePaths = sorted(list(paths.list_images("./data/redux-edition/test")))
N = len(imagePaths)
useTTA = args["TTA"]  # MUST be str!!!

print("[INFO] using %s model..." % modelname)
predictions = []
submission = pd.read_csv("./sample_submission.csv")  # columns = [id,label]

# preprocess batch images & do prediction
if useTTA == "True":
Beispiel #8
0
## cache variables
NUM_CLASSES = 2
TRAIN_HDF5 = "./data/train.hdf5"
TRAINVAL_HDF5 = "./data/trainval.hdf5"
VAL_HDF5 = "./data/val.hdf5"
TEST_HDF5 = "./data/test.hdf5"
DATASET_MEAN = "./output/dogs_vs_cats_mean.json"
OUTPUT_PATH = "./output"
BATCH_SIZE = 16

## initiate image preprocessors
sp = SimplePreprocessor(224, 224)
pp = PatchPreprocessor(224, 224)
iap = ImageToArrayPreprocessor()
aap = AspectAwarePreprocessor(256, 256)

#trainmeans = json.loads(open("./output/dogs_vs_cats_mean.json").read())
trainmeans = {"R": 124.96, "G": 115.97, "B": 106.13}
mp = MeanPreprocessor(trainmeans["R"], trainmeans["G"], trainmeans["B"])

paths = list(paths.list_images("./data/train"))
random.shuffle(paths)
print(paths[:5])

for path in paths[:10]:
    cvs = np.zeros(shape=[900, 900, 3])
    image = cv2.imread(path)
    cv2.imshow("org", image)
    #cvs[:image.shape[0], :image.shape[1], :] = image