Exemple #1
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-d",
                    "--dataset",
                    required=True,
                    help="path to input dataset")
    ap.add_argument("-o",
                    "--output",
                    required=True,
                    help="path to output directory")
    args = vars(ap.parse_args())

    # get filenames from all subdirectories
    imagePaths = list(paths.list_images(args["dataset"]))

    ## instantiate preprocessor and data loader
    print("[INFO] resizing images...\n")
    sp = SimplePreprocessor(64, 64)
    sdl = SimpleDatasetLoader(preprocessors=[sp])

    ## load and resize data
    (data, labels) = sdl.load(imagePaths, verbose=500)

    # write to output directory using the labal and order as filename
    for (i, (image, label)) in enumerate(zip(data, labels)):
        fname = os.path.join(args["output"], f"{label}_{i}.png")
        print(
            f"[INFO] writing a resized {label[:-1]} to {os.path.basename(fname)}"
        )
        imwrite(fname, image)
Exemple #2
0
def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
	args = vars(ap.parse_args())

	print("[INFO] loading images...")
	# 加载数据集的文件路径
	imagePaths = list(paths.list_images(args["dataset"]))
	# 对数据集文件夹下的图片进行预处理,统一到32x32的尺寸
	sp = SimplePreprocessor(32, 32)
	sdl = SimpleDatasetLoader(preprocessors=[sp])
	# 从RGB三颜色通道flat到1维矩阵
	(data, labels) = sdl.load(imagePaths, verbose=500)
	data = data.reshape((data.shape[0], 3072))

	le = LabelEncoder()
	labels = le.fit_transform(labels)

	(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=5)

	for r in(None, "l1", "l2"):
		print("[INFO] training model with '{}' penalty".format(r))
		model = SGDClassifier(loss="log", penalty=r, max_iter=50, learning_rate="constant", eta0=0.001, random_state=42)
		model.fit(trainX, trainY)

		acc = model.score(testX, testY)
		print("[INFO] '{}' penalty accuracy:{:.3f}%".format(r, acc * 100))
Exemple #3
0
def get_images_and_labels():
	# grab the list of images that we'll be describing
	print("[INFO] loading images...")
	imagePaths = list(paths.list_images(args["dataset"]))

	# initialize the image preprocessor, load the dataset from disk,
	# and reshape the data matrix
	sp = SimplePreprocessor(32, 32)
	sdl = SimpleDatasetLoader(preprocessors=[sp])
	(images, label) = sdl.load(imagePaths, verbose=500)
	#image = image.reshape((image.shape[0], 3072))
	# resize the image to be 32x32 pixels, ignoring aspect ratio,
	# and then perform Contrast Limited Adaptive Histogram
	# Equalization (CLAHE)
	resized_images = []
	for image in images:
		#print("image shape = ", image.shape[1::-1])
		r_image = transform.resize(image, (32, 32))
		r_image = exposure.equalize_adapthist(r_image, clip_limit=0.1)
		resized_images.append(r_image)

	# show some information on memory consumption of the images
	#print("[INFO] features matrix: {:.1f}MB".format(resized_images.nbytes / (1024 * 1024.0)))

	# convert the data and labels to NumPy arrays
	resized_images = np.array(resized_images)
	label = np.array(label)

	return resized_images, label
Exemple #4
0
encodedLabel_to_className = mapping_dict["encodedLabel_to_className"]
className_to_categoryID = mapping_dict["className_to_categoryID"]

# load submission.csv & reset 0
submission = pd.read_csv("./sample_submission.csv")
submission["Category"] = [0] * submission.shape[0]
print("[INFO] sample_sumission\n")
print(submission.head())
print("[INFO] expect to predict =", submission.shape)

## augmentation
aap = AspectAwarePreprocessor(64, 64)
iap = ImageToArrayPreprocessor()
means = json.loads(open(DATASET_MEAN).read())
mp = MeanPreprocessor(means["R"], means["G"], means["B"])
sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test")

# load in images
print("[INFO] loading test images....")
imagePaths = list(paths.list_images(args["dataset"]))
print("[INFO] fetched %d images to test" % len(imagePaths))

data, names = sdl.load(imagePaths, verbose=1e4)
testX = data.astype("float") / 255.0
imageIds = [name.split(".")[0] for name in names]

## load in models & predict
with tf.device("/cpu:0"):
    model = load_model(MODEL, custom_objects={"f1_score": f1_score})

# create distribute strategy for TF2.0
Exemple #5
0
imagePaths = []
validExtensions = ['jpg', 'jpeg', 'png', 'bmp']
for pathName, folderNames, fileNames in os.walk(dataset_path):
	for fileName in fileNames:
		if fileName.split(".")[-1] in validExtensions:
			imagePaths.append(pathName+"/"+fileName)

# print("imagePaths:",imagePaths)

new_width = cmd_dict['width']
new_height = cmd_dict['height']


sp = SimplePreprocessor(new_width, new_height)
sfp = SimpleFlattenPreprocessor()	
sdl = SimpleDatasetLoader(preprocessors = [sp,sfp]) # It is an ordered sequence. Order matters. First we resize then flatten.

# After every 500 iterations we would want to see the progress.
(data, labels) = sdl.load(imagePaths, verbose=500)
#print("data.shape", data.shape)
#print("Example string labels",labels[0:5])

# Information about the memory consumption of the image.
print("[INFO] feature matrix : {:.3f}MB".format(data.nbytes/(1024*1000.0))) # 3 digits after the decimal
# Map the string labels (class name) to integers.
le = LabelEncoder()
labels = le.fit_transform(labels) # le.classes_ attribute will have the corresponding string labels.
#print("Example integer labels", labels[0:5])


# partition the data into training and testing. 
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import os

# Commented out IPython magic to ensure Python compatibility.
print("[INFO] loading images...")
# %cd /content/drive/My\ Drive/Colab_Work
imagePaths = list(paths.list_images('flowers17'))
classNames = [pt.split(os.path.sep)[-2]for pt in imagePaths]
classNames = [str(x) for x in np.unique(classNames)]

aap = AspectAwarePreprocessor(64, 64)
iap = ImageToArrayPreprocessor()
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.astype("float")/255.0

(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25,  random_state=42)

trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)

print("[INFO] compiling model...")
opt = SGD(lr=0.005)
model = MiniVGGNet.build(width=64, height=64, depth=3, classes=len(classNames))
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

print("[INFO] training network...")
H = model.fit(trainX,  trainY, validation_data=(testX, testY), batch_size=32, epochs=100, verbose=1)
Exemple #7
0
                required=True,
                help="path to the trained model")
ap.add_argument(
    "-l", "--labelbin", required=True, help="path to the saved labels"
)  # We need it to get the name of classes (ex: whether the image passed belongs to class 1 or class 2 or class 3 etc.)
#I don't know why we would need the labels
args = vars(ap.parse_args())

#pre-process the image for classification
srp = SimpleResizePreprocessor(width=96, height=96)
nop = NormalizePreprocessor(normalizing_factor=255.)
#iap = ImageToArrayPreprocessor()
edp = ExpandDimPreprocessor(axis=0)  # We do it for testing!

preprocessors = [srp, nop, edp]
sdl = SimpleDatasetLoader(preprocessors)
preprocessed_image = sdl.loadTest(
    args["imagePath"]
)  # Path of a single image at a time and NOT entire dataset (unlike sdl.load())

print("Preprocessed image for keras to get classified: ",
      preprocessed_image.shape)

print("[INFO] loading trained model ... ")
model = load_model(args["model"])
print(model.summary())
print("trained model loaded.")

print("[INFO] loading labels ...")
lb = pickle.loads(open(args["labelbin"], "rb").read())
print("labels loaded.")
Exemple #8
0
from keras.utils import to_categorical
from keras import layers
from keras import models
from keras import regularizers
from sklearn.model_selection import train_test_split
from simpledatasetloader import SimpleDatasetLoader
import numpy as np
from matplotlib import pylab as pl

dloader = SimpleDatasetLoader()

(data_x, data_y) = dloader.load('../SMILEsmileD/SMILEs/positives/positives7',
                                '../SMILEsmileD/SMILEs/positives/laplacian', 1)

dat = []
N = data_y.shape[0]
for n in range(N):
    dat.append(data_y[n].reshape((64 * 64)))
data_y = np.array(dat)
del dat

(train_x, test_x, train_y, test_y) = train_test_split(data_x,
                                                      data_y,
                                                      test_size=0.40)

model = models.Sequential()
#kernel_regularizer = regularizers.l2(10.1),
model.add(layers.Conv2D(10, (3, 3), activation='relu',
                        input_shape=(64, 64, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(20, (3, 3), activation='relu'))
Exemple #9
0
                    help="# of cores kNN classifier (-1 uses all cores)")
"""
vars takes an object as a parameter; 
e.g{'dataset': '../animal_image_dog_cat_and_panda/', 'neighbors': 5, 'jobs': -1}
"""
args = vars(parser.parse_args())

## load images
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
#print(imagePaths[:10])
#e.g.= '../animal_image_dog_cat_and_panda/panda/panda_00528.jpg'

# initiate the image preprocessor, set fixed_image size
simpro = SimplePreprocessor(32, 32)
simloader = SimpleDatasetLoader(preprocessors=[simpro])
data, labels = simloader.load(imagePaths, verbose=500)
data = data.reshape(data.shape[0], 32 * 32 * 3)

# show information about memory consumption
print("[INFO] features matrix consumes %.1f MB" % (data.nbytes /
                                                   (1024 * 1000.0)))

## encoder & split dataset
le = LabelEncoder()
labels = le.fit_transform(labels)

# split dataset into train & test
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
Exemple #10
0
    '--jobs',
    type=int,
    default=-1,
    help='# of jobs for k-NN distance (-1 uses all available cores)')
args = ap.parse_args()
# args = vars(ap.parse_args())
# print(args)

# grab the list of images that we'll be describing
print('[INFO] loading images...')
imagesPaths = list(paths.list_images(args.dataset))

# initialze the image processor, load the dataset from disk
# and resape the data matrix
sp = SimplePreprocessor(32, 32)
sd1 = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sd1.load(
    imagesPaths, verbose=500
)  # verbose is used for yielding more information about the on going process.
data = data.reshape(
    (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))

# show some information on the memory consumption of the images
print('[INFO] features matrix: {:.1f}MB'.format(data.nbytes / (1024 * 1000.0)))

# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
Exemple #11
0
Fichier : KNN.py Projet : RodIba/NN
ap.add_argument("-j",
                "--jobs",
                type=int,
                default=-1,
                help="# of jobs for knn distance")

args = vars(ap.parse_args())

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))  # get paths to images

# initialize the image processor, load dataset from disk
# and reshape the data matrix

sp = SimplePreprocessor(32, 32)  #rescale all images to 32X32 pixels
sdl = SimpleDatasetLoader(preprocessors=[sp])  # initialize loader
(data, labels) = sdl.load(
    imagePaths,
    verbose=500)  # load images - returns 2-tuple with images and labels
data.reshape(
    (data.shape[0], 3072))  # flatten images into a 3000 x 3072 numpy array
# 3072 = 32x32x3
print("[INFO] features matrix: {:.1f}MB".format(data.nbytes /
                                                (1024 * 1000.00)))

# build training and testing splits
#encode labels as integers
le = LabelEncoder()
lables = le.fit_transform(labels)

#partition the data into training and testing splits  using 75% of
random.shuffle(imagePaths)

print("[INFO] loading trained model ... ")
model = load_model(args["model"])
print("trained model loaded.")


#pre-process the image for classification
srp = SimpleResizePreprocessor(width = 96, height = 96)
nop = NormalizePreprocessor(normalizing_factor = 255.)

# iap = ImageToArrayPreprocessor() # We don't really need it. Since we are passing numpy array itself as input
eap = ExpandDimPreprocessor(axis = 0) # Though we have the entire dataset but we are still passing one image at a time to the model.
eccf = ExtractCNNCodeAsFeatures(model, layer_index = 25) # 25 we are hard-coding. This one can see from model.summary() and choose appropriately.
preprocessors = [srp, nop, eap, eccf] 
sdl = SimpleDatasetLoader(preprocessors)
print("[INFO] loading images to disk ... ")
usefulImagePaths, feature_vectors, labels = sdl.load(imagePaths, verbose=100)

print("no of useful images: ", feature_vectors.shape)
print("no of useful labels: ", labels.shape)
#print("feature_vectors[0]")
#print(feature_vectors[0])
#print("feature_vectors[0].shape")
#print(feature_vectors[0].shape)
#print("labels[0]")
#print(labels[0])

labels = np.expand_dims(labels, axis = 1)
''' Preparing data to write to a csv '''
feature_vectors_with_corresponding_labels = np.hstack((feature_vectors, labels))
                "--model",
                required=True,
                help="path to save the train model")
args = vars(ap.parse_args())

classLabels = ["cat", "dog", "panda"]

print("[INFO] sampling images...")
imagePaths = np.array(list(paths.list_images(args["dataset"])))
idxs = np.random.randint(0, len(imagePaths), size=(10, ))
imagePaths = imagePaths[idxs]

sp = SimplePreprocessor(32, 32)
iap = ImageToArrayPreprocessor()

sdl = SimpleDatasetLoader(preprocessors=[sp, iap])
(data, labels) = sdl.load(imagePaths)
data = data.astype("float")

print("[INFO] loading pre-trained network...")
model = load_model(args["model"])

print("[INFO] predicting...")
preds = model.predict(data, batch_size=32).argmax(axis=1)

for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath)
    cv2.putText(image, "label:{}".format(classLabels[preds[i]]), (0, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.imshow("Image", image)
    cv2.waitKey(0)
from imutils import paths
import argparse

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='path to the input dataset')
args = vars(ap.parse_args())

# grab the list of image paths
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args['dataset']))

# initialize the image preprocessor, load the dataset from the disk,
# and reshape the data matrix
sp = SimplePreprocessor(32, 32)
sd1 = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sd1.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))

# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# splitting the data into train and test set
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25)

# loop over our set of regulizers
for r in (None, 'l1', 'l2', 'elasticnet'):
    # train the SGD classifier using a softmax loss function and the
    # specified regularization function for 10 epochs
    print("[INFO] training model with '{}' penalty".format(r))
Exemple #15
0
        if fileName.split(".")[-1] in validExtensions:
            #print(pathName, fileName)
            imagePath = pathName + "\\" + fileName  # Windows specific, should ideally use os.path.sep instead of "\\"
            imagePaths.append(imagePath)

print("imagePaths[0:5]", imagePaths[0:5])

random.seed(42)
random.shuffle(imagePaths)

srp = SimpleResizePreprocessor(width=96, height=96)
#iap = ImageToArrayPreprocessor() # keras specific preprocessor
#edp = ExpandDimPreprocessor(axis=0) # Not required for training!
nop = NormalizePreprocessor(normalizing_factor=255.)
preprocessors = [srp, nop]
sdl = SimpleDatasetLoader(preprocessors)

print("[INFO] loading images to disk ... ")
usefulImagePaths, data, labels = sdl.load(imagePaths, verbose=100)

print("no of useful images: ", data.shape)
print("no of useful labels: ", labels.shape)

le = LabelEncoder()
integer_labels = le.fit_transform(labels)

print("[INFO] one hot encoding labels ... ")
lb = LabelBinarizer()
one_hot_encoded_labels = lb.fit_transform(labels)
print("labels encoded.")
# 75% of the data for training