def main(): ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, help="path to input dataset") ap.add_argument("-o", "--output", required=True, help="path to output directory") args = vars(ap.parse_args()) # get filenames from all subdirectories imagePaths = list(paths.list_images(args["dataset"])) ## instantiate preprocessor and data loader print("[INFO] resizing images...\n") sp = SimplePreprocessor(64, 64) sdl = SimpleDatasetLoader(preprocessors=[sp]) ## load and resize data (data, labels) = sdl.load(imagePaths, verbose=500) # write to output directory using the labal and order as filename for (i, (image, label)) in enumerate(zip(data, labels)): fname = os.path.join(args["output"], f"{label}_{i}.png") print( f"[INFO] writing a resized {label[:-1]} to {os.path.basename(fname)}" ) imwrite(fname, image)
def main(): ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, help="path to input dataset") args = vars(ap.parse_args()) print("[INFO] loading images...") # 加载数据集的文件路径 imagePaths = list(paths.list_images(args["dataset"])) # 对数据集文件夹下的图片进行预处理,统一到32x32的尺寸 sp = SimplePreprocessor(32, 32) sdl = SimpleDatasetLoader(preprocessors=[sp]) # 从RGB三颜色通道flat到1维矩阵 (data, labels) = sdl.load(imagePaths, verbose=500) data = data.reshape((data.shape[0], 3072)) le = LabelEncoder() labels = le.fit_transform(labels) (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=5) for r in(None, "l1", "l2"): print("[INFO] training model with '{}' penalty".format(r)) model = SGDClassifier(loss="log", penalty=r, max_iter=50, learning_rate="constant", eta0=0.001, random_state=42) model.fit(trainX, trainY) acc = model.score(testX, testY) print("[INFO] '{}' penalty accuracy:{:.3f}%".format(r, acc * 100))
def get_images_and_labels(): # grab the list of images that we'll be describing print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) # initialize the image preprocessor, load the dataset from disk, # and reshape the data matrix sp = SimplePreprocessor(32, 32) sdl = SimpleDatasetLoader(preprocessors=[sp]) (images, label) = sdl.load(imagePaths, verbose=500) #image = image.reshape((image.shape[0], 3072)) # resize the image to be 32x32 pixels, ignoring aspect ratio, # and then perform Contrast Limited Adaptive Histogram # Equalization (CLAHE) resized_images = [] for image in images: #print("image shape = ", image.shape[1::-1]) r_image = transform.resize(image, (32, 32)) r_image = exposure.equalize_adapthist(r_image, clip_limit=0.1) resized_images.append(r_image) # show some information on memory consumption of the images #print("[INFO] features matrix: {:.1f}MB".format(resized_images.nbytes / (1024 * 1024.0))) # convert the data and labels to NumPy arrays resized_images = np.array(resized_images) label = np.array(label) return resized_images, label
encodedLabel_to_className = mapping_dict["encodedLabel_to_className"] className_to_categoryID = mapping_dict["className_to_categoryID"] # load submission.csv & reset 0 submission = pd.read_csv("./sample_submission.csv") submission["Category"] = [0] * submission.shape[0] print("[INFO] sample_sumission\n") print(submission.head()) print("[INFO] expect to predict =", submission.shape) ## augmentation aap = AspectAwarePreprocessor(64, 64) iap = ImageToArrayPreprocessor() means = json.loads(open(DATASET_MEAN).read()) mp = MeanPreprocessor(means["R"], means["G"], means["B"]) sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test") # load in images print("[INFO] loading test images....") imagePaths = list(paths.list_images(args["dataset"])) print("[INFO] fetched %d images to test" % len(imagePaths)) data, names = sdl.load(imagePaths, verbose=1e4) testX = data.astype("float") / 255.0 imageIds = [name.split(".")[0] for name in names] ## load in models & predict with tf.device("/cpu:0"): model = load_model(MODEL, custom_objects={"f1_score": f1_score}) # create distribute strategy for TF2.0
imagePaths = [] validExtensions = ['jpg', 'jpeg', 'png', 'bmp'] for pathName, folderNames, fileNames in os.walk(dataset_path): for fileName in fileNames: if fileName.split(".")[-1] in validExtensions: imagePaths.append(pathName+"/"+fileName) # print("imagePaths:",imagePaths) new_width = cmd_dict['width'] new_height = cmd_dict['height'] sp = SimplePreprocessor(new_width, new_height) sfp = SimpleFlattenPreprocessor() sdl = SimpleDatasetLoader(preprocessors = [sp,sfp]) # It is an ordered sequence. Order matters. First we resize then flatten. # After every 500 iterations we would want to see the progress. (data, labels) = sdl.load(imagePaths, verbose=500) #print("data.shape", data.shape) #print("Example string labels",labels[0:5]) # Information about the memory consumption of the image. print("[INFO] feature matrix : {:.3f}MB".format(data.nbytes/(1024*1000.0))) # 3 digits after the decimal # Map the string labels (class name) to integers. le = LabelEncoder() labels = le.fit_transform(labels) # le.classes_ attribute will have the corresponding string labels. #print("Example integer labels", labels[0:5]) # partition the data into training and testing.
from imutils import paths import matplotlib.pyplot as plt import numpy as np import argparse import os # Commented out IPython magic to ensure Python compatibility. print("[INFO] loading images...") # %cd /content/drive/My\ Drive/Colab_Work imagePaths = list(paths.list_images('flowers17')) classNames = [pt.split(os.path.sep)[-2]for pt in imagePaths] classNames = [str(x) for x in np.unique(classNames)] aap = AspectAwarePreprocessor(64, 64) iap = ImageToArrayPreprocessor() sdl = SimpleDatasetLoader(preprocessors=[aap, iap]) (data, labels) = sdl.load(imagePaths, verbose=500) data = data.astype("float")/255.0 (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) trainY = LabelBinarizer().fit_transform(trainY) testY = LabelBinarizer().fit_transform(testY) print("[INFO] compiling model...") opt = SGD(lr=0.005) model = MiniVGGNet.build(width=64, height=64, depth=3, classes=len(classNames)) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) print("[INFO] training network...") H = model.fit(trainX, trainY, validation_data=(testX, testY), batch_size=32, epochs=100, verbose=1)
required=True, help="path to the trained model") ap.add_argument( "-l", "--labelbin", required=True, help="path to the saved labels" ) # We need it to get the name of classes (ex: whether the image passed belongs to class 1 or class 2 or class 3 etc.) #I don't know why we would need the labels args = vars(ap.parse_args()) #pre-process the image for classification srp = SimpleResizePreprocessor(width=96, height=96) nop = NormalizePreprocessor(normalizing_factor=255.) #iap = ImageToArrayPreprocessor() edp = ExpandDimPreprocessor(axis=0) # We do it for testing! preprocessors = [srp, nop, edp] sdl = SimpleDatasetLoader(preprocessors) preprocessed_image = sdl.loadTest( args["imagePath"] ) # Path of a single image at a time and NOT entire dataset (unlike sdl.load()) print("Preprocessed image for keras to get classified: ", preprocessed_image.shape) print("[INFO] loading trained model ... ") model = load_model(args["model"]) print(model.summary()) print("trained model loaded.") print("[INFO] loading labels ...") lb = pickle.loads(open(args["labelbin"], "rb").read()) print("labels loaded.")
from keras.utils import to_categorical from keras import layers from keras import models from keras import regularizers from sklearn.model_selection import train_test_split from simpledatasetloader import SimpleDatasetLoader import numpy as np from matplotlib import pylab as pl dloader = SimpleDatasetLoader() (data_x, data_y) = dloader.load('../SMILEsmileD/SMILEs/positives/positives7', '../SMILEsmileD/SMILEs/positives/laplacian', 1) dat = [] N = data_y.shape[0] for n in range(N): dat.append(data_y[n].reshape((64 * 64))) data_y = np.array(dat) del dat (train_x, test_x, train_y, test_y) = train_test_split(data_x, data_y, test_size=0.40) model = models.Sequential() #kernel_regularizer = regularizers.l2(10.1), model.add(layers.Conv2D(10, (3, 3), activation='relu', input_shape=(64, 64, 1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(20, (3, 3), activation='relu'))
help="# of cores kNN classifier (-1 uses all cores)") """ vars takes an object as a parameter; e.g{'dataset': '../animal_image_dog_cat_and_panda/', 'neighbors': 5, 'jobs': -1} """ args = vars(parser.parse_args()) ## load images print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) #print(imagePaths[:10]) #e.g.= '../animal_image_dog_cat_and_panda/panda/panda_00528.jpg' # initiate the image preprocessor, set fixed_image size simpro = SimplePreprocessor(32, 32) simloader = SimpleDatasetLoader(preprocessors=[simpro]) data, labels = simloader.load(imagePaths, verbose=500) data = data.reshape(data.shape[0], 32 * 32 * 3) # show information about memory consumption print("[INFO] features matrix consumes %.1f MB" % (data.nbytes / (1024 * 1000.0))) ## encoder & split dataset le = LabelEncoder() labels = le.fit_transform(labels) # split dataset into train & test trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.25,
'--jobs', type=int, default=-1, help='# of jobs for k-NN distance (-1 uses all available cores)') args = ap.parse_args() # args = vars(ap.parse_args()) # print(args) # grab the list of images that we'll be describing print('[INFO] loading images...') imagesPaths = list(paths.list_images(args.dataset)) # initialze the image processor, load the dataset from disk # and resape the data matrix sp = SimplePreprocessor(32, 32) sd1 = SimpleDatasetLoader(preprocessors=[sp]) (data, labels) = sd1.load( imagesPaths, verbose=500 ) # verbose is used for yielding more information about the on going process. data = data.reshape( (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) # show some information on the memory consumption of the images print('[INFO] features matrix: {:.1f}MB'.format(data.nbytes / (1024 * 1000.0))) # encode the labels as integers le = LabelEncoder() labels = le.fit_transform(labels) # partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data,
ap.add_argument("-j", "--jobs", type=int, default=-1, help="# of jobs for knn distance") args = vars(ap.parse_args()) print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) # get paths to images # initialize the image processor, load dataset from disk # and reshape the data matrix sp = SimplePreprocessor(32, 32) #rescale all images to 32X32 pixels sdl = SimpleDatasetLoader(preprocessors=[sp]) # initialize loader (data, labels) = sdl.load( imagePaths, verbose=500) # load images - returns 2-tuple with images and labels data.reshape( (data.shape[0], 3072)) # flatten images into a 3000 x 3072 numpy array # 3072 = 32x32x3 print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1000.00))) # build training and testing splits #encode labels as integers le = LabelEncoder() lables = le.fit_transform(labels) #partition the data into training and testing splits using 75% of
random.shuffle(imagePaths) print("[INFO] loading trained model ... ") model = load_model(args["model"]) print("trained model loaded.") #pre-process the image for classification srp = SimpleResizePreprocessor(width = 96, height = 96) nop = NormalizePreprocessor(normalizing_factor = 255.) # iap = ImageToArrayPreprocessor() # We don't really need it. Since we are passing numpy array itself as input eap = ExpandDimPreprocessor(axis = 0) # Though we have the entire dataset but we are still passing one image at a time to the model. eccf = ExtractCNNCodeAsFeatures(model, layer_index = 25) # 25 we are hard-coding. This one can see from model.summary() and choose appropriately. preprocessors = [srp, nop, eap, eccf] sdl = SimpleDatasetLoader(preprocessors) print("[INFO] loading images to disk ... ") usefulImagePaths, feature_vectors, labels = sdl.load(imagePaths, verbose=100) print("no of useful images: ", feature_vectors.shape) print("no of useful labels: ", labels.shape) #print("feature_vectors[0]") #print(feature_vectors[0]) #print("feature_vectors[0].shape") #print(feature_vectors[0].shape) #print("labels[0]") #print(labels[0]) labels = np.expand_dims(labels, axis = 1) ''' Preparing data to write to a csv ''' feature_vectors_with_corresponding_labels = np.hstack((feature_vectors, labels))
"--model", required=True, help="path to save the train model") args = vars(ap.parse_args()) classLabels = ["cat", "dog", "panda"] print("[INFO] sampling images...") imagePaths = np.array(list(paths.list_images(args["dataset"]))) idxs = np.random.randint(0, len(imagePaths), size=(10, )) imagePaths = imagePaths[idxs] sp = SimplePreprocessor(32, 32) iap = ImageToArrayPreprocessor() sdl = SimpleDatasetLoader(preprocessors=[sp, iap]) (data, labels) = sdl.load(imagePaths) data = data.astype("float") print("[INFO] loading pre-trained network...") model = load_model(args["model"]) print("[INFO] predicting...") preds = model.predict(data, batch_size=32).argmax(axis=1) for (i, imagePath) in enumerate(imagePaths): image = cv2.imread(imagePath) cv2.putText(image, "label:{}".format(classLabels[preds[i]]), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) cv2.imshow("Image", image) cv2.waitKey(0)
from imutils import paths import argparse # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument('-d', '--dataset', required=True, help='path to the input dataset') args = vars(ap.parse_args()) # grab the list of image paths print("[INFO] loading images...") imagePaths = list(paths.list_images(args['dataset'])) # initialize the image preprocessor, load the dataset from the disk, # and reshape the data matrix sp = SimplePreprocessor(32, 32) sd1 = SimpleDatasetLoader(preprocessors=[sp]) (data, labels) = sd1.load(imagePaths, verbose=500) data = data.reshape((data.shape[0], 3072)) # encode the labels as integers le = LabelEncoder() labels = le.fit_transform(labels) # splitting the data into train and test set (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25) # loop over our set of regulizers for r in (None, 'l1', 'l2', 'elasticnet'): # train the SGD classifier using a softmax loss function and the # specified regularization function for 10 epochs print("[INFO] training model with '{}' penalty".format(r))
if fileName.split(".")[-1] in validExtensions: #print(pathName, fileName) imagePath = pathName + "\\" + fileName # Windows specific, should ideally use os.path.sep instead of "\\" imagePaths.append(imagePath) print("imagePaths[0:5]", imagePaths[0:5]) random.seed(42) random.shuffle(imagePaths) srp = SimpleResizePreprocessor(width=96, height=96) #iap = ImageToArrayPreprocessor() # keras specific preprocessor #edp = ExpandDimPreprocessor(axis=0) # Not required for training! nop = NormalizePreprocessor(normalizing_factor=255.) preprocessors = [srp, nop] sdl = SimpleDatasetLoader(preprocessors) print("[INFO] loading images to disk ... ") usefulImagePaths, data, labels = sdl.load(imagePaths, verbose=100) print("no of useful images: ", data.shape) print("no of useful labels: ", labels.shape) le = LabelEncoder() integer_labels = le.fit_transform(labels) print("[INFO] one hot encoding labels ... ") lb = LabelBinarizer() one_hot_encoded_labels = lb.fit_transform(labels) print("labels encoded.") # 75% of the data for training