def main(): ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, help="path to input dataset") ap.add_argument("-o", "--output", required=True, help="path to output directory") args = vars(ap.parse_args()) # get filenames from all subdirectories imagePaths = list(paths.list_images(args["dataset"])) ## instantiate preprocessor and data loader print("[INFO] resizing images...\n") sp = SimplePreprocessor(64, 64) sdl = SimpleDatasetLoader(preprocessors=[sp]) ## load and resize data (data, labels) = sdl.load(imagePaths, verbose=500) # write to output directory using the labal and order as filename for (i, (image, label)) in enumerate(zip(data, labels)): fname = os.path.join(args["output"], f"{label}_{i}.png") print( f"[INFO] writing a resized {label[:-1]} to {os.path.basename(fname)}" ) imwrite(fname, image)
def main(): ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, help="path to input dataset") args = vars(ap.parse_args()) print("[INFO] loading images...") # 加载数据集的文件路径 imagePaths = list(paths.list_images(args["dataset"])) # 对数据集文件夹下的图片进行预处理,统一到32x32的尺寸 sp = SimplePreprocessor(32, 32) sdl = SimpleDatasetLoader(preprocessors=[sp]) # 从RGB三颜色通道flat到1维矩阵 (data, labels) = sdl.load(imagePaths, verbose=500) data = data.reshape((data.shape[0], 3072)) le = LabelEncoder() labels = le.fit_transform(labels) (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=5) for r in(None, "l1", "l2"): print("[INFO] training model with '{}' penalty".format(r)) model = SGDClassifier(loss="log", penalty=r, max_iter=50, learning_rate="constant", eta0=0.001, random_state=42) model.fit(trainX, trainY) acc = model.score(testX, testY) print("[INFO] '{}' penalty accuracy:{:.3f}%".format(r, acc * 100))
def get_images_and_labels(): # grab the list of images that we'll be describing print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) # initialize the image preprocessor, load the dataset from disk, # and reshape the data matrix sp = SimplePreprocessor(32, 32) sdl = SimpleDatasetLoader(preprocessors=[sp]) (images, label) = sdl.load(imagePaths, verbose=500) #image = image.reshape((image.shape[0], 3072)) # resize the image to be 32x32 pixels, ignoring aspect ratio, # and then perform Contrast Limited Adaptive Histogram # Equalization (CLAHE) resized_images = [] for image in images: #print("image shape = ", image.shape[1::-1]) r_image = transform.resize(image, (32, 32)) r_image = exposure.equalize_adapthist(r_image, clip_limit=0.1) resized_images.append(r_image) # show some information on memory consumption of the images #print("[INFO] features matrix: {:.1f}MB".format(resized_images.nbytes / (1024 * 1024.0))) # convert the data and labels to NumPy arrays resized_images = np.array(resized_images) label = np.array(label) return resized_images, label
print(submission.head()) print("[INFO] expect to predict =", submission.shape) ## augmentation aap = AspectAwarePreprocessor(64, 64) iap = ImageToArrayPreprocessor() means = json.loads(open(DATASET_MEAN).read()) mp = MeanPreprocessor(means["R"], means["G"], means["B"]) sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test") # load in images print("[INFO] loading test images....") imagePaths = list(paths.list_images(args["dataset"])) print("[INFO] fetched %d images to test" % len(imagePaths)) data, names = sdl.load(imagePaths, verbose=1e4) testX = data.astype("float") / 255.0 imageIds = [name.split(".")[0] for name in names] ## load in models & predict with tf.device("/cpu:0"): model = load_model(MODEL, custom_objects={"f1_score": f1_score}) # create distribute strategy for TF2.0 strategy = tf.distribute.MirroredStrategy() with strategy.scope(): parallel_model = multi_gpu_model(model, gpus=2) #parallel_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=METRICS) predictions = parallel_model.predict(testX, batch_size=BATCH) pred_labels = predictions.argmax(axis=1)
for fileName in fileNames: if fileName.split(".")[-1] in validExtensions: imagePaths.append(pathName+"/"+fileName) # print("imagePaths:",imagePaths) new_width = cmd_dict['width'] new_height = cmd_dict['height'] sp = SimplePreprocessor(new_width, new_height) sfp = SimpleFlattenPreprocessor() sdl = SimpleDatasetLoader(preprocessors = [sp,sfp]) # It is an ordered sequence. Order matters. First we resize then flatten. # After every 500 iterations we would want to see the progress. (data, labels) = sdl.load(imagePaths, verbose=500) #print("data.shape", data.shape) #print("Example string labels",labels[0:5]) # Information about the memory consumption of the image. print("[INFO] feature matrix : {:.3f}MB".format(data.nbytes/(1024*1000.0))) # 3 digits after the decimal # Map the string labels (class name) to integers. le = LabelEncoder() labels = le.fit_transform(labels) # le.classes_ attribute will have the corresponding string labels. #print("Example integer labels", labels[0:5]) # partition the data into training and testing. # Generally, 75 percent is kept for training and 25 percent for testing. # Since it is the Vanilla implementation, training and testing is done on the images directly. In practice, we extract features from the images # and the training and testing data consists of featureVectors.
print(category_df.head()) print(category_df.tail()) category_id_mapping = category_df.groupby("name")["id"].apply( list).to_dict() # {'acinonyx jubatus': [122]} print("[INFO] loading images....") imagePaths = list(paths.list_images(args["dataset"])) #imagePaths = imagePaths[:1000] classNames = [path.split(os.path.sep)[-2] for path in imagePaths] classes = len(set(classNames)) print("[INFO] fetched %d classes and %d images in total" % (classes, len(imagePaths))) ## prepare images & labels print("[INFO] loading data...") data, labels = sdl.load(imagePaths, verbose=1e4) data = data.astype("float") / 255.0 labels = np.array(labels) lb = LabelBinarizer() labels = lb.fit_transform(labels) # serialize encoded label => category_id in annotation files print("[INFO] serizaling encoded_class to category_id mapping...") encoded_class = lb.classes_ # the order of set(classNames) to be encoded encoded_label_mapping = {} for i, name in enumerate(encoded_class): encoded_label_mapping[str(i)] = name mapping_dict = { "encodedLabel_to_className": encoded_label_mapping,
from keras.utils import to_categorical from keras import layers from keras import models from keras import regularizers from sklearn.model_selection import train_test_split from simpledatasetloader import SimpleDatasetLoader import numpy as np from matplotlib import pylab as pl dloader = SimpleDatasetLoader() (data_x, data_y) = dloader.load('../SMILEsmileD/SMILEs/positives/positives7', '../SMILEsmileD/SMILEs/positives/laplacian', 1) dat = [] N = data_y.shape[0] for n in range(N): dat.append(data_y[n].reshape((64 * 64))) data_y = np.array(dat) del dat (train_x, test_x, train_y, test_y) = train_test_split(data_x, data_y, test_size=0.40) model = models.Sequential() #kernel_regularizer = regularizers.l2(10.1), model.add(layers.Conv2D(10, (3, 3), activation='relu', input_shape=(64, 64, 1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(20, (3, 3), activation='relu'))
""" vars takes an object as a parameter; e.g{'dataset': '../animal_image_dog_cat_and_panda/', 'neighbors': 5, 'jobs': -1} """ args = vars(parser.parse_args()) ## load images print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) #print(imagePaths[:10]) #e.g.= '../animal_image_dog_cat_and_panda/panda/panda_00528.jpg' # initiate the image preprocessor, set fixed_image size simpro = SimplePreprocessor(32, 32) simloader = SimpleDatasetLoader(preprocessors=[simpro]) data, labels = simloader.load(imagePaths, verbose=500) data = data.reshape(data.shape[0], 32 * 32 * 3) # show information about memory consumption print("[INFO] features matrix consumes %.1f MB" % (data.nbytes / (1024 * 1000.0))) ## encoder & split dataset le = LabelEncoder() labels = le.fit_transform(labels) # split dataset into train & test trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.25, random_state=42)
default=-1, help='# of jobs for k-NN distance (-1 uses all available cores)') args = ap.parse_args() # args = vars(ap.parse_args()) # print(args) # grab the list of images that we'll be describing print('[INFO] loading images...') imagesPaths = list(paths.list_images(args.dataset)) # initialze the image processor, load the dataset from disk # and resape the data matrix sp = SimplePreprocessor(32, 32) sd1 = SimpleDatasetLoader(preprocessors=[sp]) (data, labels) = sd1.load( imagesPaths, verbose=500 ) # verbose is used for yielding more information about the on going process. data = data.reshape( (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) # show some information on the memory consumption of the images print('[INFO] features matrix: {:.1f}MB'.format(data.nbytes / (1024 * 1000.0))) # encode the labels as integers le = LabelEncoder() labels = le.fit_transform(labels) # partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25,
type=int, default=-1, help="# of jobs for knn distance") args = vars(ap.parse_args()) print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) # get paths to images # initialize the image processor, load dataset from disk # and reshape the data matrix sp = SimplePreprocessor(32, 32) #rescale all images to 32X32 pixels sdl = SimpleDatasetLoader(preprocessors=[sp]) # initialize loader (data, labels) = sdl.load( imagePaths, verbose=500) # load images - returns 2-tuple with images and labels data.reshape( (data.shape[0], 3072)) # flatten images into a 3000 x 3072 numpy array # 3072 = 32x32x3 print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1000.00))) # build training and testing splits #encode labels as integers le = LabelEncoder() lables = le.fit_transform(labels) #partition the data into training and testing splits using 75% of #the data for training and the remianing 25% for testing
print("[INFO] loading trained model ... ") model = load_model(args["model"]) print("trained model loaded.") #pre-process the image for classification srp = SimpleResizePreprocessor(width = 96, height = 96) nop = NormalizePreprocessor(normalizing_factor = 255.) # iap = ImageToArrayPreprocessor() # We don't really need it. Since we are passing numpy array itself as input eap = ExpandDimPreprocessor(axis = 0) # Though we have the entire dataset but we are still passing one image at a time to the model. eccf = ExtractCNNCodeAsFeatures(model, layer_index = 25) # 25 we are hard-coding. This one can see from model.summary() and choose appropriately. preprocessors = [srp, nop, eap, eccf] sdl = SimpleDatasetLoader(preprocessors) print("[INFO] loading images to disk ... ") usefulImagePaths, feature_vectors, labels = sdl.load(imagePaths, verbose=100) print("no of useful images: ", feature_vectors.shape) print("no of useful labels: ", labels.shape) #print("feature_vectors[0]") #print(feature_vectors[0]) #print("feature_vectors[0].shape") #print(feature_vectors[0].shape) #print("labels[0]") #print(labels[0]) labels = np.expand_dims(labels, axis = 1) ''' Preparing data to write to a csv ''' feature_vectors_with_corresponding_labels = np.hstack((feature_vectors, labels)) print("feature_vectors_with_corresponding_labels.shape")
required=True, help="path to save the train model") args = vars(ap.parse_args()) classLabels = ["cat", "dog", "panda"] print("[INFO] sampling images...") imagePaths = np.array(list(paths.list_images(args["dataset"]))) idxs = np.random.randint(0, len(imagePaths), size=(10, )) imagePaths = imagePaths[idxs] sp = SimplePreprocessor(32, 32) iap = ImageToArrayPreprocessor() sdl = SimpleDatasetLoader(preprocessors=[sp, iap]) (data, labels) = sdl.load(imagePaths) data = data.astype("float") print("[INFO] loading pre-trained network...") model = load_model(args["model"]) print("[INFO] predicting...") preds = model.predict(data, batch_size=32).argmax(axis=1) for (i, imagePath) in enumerate(imagePaths): image = cv2.imread(imagePath) cv2.putText(image, "label:{}".format(classLabels[preds[i]]), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) cv2.imshow("Image", image) cv2.waitKey(0)
imagePaths.append(imagePath) print("imagePaths[0:5]", imagePaths[0:5]) random.seed(42) random.shuffle(imagePaths) srp = SimpleResizePreprocessor(width=96, height=96) #iap = ImageToArrayPreprocessor() # keras specific preprocessor #edp = ExpandDimPreprocessor(axis=0) # Not required for training! nop = NormalizePreprocessor(normalizing_factor=255.) preprocessors = [srp, nop] sdl = SimpleDatasetLoader(preprocessors) print("[INFO] loading images to disk ... ") usefulImagePaths, data, labels = sdl.load(imagePaths, verbose=100) print("no of useful images: ", data.shape) print("no of useful labels: ", labels.shape) le = LabelEncoder() integer_labels = le.fit_transform(labels) print("[INFO] one hot encoding labels ... ") lb = LabelBinarizer() one_hot_encoded_labels = lb.fit_transform(labels) print("labels encoded.") # 75% of the data for training # 25% of the data for testing print("[INFO] spliting the dataset to train and test ... ") (trainX, testX, trainY, testY) = train_test_split(data,