Python SimpleDatasetLoader.load 예제들, simpledatasetloader.SimpleDatasetLoader.load Python 예제들

예제 #1

0

파일 보기

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-d",
                    "--dataset",
                    required=True,
                    help="path to input dataset")
    ap.add_argument("-o",
                    "--output",
                    required=True,
                    help="path to output directory")
    args = vars(ap.parse_args())

    # get filenames from all subdirectories
    imagePaths = list(paths.list_images(args["dataset"]))

    ## instantiate preprocessor and data loader
    print("[INFO] resizing images...\n")
    sp = SimplePreprocessor(64, 64)
    sdl = SimpleDatasetLoader(preprocessors=[sp])

    ## load and resize data
    (data, labels) = sdl.load(imagePaths, verbose=500)

    # write to output directory using the labal and order as filename
    for (i, (image, label)) in enumerate(zip(data, labels)):
        fname = os.path.join(args["output"], f"{label}_{i}.png")
        print(
            f"[INFO] writing a resized {label[:-1]} to {os.path.basename(fname)}"
        )
        imwrite(fname, image)

예제 #2

0

파일 보기

def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
	args = vars(ap.parse_args())

	print("[INFO] loading images...")
	# 加载数据集的文件路径
	imagePaths = list(paths.list_images(args["dataset"]))
	# 对数据集文件夹下的图片进行预处理，统一到32x32的尺寸
	sp = SimplePreprocessor(32, 32)
	sdl = SimpleDatasetLoader(preprocessors=[sp])
	# 从RGB三颜色通道flat到1维矩阵
	(data, labels) = sdl.load(imagePaths, verbose=500)
	data = data.reshape((data.shape[0], 3072))

	le = LabelEncoder()
	labels = le.fit_transform(labels)

	(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=5)

	for r in(None, "l1", "l2"):
		print("[INFO] training model with '{}' penalty".format(r))
		model = SGDClassifier(loss="log", penalty=r, max_iter=50, learning_rate="constant", eta0=0.001, random_state=42)
		model.fit(trainX, trainY)

		acc = model.score(testX, testY)
		print("[INFO] '{}' penalty accuracy:{:.3f}%".format(r, acc * 100))

예제 #3

0

파일 보기

def get_images_and_labels():
	# grab the list of images that we'll be describing
	print("[INFO] loading images...")
	imagePaths = list(paths.list_images(args["dataset"]))

	# initialize the image preprocessor, load the dataset from disk,
	# and reshape the data matrix
	sp = SimplePreprocessor(32, 32)
	sdl = SimpleDatasetLoader(preprocessors=[sp])
	(images, label) = sdl.load(imagePaths, verbose=500)
	#image = image.reshape((image.shape[0], 3072))
	# resize the image to be 32x32 pixels, ignoring aspect ratio,
	# and then perform Contrast Limited Adaptive Histogram
	# Equalization (CLAHE)
	resized_images = []
	for image in images:
		#print("image shape = ", image.shape[1::-1])
		r_image = transform.resize(image, (32, 32))
		r_image = exposure.equalize_adapthist(r_image, clip_limit=0.1)
		resized_images.append(r_image)

	# show some information on memory consumption of the images
	#print("[INFO] features matrix: {:.1f}MB".format(resized_images.nbytes / (1024 * 1024.0)))

	# convert the data and labels to NumPy arrays
	resized_images = np.array(resized_images)
	label = np.array(label)

	return resized_images, label

예제 #4

0

파일 보기

파일: predict.py 프로젝트: zlyin/Kaggle_iWildCam

print(submission.head())
print("[INFO] expect to predict =", submission.shape)

## augmentation
aap = AspectAwarePreprocessor(64, 64)
iap = ImageToArrayPreprocessor()
means = json.loads(open(DATASET_MEAN).read())
mp = MeanPreprocessor(means["R"], means["G"], means["B"])
sdl = SimpleDatasetLoader(preprocessors=[aap, mp, iap], mode="test")

# load in images
print("[INFO] loading test images....")
imagePaths = list(paths.list_images(args["dataset"]))
print("[INFO] fetched %d images to test" % len(imagePaths))

data, names = sdl.load(imagePaths, verbose=1e4)
testX = data.astype("float") / 255.0
imageIds = [name.split(".")[0] for name in names]

## load in models & predict
with tf.device("/cpu:0"):
    model = load_model(MODEL, custom_objects={"f1_score": f1_score})

# create distribute strategy for TF2.0
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    parallel_model = multi_gpu_model(model, gpus=2)
    #parallel_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=METRICS)

predictions = parallel_model.predict(testX, batch_size=BATCH)
pred_labels = predictions.argmax(axis=1)

예제 #5

0

파일 보기

	for fileName in fileNames:
		if fileName.split(".")[-1] in validExtensions:
			imagePaths.append(pathName+"/"+fileName)

# print("imagePaths:",imagePaths)

new_width = cmd_dict['width']
new_height = cmd_dict['height']


sp = SimplePreprocessor(new_width, new_height)
sfp = SimpleFlattenPreprocessor()	
sdl = SimpleDatasetLoader(preprocessors = [sp,sfp]) # It is an ordered sequence. Order matters. First we resize then flatten.

# After every 500 iterations we would want to see the progress.
(data, labels) = sdl.load(imagePaths, verbose=500)
#print("data.shape", data.shape)
#print("Example string labels",labels[0:5])

# Information about the memory consumption of the image.
print("[INFO] feature matrix : {:.3f}MB".format(data.nbytes/(1024*1000.0))) # 3 digits after the decimal
# Map the string labels (class name) to integers.
le = LabelEncoder()
labels = le.fit_transform(labels) # le.classes_ attribute will have the corresponding string labels.
#print("Example integer labels", labels[0:5])


# partition the data into training and testing. 
# Generally, 75 percent is kept for training and 25 percent for testing.
# Since it is the Vanilla implementation, training and testing is done on the images directly. In practice, we extract features from the images
# and the training and testing data consists of featureVectors.

예제 #6

0

파일 보기

파일: train_parallel.py 프로젝트: zlyin/Kaggle_iWildCam

print(category_df.head())
print(category_df.tail())
category_id_mapping = category_df.groupby("name")["id"].apply(
    list).to_dict()  # {'acinonyx jubatus': [122]}

print("[INFO] loading images....")
imagePaths = list(paths.list_images(args["dataset"]))
#imagePaths = imagePaths[:1000]
classNames = [path.split(os.path.sep)[-2] for path in imagePaths]
classes = len(set(classNames))
print("[INFO] fetched %d classes and %d images in total" %
      (classes, len(imagePaths)))

## prepare images & labels
print("[INFO] loading data...")
data, labels = sdl.load(imagePaths, verbose=1e4)
data = data.astype("float") / 255.0

labels = np.array(labels)
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# serialize encoded label => category_id in annotation files
print("[INFO] serizaling encoded_class to category_id mapping...")
encoded_class = lb.classes_  # the order of set(classNames) to be encoded
encoded_label_mapping = {}
for i, name in enumerate(encoded_class):
    encoded_label_mapping[str(i)] = name

mapping_dict = {
    "encodedLabel_to_className": encoded_label_mapping,

예제 #7

0

파일 보기

from keras.utils import to_categorical
from keras import layers
from keras import models
from keras import regularizers
from sklearn.model_selection import train_test_split
from simpledatasetloader import SimpleDatasetLoader
import numpy as np
from matplotlib import pylab as pl

dloader = SimpleDatasetLoader()

(data_x, data_y) = dloader.load('../SMILEsmileD/SMILEs/positives/positives7',
                                '../SMILEsmileD/SMILEs/positives/laplacian', 1)

dat = []
N = data_y.shape[0]
for n in range(N):
    dat.append(data_y[n].reshape((64 * 64)))
data_y = np.array(dat)
del dat

(train_x, test_x, train_y, test_y) = train_test_split(data_x,
                                                      data_y,
                                                      test_size=0.40)

model = models.Sequential()
#kernel_regularizer = regularizers.l2(10.1),
model.add(layers.Conv2D(10, (3, 3), activation='relu',
                        input_shape=(64, 64, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(20, (3, 3), activation='relu'))

예제 #8

0

파일 보기

파일: knn.py 프로젝트: zlyin/Orca

"""
vars takes an object as a parameter; 
e.g{'dataset': '../animal_image_dog_cat_and_panda/', 'neighbors': 5, 'jobs': -1}
"""
args = vars(parser.parse_args())

## load images
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
#print(imagePaths[:10])
#e.g.= '../animal_image_dog_cat_and_panda/panda/panda_00528.jpg'

# initiate the image preprocessor, set fixed_image size
simpro = SimplePreprocessor(32, 32)
simloader = SimpleDatasetLoader(preprocessors=[simpro])
data, labels = simloader.load(imagePaths, verbose=500)
data = data.reshape(data.shape[0], 32 * 32 * 3)

# show information about memory consumption
print("[INFO] features matrix consumes %.1f MB" % (data.nbytes /
                                                   (1024 * 1000.0)))

## encoder & split dataset
le = LabelEncoder()
labels = le.fit_transform(labels)

# split dataset into train & test
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
                                                random_state=42)

예제 #9

0

파일 보기

파일: knn.py 프로젝트: hafiz031/pyimage-Learning

    default=-1,
    help='# of jobs for k-NN distance (-1 uses all available cores)')
args = ap.parse_args()
# args = vars(ap.parse_args())
# print(args)

# grab the list of images that we'll be describing
print('[INFO] loading images...')
imagesPaths = list(paths.list_images(args.dataset))

# initialze the image processor, load the dataset from disk
# and resape the data matrix
sp = SimplePreprocessor(32, 32)
sd1 = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sd1.load(
    imagesPaths, verbose=500
)  # verbose is used for yielding more information about the on going process.
data = data.reshape(
    (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))

# show some information on the memory consumption of the images
print('[INFO] features matrix: {:.1f}MB'.format(data.nbytes / (1024 * 1000.0)))

# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,

예제 #10

0

파일 보기

파일: KNN.py 프로젝트: RodIba/NN

                type=int,
                default=-1,
                help="# of jobs for knn distance")

args = vars(ap.parse_args())

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))  # get paths to images

# initialize the image processor, load dataset from disk
# and reshape the data matrix

sp = SimplePreprocessor(32, 32)  #rescale all images to 32X32 pixels
sdl = SimpleDatasetLoader(preprocessors=[sp])  # initialize loader
(data, labels) = sdl.load(
    imagePaths,
    verbose=500)  # load images - returns 2-tuple with images and labels
data.reshape(
    (data.shape[0], 3072))  # flatten images into a 3000 x 3072 numpy array
# 3072 = 32x32x3
print("[INFO] features matrix: {:.1f}MB".format(data.nbytes /
                                                (1024 * 1000.00)))

# build training and testing splits
#encode labels as integers
le = LabelEncoder()
lables = le.fit_transform(labels)

#partition the data into training and testing splits  using 75% of
#the data for training and the remianing 25% for testing

예제 #11

0

파일 보기

파일: extract_feature_vectors.py 프로젝트: PollenJain/Implementation-of-VGG-Net

print("[INFO] loading trained model ... ")
model = load_model(args["model"])
print("trained model loaded.")


#pre-process the image for classification
srp = SimpleResizePreprocessor(width = 96, height = 96)
nop = NormalizePreprocessor(normalizing_factor = 255.)

# iap = ImageToArrayPreprocessor() # We don't really need it. Since we are passing numpy array itself as input
eap = ExpandDimPreprocessor(axis = 0) # Though we have the entire dataset but we are still passing one image at a time to the model.
eccf = ExtractCNNCodeAsFeatures(model, layer_index = 25) # 25 we are hard-coding. This one can see from model.summary() and choose appropriately.
preprocessors = [srp, nop, eap, eccf] 
sdl = SimpleDatasetLoader(preprocessors)
print("[INFO] loading images to disk ... ")
usefulImagePaths, feature_vectors, labels = sdl.load(imagePaths, verbose=100)

print("no of useful images: ", feature_vectors.shape)
print("no of useful labels: ", labels.shape)
#print("feature_vectors[0]")
#print(feature_vectors[0])
#print("feature_vectors[0].shape")
#print(feature_vectors[0].shape)
#print("labels[0]")
#print(labels[0])

labels = np.expand_dims(labels, axis = 1)
''' Preparing data to write to a csv '''
feature_vectors_with_corresponding_labels = np.hstack((feature_vectors, labels))

print("feature_vectors_with_corresponding_labels.shape")

예제 #12

0

파일 보기

파일: shallownet_animals-test.py 프로젝트: ibrahim85/pyimagesearch_code

                required=True,
                help="path to save the train model")
args = vars(ap.parse_args())

classLabels = ["cat", "dog", "panda"]

print("[INFO] sampling images...")
imagePaths = np.array(list(paths.list_images(args["dataset"])))
idxs = np.random.randint(0, len(imagePaths), size=(10, ))
imagePaths = imagePaths[idxs]

sp = SimplePreprocessor(32, 32)
iap = ImageToArrayPreprocessor()

sdl = SimpleDatasetLoader(preprocessors=[sp, iap])
(data, labels) = sdl.load(imagePaths)
data = data.astype("float")

print("[INFO] loading pre-trained network...")
model = load_model(args["model"])

print("[INFO] predicting...")
preds = model.predict(data, batch_size=32).argmax(axis=1)

for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath)
    cv2.putText(image, "label:{}".format(classLabels[preds[i]]), (0, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.imshow("Image", image)
    cv2.waitKey(0)

예제 #13

0

파일 보기

            imagePaths.append(imagePath)

print("imagePaths[0:5]", imagePaths[0:5])

random.seed(42)
random.shuffle(imagePaths)

srp = SimpleResizePreprocessor(width=96, height=96)
#iap = ImageToArrayPreprocessor() # keras specific preprocessor
#edp = ExpandDimPreprocessor(axis=0) # Not required for training!
nop = NormalizePreprocessor(normalizing_factor=255.)
preprocessors = [srp, nop]
sdl = SimpleDatasetLoader(preprocessors)

print("[INFO] loading images to disk ... ")
usefulImagePaths, data, labels = sdl.load(imagePaths, verbose=100)

print("no of useful images: ", data.shape)
print("no of useful labels: ", labels.shape)

le = LabelEncoder()
integer_labels = le.fit_transform(labels)

print("[INFO] one hot encoding labels ... ")
lb = LabelBinarizer()
one_hot_encoded_labels = lb.fit_transform(labels)
print("labels encoded.")
# 75% of the data for training
# 25% of the data for testing
print("[INFO] spliting the dataset to train and test ... ")
(trainX, testX, trainY, testY) = train_test_split(data,