def load_data(datasetpath, preprocessors):
  # grab the list of images that we’ll be describing
  print("[INFO] loading images...")
  imagePaths = list(paths.list_images(datasetpath))
  
  # load the dataset from disk then scale the raw pixel intensities
  # to the range [0, 1]
  sdl = SimpleDatasetLoader(preprocessors=preprocessors)
  (data, labels) = sdl.load(imagePaths, verbose=500)
  data = data.astype("float") / 255.0
  
  # partition the data into training and testing splits using 75% of
  # the data for training and the remaining 25% for testing
  (trainX, testX, trainY, testY) = train_test_split(
      data, labels, test_size=0.25, random_state=42)
  
  # convert the labels from integers to vectors
  lb = LabelBinarizer()
  lb.fit(labels)
  trainY = lb.transform(trainY)
  testY = lb.transform(testY)
  classNames = lb.classes_
  if len(classNames) < 3:
    trainY = np.hstack((trainY, 1 - trainY))
    testY = np.hstack((testY, 1 - testY))

  return ((trainX, trainY), (testX, testY), classNames)
def split_17flowers(traindir):
  for dir_id in range(17):
    os.makedirs(os.path.join(traindir, 'dir_'+str(dir_id)), exist_ok=True)
    
  imagepaths = [(f, os.path.basename(f)) for f in paths.list_images(traindir)]
  imagepaths = [(f, os.path.join(traindir, 'dir_'+str((int(i)-1)//80), n)) 
                for (f, n) in imagepaths 
                for i in re.findall('(\d{4})', n)]
  
  for (f, fn) in imagepaths:
    os.rename(f, fn)
def split_dog_cat_image_files(traindir):
  catdir = os.path.join(traindir, 'cat')
  dogdir = os.path.join(traindir, 'dog')
  os.makedirs(catdir, exist_ok=True)
  os.makedirs(dogdir, exist_ok=True)
  
  imagepaths = [(f, os.path.basename(f)) for f in paths.list_images(traindir)]
  imagepaths = [(f, os.path.join(dogdir if n.startswith('dog') else catdir, n)) 
                for (f, n) in imagepaths]
  
  for (f, fn) in imagepaths:
    os.rename(f, fn)
def extract_features(dataset_path, output_file, buffer_size, batch_size):
    bs = batch_size
    imagePaths = list(paths.list_images(dataset_path))
    random.shuffle(imagePaths)
    print("[INFO] number of images... {}".format(len(imagePaths)))

    # extract the class labels from the image paths then encode the
    # labels
    labels = [p.split(os.path.sep)[-2] for p in imagePaths]
    le = LabelEncoder()
    labels = le.fit_transform(labels)

    # initialize the HDF5 dataset writer, then store the class label
    # names in the dataset
    feature_size = 512 * 7 * 7
    dataset = HDF5DatasetWriter((len(imagePaths), feature_size),
                                output_file,
                                dataKey="features",
                                bufSize=buffer_size)
    dataset.storeClassLabels(le.classes_)

    # load the VGG16 network
    print("[INFO] loading network...")
    model = VGG16(weights="imagenet", include_top=False)

    image_size = (244, 244)
    for i, (batchLabels, batchImages) in enumerate(
            get_data_(imagePaths, labels, bs, image_size)):
        # pass the images through the network and use the outputs as
        # our actual features
        bs = len(batchImages)
        print("[INFO] processing batch... {}/{}".format(i, bs))
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=bs)

        # reshape the features so that each image is represented by
        # a flattened feature vector of the ‘MaxPooling2D‘ outputs
        features = features.reshape((features.shape[0], feature_size))

        # add the features and labels to our HDF5 dataset
        dataset.add(features, batchLabels)

    # close the dataset
    dataset.close()
    print("[INFO] processing completed...")
 def getData(self, datasetpath):
   # initialize the class labels
   self.classLabels = ["cat", "dog", "panda"]
   
   # grab the list of images in the dataset then randomly sample
   # indexes into the image paths list
   print("[INFO] sampling images...")
   self.imagePaths = np.array(list(paths.list_images(datasetpath)))
   idxs = np.random.randint(0, len(self.imagePaths), size=(10,))
   self.imagePaths = self.imagePaths[idxs]
   
   # initialize the image preprocessors
   sp = SimplePreprocessor(32, 32)
   iap = ImageToArrayPreprocessor()
   
   # load the dataset from disk then scale the raw pixel intensities
   # to the range [0, 1]
   sdl = SimpleDatasetLoader(preprocessors=[sp, iap])
   (self.data, self.labels) = sdl.load(self.imagePaths)
   self.data = self.data.astype("float") / 255.0
Exemple #6
0
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tnmlearn.preprocessing import SimplePreprocessor
from tnmlearn.datasets import SimpleDatasetLoader
from tnmlearn.other import paths
import argparse

# construct the argument parse and parse the arguments
# C:\Users\hbad483\Documents\Anaconda\datasets\animals\train
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
args = vars(ap.parse_args())

# grab the list of image paths
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))

sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))

# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,