def get_datasets(self): print("Prepare datasets loading:") if self.run_local: self.train_dir = os.path.join(path0, 'raw_data', 'test_VGG16', 'Train') self.test_dir = os.path.join(path0, 'raw_data', 'test_VGG16', 'Test') else: # self.train_dir = f"gs://{BUCKET_NAME}/{BUCKET_DATA_PATH}/Train" # self.test_dir = f"gs://{BUCKET_NAME}/{BUCKET_DATA_PATH}/Test" self.train_dir = local_dir + BUCKET_DATA_PATH + "/Train/Top_12" self.test_dir = local_dir + BUCKET_DATA_PATH + "/Test/Top_12" train_dataset = image_dataset_from_directory(self.train_dir, shuffle=True, batch_size=BATCH_SIZE,\ image_size=IMG_SIZE, label_mode='categorical') test_dataset = image_dataset_from_directory(self.test_dir, shuffle=True, batch_size=BATCH_SIZE, \ image_size=IMG_SIZE, label_mode='categorical') train_batches = cardinality(train_dataset) validation_dataset = train_dataset.take(train_batches // 5) train_train_dataset = train_dataset.skip(train_batches // 5) self.train_train_dataset = train_train_dataset.prefetch( buffer_size=AUTOTUNE) self.validation_dataset = validation_dataset.prefetch( buffer_size=AUTOTUNE) self.test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE) self.class_names = train_dataset.class_names self.n_artist = len(self.class_names) print(f'Number of detected classes: {self.n_artist}') print( f'Number of train/val/test batches: {cardinality(train_train_dataset)}/{cardinality(validation_dataset)}/{cardinality(test_dataset)}' )
def loadData(self, imgDir, valimgDir='', randomseed=1993, image_size=(256, 256), batch_size=32, split=[0.8, 0.2]): if valimgDir == '': #print('* First split the data with 8:2.') self.train_ds = image_dataset_from_directory( imgDir, validation_split=split[1], subset="training", seed=randomseed, image_size=image_size, batch_size=batch_size, label_mode='categorical') self.val_ds = image_dataset_from_directory( imgDir, validation_split=split[1], subset="validation", seed=randomseed, image_size=image_size, batch_size=batch_size, label_mode='categorical') else: self.train_ds = image_dataset_from_directory( imgDir, image_size=image_size, batch_size=batch_size, shuffle=True, label_mode='categorical') self.val_ds = image_dataset_from_directory( valimgDir, image_size=image_size, batch_size=batch_size, shuffle=True, label_mode='categorical') self.image_size = image_size self.batch_size = batch_size newClassNames = self.train_ds.class_names # Configure the dataset for performance AUTOTUNE = tf.data.experimental.AUTOTUNE self.train_ds = self.train_ds.prefetch(buffer_size=AUTOTUNE) self.val_ds = self.val_ds.prefetch(buffer_size=AUTOTUNE) if self.classNames == None: self.classNames = newClassNames print('The names of the classes are: ', newClassNames) if newClassNames != self.classNames: print('Error. Folder names {} mismatch predefined classNames: {}'. format(newClassNames, self.classNames)) return
def load_dataset(self, data_path: str = r"Dataset") -> None: # Class names and count self.CLASS_NAMES = os.listdir(os.path.join(data_path, "Test")) self.CLASS_COUNT = len(self.CLASS_NAMES) print("class count: " + str(self.CLASS_COUNT)) print(self.CLASS_NAMES) f = open("NetworkInfo.txt", "w") f.write("Output layer node count: " + str(self.CLASS_COUNT) + "\n\n") f.writelines(self.CLASS_NAMES) f.close() # Load data and squeeze images to the bounding box self.train_ds = image_dataset_from_directory( directory=os.path.join(data_path, "Train"), labels='inferred', label_mode='categorical', batch_size=self.BATCH_SIZE, image_size=(self.IMAGE_WIDTH, self.IMAGE_HEIGHT), color_mode="grayscale", ) self.test_ds = image_dataset_from_directory( directory=os.path.join(data_path, "Test"), labels='inferred', label_mode='categorical', batch_size=self.BATCH_SIZE, image_size=(self.IMAGE_WIDTH, self.IMAGE_HEIGHT), color_mode="grayscale") self.train_ds.batch(self.BATCH_SIZE) self.test_ds.batch(self.BATCH_SIZE)
def preprocess(): data_dir = pathlib.Path("images") batch_size = 32 img_height = 256 img_width = 256 train_ds = preprocessing.image_dataset_from_directory( data_dir, label_mode='int', color_mode='grayscale', validation_split=0.2, subset="training", seed=123, image_size=(img_height, img_width), batch_size=batch_size) val_ds = preprocessing.image_dataset_from_directory(data_dir, label_mode='int', color_mode='grayscale', validation_split=0.2, subset="validation", seed=123, image_size=(img_height, img_width), batch_size=batch_size) return train_ds, val_ds
def create_dataset(img_dir, mask_dir, img_prep, mask_prep, datagen_args, num_parallel_calls=AUTO): """create_training_dataset Function for loading images and segmentation masks from specified directories, and preprocessing them with specified functions Args: img_dir, directory for image files mask_dir, directory for mask files img_prep, function for image preprocessing mask_prep, function for mask preprocessing datagen_args, args for data generator object num_parallel_calls, number of maps to run in parallel Returns: dataset, a zip object of our preprocessed images/masks """ # Load image data print("Loading image data...") image_ds = image_dataset_from_directory(img_dir, **datagen_args) image_ds = image_ds.map(img_prep, num_parallel_calls=num_parallel_calls) # Load segmentation data print("\nLoading segmentation data...") mask_ds = image_dataset_from_directory(mask_dir, **datagen_args) mask_ds = mask_ds.map(mask_prep, num_parallel_calls=num_parallel_calls) return image_ds, mask_ds
def create_data( dataset_url='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', fname='flower_photos', validation_split=0.2, seed=123, batch_size=32, img_height=180, img_width=180): data_dir = pathlib.Path( get_file(origin=dataset_url, fname=fname, untar=True)) train_ds = image_dataset_from_directory(data_dir, validation_split=validation_split, subset='training', seed=seed, image_size=(img_height, img_width), batch_size=batch_size) val_ds = image_dataset_from_directory(data_dir, validation_split=validation_split, subset="validation", seed=seed, image_size=(img_height, img_width), batch_size=batch_size) AUTOTUNE = tf.data.experimental.AUTOTUNE return (train_ds.cache().prefetch(buffer_size=AUTOTUNE), val_ds.cache().prefetch(buffer_size=AUTOTUNE), train_ds.class_names)
def load_dataset(url, BATCH_SIZE, IMG_SIZE): path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=url, extract=True) PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered') train_dir = os.path.join(PATH, 'train') validation_dir = os.path.join(PATH, 'validation') train_dataset = image_dataset_from_directory(train_dir, shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE) validation_dataset = image_dataset_from_directory(validation_dir, shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE) class_names = train_dataset.class_names show_images(train_dataset, class_names) val_batches = tf.data.experimental.cardinality(validation_dataset) test_dataset = validation_dataset.take(val_batches // 5) validation_dataset = validation_dataset.skip(val_batches // 5) AUTOTUNE = tf.data.AUTOTUNE train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE) validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE) test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE) return (train_dataset, validation_dataset, test_dataset, class_names)
def loadStanfordDatasets(): """Example of use training, validation, testing = loadStanfordDatasets() print(training) from tensorflow.data.experimental import cardinality print(cardinality(training).numpy()) print(training.class_names) """ rand_seed = int(datetime.now().timestamp()) training = image_dataset_from_directory( STANFORD_TRAINING, labels="inferred", label_mode="int", class_names=None, color_mode="rgb", batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, shuffle=True, seed=rand_seed, validation_split=VAL_SPLIT, subset="training", interpolation="bilinear", follow_links=False, ) validation = image_dataset_from_directory( STANFORD_TRAINING, labels="inferred", label_mode="int", class_names=None, color_mode="rgb", batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, shuffle=True, seed=rand_seed, validation_split=VAL_SPLIT, subset="validation", interpolation="bilinear", follow_links=False, ) testing = image_dataset_from_directory( STANFORD_TESTING, labels="inferred", label_mode="int", class_names=None, color_mode="rgb", batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, shuffle=True, seed=None, validation_split=None, subset=None, interpolation="bilinear", follow_links=False, ) return training, validation, testing
def get_data(): train_data = image_dataset_from_directory('data', image_size=(32, 32), seed=123, subset="training", validation_split=0.3) test_data = image_dataset_from_directory('data', image_size=(32, 32), seed=123, subset="validation", validation_split=0.3) return train_data, test_data
def get_dataset(train_dir, test_dir, test_size=32): # a normalisation layer normalization_layer = Rescaling(1. / 255) # load the training images with the default batch size(i.e., 32) train_dataset = image_dataset_from_directory(train_dir, color_mode='grayscale', label_mode=None) # load the testing images with a specified batch size test_dataset = image_dataset_from_directory(test_dir, color_mode='grayscale', label_mode=None, batch_size=test_size) # normalise the training images normalized_train = train_dataset.map(lambda x: (normalization_layer(x))) # normalise the testing images normalized_test = test_dataset.map(lambda x: (normalization_layer(x))) # return the training and testing datasets return normalized_train, normalized_test
def train(self, epochs=5, batch_size=16): model_save = ModelCheckpoint('./results/best_model.h5', save_best_only=True, save_weights_only=True, monitor='val_loss', mode='min', verbose=1) early_stop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5, mode='min', verbose=1, restore_best_weights=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, min_delta=0.001, mode='min', verbose=1) img_height = 800 img_width = 600 train_set = image_dataset_from_directory(self.training_path, validation_split=0.2, subset="training", seed=123, image_size=(img_height, img_width), batch_size=batch_size) validation_set = image_dataset_from_directory(self.training_path, validation_split=0.2, subset="validation", seed=123, image_size=(img_height, img_width), batch_size=batch_size) """ Yey.. tf.data.AUTOTUNE isn't available in 2.3.1, or something is not right on dev machine. We are not yet ready to make tiny_v01 crash and burn, so we will improve this in a later version. """ # train_set = train_set.cache().shuffle(1000).prefetch(tf.data.AUTOTUNE) # validation_set = validation_set.cache().prefetch(tf.data.AUTOTUNE) tiny_model = define_learning_model() history = tiny_model.fit_generator( train_set, validation_data=validation_set, epochs=epochs, callbacks=[model_save, early_stop, reduce_lr]) summary(epochs, tiny_model, history, validation_set)
def create_data_set(data_path, label_mode='categorical', validation_split=0.2, image_size=(224, 224), batch_size=32, random_seed=1): """构建训练集与验证集 Args: data_path: 图片所在目录,其中每个类别的图片单独存放在一个文件夹下,文件夹名为类别名。 label_mode: - 'int' for 'sparse_categorical_crossentropy'; - 'categorical' for 'categorical_crossentropy'; - 'binary' for 'binary_crossentropy' validation_split: 验证集比例,默认 0.2;如果小于等于 0 或为 None,表示不划分验证集 image_size: batch_size: random_seed: 随机数种子 Returns: (ds_train, ds_val) 或者 ds_train """ from tensorflow.keras.preprocessing import image_dataset_from_directory if validation_split and validation_split <= 0: validation_split = None ds_train = image_dataset_from_directory( data_path, label_mode=label_mode, validation_split=validation_split, subset="training", seed=random_seed, image_size=image_size, batch_size=batch_size, ) if validation_split is not None: ds_val = image_dataset_from_directory( data_path, label_mode=label_mode, validation_split=validation_split, subset="validation", seed=random_seed, image_size=image_size, batch_size=batch_size, ) return ds_train, ds_val return ds_train
def _load_hoh_dataset(tmpdirname): # load dataset from archive; return dataset tuple for training and testing archive = get_path_to(IMAGE_DATASET) with ZipFile(archive, 'r') as zip: zip.extractall(tmpdirname) path = os.path.join(tmpdirname, 'train') norm = Rescaling(1./127.5, offset=-1) train_ds = image_dataset_from_directory(path, image_size=(32, 32), seed=42) train_ds = train_ds.map( lambda x, y: (norm(x), tf.one_hot(y, depth=2))) path = os.path.join(tmpdirname, 'test') val_ds = image_dataset_from_directory(path, image_size=(32, 32), seed=42) val_ds = val_ds.map( lambda x, y: (norm(x), tf.one_hot(y, depth=2))) return (train_ds, val_ds)
def main(): """ main method :return: """ # compute_learning_curves("test") # get_failed_training_images() # get_training_and_test_accuracy() test_data = image_dataset_from_directory("Data/Crewmate Identifier/Test Data", shuffle=False, image_size=constants.crewmate_dimensions) model = tf.keras.models.load_model(constants.crewmate_identifier) labels = np.concatenate([labels for images, labels in test_data]) predictions = np.argmax(model.predict(test_data), axis=1) print(classification_report(predictions, labels, target_names=constants.crewmate_color_ids))
def get_training_and_test_accuracy(): """ gets the training and test accuracy of a model """ model = tf.keras.models.load_model("Game Classifier.h5") training_data = image_dataset_from_directory("Data/Game Classifier/Training Data", image_size=constants.dimensions) test_data = image_dataset_from_directory("Data/Game Classifier/Test Data", image_size=constants.dimensions) model.evaluate(training_data) model.evaluate(test_data)
def create_data_generator(self): data_generator = {} for split in self.samples_dir: data_generator[split] = preprocessing.image_dataset_from_directory( self.samples_dir[split], labels="inferred", label_mode="categorical", class_names=[ "Neutral", "Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise" ], color_mode="rgb", batch_size=32, image_size=(self.IMG_HEIGHT, self.IMG_WIDTH), shuffle=True, validation_split=None, subset=None, interpolation="gaussian", follow_links=False) # Optimize the dataset using buffered prefetching to avoid blocking I/O data_generator[split] = data_generator[split].prefetch( buffer_size=32) return data_generator
def get_training_and_test_accuracy(): """ gets the training and test accuracy of a model """ model = tf.keras.models.load_model(constants.crewmate_identifier) training_data = image_dataset_from_directory("Data/Crewmate Identifier/Training Data", image_size=constants.crewmate_dimensions) test_data = image_dataset_from_directory("Data/Crewmate Identifier/Test Data", image_size=constants.crewmate_dimensions) model.evaluate(training_data) model.evaluate(test_data)
def load_all_dataset(directory: str, batch_size=16, img_size=(640, 640), seed=42) -> tf.data.Dataset: return image_dataset_from_directory(directory=directory, batch_size=batch_size, image_size=img_size, seed=seed).map(_img_as_float)
def get_val_ds(): # returns validation tf.data.Dataset val_ds = image_dataset_from_directory( "Datasets/val/", shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE).prefetch(AUTOTUNE) return val_ds
def get_train_ds(): # returns train tf.data.Dataset train_ds = (image_dataset_from_directory( "Datasets/train/", shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE).prefetch(AUTOTUNE).cache()) return train_ds
def execute(self): self.training_dataset = image_dataset_from_directory( self.training_dataset_directory, seed=1337, image_size=(100, 100), batch_size=32, ) self.training_dataset.prefetch(buffer_size=32) self.dataset_model.set_training_dataset(self.training_dataset)
def compute_learning_curves(name): """ computes the learning curves of the current architecture and outputs them into a text file :param name: name of the learning curve :return: None """ # initialize the training data training_data = image_dataset_from_directory("Data/Game Classifier/Training Data") test_data = image_dataset_from_directory("Data/Game Classifier/Test Data") # number of training examples N = training_data.cardinality().numpy() # file heading file = open(name + constants.learning_curve_extension, "w+") file.write("Data Size" + constants.delimiter + name + " training accuracy" + constants.delimiter + name + " test accuracy\n") # iterate over all the different dataset fractions for dataset_fraction in constants.dataset_fractions: # repeat the training the specified number of times sample_size = int(dataset_fraction * N) sample = training_data.take(sample_size) print(type(training_data)) print(type(sample)) for i in range(constants.test_repeats): model = trainer.train_model(sample) training_acc = model.evaluate(sample, metrics=["acc"]) test_acc = model.evaluate(test_data, metrics=["acc"]) file.write(str(sample_size) + ", " + str(training_acc) + ", "+ str(test_acc) + "\n") file.close()
def main(): """ main method :return: None """ # print(os.path.exists("Data/Game Classifier/Training Data")) training_data = image_dataset_from_directory( "Data/Game Classifier/Training Data", image_size=constants.dimensions) model = train_model(training_data) test_data = image_dataset_from_directory("Data/Game Classifier/Test Data", image_size=constants.dimensions) model.evaluate(test_data) model.save(constants.game_classifier)
def execute(self): self.logger.log(str(self.validation_dataset_directory)) self.validation_dataset = image_dataset_from_directory( self.validation_dataset_directory, seed=1337, image_size=(100, 100), batch_size=32, ) self.validation_dataset.prefetch(buffer_size=32) self.dataset_model.set_validation_dataset(self.validation_dataset)
def preprocessing(dir_path): print("qiu_dir_path", dir_path) picture_data = image_dataset_from_directory(dir_path, shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE) AUTOTUNE = tf.data.experimental.AUTOTUNE picture_data = picture_data.prefetch(buffer_size=AUTOTUNE) image, _ = next(iter(picture_data)) return image
def data_split(val_split=0.2, seed=8): ''' Method to generate train, validation and test image datasets Class arguments: val_split -- percentage of data from training directory to be used for validation seed -- int used to shuffle and split data for training/validation Returns: Tuple with three tf.data.Datasets for training, validation and test data, respectively ''' train_dir = DATA_PATH + ('TrainingNoLake' if EXCLUDE_LAKE else 'Training') test_dir = DATA_PATH + 'Test' print(f'Using seed {seed}...') print('\nLoading training data...') train_data = image_dataset_from_directory(train_dir, label_mode='binary', batch_size=BATCH_SIZE, image_size=(DIM, DIM), validation_split=val_split, seed=seed, subset="training") print('\nLoading validation data...') val_data = image_dataset_from_directory(train_dir, label_mode='binary', batch_size=BATCH_SIZE, image_size=(DIM, DIM), validation_split=val_split, seed=seed, subset="validation") print('\nLoading test data...') test_data = image_dataset_from_directory(test_dir, label_mode='binary', batch_size=BATCH_SIZE, image_size=(DIM, DIM)) return train_data, val_data, test_data
def __init__(self, train_path, validation_path, batch_size, image_size): """ Reads in different paths to a data.Dataset :param train_path: string of the path to the training dataset :param validation_path: string of the path toe the validation dataset :param batch_size: int containing the batch size to use :param image_size: tuple of size 2 containing the size to resize the images to """ self.train_dataset = image_dataset_from_directory( train_path, shuffle=True, batch_size=batch_size, image_size=image_size) self.validation_dataset = image_dataset_from_directory( validation_path, shuffle=True, batch_size=batch_size, image_size=image_size)
def get_data(data_dir=DATA_DIR): train_ds = image_dataset_from_directory(data_dir, color_mode=COLOR_MODE, validation_split=VALIDATION_SPLIT, subset='training', image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, label_mode=LABEL_MODE, seed=SEED) val_ds = image_dataset_from_directory(data_dir, color_mode=COLOR_MODE, validation_split=VALIDATION_SPLIT, subset='validation', image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, label_mode=LABEL_MODE, seed=SEED) class_names = train_ds.class_names return (train_ds, val_ds), class_names
def create_datasets(dataset_dir): train_ds = image_dataset_from_directory( dataset_dir, batch_size=batch_size, image_size=(crop_size, crop_size), validation_split=0.2, subset="training", seed=1337, label_mode=None, ) valid_ds = image_dataset_from_directory( dataset_dir, batch_size=batch_size, image_size=(crop_size, crop_size), validation_split=0.2, subset="validation", seed=1337, label_mode=None, ) return train_ds, valid_ds
def train(model): checkpoint = ModelCheckpoint(save_name, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) specDir = os.path.join("output", "specs") dataset = image_dataset_from_directory(specDir, labels='inferred', image_size=(129, 1)) history = model.fit(dataset, epochs=10, callbacks=[checkpoint])