def load_dataset_in_memory_and_resize(data_access, set, division, dataset_path, targets_path, tmp_size, final_size, batch_size): if data_access == "in-memory": with timer("Loading %s data"%set): dataset = InMemoryDataset(set, dataset_path, source_targets=targets_path, division=division) draw_data = np.copy(dataset.dataset) targets = np.copy(dataset.targets) del dataset elif data_access == "fuel": with timer("Loading %s data"%set): dataset = FuelDataset(set, tmp_size, batch_size=batch_size, shuffle=False, division=division) draw_data,targets = dataset.return_whole_dataset() del dataset else: raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access) if tmp_size != final_size: # Resize images from the validset out = np.zeros((draw_data.shape[0], final_size[0], final_size[1], final_size[2]), dtype="float32") with timer("Resizing %s images"%set): for i in range(draw_data.shape[0]): out[i] = resize_pil(draw_data[i], final_size[0:2]) del draw_data return out, targets else: return draw_data, targets
def check_preprocessed_data(data_access, dataset, targets, batch_size, tmp_size, final_size, preprocessing_func, preprocessing_args, n=10): if data_access=="in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access=="fuel": train_dataset = FuelDataset("test", tmp_size, batch_size=batch_size, division="leaderboard", shuffle=False) else: raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access) # Compute only one batch start=time.time() batch,batch_targets = train_dataset.get_batch() batch_targets = convert_labels(batch_targets) processed_batch = np.zeros((batch.shape[0],final_size[2],final_size[0],final_size[1]), dtype="float32") for k in range(batch_size): processed_batch[k] = preprocessing_func(batch[k], *preprocessing_args).transpose(2,0,1) end=time.time() print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype for i in range(n): plt.figure(0) plt.gray() plt.clf() plt.title("(%d,%d)"%(batch_targets[i][0], batch_targets[i][1])) if batch.shape[1]==3: plt.imshow(processed_batch[i].transpose(1,2,0)) else: plt.imshow(processed_batch[i,0]) plt.show() print "Processing 1 batch took : %.5f"%(end-start)
def check_preprocessed_data(data_access, dataset, targets, batch_size, tmp_size, final_size, preprocessing_func, preprocessing_args, n=10): if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("test", tmp_size, batch_size=batch_size, division="leaderboard", shuffle=False) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Compute only one batch start = time.time() batch, batch_targets = train_dataset.get_batch() batch_targets = convert_labels(batch_targets) processed_batch = np.zeros( (batch.shape[0], final_size[2], final_size[0], final_size[1]), dtype="float32") for k in range(batch_size): processed_batch[k] = preprocessing_func(batch[k], *preprocessing_args).transpose( 2, 0, 1) end = time.time() print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype for i in range(n): plt.figure(0) plt.gray() plt.clf() plt.title("(%d,%d)" % (batch_targets[i][0], batch_targets[i][1])) if batch.shape[1] == 3: plt.imshow(processed_batch[i].transpose(1, 2, 0)) else: plt.imshow(processed_batch[i, 0]) plt.show() print "Processing 1 batch took : %.5f" % (end - start)
def features_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, preprocessing_func, preprocessing_args, pretrained_model): # Instantiate the dataset if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Generator loop while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: features = pretrained_model.predict(processed_batch) yield features, labels else: features = pretrained_model.predict( [processed_batch for i in range(multiple_input)]) yield features, labels
def multi_features_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, preprocessing_func, preprocessing_args, pretrained_models, mode="concat"): # Instantiate the dataset if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Generator loop while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: features = [] for pretrained_model in pretrained_models: features.append( pretrained_model.predict(processed_batch, batch_size=1)) if mode == "concat": features = np.concatenate(features, axis=1) yield features, labels else: raise Exception("Generator does not work with multiple inputs")
def images_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, division, preprocessing_func, preprocessing_args): """ Generator function used when using the keras function 'fit_on_generator'. Can work with InMemoryDataset, FuelDataset. Yield a tuple to the training containing a processed batch and targets. This can be done on the CPU, in parallel of a GPU training. See 'fit_on_generator' for more details. :param data_access: "in-memory" or "fuel" :param dataset: path to the dataset numpy file (not used when data_acces = "fuel") :param targets: path to the targets numpy file (not used when data_acces = "fuel") :param batch_size: :param tmp_size: Used when data_access == "fuel". Datastream will return images of size equal to tmp_size. :param final_size: size of images used for the training :param preprocessing_func: function which will be applied to each training batch :param preprocessing_args: arguments of the preprocessing function :return: tuple(batch,targets) """ if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets, division=division) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator, division=division) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: yield processed_batch, labels else: yield [processed_batch for i in range(multiple_input)], labels