### ADD GENERATED IMAGES from glob import glob from data_utils import GENERATED_DATA, to_set, get_label gen_train_jpg_files = glob( os.path.join(GENERATED_DATA, "train", "jpg", "*.jpg")) gen_train_jpg_ids = [ s[len(os.path.join(GENERATED_DATA, "train", "jpg")) + 1 + len('gen_train_'):-4] for s in gen_train_jpg_files ] gen_id_type_list = [(image_id, "Generated_Train_jpg") for image_id in gen_train_jpg_ids] class_index_gen_train_jpg_ids = [ id_type for id_type in gen_train_jpg_ids if np.sum(get_label(*gen_id_type_list[0], class_index=class_index)) > 0 ] class_index_gen_id_type_list = [(image_id, "Generated_Train_jpg") for image_id in class_index_gen_train_jpg_ids] trainval_id_type_list = trainval_id_type_list + class_index_gen_id_type_list ### ADD GENERATED IMAGES # class_indices = list(equalized_data_classes.keys()) # class_indices.remove(class_index) # # n_other_samples = int(len(trainval_id_type_list) * 1.0 / len(class_indices)) # # for index in class_indices: # id_type_list = np.array(get_id_type_list_for_class(index)) # id_type_list = list(to_set(id_type_list) - to_set(trainval_id_type_list)) # np.random.shuffle(id_type_list)
def image_class_labels_provider(image_id_type_list, image_size, class_index, channels_first=True, test_mode=False, seed=None, cache=None, verbose=0, **kwargs): if seed is not None: np.random.seed(seed) counter = 0 image_id_type_list = list(image_id_type_list) while True: np.random.shuffle(image_id_type_list) for i, (image_id, image_type) in enumerate(image_id_type_list): if verbose > 0: print("Image id/type:", image_id, image_type, "| counter=", i) key = (image_id, image_type) if cache is not None and key in cache: if verbose > 0: print("-- Load from RAM") img, label = cache.get(key) if channels_first: if img.shape[1:] != image_size[::-1]: img = img.transpose([1, 2, 0]) img = cv2.resize(img, dsize=image_size[::-1]) img = img.transpose([2, 0, 1]) else: if img.shape[:2] != image_size[::-1]: img = cv2.resize(img, dsize=image_size[::-1]) else: if verbose > 0: print("-- Load from disk") img = get_image_data(image_id, image_type) if img.shape[:2] != image_size: img = cv2.resize(img, dsize=image_size) if channels_first: img = img.transpose([2, 0, 1]) img = img.astype(np.float32) / 255.0 if class_index is not None: label = get_label(image_id, image_type, class_index=class_index) else: label = None # fill the cache only at first time: if cache is not None and counter == 0: cache.put(key, (img, label)) if test_mode: yield img, label, (image_id, image_type) else: yield img, label if test_mode: return counter += 1
seed = 2017 np.random.seed(seed) cache = DataCache(10000) # !!! CHECK BEFORE LOAD TO FLOYD class_index = 0 trainval_id_type_list = get_id_type_list_for_class(class_index, 'Train_tif') ### ADD GENERATED IMAGES from glob import glob from data_utils import GENERATED_DATA, to_set, get_label gen_train_files = glob(os.path.join(GENERATED_DATA, "train", "tif", "*.tif")) gen_train_ids = [s[len(os.path.join(GENERATED_DATA, "train", "tif"))+1+len('gen_train_'):-4] for s in gen_train_files] gen_id_type_list = [(image_id, "Generated_Train_tif") for image_id in gen_train_ids] class_index_gen_train_ids = [id_type for id_type in gen_train_ids if np.sum(get_label(*gen_id_type_list[0], class_index=class_index)) > 0] class_index_gen_id_type_list = [(image_id, "Generated_Train_tif") for image_id in class_index_gen_train_ids] trainval_id_type_list = trainval_id_type_list + class_index_gen_id_type_list ### ADD GENERATED IMAGES class_indices = list(equalized_data_classes.keys()) class_indices.remove(class_index) n_other_samples = int(len(trainval_id_type_list) * 1.0 / len(class_indices) / len(equalized_data_classes[class_index])) for index in class_indices: id_type_list = np.array(get_id_type_list_for_class(index, 'Train_tif')) id_type_list = list(to_set(id_type_list) - to_set(trainval_id_type_list)) np.random.shuffle(id_type_list) trainval_id_type_list.extend(id_type_list[:n_other_samples])
def tif_image_label_provider(image_id_type_list, image_size, channels_first=True, test_mode=False, seed=None, cache=None, with_label=True, class_index=None, tag=None, verbose=0, **kwargs): assert not (class_index is not None and tag is not None), "Either class_index or either tag, not both" if seed is not None: np.random.seed(seed) counter = 0 image_id_type_list = list(image_id_type_list) while True: np.random.shuffle(image_id_type_list) for i, (image_id, image_type) in enumerate(image_id_type_list): if verbose > 0: print("Image id/type:", image_id, image_type, "| counter=", i) key = (image_id, image_type) if cache is not None and key in cache: if verbose > 0: print("-- Load from RAM") img, label = cache.get(key) if channels_first: if img.shape[1:] != image_size[::-1]: img = img.transpose([1, 2, 0]) img = cv2.resize(img, dsize=image_size[::-1]) img = img.transpose([2, 0, 1]) else: if img.shape[:2] != image_size[::-1]: img = cv2.resize(img, dsize=image_size[::-1]) else: if verbose > 0: print("-- Load from disk") tif_img = get_image_data(image_id, image_type) if tif_img.shape[:2] != image_size: tif_img = cv2.resize(tif_img, dsize=image_size) if channels_first: tif_img = tif_img.transpose([2, 0, 1]) tif_img = tif_img.astype(np.float32) / 255.0 # [rgb + nir (originasl tif)] + ndvi + ndwi + lightness img = np.zeros(image_size + (7, ), dtype=np.float32) img[:, :, :4] = tif_img img[:, :, 4] = to_ndvi(tif_img) img[:, :, 5] = to_ndwi(tif_img) img[:, :, 6] = to_lightness(tif_img) if with_label: if class_index is not None: label = get_label(image_id, image_type, class_index=class_index) label = np.concatenate((label, [ 0, ])) if np.sum(label) < 1: label[-1] = 1 elif tag is not None: label = get_label(image_id, image_type, tag=tag) else: label = get_label(image_id, image_type) else: label = None # fill the cache only at first time: if cache is not None and counter == 0: cache.put(key, (img, label)) if test_mode: yield img, label, (image_id, image_type) else: yield img, label if test_mode: return counter += 1