def load_data_raw_images(serialization_path='data.pickle', train_images_folder='data_road/roadC621/'): """Load color images (3 channels) and labels (as images). Returns ------- tuple : (featurevector list, label list) """ logging.info("Start loading data...") data_source = serialization_path + ".npz" if not os.path.exists(data_source): # build lists of files which will be read path_data = os.path.join(os.environ['DATA_PATH'], train_images_folder, "image_2/") files_data = [os.path.join(path_data, f) for f in sorted(os.listdir(path_data)) if f.endswith('.png')] path_gt = os.path.join(os.environ['DATA_PATH'], train_images_folder, "gt_image_2/") files_gt = [os.path.join(path_gt, f) for f in sorted(os.listdir(path_gt)) if f.endswith('.png')] if not os.path.isfile('training.pickle') or \ not os.path.isfile('testing.pickle'): logging.info("Write training.pickle and testing.pickle") write_files(files_data, files_gt) filelist_tuples = read_filelist('training.pickle') files_data, files_gt = [], [] for file_data, file_gt in filelist_tuples: files_data.append(file_data) files_gt.append(file_gt) # read files (data first) print("Start reading images: ", end='') colored_image_features = [] for img_path in files_data: print('.', end='') ac = utils.load_color_image_features(img_path) if(ac.shape[0] == 188): # TODO: Why is this skipped? colored_image_features.append(ac) print('') xs_colored = np.array(colored_image_features, copy=False) # read grayscale groundtruth yl = [] for f in files_gt: img = scipy.misc.imread(f) if(img.shape[0] != 188): # TODO: Why is this skipped? continue new_img = np.zeros(img.shape) for i, row in enumerate(img): for j, pixel in enumerate(row): new_img[i][j] = (105 == pixel) yl.append(new_img) yl = np.array(yl) assert len(xs_colored) == len(yl), "len(xs_colored) != len(yl)" for i, (X, y) in enumerate(zip(xs_colored, yl), start=1): logging.info("Get labels (%i/%i)...", i, len(yl)) assert X.shape[:2] == y.shape, \ ("X.shape[1:]=%s and y.shape=%s" % (X.shape[:2], y.shape)) assert min(y.flatten()) == 0.0, \ ("min(y)=%s" % str(min(y.flatten()))) assert max(y.flatten()) == 1.0, \ ("max(y)=%s" % str(max(y.flatten()))) np.savez(serialization_path, xs_colored, yl) else: logging.info("!! Loaded pickled data" + "!" * 80) logging.info("Data source: %s", data_source) logging.info("This implies same test / training split as before.") npzfile = np.load(data_source) xs_colored = npzfile['arr_0'] yl = npzfile['arr_1'] return (xs_colored, yl)
zipped = list(zip(data_paths, label_paths)) random.shuffle(zipped) assert 0.1 <= training_ratio <= 1.0, 'wrong training ratio' split_to = int(len(zipped) * training_ratio) training_files = zipped[:split_to] testing_files = zipped[split_to:] np.array(training_files).shape training_data=[x[0] for x in training_files] img_path = os.path.join(data_prefix,training_data[0]) ac = utils.load_color_image_features(img_path) # read files (data first) print("Start reading images: ", end='') colored_image_features = [] for img_path in files_data: print('.', end='') ac = utils.load_color_image_features(img_path) if(ac.shape[0] == 188): # TODO: Why is this skipped? colored_image_features.append(ac) print('') xs_colored = np.array(colored_image_features, copy=False)