def extract_dataset(option, file_path): data_dir = '' info_file = '' if option == 'option1': try: find_image_files_folder_per_class(data_dir) except AssertionError: return False elif option == 'option2': try: find_image_files_from_file(data_dir, info_file) except AssertionError: return False elif option == 'option3': pass return True
def test_upload(self, request): test_file = request.files['input_file'] filename = test_file.filename dataset_test_path = os.path.join( self._dataset.get_dataset_path().replace('train', 'test'), filename.split('.')[0]) path_file = os.path.join(dataset_test_path, filename) os.makedirs(dataset_test_path, exist_ok=True) test_file.save(path_file) try: if path_file.endswith('.npz'): try: _, test_data = find_dataset_from_numpy(path_file, requires_y=False, only_test=True) if test_data is None: tree_remove(dataset_test_path) return 'The file contents are not valid.' np.savez(path_file, x=test_data[0], y=test_data[1]) open(os.path.join(dataset_test_path, '.option0'), 'w') # NUMPY FILE return 'ok' except KeyError: tree_remove(dataset_test_path) return "The file contents are not valid." else: if not unzip(path_file, dataset_test_path): return "The file contents already exists." os.remove(path_file) if find_images_test_file(dataset_test_path): open(os.path.join(dataset_test_path, '.option1'), 'w') # ONLY IMAGES else: try: f, n, c = find_image_files_folder_per_class( dataset_test_path, require_all=False) assert len(c) == len(self.get_target_labels()) open(os.path.join(dataset_test_path, '.option2'), 'w') # FOLDER PER CLASS except AssertionError: try: info_file = [ f for f in os.listdir(dataset_test_path) if f.startswith('test.') or f.startswith('labels.') ] assert len(info_file) == 1 info_path = os.path.join(dataset_test_path, info_file[0]) f, n, c = find_image_files_from_file( dataset_test_path, info_path, require_all=False) assert len(c) == len(self.get_target_labels()) open(os.path.join(dataset_test_path, '.option3'), 'w') # LABELS.TXT os.rename( info_path, os.path.join(dataset_test_path, 'labels.txt')) except AssertionError: tree_remove(dataset_test_path) return "The file contents are not valid." except ValueError: tree_remove(dataset_test_path) return "The file contents are not valid." return 'ok'
def new_image_dataset(USER_ROOT, username, option, file): if isinstance(file, str): return False dataset_name = file.filename.split('.')[0] dataset_name, dataset_path = check_dataset_path(USER_ROOT, username, dataset_name) open(os.path.join(dataset_path, option_map[option]), 'w') dataset_test_path = os.path.join(dataset_path, 'test') os.makedirs(dataset_test_path, exist_ok=True) train_path = os.path.join(dataset_path, 'train') os.makedirs(train_path, exist_ok=True) filename = secure_filename(file.filename) path_file = os.path.join(dataset_path, filename) file.save(path_file) if option == 'option3': try: train_data, test_data = find_dataset_from_numpy(path_file) np.savez(os.path.join(train_path, filename), x=train_data[0], y=train_data[1]) if test_data: os.makedirs(os.path.join(dataset_test_path, dataset_name), exist_ok=True) np.savez(os.path.join(dataset_test_path, dataset_name, dataset_name + '.npz'), x=test_data[0], y=test_data[1]) open(os.path.join(dataset_test_path, dataset_name, '.option0'), 'w') # NUMPY FILE return dataset_name except Exception as e: tree_remove(dataset_path) raise e if not check_zip_file(path_file): tree_remove(dataset_path) raise ValueError('Invalid file.') unzip(path_file, train_path) try: if option == 'option1': if 'train' in os.listdir(train_path): rename(os.path.join(train_path, 'train'), train_path) find_image_files_folder_per_class(train_path) if 'test' in os.listdir(train_path): dataset_test_path = os.path.join(dataset_test_path, dataset_name) os.makedirs(dataset_test_path, exist_ok=True) rename(os.path.join(train_path, 'test'), dataset_test_path) find_image_files_folder_per_class(dataset_test_path, require_all=False) open(os.path.join(dataset_test_path, '.option2'), 'w') elif option == 'option2': info_file = [f for f in os.listdir(train_path) if f.startswith('labels.') or f.startswith('train.')] assert len(info_file) == 1 os.rename(os.path.join(train_path, info_file[0]), os.path.join(train_path, 'labels.txt')) find_image_files_from_file(train_path, os.path.join(train_path, 'labels.txt')) info_test_file = [f for f in os.listdir(train_path) if f.startswith('test.')] if len(info_test_file) == 1: find_image_files_from_file(train_path, os.path.join(train_path, info_test_file[0]), require_all=False) dataset_test_path = os.path.join(dataset_test_path, dataset_name) os.makedirs(dataset_test_path, exist_ok=True) args = {} if not has_header(os.path.join(train_path, info_test_file[0])): args['header'] = None df = pd.read_csv(os.path.join(train_path, info_test_file[0]), sep=None, engine='python', **args) filenames = df[df.columns[0]].values if not os.path.isfile(filenames[0]): filenames = [os.path.join(train_path, f) for f in filenames] for f in filenames: os.rename(f, os.path.join(dataset_test_path, os.path.basename(f))) os.rename(os.path.join(train_path, info_test_file[0]), os.path.join(dataset_test_path, 'labels.txt')) open(os.path.join(dataset_test_path, '.option3'), 'w') # NUMPY FILE except AssertionError as e: tree_remove(dataset_path) raise e return dataset_name