def enumerate_files(): # image_files will contain the *relative* paths to all image files in the input folder if (file_list_cache_file is None) or (not os.path.isfile(file_list_cache_file)): image_files = find_images(input_base, recursive=True) image_files = [os.path.relpath(s, input_base) for s in image_files] image_files = [s.replace('\\', '/') for s in image_files] if file_list_cache_file is not None: with open(file_list_cache_file, 'w') as f: for fn in image_files: f.write(fn + '\n') print('Enumerated {} files'.format(len(image_files))) else: with open(file_list_cache_file, 'r') as f: image_files = f.readlines() image_files = [s.strip() for s in image_files] print('Read a list of {} files'.format(len(image_files))) return image_files
def enumerate_images(dirName, outputFileName=None): """ Non-recursively enumerates all image files in *dirName* to the text file *outputFileName*, as relative paths. This is used to produce a file list after removing true positives from the image directory. Not used directly in this module, but provides a consistent way to enumerate files in the format expected by this module. """ imageList = path_utils.find_images(dirName) imageList = [os.path.basename(fn) for fn in imageList] if outputFileName is not None: with open(outputFileName, 'w') as f: for s in imageList: f.write(s + '\n') return imageList
new_ann = ann.copy() new_ann['image_id'] = associated_image['id'] new_ann['id'] = str(uuid.uuid1()) annotations_replicated.append(new_ann) print('\nCreated {} replicated annotations from {} original annotations'.format(len(annotations_replicated), len(annotations))) annotations = annotations_replicated #%% See what files are on disk but not annotated print('Listing images from disk...') start_time = time.time() image_files = path_utils.find_images(project_base,bRecursive=True) elapsed = time.time() - start_time print('Finished listing {} files in {}'.format(len(image_files),humanfriendly.format_timespan(elapsed))) files_not_in_db = [] for fn in tqdm(image_files): id = os.path.relpath(fn,project_base).replace('\\','/').replace('.JPG','') if id not in im_id_to_image: files_not_in_db.append(fn) print('{} files not in the database (of {})'.format(len(files_not_in_db),len(image_files))) del fn #%% Sanity-check image and annotation uniqueness
if False: #%% fn = filenames_with_multiple_annotations[1000] rows = filenames_to_rows[fn] assert(len(rows) > 1) for i_row in rows: print(input_metadata.iloc[i_row]['Species']) #%% Check for images that aren't included in the metadata file # Enumerate all images image_full_paths = find_images(image_directory, bRecursive=True) unannotated_images = [] for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)): relative_path = os.path.relpath(image_path,image_directory) if relative_path not in filenames_to_rows: unannotated_images.append(relative_path) print('Finished checking {} images to make sure they\'re in the metadata, found {} unannotated images'.format( len(image_full_paths),len(unannotated_images))) #%% Create CCT dictionaries images = []
frame_folder_base = r'e:\video_test\frames' detected_frame_folder_base = r'e:\video_test\detected_frames' rendered_videos_folder_base = r'e:\video_test\rendered_videos' results_file = r'results.json' os.makedirs(detected_frame_folder_base, exist_ok=True) os.makedirs(rendered_videos_folder_base, exist_ok=True) #%% Split videos into frames frame_filenames_by_video, fs_by_video = video_folder_to_frames( input_folder, frame_folder_base, recursive=True) #%% List image files, break into folders frame_files = path_utils.find_images(frame_folder_base, True) frame_files = [s.replace('\\', '/') for s in frame_files] print('Enumerated {} total frames'.format(len(frame_files))) Fs = 30.01 # Find unique folders folders = set() # fn = frame_files[0] for fn in frame_files: folders.add(os.path.dirname(fn)) folders = [s.replace('\\', '/') for s in folders] print('Found {} folders for {} files'.format(len(folders), len(frame_files))) #%% Load detector output
output_encoding = 'utf-8' read_image_sizes = True info = {} info['year'] = 2020 info['version'] = '1.0' info['description'] = 'Auckaland DOC Camera Traps (test)' info['contributor'] = 'Auckland DOC' info['date_created'] = str(datetime.date.today()) #%% Enumerate files print('Enumerating files from {}'.format(input_base_dir)) absolute_image_paths = find_images(input_base_dir, recursive=True) print('Enumerated {} images'.format(len(absolute_image_paths))) relative_image_paths = [] for fn in absolute_image_paths: relative_image_paths.append(os.path.relpath(fn,input_base_dir).replace('\\','/')) relative_image_paths_set = set(relative_image_paths) assert len(relative_image_paths_set) == len(relative_image_paths) #%% Create unique identifier for each image # The ground truth doesn't have full paths in it; create unique identifiers for each image # based on the camera name and filename.
annotations.append(ann) categories = list(categoryIDToCategories.values()) elapsed = time.time() - startTime print('Finished verifying file loop in {}, {} images, {} missing images, {} repeat labels'.format( humanfriendly.format_timespan(elapsed), len(images), len(missingFiles), len(duplicateImageIDs))) #%% Check for images that aren't included in the metadata file # Enumerate all images # list(relativePathToImage.keys())[0] imageFullPaths = path_utils.find_images(image_base,bRecursive=True) unmatchedFiles = [] for iImage,imagePath in enumerate(imageFullPaths): fn = os.path.relpath(imagePath,image_base) if fn not in relativePathToImage: unmatchedFiles.append(fn) print('Finished checking {} images to make sure they\'re in the metadata, found {} mismatches'.format( len(imageFullPaths),len(unmatchedFiles))) #%% Create info struct info = {}
os.makedirs(output_base_dir, exist_ok=True) output_encoding = 'utf-8' read_image_sizes = True info = {} info['year'] = 2019 info['version'] = '1.0' info['description'] = 'Auckaland DOC Camera Traps' info['contributor'] = 'Auckland DOC' info['date_created'] = str(datetime.date.today()) #%% Enumerate files print('Enumerating files from {}'.format(input_base_dir)) image_files = find_images(input_base_dir, bRecursive=True) print('Enumerated {} images'.format(len(image_files))) #%% Assemble dictionaries images = [] image_id_to_image = {} annotations = [] categories = [] category_name_to_category = {} category_id_to_category = {} # Force the empty category to be ID 0 empty_category = {} empty_category['name'] = 'empty'
# local folders filename_base = os.path.join(base_output_folder_name, base_task_name) combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs') postprocessing_output_folder = os.path.join(filename_base, 'postprocessing') os.makedirs(filename_base, exist_ok=True) os.makedirs(combined_api_output_folder, exist_ok=True) os.makedirs(postprocessing_output_folder, exist_ok=True) if input_path.endswith('/'): input_path = input_path[0:-1] #%% Enumerate files all_images = path_utils.find_images(input_path, recursive=True) print('Enumerated {} image files in {}'.format(len(all_images), input_path)) #%% Divide images into chunks def split_list(L, n): k, m = divmod(len(L), n) return list(L[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)) folder_chunks = split_list(all_images, n_jobs) #%% Estimate total time
metadata_fullpath = os.path.join(input_base, fn) print('Reading {}'.format(metadata_fullpath)) df = pd.read_csv(metadata_fullpath) assert list(df.columns) == expected_columns df['DirName'] = dirname all_input_metadata.append(df) # Concatenate into a giant data frame input_metadata = pd.concat(all_input_metadata) print('Read {} rows total'.format(len(input_metadata))) #%% List files print('Listing images...') image_full_paths = path_utils.find_images(input_base, bRecursive=True) print('Finished listing {} images'.format(len(image_full_paths))) image_relative_paths = [] for s in image_full_paths: image_relative_paths.append(os.path.relpath(s, input_base)) image_relative_paths = set(image_relative_paths) image_relative_paths_lower = set() for s in image_relative_paths: image_relative_paths_lower.add(s.lower()) #%% Main loop over labels (prep) start_time = time.time()
reader = csv.reader(f) csvInfo = list(list(item) for item in csv.reader(f, delimiter=',')) for iRow in range(len(csvInfo)): csvInfo[iRow][2] = int(csvInfo[iRow][2]) csvInfo[iRow][3] = int(csvInfo[iRow][3]) fileInfo = csvInfo print('Finished reading list of {} files'.format(len(fileInfo))) else: print('Enumerating files from {} to {}'.format(baseDir, outputCsvFilename)) image_files = find_images(baseDir, bRecursive=True) print('Enumerated {} images'.format(len(image_files))) with io.open(outputCsvFilename, "w", encoding=outputEncoding) as outputFileHandle: for fname in image_files: nFiles = nFiles + 1 if maxFiles >= 0 and nFiles > maxFiles: print('Warning: early break at {} files'.format(maxFiles)) break fullPath = fname relativePath = os.path.relpath(fullPath, baseDir)
} target_fields = ['species_count','group_count','behaviour'] #%% Enumerate images # Load from file if we've already enumerated if os.path.isfile(file_list_file): with open(file_list_file,'r') as f: files = f.readlines() files = [s.strip() for s in files] image_full_paths = files print('Loaded {} images from {}'.format(len(image_full_paths),file_list_file)) else: image_full_paths = find_images(input_base, recursive=True) with open(file_list_file,'w') as f: for line in image_full_paths: f.write(line + '\n') print('Enumerated {} images from {}'.format(len(image_full_paths),input_base)) image_full_paths_set = set(image_full_paths) image_relative_paths = [os.path.relpath(fn,input_base) for fn in image_full_paths] image_relative_paths_set = set(image_relative_paths) #%% Create CCT dictionaries annotations = [] image_ids_to_images = {} category_name_to_category = {}
#%% Interactive driver if False: pass #%% # List images in a test folder base_dir = r'c:\temp\test_images' image_list_file = os.path.join(base_dir, 'images.json') relative_image_list_file = os.path.join(base_dir, 'images_relative.json') image_size_file = os.path.join(base_dir, 'image_sizes.json') import path_utils image_names = path_utils.find_images(base_dir, recursive=True) with open(image_list_file, 'w') as f: json.dump(image_names, f, indent=2) relative_image_names = [] for s in image_names: relative_image_names.append(os.path.relpath(s, base_dir)) with open(relative_image_list_file, 'w') as f: json.dump(relative_image_names, f, indent=2) #%% # process_list_file(image_list_file,image_size_file,image_prefix=base_dir) process_list_file(relative_image_list_file,