import asyncio from FileManager import FileManager from DownloadManager import DownloadManager import sys INPUT_FILE_PATH = sys.argv[1] # Input plain file path argument OUTPUT_DIRECTORY_PATH = '/images' # Directory of output images NUM_OF_SEMAPHORE = 5 # The number of semaphores URL_CHUNK_SIZE = 10 # The chunk containing the number of URLs to download once if __name__ == '__main__': fileManager = FileManager(INPUT_FILE_PATH, URL_CHUNK_SIZE) downloadManager = DownloadManager(OUTPUT_DIRECTORY_PATH, NUM_OF_SEMAPHORE) loop = asyncio.get_event_loop() url_generator = fileManager.get_image_url() if url_generator is not None: for urls in url_generator: tasks = [downloadManager.download(url) for url in urls] loop.run_until_complete(asyncio.wait(tasks)) loop.close()
clean_run = input( "Destroy existing resources and train from scratch " + "(WARNING: this is memory intensive and may take considerable time)? Enter Y/N: " ) while clean_run.upper() not in ['Y', 'N']: clean_run = input( "Please enter Y (clean run) or N (retrain existing resources): ") if clean_run.upper() == 'Y': file_manager = FileSystemManager(image_directory, model_directory) file_manager.clean_run() download_manager = DownloadManager(source_data) download_manager.download() extract_dir = file_manager.extract_archive(source_archive) file_manager.remove_files_except('.png') file_manager.data_science_fs(category0='benign', category1='malignant') file_manager.organise_files(extract_dir, category_rules={ 'benign': 'SOB_B_.*.png', 'malignant': 'SOB_M_.*.png' }) elif clean_run.upper() == 'N': pass else: graceful_exit()