def download_model(algo, model_dir): zip_fpath = realpath(join(DETECTMODELS_DIR, algo + '.zip')) # Download and unzip model print('[grabmodels] Downloading model_dir=%s' % zip_fpath) dropbox_link = MODEL_URLS[algo] utool.download_url(dropbox_link, zip_fpath) utool.unzip_file(zip_fpath) # Cleanup utool.delete(zip_fpath)
def _download_model(algo, algo_modeldir): """ Download and overwrites models """ zip_fpath = realpath(join(algo_modeldir, algo + '.zip')) # Download and unzip model logger.info('[grabmodels] Downloading model_dir=%s' % zip_fpath) model_link = MODEL_URLS[algo] ut.download_url(model_link, zip_fpath) ut.unzip_file(zip_fpath) # Cleanup ut.delete(zip_fpath)
def _download_model(algo, algo_modeldir): """ Download and overwrites models """ zip_fpath = realpath(join(algo_modeldir, algo + '.zip')) # Download and unzip model print('[grabmodels] Downloading model_dir=%s' % zip_fpath) model_link = MODEL_URLS[algo] ut.download_url(model_link, zip_fpath) ut.unzip_file(zip_fpath) # Cleanup ut.delete(zip_fpath)
def ensure_inno_isinstalled(): """ Ensures that the current machine has INNO installed. returns path to the executable """ assert ut.WIN32, 'Can only build INNO on windows' inno_fpath = ut.search_in_dirs('Inno Setup 5\ISCC.exe', ut.get_install_dirs()) # Make sure INNO is installed if inno_fpath is None: print('WARNING: cannot find inno_fpath') AUTO_FIXIT = ut.WIN32 print('Inno seems to not be installed. AUTO_FIXIT=%r' % AUTO_FIXIT) if AUTO_FIXIT: print('Automaticaly trying to downoad and install INNO') # Download INNO Installer inno_installer_url = 'http://www.jrsoftware.org/download.php/ispack.exe' inno_installer_fpath = ut.download_url(inno_installer_url) print('Automaticaly trying to install INNO') # Install INNO Installer ut.cmd(inno_installer_fpath) else: inno_homepage_url = 'http://www.jrsoftware.org/isdl.php' ut.open_url_in_browser(inno_homepage_url) raise AssertionError('Cannot find INNO and AUTOFIX it is false') # Ensure that it has now been installed inno_fpath = ut.search_in_dirs('Inno Setup 5\ISCC.exe', ut.get_install_dirs()) assert ut.checkpath( inno_fpath, verbose=True, info=True), 'inno installer is still not installed!' return inno_fpath
def ensure_inno_isinstalled(): """ Ensures that the current machine has INNO installed. returns path to the executable """ assert ut.WIN32, 'Can only build INNO on windows' inno_fpath = ut.search_in_dirs('Inno Setup 5\ISCC.exe', ut.get_install_dirs()) # Make sure INNO is installed if inno_fpath is None: print('WARNING: cannot find inno_fpath') AUTO_FIXIT = ut.WIN32 print('Inno seems to not be installed. AUTO_FIXIT=%r' % AUTO_FIXIT) if AUTO_FIXIT: print('Automaticaly trying to downoad and install INNO') # Download INNO Installer inno_installer_url = 'http://www.jrsoftware.org/download.php/ispack.exe' inno_installer_fpath = ut.download_url(inno_installer_url) print('Automaticaly trying to install INNO') # Install INNO Installer ut.cmd(inno_installer_fpath) else: inno_homepage_url = 'http://www.jrsoftware.org/isdl.php' ut.open_url_in_browser(inno_homepage_url) raise AssertionError('Cannot find INNO and AUTOFIX it is false') # Ensure that it has now been installed inno_fpath = ut.search_in_dirs('Inno Setup 5\ISCC.exe', ut.get_install_dirs()) assert ut.checkpath(inno_fpath, verbose=True, info=True), 'inno installer is still not installed!' return inno_fpath
def download_sharks(XMLdata, number): """ cd ~/work/WS_ALL python -m ibeis.scripts.getshark >>> from ibeis.scripts.getshark import * # NOQA >>> url = 'www.whaleshark.org/listImages.jsp' >>> XMLdata = ut.url_read(url) >>> number = None """ # Prepare the output directory for writing, if it doesn't exist output_dir = 'sharkimages' ut.ensuredir(output_dir) dom = parseString(XMLdata) # Download files if number: maxCount = min(number, len(dom.getElementsByTagName('img'))) else: maxCount = len(dom.getElementsByTagName('img')) parsed_info = dict( img_url_list=[], localid_list=[], nameid_list=[], orig_fname_list=[], new_fname_list=[], ) print('Preparing to fetch %i files...' % maxCount) for shark in dom.getElementsByTagName('shark'): localCount = 0 for imageset in shark.getElementsByTagName('imageset'): for img in imageset.getElementsByTagName('img'): localCount += 1 img_url = img.getAttribute('href') orig_fname = split(img_url)[1] ext = splitext(orig_fname)[1].lower() nameid = shark.getAttribute('number') new_fname = '%s-%i%s' % ( nameid, localCount, ext) parsed_info['img_url_list'].append(img_url) parsed_info['nameid_list'].append(nameid) parsed_info['localid_list'].append(localCount) parsed_info['orig_fname_list'].append(orig_fname) parsed_info['new_fname_list'].append(new_fname) print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount)) if number is not None and len(parsed_info['orig_fname_list']) == number: break parsed_info['new_fpath_list'] = [join(output_dir, _fname) for _fname in parsed_info['new_fname_list']] print('Filtering parsed images') # Filter based on image type (keep only jpgs) ext_flags = [_fname.endswith('.jpg') or _fname.endswith('.jpg') for _fname in parsed_info['new_fname_list']] parsed_info = {key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items()} # Filter to only images matching the appropriate tags from ibeis import tag_funcs parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list']) tag_flags = tag_funcs.filterflags_general_tags( parsed_info['tags_list'], has_any=['view-left'], none_match=['qual.*', 'view-top', 'part-.*', 'cropped'], ) parsed_info = {key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items()} print('Tags in chosen images:') print(ut.dict_hist(ut.flatten(parsed_info['tags_list'] ))) # Download selected subset print('Downloading selected subset') _iter = list(zip(parsed_info['img_url_list'], parsed_info['new_fpath_list'])) _iter = ut.ProgressIter(_iter, lbl='downloading sharks') for img_url, new_fpath in _iter: if not exists(new_fpath): ut.download_url(img_url, new_fpath) # Remove corrupted or ill-formatted images print('Checking for corrupted images') import vtool as vt noncorrupt_flags = vt.filterflags_valid_images(parsed_info['new_fpath_list']) parsed_info = { key: ut.compress(list_, noncorrupt_flags) for key, list_ in parsed_info.items() } print('Removing small images') import numpy as np imgsize_list = np.array([vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']]) sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1)) areq_flags_list = sqrt_area_list >= 750 parsed_info = {key: ut.compress(list_, areq_flags_list) for key, list_ in parsed_info.items()} grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))), parsed_info['nameid_list']) keep_idxs = sorted(ut.flatten([idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2])) parsed_info = {key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items()} print('Moving imagse to secondary directory') named_outputdir = 'named-left-sharkimages' # Build names parsed_info['namedir_fpath_list'] = [ join(named_outputdir, _nameid, _fname) for _fname, _nameid in zip(parsed_info['new_fname_list'], parsed_info['nameid_list'])] # Create directories ut.ensuredir(named_outputdir) named_dirs = ut.unique_ordered(list(map(dirname, parsed_info['namedir_fpath_list']))) for dir_ in named_dirs: ut.ensuredir(dir_) # Copy ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'], dst_fpath_list=parsed_info['namedir_fpath_list'])
import utool as ut import vtool as vt import random import uuid import tqdm import cv2 ibs = None MIN_AIDS = 1 MAX_AIDS = np.inf MAX_NAMES = np.inf PADDING = 32 url = 'https://wildbookiarepository.azureedge.net/random/humpback.crc.csv' local_filepath = ut.download_url(url) filepath = abspath(local_filepath) with open(filepath, 'r') as file: # header = file.readline() lines = file.readlines() header = ['acmID', 'individualID'] line_list = [] for line in lines: line = line.strip() if len(line) == 0: continue line = line.split(',') line_dict = dict(zip(header, line)) line_list.append(line_dict)
def download_sharks(XMLdata, number): """ cd ~/work/WS_ALL python -m ibeis.scripts.getshark >>> from ibeis.scripts.getshark import * # NOQA >>> url = 'www.whaleshark.org/listImages.jsp' >>> XMLdata = ut.url_read(url) >>> number = None """ # Prepare the output directory for writing, if it doesn't exist output_dir = 'sharkimages' ut.ensuredir(output_dir) dom = parseString(XMLdata) # Download files if number: maxCount = min(number, len(dom.getElementsByTagName('img'))) else: maxCount = len(dom.getElementsByTagName('img')) parsed_info = dict( img_url_list=[], localid_list=[], nameid_list=[], orig_fname_list=[], new_fname_list=[], ) print('Preparing to fetch %i files...' % maxCount) for shark in dom.getElementsByTagName('shark'): localCount = 0 for imageset in shark.getElementsByTagName('imageset'): for img in imageset.getElementsByTagName('img'): localCount += 1 img_url = img.getAttribute('href') orig_fname = split(img_url)[1] ext = splitext(orig_fname)[1].lower() nameid = shark.getAttribute('number') new_fname = '%s-%i%s' % (nameid, localCount, ext) parsed_info['img_url_list'].append(img_url) parsed_info['nameid_list'].append(nameid) parsed_info['localid_list'].append(localCount) parsed_info['orig_fname_list'].append(orig_fname) parsed_info['new_fname_list'].append(new_fname) print('Parsed %i / %i files.' % (len(parsed_info['orig_fname_list']), maxCount)) if number is not None and len( parsed_info['orig_fname_list']) == number: break parsed_info['new_fpath_list'] = [ join(output_dir, _fname) for _fname in parsed_info['new_fname_list'] ] print('Filtering parsed images') # Filter based on image type (keep only jpgs) ext_flags = [ _fname.endswith('.jpg') or _fname.endswith('.jpg') for _fname in parsed_info['new_fname_list'] ] parsed_info = { key: ut.compress(list_, ext_flags) for key, list_ in parsed_info.items() } # Filter to only images matching the appropriate tags from ibeis import tag_funcs parsed_info['tags_list'] = parse_shark_tags(parsed_info['orig_fname_list']) tag_flags = tag_funcs.filterflags_general_tags( parsed_info['tags_list'], has_any=['view-left'], none_match=['qual.*', 'view-top', 'part-.*', 'cropped'], ) parsed_info = { key: ut.compress(list_, tag_flags) for key, list_ in parsed_info.items() } print('Tags in chosen images:') print(ut.dict_hist(ut.flatten(parsed_info['tags_list']))) # Download selected subset print('Downloading selected subset') _iter = list( zip(parsed_info['img_url_list'], parsed_info['new_fpath_list'])) _iter = ut.ProgressIter(_iter, lbl='downloading sharks') for img_url, new_fpath in _iter: if not exists(new_fpath): ut.download_url(img_url, new_fpath) # Remove corrupted or ill-formatted images print('Checking for corrupted images') import vtool as vt noncorrupt_flags = vt.filterflags_valid_images( parsed_info['new_fpath_list']) parsed_info = { key: ut.compress(list_, noncorrupt_flags) for key, list_ in parsed_info.items() } print('Removing small images') import numpy as np imgsize_list = np.array( [vt.open_image_size(gpath) for gpath in parsed_info['new_fpath_list']]) sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1)) areq_flags_list = sqrt_area_list >= 750 parsed_info = { key: ut.compress(list_, areq_flags_list) for key, list_ in parsed_info.items() } grouped_idxs = ut.group_items(list(range(len(parsed_info['nameid_list']))), parsed_info['nameid_list']) keep_idxs = sorted( ut.flatten( [idxs for key, idxs in grouped_idxs.items() if len(idxs) >= 2])) parsed_info = { key: ut.take(list_, keep_idxs) for key, list_ in parsed_info.items() } print('Moving imagse to secondary directory') named_outputdir = 'named-left-sharkimages' # Build names parsed_info['namedir_fpath_list'] = [ join(named_outputdir, _nameid, _fname) for _fname, _nameid in zip( parsed_info['new_fname_list'], parsed_info['nameid_list']) ] # Create directories ut.ensuredir(named_outputdir) named_dirs = ut.unique_ordered( list(map(dirname, parsed_info['namedir_fpath_list']))) for dir_ in named_dirs: ut.ensuredir(dir_) # Copy ut.copy_files_to(src_fpath_list=parsed_info['new_fpath_list'], dst_fpath_list=parsed_info['namedir_fpath_list'])