def _autojit_cython(pyx_fpath, verbose=1): """ This idea is that given a pyx file, we try to compile it. We write a stamp file so subsequent calls should be very fast as long as the source pyx has not changed. Parameters ---------- pyx_fpath : str path to the pyx file verbose : int higher is more verbose. """ import shutil # TODO: move necessary ubelt utilities to nx.utils? # Separate this into its own util? if shutil.which("cythonize"): pyx_dpath = dirname(pyx_fpath) # Check if the compiled library exists pyx_base = splitext(basename(pyx_fpath))[0] SO_EXTS = _platform_pylib_exts() so_fname = False for fname in os.listdir(pyx_dpath): if fname.startswith(pyx_base) and fname.endswith(SO_EXTS): so_fname = fname break try: # Currently this functionality depends on ubelt. # We could replace ub.cmd with subprocess.check_call and ub.augpath # with os.path operations, but hash_file and CacheStamp are harder # to replace. We can use "liberator" to statically extract these # and add them to nx.utils though. import ubelt as ub except Exception: return False else: if so_fname is False: # We can compute what the so_fname will be if it doesnt exist so_fname = pyx_base + SO_EXTS[0] so_fpath = join(pyx_dpath, so_fname) depends = [ub.hash_file(pyx_fpath, hasher="sha1")] stamp_fname = ub.augpath(so_fname, ext=".jit.stamp") stamp = ub.CacheStamp( stamp_fname, dpath=pyx_dpath, product=so_fpath, depends=depends, verbose=verbose, ) if stamp.expired(): ub.cmd("cythonize -i {}".format(pyx_fpath), verbose=verbose, check=True) stamp.renew() return True
def test_2(self): dpath = 取运行目录() + r"\cache" # You must specify a directory, unlike in Cacher where it is optional self = ub.CacheStamp('name', dpath=dpath, cfgstr='dependencies') if self.expired(): compute_many_files(dpath) self.renew() assert not self.expired()
def demo(cls, key='astro', dsize=None): """ Ignore: >>> from ndsampler.utils.util_gdal import * # NOQA >>> self = LazyGDalFrameFile.demo(dsize=(6600, 4400)) """ cache_dpath = ub.ensure_app_cache_dir('ndsampler/demo') fpath = join(cache_dpath, key + '.cog.tiff') depends = ub.odict(dsize=dsize) cfgstr = ub.hash_data(depends) stamp = ub.CacheStamp(fname=key, cfgstr=cfgstr, dpath=cache_dpath, product=[fpath]) if stamp.expired(): import kwimage img = kwimage.grab_test_image(key, dsize=dsize) kwimage.imwrite(fpath, img, backend='gdal') stamp.renew() self = cls(fpath) return self
def grab_coco_camvid(): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> dset = grab_coco_camvid() >>> print('dset = {!r}'.format(dset)) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() >>> plt.clf() >>> dset.show_image(gid=1) Ignore: import xdev gid_list = list(dset.imgs) for gid in xdev.InteractiveIter(gid_list): dset.show_image(gid) xdev.InteractiveIter.draw() """ import kwcoco cache_dpath = ub.ensure_app_cache_dir('kwcoco', 'camvid') coco_fpath = join(cache_dpath, 'camvid.mscoco.json') # Need to manually bump this if you make a change to loading SCRIPT_VERSION = 'v4' # Ubelt's stamp-based caches are super cheap and let you take control of # the data format. stamp = ub.CacheStamp('camvid_coco', cfgstr=SCRIPT_VERSION, dpath=cache_dpath, product=coco_fpath, hasher='sha1', verbose=3) if stamp.expired(): camvid_raw_info = grab_raw_camvid() dset = convert_camvid_raw_to_coco(camvid_raw_info) with ub.Timer('dumping MS-COCO dset to: {}'.format(coco_fpath)): dset.dump(coco_fpath) # Mark this process as completed by saving a small file containing the # hash of the "product" you are stamping. stamp.renew() # We can also cache the index build step independently. This uses # ubelt.Cacher, which is pickle based, and writes the actual object to # disk. Each type of caching has its own uses and tradeoffs. cacher = ub.Cacher('prebuilt-coco', cfgstr=SCRIPT_VERSION, dpath=cache_dpath, verbose=3) dset = cacher.tryload() if dset is None: print('Reading coco_fpath = {!r}'.format(coco_fpath)) dset = kwcoco.CocoDataset(coco_fpath, tag='camvid') # Directly save the file to disk. dset._build_index() dset._build_hashid() cacher.save(dset) camvid_dset = dset print('Loaded camvid_dset = {!r}'.format(camvid_dset)) return camvid_dset
def prepare(self, gids=None, workers=0, use_stamp=True): """ Precompute the cached frame conversions Args: gids (List[int] | None): specific image ids to prepare. If None prepare all images. workers (int, default=0): number of parallel threads for this io-bound task Example: >>> from ndsampler.abstract_frames import * >>> workdir = ub.ensure_app_cache_dir('ndsampler/tests/test_cog_precomp') >>> print('workdir = {!r}'.format(workdir)) >>> ub.delete(workdir) >>> ub.ensuredir(workdir) >>> self = SimpleFrames.demo(backend='npy', workdir=workdir) >>> print('self = {!r}'.format(self)) >>> print('self.cache_dpath = {!r}'.format(self.cache_dpath)) >>> #_ = ub.cmd('tree ' + workdir, verbose=3) >>> self.prepare() >>> self.prepare() >>> #_ = ub.cmd('tree ' + workdir, verbose=3) >>> _ = ub.cmd('ls ' + self.cache_dpath, verbose=3) Example: >>> from ndsampler.abstract_frames import * >>> import ndsampler >>> workdir = ub.get_app_cache_dir('ndsampler/tests/test_cog_precomp2') >>> ub.delete(workdir) >>> # TEST NPY >>> # >>> sampler = ndsampler.CocoSampler.demo(workdir=workdir, backend='npy') >>> self = sampler.frames >>> ub.delete(self.cache_dpath) # reset >>> self.prepare() # serial, miss >>> self.prepare() # serial, hit >>> ub.delete(self.cache_dpath) # reset >>> self.prepare(workers=3) # parallel, miss >>> self.prepare(workers=3) # parallel, hit >>> # >>> ## TEST COG >>> # xdoctest: +REQUIRES(module:osgeo) >>> sampler = ndsampler.CocoSampler.demo(workdir=workdir, backend='cog') >>> self = sampler.frames >>> ub.delete(self.cache_dpath) # reset >>> self.prepare() # serial, miss >>> self.prepare() # serial, hit >>> ub.delete(self.cache_dpath) # reset >>> self.prepare(workers=3) # parallel, miss >>> self.prepare(workers=3) # parallel, hit """ if self.cache_dpath is None: print('Frames backend is None, skip prepare') return ub.ensuredir(self.cache_dpath) # Note: this usually acceses the hashid attribute of util.HashIdentifiable hashid = getattr(self, 'hashid', None) # TODO: # Add some image preprocessing ability here? stamp = ub.CacheStamp('prepare_frames_stamp_v2', dpath=self.cache_dpath, depends=hashid, verbose=3) stamp.cacher.enabled = bool(hashid) and bool( use_stamp) and gids is None if stamp.expired() or hashid is None: from ndsampler.utils import util_futures from concurrent import futures # Use thread mode, because we are mostly in doing io. executor = util_futures.Executor(mode='thread', max_workers=workers) with executor as executor: if gids is None: gids = self.image_ids missing_cache_infos = [] for gid in ub.ProgIter(gids, desc='lookup missing cache paths'): pathinfo = self._lookup_pathinfo(gid) for chan in pathinfo['channels'].values(): if not exists(chan['cache']): missing_cache_infos.append((gid, chan['channels'])) prog = ub.ProgIter(missing_cache_infos, desc='Frames: submit prepare jobs') job_list = [ executor.submit(self.load_image, image_id, channels=channels, cache=True, noreturn=True) for image_id, channels in prog ] for job in ub.ProgIter(futures.as_completed(job_list), total=len(job_list), adjust=False, freq=1, desc='Frames: collect prepare jobs'): job.result() stamp.renew()
def test_disabled_cache_stamp(): stamp = ub.CacheStamp('foo', 'bar', enabled=False) assert stamp.expired() is True, 'disabled cache stamps are always expired'
def ensure_sqlite_csv_conn(collection_file, fields, table_create_cmd, tablename='unnamed_table1', index_cols=[], overwrite=False): """ Returns a connection to a cache of a csv file """ sql_fpath = collection_file + '.v001.sqlite' overwrite = False if os.path.exists(sql_fpath): sql_stat = os.stat(sql_fpath) col_stat = os.stat(collection_file) # CSV file has a newer modified time, we have to update if col_stat.st_mtime > sql_stat.st_mtime: overwrite = True else: overwrite = True stamp_dpath = ubelt.ensuredir( (os.path.dirname(collection_file), '.stamps')) base_name = os.path.basename(collection_file) stamp = ubelt.CacheStamp(base_name, dpath=stamp_dpath, depends=[fields, table_create_cmd, tablename], verbose=3) if stamp.expired(): overwrite = True if overwrite: # Update the SQL cache if the CSV file was modified. print('Computing (or recomputing) an sql cache') ubelt.delete(sql_fpath, verbose=3) print('Initial connection to sql_fpath = {!r}'.format(sql_fpath)) conn = sqlite3.connect(sql_fpath) cur = conn.cursor() try: print('(SQL) >') print(table_create_cmd) cur.execute(table_create_cmd) keypart = ','.join(fields) valpart = ','.join('?' * len(fields)) insert_statement = ubelt.codeblock(''' INSERT INTO {tablename}({keypart}) VALUES({valpart}) ''').format(keypart=keypart, valpart=valpart, tablename=tablename) if index_cols: index_cols_str = ', '.join(index_cols) indexname = 'noname_index' # TODO: Can we make an efficient date index with sqlite? create_index_cmd = ubelt.codeblock(''' CREATE INDEX {indexname} ON {tablename} ({index_cols_str}); ''').format(index_cols_str=index_cols_str, tablename=tablename, indexname=indexname) print('(SQL) >') print(create_index_cmd) _ = cur.execute(create_index_cmd) import tqdm print('convert to sqlite collection_file = {!r}'.format( collection_file)) with open(collection_file, 'r') as csvfile: # Read the total number of bytes in the CSV file csvfile.seek(0, 2) total_nbytes = csvfile.tell() # Read the header information csvfile.seek(0) header = csvfile.readline() header_nbytes = csvfile.tell() # Approximate the number of lines in the file # Measure the bytes in the first N lines and take the average num_lines_to_measure = 100 csvfile.seek(0, 2) content_nbytes = total_nbytes - header_nbytes csvfile.seek(header_nbytes) for _ in range(num_lines_to_measure): csvfile.readline() first_content_bytes = csvfile.tell() - header_nbytes appprox_bytes_per_line = first_content_bytes / num_lines_to_measure approx_num_rows = int(content_nbytes / appprox_bytes_per_line) # Select the indexes of the columns we want csv_fields = header.strip().split(',') field_to_idx = { field: idx for idx, field in enumerate(csv_fields) } col_indexes = [field_to_idx[k] for k in fields] prog = tqdm.tqdm( iter(csvfile), desc='insert csv rows into sqlite cache', total=approx_num_rows, mininterval=1, maxinterval=15, position=0, leave=True, ) # Note: Manual iteration is 1.5x faster than DictReader for line in prog: cols = line[:-1].split(',') # Select the values to insert into the SQLite database # Note: if this fails with an index error, its possible # the CSV file was not fully downloaded vals = [cols[idx] for idx in col_indexes] cur.execute(insert_statement, vals) conn.commit() except Exception: raise else: GLOBAL_SQLITE_CONNECTIONS[sql_fpath] = conn stamp.renew() finally: cur.close() # cache SQLite connections if sql_fpath in GLOBAL_SQLITE_CONNECTIONS: conn = GLOBAL_SQLITE_CONNECTIONS[sql_fpath] else: conn = sqlite3.connect(sql_fpath) GLOBAL_SQLITE_CONNECTIONS[sql_fpath] = conn return conn
def _autojit_cython(pyx_fpath, verbose=1, recompile=False, annotate=False): """ This idea is that given a pyx file, we try to compile it. We write a stamp file so subsequent calls should be very fast as long as the source pyx has not changed. Parameters ---------- pyx_fpath : str path to the pyx file verbose : int higher is more verbose. """ import shutil if verbose > 3: print('_autojit_cython') # TODO: move necessary ubelt utilities to nx.utils? # Separate this into its own util? if shutil.which("cythonize"): pyx_dpath = dirname(pyx_fpath) if verbose > 3: print('pyx_dpath = {!r}'.format(pyx_dpath)) # Check if the compiled library exists pyx_base = splitext(basename(pyx_fpath))[0] SO_EXTS = _platform_pylib_exts() so_fname = False for fname in os.listdir(pyx_dpath): if fname.startswith(pyx_base) and fname.endswith(SO_EXTS): so_fname = fname break if verbose > 3: print('so_fname = {!r}'.format(so_fname)) try: # Currently this functionality depends on ubelt. # We could replace ub.cmd with subprocess.check_call and ub.augpath # with os.path operations, but hash_file and CacheStamp are harder # to replace. We can use "liberator" to statically extract these # and add them to nx.utils though. import ubelt as ub except Exception: if verbose > 3: print('return false, no ubelt') return False else: if so_fname is False: # We can compute what the so_fname will be if it doesnt exist so_fname = pyx_base + SO_EXTS[0] so_fpath = join(pyx_dpath, so_fname) content = ub.readfrom(pyx_fpath) mtime = os.stat(pyx_fpath).st_mtime depends = [ub.hash_data(content, hasher="sha1"), mtime] stamp_fname = ub.augpath(so_fname, ext=".jit.stamp") stamp = ub.CacheStamp( stamp_fname, dpath=pyx_dpath, product=so_fpath, depends=depends, verbose=verbose, ) if verbose > 3: print('stamp = {!r}'.format(stamp)) if recompile or stamp.expired(): # Heuristic to try and grab the numpy include dir or not cythonize_args = ['cythonize'] cythonize_env = os.environ.copy() needs_numpy = 'numpy' in content if needs_numpy: import numpy as np import pathlib numpy_include_dpath = pathlib.Path(np.get_include()) numpy_dpath = (numpy_include_dpath / '../..').resolve() # cythonize_env['CPATH'] = numpy_include_dpath + ':' + cythonize_env.get('CPATH', '') cythonize_env['CFLAGS'] = ' '.join([ '-I{}'.format(numpy_include_dpath), ]) + cythonize_env.get('CFLAGS', '') cythonize_env['LDFLAGS'] = ' '.join([ '-L{} -lnpyrandom'.format(numpy_dpath / 'random/lib'), '-L{} -lnpymath'.format(numpy_dpath / 'core/lib'), ]) + cythonize_env.get('LDFLAGS', '') if annotate: cythonize_args.append('-a') cythonize_args.append('-i {}'.format(pyx_fpath)) cythonize_cmd = ' '.join(cythonize_args) if needs_numpy: print('CFLAGS="{}" '.format(cythonize_env['CFLAGS']) + 'LDFLAGS="{}" '.format(cythonize_env['LDFLAGS']) + cythonize_cmd) ub.cmd(cythonize_cmd, verbose=verbose, check=True, env=cythonize_env) stamp.renew() return True else: if verbose > 2: print('Cythonize not found!')