Beispiel #1
0
def _autojit_cython(pyx_fpath, verbose=1):
    """
    This idea is that given a pyx file, we try to compile it. We write a stamp
    file so subsequent calls should be very fast as long as the source pyx has
    not changed.

    Parameters
    ----------
    pyx_fpath : str
        path to the pyx file

    verbose : int
        higher is more verbose.
    """
    import shutil

    # TODO: move necessary ubelt utilities to nx.utils?
    # Separate this into its own util?
    if shutil.which("cythonize"):
        pyx_dpath = dirname(pyx_fpath)

        # Check if the compiled library exists
        pyx_base = splitext(basename(pyx_fpath))[0]

        SO_EXTS = _platform_pylib_exts()
        so_fname = False
        for fname in os.listdir(pyx_dpath):
            if fname.startswith(pyx_base) and fname.endswith(SO_EXTS):
                so_fname = fname
                break

        try:
            # Currently this functionality depends on ubelt.
            # We could replace ub.cmd with subprocess.check_call and ub.augpath
            # with os.path operations, but hash_file and CacheStamp are harder
            # to replace. We can use "liberator" to statically extract these
            # and add them to nx.utils though.
            import ubelt as ub
        except Exception:
            return False
        else:
            if so_fname is False:
                # We can compute what the so_fname will be if it doesnt exist
                so_fname = pyx_base + SO_EXTS[0]

            so_fpath = join(pyx_dpath, so_fname)
            depends = [ub.hash_file(pyx_fpath, hasher="sha1")]
            stamp_fname = ub.augpath(so_fname, ext=".jit.stamp")
            stamp = ub.CacheStamp(
                stamp_fname,
                dpath=pyx_dpath,
                product=so_fpath,
                depends=depends,
                verbose=verbose,
            )
            if stamp.expired():
                ub.cmd("cythonize -i {}".format(pyx_fpath), verbose=verbose, check=True)
                stamp.renew()
            return True
Beispiel #2
0
    def test_2(self):
        dpath = 取运行目录() + r"\cache"
        # You must specify a directory, unlike in Cacher where it is optional
        self = ub.CacheStamp('name', dpath=dpath, cfgstr='dependencies')
        if self.expired():
            compute_many_files(dpath)

            self.renew()
        assert not self.expired()
Beispiel #3
0
 def demo(cls, key='astro', dsize=None):
     """
     Ignore:
         >>> from ndsampler.utils.util_gdal import *  # NOQA
         >>> self = LazyGDalFrameFile.demo(dsize=(6600, 4400))
     """
     cache_dpath = ub.ensure_app_cache_dir('ndsampler/demo')
     fpath = join(cache_dpath, key + '.cog.tiff')
     depends = ub.odict(dsize=dsize)
     cfgstr = ub.hash_data(depends)
     stamp = ub.CacheStamp(fname=key,
                           cfgstr=cfgstr,
                           dpath=cache_dpath,
                           product=[fpath])
     if stamp.expired():
         import kwimage
         img = kwimage.grab_test_image(key, dsize=dsize)
         kwimage.imwrite(fpath, img, backend='gdal')
         stamp.renew()
     self = cls(fpath)
     return self
Beispiel #4
0
def grab_coco_camvid():
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> dset = grab_coco_camvid()
        >>> print('dset = {!r}'.format(dset))
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> plt = kwplot.autoplt()
        >>> plt.clf()
        >>> dset.show_image(gid=1)

    Ignore:
        import xdev
        gid_list = list(dset.imgs)
        for gid in xdev.InteractiveIter(gid_list):
            dset.show_image(gid)
            xdev.InteractiveIter.draw()
    """
    import kwcoco
    cache_dpath = ub.ensure_app_cache_dir('kwcoco', 'camvid')
    coco_fpath = join(cache_dpath, 'camvid.mscoco.json')

    # Need to manually bump this if you make a change to loading
    SCRIPT_VERSION = 'v4'

    # Ubelt's stamp-based caches are super cheap and let you take control of
    # the data format.
    stamp = ub.CacheStamp('camvid_coco',
                          cfgstr=SCRIPT_VERSION,
                          dpath=cache_dpath,
                          product=coco_fpath,
                          hasher='sha1',
                          verbose=3)
    if stamp.expired():
        camvid_raw_info = grab_raw_camvid()
        dset = convert_camvid_raw_to_coco(camvid_raw_info)
        with ub.Timer('dumping MS-COCO dset to: {}'.format(coco_fpath)):
            dset.dump(coco_fpath)
        # Mark this process as completed by saving a small file containing the
        # hash of the "product" you are stamping.
        stamp.renew()

    # We can also cache the index build step independently. This uses
    # ubelt.Cacher, which is pickle based, and writes the actual object to
    # disk. Each type of caching has its own uses and tradeoffs.
    cacher = ub.Cacher('prebuilt-coco',
                       cfgstr=SCRIPT_VERSION,
                       dpath=cache_dpath,
                       verbose=3)
    dset = cacher.tryload()
    if dset is None:
        print('Reading coco_fpath = {!r}'.format(coco_fpath))
        dset = kwcoco.CocoDataset(coco_fpath, tag='camvid')
        # Directly save the file to disk.
        dset._build_index()
        dset._build_hashid()
        cacher.save(dset)

    camvid_dset = dset
    print('Loaded camvid_dset = {!r}'.format(camvid_dset))
    return camvid_dset
Beispiel #5
0
    def prepare(self, gids=None, workers=0, use_stamp=True):
        """
        Precompute the cached frame conversions

        Args:
            gids (List[int] | None): specific image ids to prepare.
                If None prepare all images.
            workers (int, default=0): number of parallel threads for this
                io-bound task

        Example:
            >>> from ndsampler.abstract_frames import *
            >>> workdir = ub.ensure_app_cache_dir('ndsampler/tests/test_cog_precomp')
            >>> print('workdir = {!r}'.format(workdir))
            >>> ub.delete(workdir)
            >>> ub.ensuredir(workdir)
            >>> self = SimpleFrames.demo(backend='npy', workdir=workdir)
            >>> print('self = {!r}'.format(self))
            >>> print('self.cache_dpath = {!r}'.format(self.cache_dpath))
            >>> #_ = ub.cmd('tree ' + workdir, verbose=3)
            >>> self.prepare()
            >>> self.prepare()
            >>> #_ = ub.cmd('tree ' + workdir, verbose=3)
            >>> _ = ub.cmd('ls ' + self.cache_dpath, verbose=3)

        Example:
            >>> from ndsampler.abstract_frames import *
            >>> import ndsampler
            >>> workdir = ub.get_app_cache_dir('ndsampler/tests/test_cog_precomp2')
            >>> ub.delete(workdir)
            >>> # TEST NPY
            >>> #
            >>> sampler = ndsampler.CocoSampler.demo(workdir=workdir, backend='npy')
            >>> self = sampler.frames
            >>> ub.delete(self.cache_dpath)  # reset
            >>> self.prepare()  # serial, miss
            >>> self.prepare()  # serial, hit
            >>> ub.delete(self.cache_dpath)  # reset
            >>> self.prepare(workers=3)  # parallel, miss
            >>> self.prepare(workers=3)  # parallel, hit
            >>> #
            >>> ## TEST COG
            >>> # xdoctest: +REQUIRES(module:osgeo)
            >>> sampler = ndsampler.CocoSampler.demo(workdir=workdir, backend='cog')
            >>> self = sampler.frames
            >>> ub.delete(self.cache_dpath)  # reset
            >>> self.prepare()  # serial, miss
            >>> self.prepare()  # serial, hit
            >>> ub.delete(self.cache_dpath)  # reset
            >>> self.prepare(workers=3)  # parallel, miss
            >>> self.prepare(workers=3)  # parallel, hit
        """
        if self.cache_dpath is None:
            print('Frames backend is None, skip prepare')
            return

        ub.ensuredir(self.cache_dpath)
        # Note: this usually acceses the hashid attribute of util.HashIdentifiable
        hashid = getattr(self, 'hashid', None)

        # TODO:
        #     Add some image preprocessing ability here?
        stamp = ub.CacheStamp('prepare_frames_stamp_v2',
                              dpath=self.cache_dpath,
                              depends=hashid,
                              verbose=3)
        stamp.cacher.enabled = bool(hashid) and bool(
            use_stamp) and gids is None

        if stamp.expired() or hashid is None:
            from ndsampler.utils import util_futures
            from concurrent import futures
            # Use thread mode, because we are mostly in doing io.
            executor = util_futures.Executor(mode='thread',
                                             max_workers=workers)
            with executor as executor:
                if gids is None:
                    gids = self.image_ids

                missing_cache_infos = []
                for gid in ub.ProgIter(gids,
                                       desc='lookup missing cache paths'):
                    pathinfo = self._lookup_pathinfo(gid)
                    for chan in pathinfo['channels'].values():
                        if not exists(chan['cache']):
                            missing_cache_infos.append((gid, chan['channels']))

                prog = ub.ProgIter(missing_cache_infos,
                                   desc='Frames: submit prepare jobs')
                job_list = [
                    executor.submit(self.load_image,
                                    image_id,
                                    channels=channels,
                                    cache=True,
                                    noreturn=True)
                    for image_id, channels in prog
                ]

                for job in ub.ProgIter(futures.as_completed(job_list),
                                       total=len(job_list),
                                       adjust=False,
                                       freq=1,
                                       desc='Frames: collect prepare jobs'):
                    job.result()
            stamp.renew()
Beispiel #6
0
def test_disabled_cache_stamp():
    stamp = ub.CacheStamp('foo', 'bar', enabled=False)
    assert stamp.expired() is True, 'disabled cache stamps are always expired'
Beispiel #7
0
def ensure_sqlite_csv_conn(collection_file,
                           fields,
                           table_create_cmd,
                           tablename='unnamed_table1',
                           index_cols=[],
                           overwrite=False):
    """
    Returns a connection to a cache of a csv file
    """
    sql_fpath = collection_file + '.v001.sqlite'
    overwrite = False
    if os.path.exists(sql_fpath):
        sql_stat = os.stat(sql_fpath)
        col_stat = os.stat(collection_file)
        # CSV file has a newer modified time, we have to update
        if col_stat.st_mtime > sql_stat.st_mtime:
            overwrite = True
    else:
        overwrite = True

    stamp_dpath = ubelt.ensuredir(
        (os.path.dirname(collection_file), '.stamps'))
    base_name = os.path.basename(collection_file)

    stamp = ubelt.CacheStamp(base_name,
                             dpath=stamp_dpath,
                             depends=[fields, table_create_cmd, tablename],
                             verbose=3)
    if stamp.expired():
        overwrite = True

    if overwrite:
        # Update the SQL cache if the CSV file was modified.
        print('Computing (or recomputing) an sql cache')

        ubelt.delete(sql_fpath, verbose=3)
        print('Initial connection to sql_fpath = {!r}'.format(sql_fpath))
        conn = sqlite3.connect(sql_fpath)
        cur = conn.cursor()
        try:
            print('(SQL) >')
            print(table_create_cmd)
            cur.execute(table_create_cmd)

            keypart = ','.join(fields)
            valpart = ','.join('?' * len(fields))
            insert_statement = ubelt.codeblock('''
                INSERT INTO {tablename}({keypart})
                VALUES({valpart})
                ''').format(keypart=keypart,
                            valpart=valpart,
                            tablename=tablename)

            if index_cols:
                index_cols_str = ', '.join(index_cols)
                indexname = 'noname_index'
                # TODO: Can we make an efficient date index with sqlite?
                create_index_cmd = ubelt.codeblock('''
                    CREATE INDEX {indexname} ON {tablename} ({index_cols_str});
                    ''').format(index_cols_str=index_cols_str,
                                tablename=tablename,
                                indexname=indexname)
                print('(SQL) >')
                print(create_index_cmd)
                _ = cur.execute(create_index_cmd)

            import tqdm
            print('convert to sqlite collection_file = {!r}'.format(
                collection_file))
            with open(collection_file, 'r') as csvfile:

                # Read the total number of bytes in the CSV file
                csvfile.seek(0, 2)
                total_nbytes = csvfile.tell()

                # Read the header information
                csvfile.seek(0)
                header = csvfile.readline()
                header_nbytes = csvfile.tell()

                # Approximate the number of lines in the file
                # Measure the bytes in the first N lines and take the average
                num_lines_to_measure = 100
                csvfile.seek(0, 2)
                content_nbytes = total_nbytes - header_nbytes
                csvfile.seek(header_nbytes)
                for _ in range(num_lines_to_measure):
                    csvfile.readline()
                first_content_bytes = csvfile.tell() - header_nbytes
                appprox_bytes_per_line = first_content_bytes / num_lines_to_measure
                approx_num_rows = int(content_nbytes / appprox_bytes_per_line)

                # Select the indexes of the columns we want
                csv_fields = header.strip().split(',')
                field_to_idx = {
                    field: idx
                    for idx, field in enumerate(csv_fields)
                }
                col_indexes = [field_to_idx[k] for k in fields]

                prog = tqdm.tqdm(
                    iter(csvfile),
                    desc='insert csv rows into sqlite cache',
                    total=approx_num_rows,
                    mininterval=1,
                    maxinterval=15,
                    position=0,
                    leave=True,
                )
                # Note: Manual iteration is 1.5x faster than DictReader
                for line in prog:
                    cols = line[:-1].split(',')
                    # Select the values to insert into the SQLite database
                    # Note: if this fails with an index error, its possible
                    # the CSV file was not fully downloaded
                    vals = [cols[idx] for idx in col_indexes]
                    cur.execute(insert_statement, vals)

            conn.commit()
        except Exception:
            raise
        else:
            GLOBAL_SQLITE_CONNECTIONS[sql_fpath] = conn
            stamp.renew()
        finally:
            cur.close()

    # cache SQLite connections
    if sql_fpath in GLOBAL_SQLITE_CONNECTIONS:
        conn = GLOBAL_SQLITE_CONNECTIONS[sql_fpath]
    else:
        conn = sqlite3.connect(sql_fpath)
        GLOBAL_SQLITE_CONNECTIONS[sql_fpath] = conn

    return conn
Beispiel #8
0
def _autojit_cython(pyx_fpath, verbose=1, recompile=False, annotate=False):
    """
    This idea is that given a pyx file, we try to compile it. We write a stamp
    file so subsequent calls should be very fast as long as the source pyx has
    not changed.

    Parameters
    ----------
    pyx_fpath : str
        path to the pyx file

    verbose : int
        higher is more verbose.
    """
    import shutil
    if verbose > 3:
        print('_autojit_cython')

    # TODO: move necessary ubelt utilities to nx.utils?
    # Separate this into its own util?
    if shutil.which("cythonize"):
        pyx_dpath = dirname(pyx_fpath)

        if verbose > 3:
            print('pyx_dpath = {!r}'.format(pyx_dpath))

        # Check if the compiled library exists
        pyx_base = splitext(basename(pyx_fpath))[0]

        SO_EXTS = _platform_pylib_exts()
        so_fname = False
        for fname in os.listdir(pyx_dpath):
            if fname.startswith(pyx_base) and fname.endswith(SO_EXTS):
                so_fname = fname
                break

        if verbose > 3:
            print('so_fname = {!r}'.format(so_fname))

        try:
            # Currently this functionality depends on ubelt.
            # We could replace ub.cmd with subprocess.check_call and ub.augpath
            # with os.path operations, but hash_file and CacheStamp are harder
            # to replace. We can use "liberator" to statically extract these
            # and add them to nx.utils though.
            import ubelt as ub
        except Exception:
            if verbose > 3:
                print('return false, no ubelt')
            return False
        else:
            if so_fname is False:
                # We can compute what the so_fname will be if it doesnt exist
                so_fname = pyx_base + SO_EXTS[0]

            so_fpath = join(pyx_dpath, so_fname)
            content = ub.readfrom(pyx_fpath)
            mtime = os.stat(pyx_fpath).st_mtime

            depends = [ub.hash_data(content, hasher="sha1"), mtime]
            stamp_fname = ub.augpath(so_fname, ext=".jit.stamp")
            stamp = ub.CacheStamp(
                stamp_fname,
                dpath=pyx_dpath,
                product=so_fpath,
                depends=depends,
                verbose=verbose,
            )
            if verbose > 3:
                print('stamp = {!r}'.format(stamp))
            if recompile or stamp.expired():
                # Heuristic to try and grab the numpy include dir or not
                cythonize_args = ['cythonize']
                cythonize_env = os.environ.copy()
                needs_numpy = 'numpy' in content
                if needs_numpy:
                    import numpy as np
                    import pathlib
                    numpy_include_dpath = pathlib.Path(np.get_include())
                    numpy_dpath = (numpy_include_dpath / '../..').resolve()
                    # cythonize_env['CPATH'] = numpy_include_dpath + ':' + cythonize_env.get('CPATH', '')
                    cythonize_env['CFLAGS'] = ' '.join([
                        '-I{}'.format(numpy_include_dpath),
                    ]) + cythonize_env.get('CFLAGS', '')

                    cythonize_env['LDFLAGS'] = ' '.join([
                        '-L{} -lnpyrandom'.format(numpy_dpath / 'random/lib'),
                        '-L{} -lnpymath'.format(numpy_dpath / 'core/lib'),
                    ]) + cythonize_env.get('LDFLAGS', '')
                if annotate:
                    cythonize_args.append('-a')
                cythonize_args.append('-i {}'.format(pyx_fpath))
                cythonize_cmd = ' '.join(cythonize_args)
                if needs_numpy:
                    print('CFLAGS="{}" '.format(cythonize_env['CFLAGS']) +
                          'LDFLAGS="{}" '.format(cythonize_env['LDFLAGS']) +
                          cythonize_cmd)
                ub.cmd(cythonize_cmd,
                       verbose=verbose,
                       check=True,
                       env=cythonize_env)
                stamp.renew()
            return True
    else:
        if verbose > 2:
            print('Cythonize not found!')