Ejemplo n.º 1
0
 def preprocess_augment(task, modes=None):
     assert task.enable_augment
     if modes is None:
         modes = task.base_modes
     for scene in ub.ProgIter(task.scene_ids, label='preproc augment scene', verbose=3):
         for mode in ub.ProgIter(modes, label=' * mode', verbose=0):
             task.make_augment_scene(mode, scene, rng='determ')
Ejemplo n.º 2
0
def _cached_class_frequency(dset):
    import ubelt as ub
    import copy
    # Copy the dataset so we can muck with it
    dset_copy = copy.copy(dset)

    dset_copy._build_sliders(input_overlap=0)
    dset_copy.augmenter = None

    cfgstr = '_'.join([dset_copy.sampler.dset.hashid, 'v1'])
    cacher = ub.Cacher('class_freq', cfgstr=cfgstr)
    total_freq = cacher.tryload()
    if total_freq is None:

        total_freq = np.zeros(len(dset_copy.classes), dtype=np.int64)
        if True:
            loader = torch_data.DataLoader(dset_copy, batch_size=16,
                                           num_workers=7, shuffle=False,
                                           pin_memory=True)

            prog = ub.ProgIter(loader, desc='computing (par) class freq')
            for batch in prog:
                class_idxs = batch['class_idxs'].data.numpy()
                item_freq = np.histogram(class_idxs, bins=len(dset_copy.classes))[0]
                total_freq += item_freq
        else:
            prog = ub.ProgIter(range(len(dset_copy)), desc='computing (ser) class freq')
            for index in prog:
                item = dset_copy[index]
                class_idxs = item['class_idxs'].data.numpy()
                item_freq = np.histogram(class_idxs, bins=len(dset_copy.classes))[0]
                total_freq += item_freq
        cacher.save(total_freq)
    return total_freq
Ejemplo n.º 3
0
def _devcheck_manage_monitor(workdir, dry=True):

    all_sessions = collect_sessions(workdir)

    # Get all the images in the monitor directories
    # (this is a convention and not something netharn does by default)

    all_files = []
    # factor = 100
    max_keep = 300

    def _choose_action(file_infos):
        import kwarray
        file_infos = kwarray.shuffle(file_infos, rng=0)
        n_keep = max_keep
        # n_keep = (len(file_infos) // factor) + 1
        # n_keep = min(max_keep, n_keep)

        for info in file_infos[:n_keep]:
            info['action'] = 'keep'
        for info in file_infos[n_keep:]:
            info['action'] = 'delete'

    for session in ub.ProgIter(all_sessions, desc='checking monitor files'):
        dpaths = [
            join(session.dpath, 'monitor', 'train', 'batch'),
            join(session.dpath, 'monitor', 'vali', 'batch'),
            join(session.dpath, 'monitor', 'train'),
            join(session.dpath, 'monitor', 'vali'),
        ]
        exts = ['*.jpg', '*.png']
        for dpath in dpaths:
            for ext in exts:
                fpaths = list(glob.glob(join(dpath, ext)))
                file_infos = [{
                    'size': os.stat(p).st_size,
                    'fpath': p
                } for p in fpaths]
                _choose_action(file_infos)
                all_files.extend(file_infos)

    grouped_actions = ub.group_items(all_files, lambda x: x['action'])

    for key, group in grouped_actions.items():
        size = byte_str(sum([s['size'] for s in group]))
        print('{:>4} images:  {:>4}, size={}'.format(key.capitalize(),
                                                     len(group), size))

    if dry:
        print('Dry run')
    else:
        delete = grouped_actions.get('delete', [])
        delete_fpaths = [item['fpath'] for item in delete]
        for p in ub.ProgIter(delete_fpaths, desc='deleting'):
            ub.delete(p)
Ejemplo n.º 4
0
    def parallel_refine(cls,
                        pfiles,
                        step_idx,
                        mode='serial',
                        max_workers=6,
                        verbose=0):
        """
        Refines the hashids of multiple files

        Ignore:
            >>> # Create a directory filled with random files
            >>> #fpaths = _demodata_files(
            >>> #        num_files=1, size_pool=[30], pool_size=2)
            >>> fpaths = _demodata_files()
            >>> pfiles = [ProgressiveFile(f) for f in fpaths]
            >>> with ub.Timer('step'):
            >>>     step_idx = 2
            >>>     ProgressiveFile.parallel_refine(pfiles, step_idx)
        """
        from kwcoco.util.util_futures import JobPool  # NOQA
        # jobs = JobPool(mode='thread', max_workers=2)

        jobs = JobPool(mode=mode, max_workers=max_workers)

        for pfile in ub.ProgIter(pfiles,
                                 desc='submit hash jobs',
                                 verbose=verbose):
            # only submit the job if we need to
            parts = pfile._parts
            if pfile.can_refine and (step_idx == 'next'
                                     or len(parts) <= step_idx):
                hasher = pfile._hasher
                fpath = pfile.fpath
                pos = pfile._pos
                size = pfile._size
                curr_blocks = pfile._curr_blocks

                job = jobs.submit(progressive_refine_worker, hasher, fpath,
                                  parts, pos, curr_blocks, step_idx, size)
                job.pfile = pfile

        for job in ub.ProgIter(jobs.as_completed(),
                               total=len(jobs),
                               desc='collect hash jobs',
                               verbose=verbose):
            pfile = job.pfile
            result = job.result()
            if result is not None:
                hasher, next_parts, pos, curr_blocks, size = result
                pfile._hasher = hasher
                pfile._parts.extend(next_parts)
                pfile._pos = pos
                pfile._size = size
                pfile._curr_blocks = curr_blocks
Ejemplo n.º 5
0
def test_tqdm_compatibility():
    prog = ProgIter(range(20), total=20, miniters=17, show_times=False)
    assert prog.pos == 0
    assert prog.freq == 17
    for _ in prog:
        pass

    import ubelt as ub
    with ub.CaptureStdout() as cap:
        ProgIter.write('foo')
    assert cap.text.strip() == 'foo'

    with ub.CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description('new desc', refresh=False)
        prog.begin()
        prog.refresh()
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    with ub.CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description('new desc', refresh=True)
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    with ub.CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description_str('new desc')
        prog.begin()
        prog.refresh()
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    import ubelt as ub
    with ub.CaptureStdout() as cap:
        prog = ub.ProgIter(show_times=False)
        prog.set_postfix({'foo': 'bar'}, baz='biz', x=object(), y=2)
        prog.begin()
    assert prog.length is None
    assert 'foo=bar' in cap.text.strip()
    assert 'baz=biz' in cap.text.strip()
    assert 'y=2' in cap.text.strip()
    assert 'x=<object' in cap.text.strip()

    import ubelt as ub
    with ub.CaptureStdout() as cap:
        prog = ub.ProgIter(show_times=False)
        prog.set_postfix_str('bar baz', refresh=False)
    assert 'bar baz' not in cap.text.strip()
Ejemplo n.º 6
0
def _demodata_files(dpath=None, num_files=10, pool_size=3, size_pool=None):
    def _random_data(rng, num):
        return ''.join([rng.choice(string.hexdigits) for _ in range(num)])

    def _write_random_file(dpath, part_pool, size_pool, rng):
        namesize = 16
        # Choose 1, 4, or 16 parts of data
        num_parts = rng.choice(size_pool)
        chunks = [rng.choice(part_pool) for _ in range(num_parts)]
        contents = ''.join(chunks)
        fname_noext = _random_data(rng, namesize)
        ext = ub.hash_data(contents)[0:4]
        fname = '{}.{}'.format(fname_noext, ext)
        fpath = join(dpath, fname)
        with open(fpath, 'w') as file:
            file.write(contents)
        return fpath

    if size_pool is None:
        size_pool = [1, 4, 16]

    dpath = ub.ensure_app_cache_dir('pfile/random')
    rng = random.Random(0)
    # Create a pool of random chunks of data
    chunksize = 65536
    part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)]
    # Write 100 random files that have a reasonable collision probability
    fpaths = [
        _write_random_file(dpath, part_pool, size_pool, rng)
        for _ in ub.ProgIter(range(num_files), desc='write files')
    ]

    for fpath in fpaths:
        assert exists(fpath)
    return fpaths
Ejemplo n.º 7
0
    def rebase_groundtruth(task, fullres, force=False):
        """
        Inplace / lazy modification of groundtruth labels

        hacky.
        """

        # Remap the original three labels to [0, 1, 2]
        orig_labels = [2, 6, 65]
        mapping = np.full(max(orig_labels) + 1, fill_value=-1)
        mapping[orig_labels] = np.arange(len(orig_labels))

        datadir = ub.ensuredir((task.workdir, 'data'))
        dpath = ub.ensuredir((datadir, 'gt', 'full'))

        new_gt_paths = []
        for ix in ub.ProgIter(range(len(fullres.paths['gt'])), label='rebase'):
            path = fullres.paths['gt'][ix]
            name = fullres.dump_im_names[ix]
            out_dpath = join(dpath, name)
            # Hacky cache
            if force or not exists(out_dpath):
                in_data = imutil.imread(path)
                out_data = mapping[in_data]
                imutil.imwrite(out_dpath, out_data)
            new_gt_paths.append(out_dpath)
        fullres.paths['gt'] = new_gt_paths
        return fullres
Ejemplo n.º 8
0
    def find_connecting_edges(infr):
        """
        Searches for a small set of edges, which if reviewed as positive would
        ensure that each PCC is k-connected.  Note that in somes cases this is
        not possible
        """
        label = 'name_label'
        node_to_label = infr.get_node_attrs(label)
        label_to_nodes = ub.group_items(node_to_label.keys(),
                                        node_to_label.values())

        # k = infr.params['redun.pos']
        k = 1
        new_edges = []
        prog = ub.ProgIter(list(label_to_nodes.keys()),
                           desc='finding connecting edges',
                           enabled=infr.verbose > 0)
        for nid in prog:
            nodes = set(label_to_nodes[nid])
            G = infr.pos_graph.subgraph(nodes, dynamic=False)
            impossible = nxu.edges_inside(infr.neg_graph, nodes)
            impossible |= nxu.edges_inside(infr.incomp_graph, nodes)

            candidates = set(nx.complement(G).edges())
            candidates.difference_update(impossible)

            aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates)
            new_edges += aug_edges
        prog.ensure_newline()
        return new_edges
Ejemplo n.º 9
0
    def stage_files(self):
        import xdev
        self.staging_infos = []
        for info in ub.ProgIter(self.template_infos, desc='staging'):
            tags = info.get('tags', None)
            if tags:
                tags = set(tags.split(','))
                if not set(self.config['tags']).issuperset(tags):
                    continue
            stage_fpath = self.staging_dpath / info['fname']
            if info.get('dynamic', ''):
                text = getattr(self, info.get('dynamic', ''))()
                stage_fpath.write_text(text)
            else:
                raw_fpath = self.template_dpath / info['fname']
                stage_fpath.parent.ensuredir()
                shutil.copy2(raw_fpath, stage_fpath)
                if info['template']:
                    xdev.sedfile(stage_fpath,
                                 'PYPKG',
                                 self.repo_name,
                                 verbose=0)

            info['stage_fpath'] = stage_fpath
            info['repo_fpath'] = self.repo_dpath / info['fname']
            self.staging_infos.append(info)

        if 1:
            import pandas as pd
            df = pd.DataFrame(self.staging_infos)
            print(df)
Ejemplo n.º 10
0
def run_benchmark():
    import ubelt as ub

    data_dim = 128
    num_dpts = 1000000
    num_qpts = 25000
    num_neighbs = 5
    random_seed = 42
    rng = np.random.RandomState(0)

    dataset = rand_vecs(num_dpts, data_dim, rng)
    testset = rand_vecs(num_qpts, data_dim, rng)
    # Build determenistic flann object
    flann = pyflann.FLANN()
    print('building datset for %d vecs' % (len(dataset)))

    with ub.Timer(label='building kdtrees', verbose=True) as t:
        params = flann.build_index(
            dataset,
            algorithm='kdtree',
            trees=6,
            random_seed=random_seed,
            cores=6,
        )

    print(params)

    qvec_chunks = list(ub.chunks(testset, 1000))
    times = []
    for qvecs in ub.ProgIter(qvec_chunks, label='find nn'):
        with ub.Timer(verbose=0) as t:
            _ = flann.nn_index(testset, num_neighbs)  # NOQA
        times.append(t.ellapsed)
    print(np.mean(times))
Ejemplo n.º 11
0
def parse_mscoco():
    # Test that our implementation can handle the real mscoco data
    root = ub.expandpath('~/data/standard_datasets/mscoco/')

    fpath = join(root, 'annotations/instances_val2014.json')
    img_root = normpath(ub.ensuredir((root, 'images', 'val2014')))

    # fpath = join(root, 'annotations/stuff_val2017.json')
    # img_root = normpath(ub.ensuredir((root, 'images', 'val2017')))

    import ujson
    dataset = ujson.load(open(fpath, 'rb'))

    import ndsampler
    dset = ndsampler.CocoDataset(dataset)
    dset.img_root = img_root

    gid_iter = iter(dset.imgs.keys())

    gid = ub.peek(gid_iter)

    for gid in ub.ProgIter(gid_iter):
        img = dset.imgs[gid]
        ub.grabdata(img['coco_url'], dpath=img_root, verbose=0)
        anns = [dset.anns[aid] for aid in dset.gid_to_aids[gid]]
        dset.show_image(gid=gid)

    ann = anns[0]

    segmentation = ann['segmentation']

    from PIL import Image
    gpath = join(dset.img_root, img['file_name'])
    with Image.open(gpath) as pil_img:
        np_img = np.array(pil_img)
    def dump_selection(dset, gid_list):
        from matplotlib import pyplot as plt
        for gid in ub.ProgIter(gid_list, verbose=3):
            fig = plt.figure(6)
            fig.clf()
            dset.show_annotation(gid=gid)
            name = os.path.basename(
                os.path.dirname(dset.imgs[gid]['file_name']))
            ax = plt.gca()
            plt.gca().set_title(name)
            ax.set_xticks([])
            ax.set_yticks([])
            plt.gca().grid('off')
            fig.canvas.draw()
            dpi = 96
            fig.set_dpi(dpi)
            fig.set_size_inches(1920 / dpi, 1080 / dpi)
            img = nh.util.mplutil.render_figure_to_image(fig, dpi=dpi)
            # print('img = {!r}'.format(img.shape))

            if dset.tag:
                out_fname = dset.tag + '_' + '_'.join(
                    dset.imgs[gid]['file_name'].split('/')[-2:])
            else:
                out_fname = '_'.join(
                    dset.imgs[gid]['file_name'].split('/')[-2:])
            fpath = join(output_dpath, out_fname)
            print('fpath = {!r}'.format(fpath))
            nh.util.imwrite(fpath, img)
Ejemplo n.º 13
0
    def _configure(self):
        logger.debug(' ----- ' + self.__class__.__name__ + ' configure')
        config = tmp_smart_cast_config(self)

        logger.info('triangulator config = {}'.format(ub.repr2(config, nl=2)))
        self.measurement_file = config.pop('measurement_file')
        self.calibration_file = config.pop('calibration_file')
        self.triangulator = ctalgo.StereoLengthMeasurments(**config)

        # Load camera calibration data here.
        if not os.path.exists(self.calibration_file):
            raise KeyError('must specify a valid camera calibration path')

        self.cal = ctalgo.StereoCalibration.from_file(self.calibration_file)
        logger.info('self.cal = {!r}'.format(self.cal))

        self.headers = [
            'current_frame', 'fishlen', 'range', 'error', 'dz', 'box_pts1',
            'box_pts2'
        ]

        if self.measurement_file:
            self.output_file = open(self.measurement_file, 'w')
            self.output_file.write(','.join(self.headers) + '\n')
            self.output_file.close()

            self.output_file = open(self.measurement_file, 'a')

        self._base_configure()

        self.prog = ub.ProgIter(verbose=3)
        self.prog.begin()

        self.frame_id = 0
Ejemplo n.º 14
0
    def predict(harn, have_true=True):
        # Import the right version of caffe
        print(ub.color_text('[segnet] begin prediction', 'blue'))
        harn.prepare_test_model(force=False)
        harn.test.make_dumpsafe_names()
        net = harn.make_net()
        assert harn.test_batch_size == 1
        # have_true = bool(harn.test.gt_paths)

        if not have_true:

            def load_batch_data(bx):
                """
                bx = 0
                """
                offset = bx * harn.test_batch_size
                blob_data = net.blobs['data'].data
                for jx in range(harn.test_batch_size):
                    # push data into the network
                    ix = offset + jx
                    im_hwc = util.imread(harn.test.im_paths[ix])
                    im_hwc = im_hwc[:, :, ::-1]
                    im_chw = np.transpose(im_hwc, (2, 0, 1)).astype(np.float32)
                    blob_data[jx, :, :, :] = im_chw

        n_iter = int(harn.test.n_input / harn.test_batch_size)
        for bx in ub.ProgIter(range(n_iter), label='forward batch', freq=1):
            if not have_true:
                load_batch_data(bx)
            net.forward()
            blobs = net.blobs
            harn.dump_predictions(blobs, bx, have_true=have_true)
Ejemplo n.º 15
0
    def find_pos_redun_candidate_edges(infr, k=None, verbose=False):
        """
        Searches for augmenting edges that would make PCCs k-positive redundant

        CommandLine:
            python -m graphid.core.mixin_dynamic _RedundancyAugmentation.find_pos_redun_candidate_edges

        Doctest:
            >>> from graphid import demo
            >>> infr = demo.demodata_infr(ccs=[(1, 2, 3, 4, 5), (7, 8, 9, 10)], pos_redun=1)
            >>> infr.add_feedback((2, 5), POSTV)
            >>> infr.add_feedback((1, 5), INCMP)
            >>> infr.params['redun.pos'] = 2
            >>> candidate_edges = list(infr.find_pos_redun_candidate_edges())
            >>> result = ('candidate_edges = ' + ub.repr2(candidate_edges, nl=0))
            >>> print(result)
            candidate_edges = [(1, 4), (3, 5), (7, 10)]
        """
        # Add random edges between exisiting non-redundant PCCs
        if k is None:
            k = infr.params['redun.pos']
        # infr.find_non_pos_redundant_pccs(k=k, relax=True)
        pcc_gen = list(infr.positive_components())
        prog = ub.ProgIter(pcc_gen, enabled=verbose, freq=1, adjust=False)
        for pcc in prog:
            if not infr.is_pos_redundant(pcc, k=k, relax=True,
                                         assume_connected=True):
                for edge in infr.find_pos_augment_edges(pcc, k=k):
                    yield nxu.e_(*edge)
Ejemplo n.º 16
0
    def _configure(self):
        logger.debug(' ----- ' + self.__class__.__name__ + ' configure')
        config = tmp_smart_cast_config(self)

        logger.info('triangulator config = {}'.format(ub.repr2(config, nl=2)))
        output_fpath = config.pop('output_fpath')
        cal_fpath = config.pop('cal_fpath')
        self.triangulator = ctalgo.FishStereoMeasurments(**config)

        # Camera loading process is not working correctly.
        # Load camera calibration data here for now.
        #
        if not os.path.exists(cal_fpath):
            raise KeyError('must specify a valid camera calibration path')
        self.cal = ctalgo.StereoCalibration.from_file(cal_fpath)
        logger.info('self.cal = {!r}'.format(self.cal))

        self.headers = [
            'current_frame', 'fishlen', 'range', 'error', 'dz', 'box_pts1',
            'box_pts2'
        ]
        self.output_file = open(output_fpath, 'w')
        self.output_file.write(','.join(self.headers) + '\n')
        self.output_file.close()

        self.output_file = open(output_fpath, 'a')
        self._base_configure()

        self.prog = ub.ProgIter(verbose=3)
        self.prog.begin()
Ejemplo n.º 17
0
def batch_convert_to_cog(src_fpaths,
                         dst_fpaths,
                         mode='process',
                         max_workers=0,
                         cog_config=None):
    """
    Converts many input images to COGs and verifies that the outputs are
    correct

    Args:
        src_fpaths (List[str]): source image filepaths
        dst_fpaths (List[str]): corresponding destination image filepaths
        mode (str, default='process'): either process, thread, or serial
        max_workers (int, default=0): number of processes / threads to use
        cog_config (dict):
            config options for COG files
            (e.g. compress, blocksize, overviews, etc).
    """
    if cog_config is None:
        cog_config = {
            'compress': 'LZW',
            'blocksize': 256,
        }
    from ndsampler.utils import util_futures
    jobs = util_futures.JobPool(mode, max_workers=max_workers)
    for src_fpath, dst_fpath in zip(src_fpaths, dst_fpaths):
        jobs.submit(_convert_to_cog_worker,
                    src_fpath,
                    dst_fpath,
                    cog_config=cog_config)
    for job in ub.ProgIter(jobs.as_completed(),
                           total=len(jobs),
                           desc='converting to cog'):
        job.result()
Ejemplo n.º 18
0
def make_pycocotools_compliant(fpath):
    import kwcoco
    import kwimage
    import ubelt as ub

    print('Reading fpath = {!r}'.format(fpath))
    dset = kwcoco.CocoDataset(fpath)

    dset._ensure_imgsize(workers=8)

    for ann in ub.ProgIter(dset.dataset['annotations'], desc='update anns'):
        if 'iscrowd' not in ann:
            ann['iscrowd'] = False

        if 'ignore' not in ann:
            ann['ignore'] = ann.get('weight', 1.0) < .5

        if 'area' not in ann:
            # Use segmentation if available
            if 'segmentation' in ann:
                poly = kwimage.Polygon.from_coco(ann['segmentation'][0])
                ann['area'] = float(poly.to_shapely().area)
            else:
                x, y, w, h = ann['bbox']
                ann['area'] = w * h

    dset.dump(dset.fpath, newlines=True)
def _query_sentinel2_with_csv(collection_file,
                              cc_limit,
                              date_start,
                              date_end,
                              tile,
                              latest=False):
    cc_values = []
    all_urls = []
    all_acqdates = []
    with open(collection_file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in ubelt.ProgIter(reader, desc='searching S2'):
            year_acq = int(row['SENSING_TIME'][0:4])
            month_acq = int(row['SENSING_TIME'][5:7])
            day_acq = int(row['SENSING_TIME'][8:10])
            acqdate = datetime.datetime(year_acq, month_acq, day_acq)
            if row['MGRS_TILE'] == tile and float(row['CLOUD_COVER']) <= cc_limit \
                    and date_start < acqdate < date_end:
                all_urls.append(row['BASE_URL'])
                cc_values.append(float(row['CLOUD_COVER']))
                all_acqdates.append(acqdate)

    if latest and all_urls:
        return [sort_url_list(cc_values, all_acqdates, all_urls).pop()]
    return sort_url_list(cc_values, all_acqdates, all_urls)
Ejemplo n.º 20
0
    def main(cls, cmdline=True, **kw):
        """
        Example:
            >>> from kwcoco.cli.coco_validate import *  # NOQA
            >>> kw = {'src': 'special:shapes8'}
            >>> cmdline = False
            >>> cls = CocoValidateCLI
            >>> cls.main(cmdline, **kw)
        """
        import kwcoco
        config = cls.CLIConfig(kw, cmdline=cmdline)
        print('config = {}'.format(ub.repr2(dict(config), nl=1)))

        if config['src'] is None:
            raise Exception('must specify source: {}'.format(config['src']))

        if isinstance(config['src'], str):
            fpaths = [config['src']]
        else:
            fpaths = config['src']

        if config['dst']:
            if len(fpaths) != 1:
                raise Exception('can only specify 1 dataset in fix mode')

        fix_strat = set()
        if config['fix'] is not None:
            fix_strat = {c.lower() for c in config['fix'].split('+')}

        for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1):
            print('reading fpath = {!r}'.format(fpath))
            dset = kwcoco.CocoDataset.coerce(fpath)

            config_ = ub.dict_diff(config, {'src', 'dst', 'fix'})
            result = dset.validate(**config_)

            if 'missing' in result:
                if 'remove' in fix_strat:
                    missing = result['missing']
                    bad_gids = [t[2] for t in missing]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if 'corrupted' in result:
                if 'remove' in fix_strat:
                    corrupted = result['corrupted']
                    bad_gids = [t[2] for t in corrupted]
                    status = dset.remove_images(bad_gids, verbose=1)
                    print('status = {}'.format(ub.repr2(status, nl=1)))

            if config['dst']:
                if len(fpaths) != 1:
                    raise Exception('can only specify 1 dataset in fix mode')
                dset.dump(config['dst'], newlines=True)

            errors = result['errors']
            if errors:
                print('result = {}'.format(ub.repr2(result, nl=-1)))
                raise Exception('\n'.join(errors))
Ejemplo n.º 21
0
    def run_epoch(harn, loader, tag, learn=False):
        # Use exponentially weighted or windowed moving averages across epochs
        run_metrics = harn._run_metrics[tag]
        # Use simple moving average within an epoch
        batch_metrics = metrics.CumMovingAve()

        # train batch
        if not harn.dry:
            # Flag if model is training (influences batch-norm / dropout)
            if harn.model.training != learn or learn:
                harn.model.train(learn)

        display_interval = harn.intervals['display_' + tag]

        prog = ub.ProgIter(label=tag,
                           length=len(loader),
                           verbose=1,
                           clearline=True)
        with prog:
            for bx, input_batch in enumerate(loader):
                iter_idx = (harn.epoch * len(loader) + bx)

                input_batch = harn.xpu.to_xpu_var(*input_batch)

                # Core learning / backprop
                *inputs, label = input_batch
                output, loss = harn.run_batch(inputs, label, learn=learn)

                # Measure train accuracy and other informative metrics
                cur_metrics = harn._call_metric_hooks(output, label, loss)

                if 1:
                    harn._tensorboard_extra(inputs, output, label, tag,
                                            iter_idx, loader)

                # Accumulate measures
                batch_metrics.update(cur_metrics)
                run_metrics.update(cur_metrics)

                # display_train training info
                if (bx + 1) % display_interval == 0:
                    ave_metrics = run_metrics.average()

                    msg = harn.batch_msg({'loss': ave_metrics['loss']},
                                         loader.batch_size)
                    prog.set_extra(msg)

                    for key, value in ave_metrics.items():
                        # harn.log_value(tag + ' ' + key, value, iter_idx)
                        # TODO: use this one:
                        harn.log_value(tag + ' iter ' + key, value, iter_idx)

                    prog.step(harn.intervals['display_' + tag])

        # Record a true average for the entire batch
        final_metrics = batch_metrics.average()
        for key, value in final_metrics.items():
            harn.log_value(tag + ' epoch ' + key, value, harn.epoch)
Ejemplo n.º 22
0
 def missing_images(dset):
     import os
     bad_paths = []
     for index in ub.ProgIter(range(len(dset.dataset['images']))):
         img = dset.dataset['images'][index]
         gpath = join(dset.img_root, img['file_name'])
         if not os.path.exists(gpath):
             bad_paths.append((index, gpath))
     return bad_paths
Ejemplo n.º 23
0
def _nh_data_nh_map(harn, num=10):
    with torch.no_grad():
        postprocess = harn.model.module.postprocess
        # postprocess.conf_thresh = 0.001
        # postprocess.nms_thresh = 0.5
        batch_confusions = []
        moving_ave = nh.util.util_averages.CumMovingAve()
        loader = harn.loaders['test']
        prog = ub.ProgIter(iter(loader), desc='')
        for bx, batch in enumerate(prog):
            inputs, labels = harn.prepare_batch(batch)
            inp_size = np.array(inputs.shape[-2:][::-1])
            outputs = harn.model(inputs)

            loss = harn.criterion(outputs, labels['targets'],
                                  gt_weights=labels['gt_weights'],
                                  seen=1000000000)
            moving_ave.update(ub.odict([
                ('loss', float(loss.sum())),
                ('coord', harn.criterion.loss_coord),
                ('conf', harn.criterion.loss_conf),
                ('cls', harn.criterion.loss_cls),
            ]))

            average_losses = moving_ave.average()
            desc = ub.repr2(average_losses, nl=0, precision=2, si=True)
            prog.set_description(desc, refresh=False)

            postout = postprocess(outputs)
            for y in harn._measure_confusion(postout, labels, inp_size):
                batch_confusions.append(y)

            # batch_output.append((outputs.cpu().data.numpy().copy(), inp_size))
            # batch_labels.append([x.cpu().data.numpy().copy() for x in labels])
            if num is not None and bx >= num:
                break

        average_losses = moving_ave.average()
        print('average_losses {}'.format(ub.repr2(average_losses)))

    if False:
        from netharn.util import mplutil
        mplutil.qtensure()  # xdoc: +SKIP
        harn.visualize_prediction(batch, outputs, postout, thresh=.1)

    y = pd.concat([pd.DataFrame(c) for c in batch_confusions])
    precision, recall, ap = nh.metrics.detections._multiclass_ap(y)

    ln_test = ub.import_module_from_path(ub.truepath('~/code/lightnet/examples/yolo-voc/test.py'))
    num_classes = len(ln_test.LABELS)
    cls_labels = list(range(num_classes))

    aps = nh.metrics.ave_precisions(y, cls_labels, use_07_metric=True)
    aps = aps.rename(dict(zip(cls_labels, ln_test.LABELS)), axis=0)
    # return ap
    return ap, aps
Ejemplo n.º 24
0
    def draw_data_overlay(task, sl=None):
        """
            >>> from clab.tasks import *
            >>> import clab
            >>> task = DivaV1(clean=2)
            >>> arch = 'segnet_proper'

            >>> # Use external dataset to increase the amount of training data
            >>> tutorial_dir = './SegNet-Tutorial'
            >>> task.extend_data_from(clab.tasks.CamVid(tutorial_dir))
            >>> task.draw_data_overlay()
        """
        keys = task._preprocessing_keys()
        scenes = task.scene_ids[:]
        keys = keys + ['extern']
        for key in ub.ProgIter(keys, label='overlay', verbose=3):
            scene_overlay_dir = task.datasubdir('overlay', key)
            if key == 'extern':
                # HACK
                im_paths = task.extern_train_im_paths
                gt_paths = task.extern_train_gt_paths
            else:
                im_paths, gt_paths = task._scene_data_subset(scenes, [key])
            gt_paths = fnameutil.align_paths(im_paths, gt_paths)

            overlay_fnames = list(fnameutil.dumpsafe(im_paths))

            if sl is not None:
                im_paths = im_paths[sl]
                gt_paths = gt_paths[sl]
                overlay_fnames = overlay_fnames[sl]

            prog = ub.ProgIter(zip(im_paths, gt_paths, overlay_fnames),
                               length=len(im_paths),
                               label='overlay key={}'.format(key))
            for impath, gtpath, safename in prog:
                # Make a nice visualization
                fpath = join(scene_overlay_dir, safename)
                gt_img = cv2.imread(gtpath, cv2.IMREAD_UNCHANGED)
                im_img = cv2.imread(impath, cv2.IMREAD_UNCHANGED)
                gt_color = task.colorize(gt_img)
                gt_overlay = imutil.overlay_colorized(gt_color, im_img)
                cv2.imwrite(fpath, gt_overlay)
Ejemplo n.º 25
0
 def _ensure_imgsize(self):
     from PIL import Image
     for img in ub.ProgIter(list(self.imgs.values())):
         gpath = join(self.img_root, img['file_name'])
         if 'width' not in img:
             pil_img = Image.open(gpath)
             w, h = pil_img.size
             pil_img.close()
             img['width'] = w
             img['height'] = h
Ejemplo n.º 26
0
 def _coco_to_dets(coco_dset, desc=''):
     for gid in ub.ProgIter(gids, desc=desc, verbose=verbose):
         img = coco_dset.imgs[gid]
         gid = img['id']
         imgname = img['file_name']
         aids = coco_dset.gid_to_aids[gid]
         annots = [coco_dset.anns[aid] for aid in aids]
         dets = kwimage.Detections.from_coco_annots(annots,
                                                    dset=coco_dset,
                                                    classes=classes)
         yield dets, imgname, gid
Ejemplo n.º 27
0
    def _nh_loop(harn):
        # Reset
        harn.current_tag = tag = 'test'

        dmet = harn.dmets[tag]
        dmet.pred.remove_all_annotations()
        dmet.true.remove_all_annotations()
        dmet.true._build_index()
        dmet.pred._build_index()

        moving_ave = nh.util.util_averages.CumMovingAve()
        loader = harn.loaders[tag]
        loader.num_workers = 4
        prog = ub.ProgIter(iter(loader), desc='')
        with torch.no_grad():
            for bx, batch in enumerate(prog):
                inputs, labels = harn.prepare_batch(batch)
                outputs = harn.model(inputs)
                loss = harn.criterion(outputs, labels['targets'],
                                      gt_weights=labels['gt_weights'],
                                      seen=1000000000)
                moving_ave.update(ub.odict([
                    ('loss', float(loss.sum())),
                    ('coord', harn.criterion.loss_coord),
                    ('conf', harn.criterion.loss_conf),
                    ('cls', harn.criterion.loss_cls),
                ]))
                average_losses = moving_ave.average()
                desc = ub.repr2(average_losses, nl=0, precision=2, si=True)
                prog.set_description(desc, refresh=False)

                postout = harn.model.module.postprocess(outputs, nms_mode=2)

                inputs, labels = batch
                inp_size = np.array(inputs.shape[-2:][::-1])
                pred_anns = list(harn._postout_to_pred_ann(
                    inp_size, labels, postout,
                    _aidbase=len(dmet.pred.dataset['annotations']) + 1
                ))
                dmet.pred.add_annotations(pred_anns)

                true_anns = list(harn._labels_to_true_ann(
                    inp_size, labels,
                    _aidbase=len(dmet.true.dataset['annotations']) + 1
                ))
                dmet.true.add_annotations(true_anns)

            average_losses = moving_ave.average()
            print('average_losses {}'.format(ub.repr2(average_losses)))
        print('netharn voc_mAP = {}'.format(dmet.score_voc()['mAP']))
        print('netharn nh_mAP = {}'.format(dmet.score_netharn()['mAP']))
        # Reset
        dmet.pred.remove_all_annotations()
        dmet.true.remove_all_annotations()
Ejemplo n.º 28
0
def count_ubelt_usage():
    import ubelt as ub
    import glob
    from os.path import join
    names = [
        'xdoctest',
        'netharn',
        'xdev',
        'xinspect',
        'ndsampler',
        'kwil',
        'kwarray',
        'kwimage',
        'kwplot',
        'scriptconfig',
    ]

    all_fpaths = []
    for name in names:
        repo_fpath = ub.expandpath(join('~/code', name))
        fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True)
        for fpath in fpaths:
            all_fpaths.append((name, fpath))

    import re
    pat = re.compile(r'\bub\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b')

    import ubelt as ub

    pkg_to_hist = ub.ddict(lambda: ub.ddict(int))
    for name, fpath in ub.ProgIter(all_fpaths):
        text = open(fpath, 'r').read()
        for match in pat.finditer(text):
            attr = match.groupdict()['attr']
            if attr in ub.__all__:
                pkg_to_hist[name][attr] += 1

    hist_iter = iter(pkg_to_hist.values())
    usage = next(hist_iter).copy()
    for other in hist_iter:
        for k, v in other.items():
            usage[k] += v
    for attr in ub.__all__:
        usage[attr] += 0

    for name in pkg_to_hist.keys():
        pkg_to_hist[name] = ub.odict(
            sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1])

    usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1])

    print(ub.repr2(pkg_to_hist, nl=2))
    print(ub.repr2(usage, nl=1))
Ejemplo n.º 29
0
    def as_completed(self, timeout=None, desc=None, progkw=None):
        """
        Generates completed jobs in an arbitrary order

        Args:
            timeout (float | None):
                Specify the the maximum number of seconds to wait for a job.

            desc (str | None):
                if specified, reports progress with a
                :class:`ubelt.progiter.ProgIter` object.

            progkw (dict | None):
                extra keyword arguments to :class:`ubelt.progiter.ProgIter`.

        Yields:
            concurrent.futures.Future:
                The completed future object containing the results of a job.

        CommandLine:
            xdoctest -m ubelt.util_futures JobPool.as_completed

        Example:
            >>> import ubelt as ub
            >>> pool = ub.JobPool('thread', max_workers=8)
            >>> text = ub.paragraph(
            ...     '''
            ...     UDP is a cool protocol, check out the wiki:
            ...
            ...     UDP-based Data Transfer Protocol (UDT), is a high-performance
            ...     data transfer protocol designed for transferring large
            ...     volumetric datasets over high-speed wide area networks. Such
            ...     settings are typically disadvantageous for the more common TCP
            ...     protocol.
            ...     ''')
            >>> for word in text.split(' '):
            ...     pool.submit(print, word)
            >>> for _ in pool.as_completed():
            ...     pass
            >>> pool.shutdown()
        """
        import ubelt as ub
        job_iter = as_completed(self.jobs)
        if desc is not None:
            if progkw is None:
                progkw = {}
            job_iter = ub.ProgIter(job_iter,
                                   desc=desc,
                                   total=len(self.jobs),
                                   **progkw)
        for job in job_iter:
            yield job
Ejemplo n.º 30
0
    def make_augment_scene(task, mode, scene, rng=None):
        """
        Augments data in a scene of a specific "mode"

        mode = 'part-scale1'
        scene = '0000'
        rng = 'determ'

        gtdir = task.datasubdir('gtpart', scene))
        imdir = task.datasubdir('impart', scene))
        """
        assert task.enable_augment

        if rng == 'determ':
            # Make a determenistic seed based on the scene and mode
            seed = int(hashutil.hash_data([scene, mode], alphabet='hex'), 16)
            seed = seed % (2**32 - 1)
            rng = np.random.RandomState(seed)

        auger = augment.SSegAugmentor(rng=rng, ignore_label=task.ignore_label)
        auger.params = task.aug_params

        # rng = np.random.RandomState(0)
        imdir = task.datasubdir('im' + mode, scene)
        gtdir = task.datasubdir('gt' + mode, scene)
        im_fpaths = sorted(glob.glob(join(imdir, '*.png')))
        gt_fpaths = sorted(glob.glob(join(gtdir, '*.png')))

        # Define the output path for the augmentation of this mode
        key = mode + '-aug'
        scene_imout_dpath = task.datasubdir('im' + key, scene)
        scene_gtout_dpath = task.datasubdir('gt' + key, scene)

        # Start fresh. Remove existing files
        ub.delete(scene_gtout_dpath, verbose=False)
        ub.delete(scene_imout_dpath, verbose=False)
        ub.ensuredir(scene_gtout_dpath)
        ub.ensuredir(scene_imout_dpath)

        for impath, gtpath in ub.ProgIter(
                list(zip(im_fpaths, gt_fpaths)),
                label='   * augment mode={}'.format(mode)):
            fname_we = splitext(basename(impath))[0]
            im = cv2.imread(impath, flags=cv2.IMREAD_UNCHANGED)
            gt = cv2.imread(gtpath, flags=cv2.IMREAD_UNCHANGED)
            aug_gen = auger.augment(im, gt)
            for augx, aug_data in enumerate(aug_gen):
                (im_aug, gt_aug) = aug_data[0:2]
                fname = '{}_aug{:0=4d}.png'.format(fname_we, augx)
                cv2.imwrite(join(scene_imout_dpath, fname), im_aug)
                cv2.imwrite(join(scene_gtout_dpath, fname), gt_aug)
        return scene_imout_dpath, scene_gtout_dpath