Esempio n. 1
0
    def update_registry(drive):
        print('Updating registered files in %r' % (drive,))
        # Update existing files
        fpath_exists_list = list(map(exists, ut.ProgIter(drive.fpath_list, 'checkexist fpath', freq=1000)))
        dpath_exists_list = list(map(exists, ut.ProgIter(drive.dpath_list, 'checkexist dpath', freq=1000)))
        if all(fpath_exists_list):
            print('No change in file structure')
        else:
            print('%d/%d files no longer exist' % (
                len(drive.fpath_list) - sum(fpath_exists_list),
                len(drive.fpath_list)))
            removed_fpaths = ut.compress(drive.fpath_list, ut.not_list(fpath_exists_list))
            print('removed_fpaths = %s' % (ut.list_str(removed_fpaths),))
        if all(dpath_exists_list):
            print('No change in dpath structure')
        else:
            print('%d/%d dirs no longer exist' % (
                len(drive.dpath_list) - sum(dpath_exists_list),
                len(drive.dpath_list)))
            removed_dpaths = ut.compress(
                drive.dpath_list,
                ut.not_list(dpath_exists_list))
            print('removed_dpaths = %s' % (ut.list_str(removed_dpaths),))

        drive.fpath_list = ut.compress(drive.fpath_list, fpath_exists_list)
        drive.dpath_list = ut.compress(drive.dpath_list, dpath_exists_list)
        drive.cache.save('fpath_list', drive.fpath_list)
        drive.cache.save('dpath_list', drive.dpath_list)
Esempio n. 2
0
    def update_registry(drive):
        print('Updating registered files in %r' % (drive, ))
        # Update existing files
        fpath_exists_list = list(
            map(exists,
                ut.ProgIter(drive.fpath_list, 'checkexist fpath', freq=1000)))
        dpath_exists_list = list(
            map(exists,
                ut.ProgIter(drive.dpath_list, 'checkexist dpath', freq=1000)))
        if all(fpath_exists_list):
            print('No change in file structure')
        else:
            print('%d/%d files no longer exist' %
                  (len(drive.fpath_list) - sum(fpath_exists_list),
                   len(drive.fpath_list)))
            removed_fpaths = ut.compress(drive.fpath_list,
                                         ut.not_list(fpath_exists_list))
            print('removed_fpaths = %s' % (ut.repr2(removed_fpaths), ))
        if all(dpath_exists_list):
            print('No change in dpath structure')
        else:
            print('%d/%d dirs no longer exist' %
                  (len(drive.dpath_list) - sum(dpath_exists_list),
                   len(drive.dpath_list)))
            removed_dpaths = ut.compress(drive.dpath_list,
                                         ut.not_list(dpath_exists_list))
            print('removed_dpaths = %s' % (ut.repr2(removed_dpaths), ))

        drive.fpath_list = ut.compress(drive.fpath_list, fpath_exists_list)
        drive.dpath_list = ut.compress(drive.dpath_list, dpath_exists_list)
        drive.cache.save('fpath_list', drive.fpath_list)
        drive.cache.save('dpath_list', drive.dpath_list)
Esempio n. 3
0
 def get_cards_in_hand(player, valid_types=None, invert=False):
     card_list = player.hand
     if valid_types is None:
         valid_cards = card_list
     else:
         flags = [ut.is_superset(c.types, valid_types) for c in card_list]
         if invert:
             flags = ut.not_list(flags)
         valid_cards = ut.compress(card_list, flags)
     return valid_cards
Esempio n. 4
0
    def hack_remove_pystuff(self):
        import utool as ut
        # Hack of a method
        new_lines = []
        for lines in self.found_lines_list:
            # remove comment results
            flags = [not line.strip().startswith('# ') for line in lines]
            lines = ut.compress(lines, flags)

            # remove doctest results
            flags = [not line.strip().startswith('>>> ') for line in lines]
            lines = ut.compress(lines, flags)

            # remove cmdline tests
            import re
            flags = [
                not re.search('--test-' + self.extended_regex_list[0], line)
                for line in lines
            ]
            lines = ut.compress(lines, flags)

            flags = [
                not re.search('--exec-' + self.extended_regex_list[0], line)
                for line in lines
            ]
            lines = ut.compress(lines, flags)

            flags = [
                not re.search(
                    '--exec-[a-zA-z]*\.' + self.extended_regex_list[0], line)
                for line in lines
            ]
            lines = ut.compress(lines, flags)

            flags = [
                not re.search(
                    '--test-[a-zA-z]*\.' + self.extended_regex_list[0], line)
                for line in lines
            ]
            lines = ut.compress(lines, flags)

            # remove func defs
            flags = [
                not re.search('def ' + self.extended_regex_list[0], line)
                for line in lines
            ]
            lines = ut.compress(lines, flags)
            new_lines += [lines]
        self.found_lines_list = new_lines

        # compress self
        flags = [len(lines_) > 0 for lines_ in self.found_lines_list]
        idxs = ut.list_where(ut.not_list(flags))
        del self[idxs]
Esempio n. 5
0
 def download_image_urls(image_url_info_list):
     # Find ones that we already have
     print('Requested %d downloaded images' % (len(image_url_info_list)))
     full_gpath_list = [join(image_dir, basename(gpath)) for gpath in image_url_info_list]
     exists_list = [ut.checkpath(gpath) for gpath in full_gpath_list]
     image_url_info_list_ = ut.compress(image_url_info_list, ut.not_list(exists_list))
     print('Already have %d/%d downloaded images' % (
         len(image_url_info_list) - len(image_url_info_list_), len(image_url_info_list)))
     print('Need to download %d images' % (len(image_url_info_list_)))
     #import sys
     #sys.exit(0)
     # Download the rest
     imgurl_prefix = 'https://snapshotserengeti.s3.msi.umn.edu/'
     image_url_list = [imgurl_prefix + suffix for suffix in image_url_info_list_]
     for img_url in ut.ProgressIter(image_url_list, lbl='Downloading image'):
         ut.grab_file_url(img_url, download_dir=image_dir)
     return full_gpath_list
Esempio n. 6
0
    def hack_remove_pystuff(self):
        import utool as ut
        # Hack of a method
        new_lines = []
        for lines in self.found_lines_list:
            # remove comment results
            flags = [not line.strip().startswith('# ') for line in lines]
            lines = ut.compress(lines, flags)

            # remove doctest results
            flags = [not line.strip().startswith('>>> ') for line in lines]
            lines = ut.compress(lines, flags)

            # remove cmdline tests
            import re
            flags = [not re.search('--test-' + self.extended_regex_list[0], line) for line in lines]
            lines = ut.compress(lines, flags)

            flags = [not re.search('--exec-' + self.extended_regex_list[0], line) for line in lines]
            lines = ut.compress(lines, flags)

            flags = [not re.search('--exec-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines]
            lines = ut.compress(lines, flags)

            flags = [not re.search('--test-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines]
            lines = ut.compress(lines, flags)

            # remove func defs
            flags = [not re.search('def ' + self.extended_regex_list[0], line) for line in lines]
            lines = ut.compress(lines, flags)
            new_lines += [lines]
        self.found_lines_list = new_lines

        # compress self
        flags = [len(lines_) > 0 for lines_ in self.found_lines_list]
        idxs = ut.list_where(ut.not_list(flags))
        del self[idxs]
Esempio n. 7
0
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs):
    r"""
    Args
        hsdir (str): Directory to folder *containing* _hsdb
        dbdir (str): Output directory (defaults to same as  hsdb)

    CommandLine:
        python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs
        python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"

    Ignore:
        from ibeis.dbio.ingest_hsdb import *  # NOQA
        hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"
        dbdir = "~/work/RotanTurtles"

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_hsdb import *  # NOQA
        >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None)
        >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir)
        >>> result = convert_hsdb_to_ibeis(hsdir)
        >>> print(result)
    """
    from ibeis.control import IBEISControl
    import utool as ut

    if dbdir is None:
        dbdir = hsdir
    print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir))

    assert is_hsdb(
        hsdir
    ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % (
        hsdir, )
    assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % (
        hsdir, )
    #print('FORCE DELETE: %r' % (hsdir,))
    #ibsfuncs.delete_ibeis_database(hsdir)
    imgdir = join(hsdir, 'images')

    internal_dir = get_hsinternal(hsdir)
    nametbl_fpath = join(internal_dir, 'name_table.csv')
    imgtbl_fpath = join(internal_dir, 'image_table.csv')
    chiptbl_fpath = join(internal_dir, 'chip_table.csv')

    # READ NAME TABLE
    name_text_list = ['____']
    name_hs_nid_list = [0]
    with open(nametbl_fpath, 'r') as nametbl_file:
        name_reader = csv.reader(nametbl_file)
        for ix, row in enumerate(name_reader):
            #if ix >= 3:
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_nid = int(row[0])
                name = row[1].strip()
                name_text_list.append(name)
                name_hs_nid_list.append(hs_nid)

    # READ IMAGE TABLE
    iamge_hs_gid_list = []
    image_gname_list = []
    image_reviewed_list = []
    with open(imgtbl_fpath, 'r') as imgtb_file:
        image_reader = csv.reader(imgtb_file)
        for ix, row in enumerate(image_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[0])
                gname_ = row[1].strip()
                # aif in hotspotter is equivilant to reviewed in IBEIS
                reviewed = bool(row[2])
                iamge_hs_gid_list.append(hs_gid)
                image_gname_list.append(gname_)
                image_reviewed_list.append(reviewed)

    image_gpath_list = [join(imgdir, gname) for gname in image_gname_list]

    ut.debug_duplicate_items(image_gpath_list)
    #print(image_gpath_list)
    image_exist_flags = list(map(exists, image_gpath_list))
    missing_images = []
    for image_gpath, flag in zip(image_gpath_list, image_exist_flags):
        if not flag:
            missing_images.append(image_gpath)
            print('Image does not exist: %s' % image_gpath)

    if not all(image_exist_flags):
        print('Only %d / %d image exist' %
              (sum(image_exist_flags), len(image_exist_flags)))

    SEARCH_FOR_IMAGES = False
    if SEARCH_FOR_IMAGES:
        # Hack to try and find the missing images
        from os.path import basename
        subfiles = ut.glob(hsdir,
                           '*',
                           recursive=True,
                           fullpath=True,
                           with_files=True)
        basename_to_existing = ut.group_items(subfiles,
                                              ut.lmap(basename, subfiles))

        can_copy_list = []
        for gpath in missing_images:
            gname = basename(gpath)
            if gname not in basename_to_existing:
                print('gname = %r' % (gname, ))
                pass
            else:
                existing = basename_to_existing[gname]
                can_choose = True
                if len(existing) > 1:
                    if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)):
                        can_choose = False
                if can_choose:
                    found = existing[0]
                    can_copy_list.append((found, gpath))
                else:
                    print(existing)

        src, dst = ut.listT(can_copy_list)
        ut.copy_list(src, dst)

    # READ CHIP TABLE
    chip_bbox_list = []
    chip_theta_list = []
    chip_hs_nid_list = []
    chip_hs_gid_list = []
    chip_note_list = []
    with open(chiptbl_fpath, 'r') as chiptbl_file:
        chip_reader = csv.reader(chiptbl_file)
        for ix, row in enumerate(chip_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[1])
                hs_nid = int(row[2])
                bbox_text = row[3]
                theta = float(row[4])
                notes = '<COMMA>'.join([item.strip() for item in row[5:]])

                bbox_text = bbox_text.replace('[', '').replace(']', '').strip()
                bbox_text = re.sub('  *', ' ', bbox_text)
                bbox_strlist = bbox_text.split(' ')
                bbox = tuple(map(int, bbox_strlist))
                #bbox = [int(item) for item in bbox_strlist]
                chip_hs_nid_list.append(hs_nid)
                chip_hs_gid_list.append(hs_gid)
                chip_bbox_list.append(bbox)
                chip_theta_list.append(theta)
                chip_note_list.append(notes)

    names = ut.ColumnLists({
        'hs_nid': name_hs_nid_list,
        'text': name_text_list,
    })

    images = ut.ColumnLists({
        'hs_gid': iamge_hs_gid_list,
        'gpath': image_gpath_list,
        'reviewed': image_reviewed_list,
        'exists': image_exist_flags,
    })

    chips = ut.ColumnLists({
        'hs_gid': chip_hs_gid_list,
        'hs_nid': chip_hs_nid_list,
        'bbox': chip_bbox_list,
        'theta': chip_theta_list,
        'note': chip_note_list,
    })

    IGNORE_MISSING_IMAGES = True
    if IGNORE_MISSING_IMAGES:
        # Ignore missing information
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.not_list(images['exists']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        images = images.remove(missing_gxs)
        chips = chips.remove(missing_cxs)
        valid_nids = set(chips['hs_nid'] + [0])
        isvalid = [nid in valid_nids for nid in names['hs_nid']]
        names = names.compress(isvalid)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    assert all(images['exists']), 'some images dont exist'

    # if gid is None:
    #     print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,))
    #     # continue
    # # Build mappings to new indexes
    # names_nid_to_nid  = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)}
    # names_nid_to_nid[1] = names_nid_to_nid[0]  # hsdb unknknown is 0 or 1
    # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)}

    ibs = IBEISControl.request_IBEISController(dbdir=dbdir,
                                               check_hsdb=False,
                                               **kwargs)
    assert len(ibs.get_valid_gids()) == 0, 'target database is not empty'

    # Add names, images, and annotations
    names['ibs_nid'] = ibs.add_names(names['text'])
    images['ibs_gid'] = ibs.add_images(
        images['gpath'])  # any failed gids will be None

    if True:
        # Remove corrupted images
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        chips = chips.remove(missing_cxs)
        images = images.remove(missing_gxs)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    # Index chips using new ibs rowids
    ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid'])
    ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid'])
    try:
        chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid'])
    except KeyError:
        chips['ibs_gid'] = [
            ibs_gid_lookup.get(index, None) for index in chips['hs_gid']
        ]
    try:
        chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid'])
    except KeyError:
        chips['ibs_nid'] = [
            ibs_nid_lookup.get(index, None) for index in chips['hs_nid']
        ]

    ibs.add_annots(chips['ibs_gid'],
                   bbox_list=chips['bbox'],
                   theta_list=chips['theta'],
                   nid_list=chips['ibs_nid'],
                   notes_list=chips['note'])

    # aid_list = ibs.get_valid_aids()
    # flag_list = [True] * len(aid_list)
    # ibs.set_annot_exemplar_flags(aid_list, flag_list)
    # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly'

    # Write file flagging successful conversion
    with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_:
        file_.write('Successfully converted hsdir=%r' % (hsdir, ))
    print('finished ingest')
    return ibs
Esempio n. 8
0
    def parse_column_tuples(self,
                            col_name_list,
                            col_types_dict,
                            col_getter_dict,
                            col_bgrole_dict,
                            col_ider_dict,
                            col_setter_dict,
                            editable_colnames,
                            sortby,
                            sort_reverse=True,
                            strict=False,
                            **kwargs):
        """
        parses simple lists into information suitable for making guitool headers
        """
        # Unpack the column tuples into names, getters, and types
        if not strict:
            # slopply colname definitions
            flag_list = [colname in col_getter_dict for colname in col_name_list]
            if not all(flag_list):
                invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list))
                print('[api_item_widget] Warning: colnames=%r have no getters' % (invalid_colnames,))
                col_name_list = ut.compress(col_name_list, flag_list)
            # sloppy type inference
            for colname in col_name_list:
                getter_ = col_getter_dict[colname]
                if colname not in col_types_dict:
                    type_ = ut.get_homogenous_list_type(getter_)
                    if type_ is not None:
                        col_types_dict[colname] = type_
        # sloppy kwargs.
        # FIXME: explicitly list col_nice_dict
        col_nice_dict = kwargs.get('col_nice_dict', {})
        self.col_nice_list = [col_nice_dict.get(name, name) for name in col_name_list]

        self.col_name_list = col_name_list
        self.col_type_list = [col_types_dict.get(colname, str) for colname in col_name_list]
        self.col_getter_list = [col_getter_dict.get(colname, str) for colname in col_name_list]  # First col is always a getter
        # Get number of rows / columns
        self.nCols = len(self.col_getter_list)
        self.nRows = 0 if self.nCols == 0 else len(self.col_getter_list[0])  # FIXME
        # Init iders to default and then overwite based on dict inputs
        self.col_ider_list = ut.alloc_nones(self.nCols)
        for colname, ider_colnames in six.iteritems(col_ider_dict):
            try:
                col = self.col_name_list.index(colname)
                # Col iders might have tuple input
                ider_cols = ut.uinput_1to1(self.col_name_list.index, ider_colnames)
                col_ider  = ut.uinput_1to1(lambda c: partial(self.get, c), ider_cols)
                self.col_ider_list[col] = col_ider
                del col_ider
                del ider_cols
                del col
                del colname
            except Exception as ex:
                ut.printex(ex, keys=['colname', 'ider_colnames', 'col', 'col_ider', 'ider_cols'])
                raise
        # Init setters to data, and then overwrite based on dict inputs
        self.col_setter_list = list(self.col_getter_list)
        for colname, col_setter in six.iteritems(col_setter_dict):
            col = self.col_name_list.index(colname)
            self.col_setter_list[col] = col_setter
        # Init bgrole_getters to None, and then overwrite based on dict inputs
        self.col_bgrole_getter_list = [col_bgrole_dict.get(colname, None) for colname in self.col_name_list]
        # Mark edtiable columns
        self.col_edit_list = [name in editable_colnames for name in col_name_list]
        # Mark the sort column index
        if ut.is_str(sortby):
            self.col_sort_index = self.col_name_list.index(sortby)
        else:
            self.col_sort_index = sortby
        self.col_sort_reverse = sort_reverse
Esempio n. 9
0
def compute_and_write_probchip(ibs, aid_list, config2_=None, lazy=True):
    """ Computes probability chips using pyrf

    CommandLine:
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:0 --show
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:1
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:2 --show --cnn

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> config2_ = None
        >>> lazy = True
        >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)[0:4]
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4)
        >>> ut.show_if_requested()

    Example1:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = None
        >>> lazy = False
        >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)

    Example2:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = ibs.new_query_params({'featweight_detector': 'cnn'})
        >>> lazy = True
        >>> aid_list = ibs.get_valid_aids()
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4)
        >>> ut.show_if_requested()

    Dev::
        #ibs.delete_annot_chips(aid_list)
        #probchip_fpath_list = get_annot_probchip_fpath_list(ibs, aid_list)
    """
    # Get probchip dest information (output path)
    # TODO; properly ungroup output
    grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(
        ibs, aid_list, config2_)
    nSpecies = len(unique_species)
    nTasks = len(aid_list)
    print(('[preproc_probchip.compute_and_write_probchip] '
          'Preparing to compute %d probchips of %d species')
          % (nTasks, nSpecies))
    cachedir = ibs.get_probchip_dir()
    ut.ensuredir(cachedir)

    grouped_probchip_fpath_list = []
    if ut.VERBOSE:
        print('[preproc_probchip] +--------------------')
    for aids, species in zip(grouped_aids, unique_species):
        if ut.VERBOSE:
            print('[preproc_probchip] Computing probchips for species=%r' % species)
            print('[preproc_probchip] |--------------------')
        if len(aids) == 0:
            continue
        probchip_fpaths = get_annot_probchip_fpath_list(ibs, aids,
                                                        config2_=config2_,
                                                        species=species)

        if lazy:
            # Filter out probchips that are already on disk
            # pyrf used to do this, now we need to do it
            # caching should be implicit due to using the visual_annot_uuid in
            # the filename
            isdirty_list = ut.not_list(map(exists, probchip_fpaths))
            dirty_aids = ut.compress(aids, isdirty_list)
            dirty_probchip_fpath_list = ut.compress(probchip_fpaths, isdirty_list)
            print(('[preproc_probchip.compute_and_write_probchip]'
                  ' Lazy compute of to compute %d/%d of species=%s') %
                  (len(dirty_aids), len(aids), species))
        else:
            # No filtering
            dirty_aids  = aids
            dirty_probchip_fpath_list = probchip_fpaths

        if len(dirty_aids) > 0:
            write_dirty_aids(ibs, dirty_probchip_fpath_list, dirty_aids, config2_, species)

        grouped_probchip_fpath_list.append(probchip_fpaths)
    if ut.VERBOSE:
        print('[preproc_probchip] Done computing probability images')
        print('[preproc_probchip] L_______________________')

    probchip_fpath_list = vt.invert_apply_grouping2(
        grouped_probchip_fpath_list, groupxs, dtype=object)
    return probchip_fpath_list
Esempio n. 10
0
    def add_rows_from_parent(table, parent_rowids, config=None, verbose=True,
                             return_num_dirty=False):
        """
        Lazy addition
        """
        try:
            # Get requested configuration id
            config_rowid = table.get_config_rowid(config)
            # Find leaf rowids that need to be computed
            initial_rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                                config=config)
            # Get corresponding "dirty" parent rowids
            isdirty_list = ut.flag_None_items(initial_rowid_list)
            dirty_parent_rowids = ut.compress(parent_rowids, isdirty_list)
            num_dirty = len(dirty_parent_rowids)
            num_total = len(parent_rowids)
            if num_dirty > 0:
                if verbose:
                    fmtstr = 'adding %d / %d new props to %r for config_rowid=%r'
                    print(fmtstr % (num_dirty, num_total, table.tablename,
                                    config_rowid))
                args = zip(*dirty_parent_rowids)
                if table._asobject:
                    # Convinience
                    args = [table.depc.get_obj(parent, rowids)
                            for parent, rowids in zip(table.parents, args)]
                # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION
                proptup_gen = table.preproc_func(table.depc, *args, config=config)

                #proptup_gen = list(proptup_gen)

                if len(table._nested_idxs) > 0:
                    # TODO: rewrite
                    nested_nCols = len(table.data_colnames)
                    idxs1 = table._nested_idxs
                    mask1 = ut.index_to_boolmask(idxs1, nested_nCols)
                    mask2 = ut.not_list(mask1)
                    idxs2 = ut.where(mask2)
                    def unnest_data(data):
                        unnested_cols = list(zip(ut.take(data, idxs2)))
                        nested_cols = ut.take(data, idxs1)
                        grouped_items = [nested_cols, unnested_cols]
                        groupxs = [idxs1, idxs2]
                        unflat = ut.ungroup(grouped_items, groupxs, nested_nCols - 1)
                        return tuple(ut.flatten(unflat))
                    # Hack when a sql schema has tuples defined in it
                    proptup_gen = (unnest_data(data) for data in proptup_gen)

                #proptup_gen = list(proptup_gen)

                dirty_params_iter = (
                    parent_rowids + (config_rowid,) + data_cols
                    for parent_rowids, data_cols in zip(dirty_parent_rowids, proptup_gen))
                #dirty_params_iter = list(dirty_params_iter)
                #print('dirty_params_iter = %s' % (ut.repr2(dirty_params_iter, nl=1),))
                CHUNKED_ADD = table.chunksize is not None
                if CHUNKED_ADD:
                    for dirty_params_chunk in ut.ichunks(dirty_params_iter,
                                                         chunksize=table.chunksize):
                        table.db._add(table.tablename, table._table_colnames,
                                      dirty_params_chunk,
                                      nInput=len(dirty_params_chunk))
                else:
                    nInput = num_dirty
                    table.db._add(table.tablename, table._table_colnames,
                                  dirty_params_iter, nInput=nInput)
                # Now that the dirty params are added get the correct order of rowids
                rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                            config=config)
            else:
                rowid_list = initial_rowid_list
            if return_num_dirty:
                return rowid_list, num_dirty
            else:
                return rowid_list
        except Exception as ex:
            ut.printex(ex, 'error in add_rowids', keys=[
                'table', 'parent_rowids', 'config', 'args',
                'dirty_parent_rowids', 'table.preproc_func'])
            raise
Esempio n. 11
0
    def fix_empty_dirs(drive):
        """
        # --- FIND EMPTY DIRECTORIES ---
        """
        print('Fixing Empty Dirs in %r' % (drive,))
        fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list)
        isempty_flags = [len(fidxs) == 0 for fidxs in fidxs_list]
        empty_dpaths = ut.compress(drive.dpath_list, isempty_flags)

        def is_cplat_link(path_):
            try:
                if islink(path_):
                    return True
                os.listdir(d)
                return False
            except SystemErrors:
                return True
        valid_flags = [not is_cplat_link(d) for d  in empty_dpaths]
        if not all(valid_flags):
            print('Filtered windows links %r / %r' % (
                len(empty_dpaths) - sum(valid_flags), len(empty_dpaths)))
            #print(ut.list_str(empty_dpaths[0:10]))
            empty_dpaths = ut.compress(empty_dpaths, valid_flags)

        print('Found %r / %r empty_dpaths' % (len(empty_dpaths), len(drive.dpath_list)))
        print(ut.list_str(empty_dpaths[0:10]))

        # Ensure actually still empty
        current_contents = [ut.glob(d, with_dirs=False)
                            for d in ut.ProgIter(empty_dpaths, 'checking empty status')]
        current_lens = list(map(len, current_contents))
        assert not any(current_lens), 'some dirs are not empty'

        # n ** 2 check to get only the base directories
        isbase_dir = [
            not any([d.startswith(dpath_) and d != dpath_
                        for dpath_ in empty_dpaths])
            for d in ut.ProgIter(empty_dpaths, 'finding base dirs')
        ]
        base_empty_dirs = ut.compress(empty_dpaths, isbase_dir)
        def list_only_files(dpath):
            # glob is too slow
            for root, dirs, fpaths in os.walk(dpath):
                for fpath in fpaths:
                    yield fpath
        base_current_contents = [
            list(list_only_files(d))
            for d in ut.ProgIter(base_empty_dirs, 'checking emptyness', freq=10)]
        is_actually_empty = [len(fs) == 0 for fs in base_current_contents]
        not_really_empty = ut.compress(base_empty_dirs, ut.not_list(is_actually_empty))
        print('%d dirs are not actually empty' % (len(not_really_empty),))
        print('not_really_empty = %s' % (ut.list_str(not_really_empty[0:10]),))
        truly_empty_dirs = ut.compress(base_empty_dirs, is_actually_empty)

        def list_all(dpath):
            # glob is too slow
            for root, dirs, fpaths in os.walk(dpath):
                for dir_ in dirs:
                    yield dir_
                for fpath in fpaths:
                    yield fpath

        exclude_base_dirs = [join(drive.root_dpath, 'AppData')]
        exclude_end_dirs = ['__pycache__']
        truly_empty_dirs1 = truly_empty_dirs
        for ed in exclude_base_dirs:
            truly_empty_dirs1 = [
                d for d in truly_empty_dirs1
                if (
                    not any(d.startswith(ed) for ed in exclude_base_dirs) and
                    not any(d.endswith(ed) for ed in exclude_end_dirs)
                )
            ]
        # Ensure actually still empty (with recursive checks for hidden files)
        print('truly_empty_dirs1[::5] = %s' % (
            ut.list_str(truly_empty_dirs1[0::5], strvals=True),))
        #print('truly_empty_dirs1 = %s' % (ut.list_str(truly_empty_dirs1, strvals=True),))

        if not dryrun:
            # FIX PART
            #from os.path import normpath
            #for d in ut.ProgIter(truly_empty_dirs):
            #    break
            #    if ut.WIN32:
            #        # http://www.sevenforums.com/system-security/53095-file-folder-read-only-attribute-wont-disable.html
            #        ut.cmd('attrib', '-r', '-s', normpath(d), verbose=False)
            #x = ut.remove_fpaths(truly_empty_dirs, strict=False)

            print('Deleting %d truly_empty_dirs1' % (len(truly_empty_dirs1),))

            for d in ut.ProgIter(truly_empty_dirs1, 'DELETE empty dirs', freq=1000):  # NOQA
                ut.delete(d, quiet=True)

            if ut.WIN32 and False:
                # remove file that failed removing
                flags = list(map(exists, truly_empty_dirs1))
                truly_empty_dirs1 = ut.compress(truly_empty_dirs1, flags)
                for d in ut.ProgIter(truly_empty_dirs1, 'rming', freq=1000):
                    ut.cmd('rmdir', d)
Esempio n. 12
0
def compute_and_write_probchip(ibs, aid_list, config2_=None, lazy=True):
    """ Computes probability chips using pyrf

    CommandLine:
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:0 --show
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:1
        python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:2 --show --cnn

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> config2_ = None
        >>> lazy = True
        >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)[0:4]
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4)
        >>> ut.show_if_requested()

    Example1:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = None
        >>> lazy = False
        >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)

    Example2:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = ibs.new_query_params({'fw_detector': 'cnn'})
        >>> lazy = True
        >>> aid_list = ibs.get_valid_aids()
        >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy)
        >>> result = ut.list_str(probchip_fpath_list_)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4)
        >>> ut.show_if_requested()

    Dev::
        #ibs.delete_annot_chips(aid_list)
        #probchip_fpath_list = get_annot_probchip_fpath_list(ibs, aid_list)
    """
    # Get probchip dest information (output path)
    # TODO; properly ungroup output
    grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(
        ibs, aid_list, config2_)
    nSpecies = len(unique_species)
    nTasks = len(aid_list)
    print(('[preproc_probchip.compute_and_write_probchip] '
           'Preparing to compute %d probchips of %d species') %
          (nTasks, nSpecies))
    cachedir = ibs.get_probchip_dir()
    ut.ensuredir(cachedir)

    grouped_probchip_fpath_list = []
    if ut.VERBOSE:
        print('[preproc_probchip] +--------------------')
    for aids, species in zip(grouped_aids, unique_species):
        if ut.VERBOSE:
            print('[preproc_probchip] Computing probchips for species=%r' %
                  species)
            print('[preproc_probchip] |--------------------')
        if len(aids) == 0:
            continue
        probchip_fpaths = get_annot_probchip_fpath_list(ibs,
                                                        aids,
                                                        config2_=config2_,
                                                        species=species)

        if lazy:
            # Filter out probchips that are already on disk
            # pyrf used to do this, now we need to do it
            # caching should be implicit due to using the visual_annot_uuid in
            # the filename
            isdirty_list = ut.not_list(map(exists, probchip_fpaths))
            dirty_aids = ut.compress(aids, isdirty_list)
            dirty_probchip_fpath_list = ut.compress(probchip_fpaths,
                                                    isdirty_list)
            print(('[preproc_probchip.compute_and_write_probchip]'
                   ' Lazy compute of to compute %d/%d of species=%s') %
                  (len(dirty_aids), len(aids), species))
        else:
            # No filtering
            dirty_aids = aids
            dirty_probchip_fpath_list = probchip_fpaths

        if len(dirty_aids) > 0:
            write_dirty_aids(ibs, dirty_probchip_fpath_list, dirty_aids,
                             config2_, species)

        grouped_probchip_fpath_list.append(probchip_fpaths)
    if ut.VERBOSE:
        print('[preproc_probchip] Done computing probability images')
        print('[preproc_probchip] L_______________________')

    probchip_fpath_list = vt.invert_apply_grouping2(
        grouped_probchip_fpath_list, groupxs, dtype=object)
    return probchip_fpath_list
Esempio n. 13
0
    def parse_column_tuples(self,
                            col_name_list,
                            col_types_dict,
                            col_getter_dict,
                            col_bgrole_dict,
                            col_ider_dict,
                            col_setter_dict,
                            editable_colnames,
                            sortby,
                            sort_reverse=True,
                            strict=False,
                            **kwargs):
        """
        parses simple lists into information suitable for making guitool headers
        """
        # Unpack the column tuples into names, getters, and types
        if not strict:
            # slopply colname definitions
            flag_list = [
                colname in col_getter_dict for colname in col_name_list
            ]
            if not all(flag_list):
                invalid_colnames = ut.compress(col_name_list,
                                               ut.not_list(flag_list))
                print(
                    '[api_item_widget] Warning: colnames=%r have no getters' %
                    (invalid_colnames, ))
                col_name_list = ut.compress(col_name_list, flag_list)
            # sloppy type inference
            for colname in col_name_list:
                getter_ = col_getter_dict[colname]
                if colname not in col_types_dict:
                    type_ = ut.get_homogenous_list_type(getter_)
                    if type_ is not None:
                        col_types_dict[colname] = type_
        # sloppy kwargs.
        # FIXME: explicitly list col_nice_dict
        col_nice_dict = kwargs.get('col_nice_dict', {})
        self.col_nice_list = [
            col_nice_dict.get(name, name) for name in col_name_list
        ]

        self.col_name_list = col_name_list
        self.col_type_list = [
            col_types_dict.get(colname, str) for colname in col_name_list
        ]
        self.col_getter_list = [
            col_getter_dict.get(colname, str) for colname in col_name_list
        ]  # First col is always a getter
        # Get number of rows / columns
        self.nCols = len(self.col_getter_list)
        self.nRows = 0 if self.nCols == 0 else len(
            self.col_getter_list[0])  # FIXME
        # Init iders to default and then overwite based on dict inputs
        self.col_ider_list = ut.alloc_nones(self.nCols)
        for colname, ider_colnames in six.iteritems(col_ider_dict):
            try:
                col = self.col_name_list.index(colname)
                # Col iders might have tuple input
                ider_cols = ut.uinput_1to1(self.col_name_list.index,
                                           ider_colnames)
                col_ider = ut.uinput_1to1(lambda c: partial(self.get, c),
                                          ider_cols)
                self.col_ider_list[col] = col_ider
                del col_ider
                del ider_cols
                del col
                del colname
            except Exception as ex:
                ut.printex(ex,
                           keys=[
                               'colname', 'ider_colnames', 'col', 'col_ider',
                               'ider_cols'
                           ])
                raise
        # Init setters to data, and then overwrite based on dict inputs
        self.col_setter_list = list(self.col_getter_list)
        for colname, col_setter in six.iteritems(col_setter_dict):
            col = self.col_name_list.index(colname)
            self.col_setter_list[col] = col_setter
        # Init bgrole_getters to None, and then overwrite based on dict inputs
        self.col_bgrole_getter_list = [
            col_bgrole_dict.get(colname, None)
            for colname in self.col_name_list
        ]
        # Mark edtiable columns
        self.col_edit_list = [
            name in editable_colnames for name in col_name_list
        ]
        # Mark the sort column index
        if ut.is_str(sortby):
            self.col_sort_index = self.col_name_list.index(sortby)
        else:
            self.col_sort_index = sortby
        self.col_sort_reverse = sort_reverse
Esempio n. 14
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()
Esempio n. 15
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Esempio n. 16
0
    def parse_column_tuples(
        self,
        col_name_list,
        col_types_dict,
        col_getter_dict,
        col_bgrole_dict,
        col_ider_dict,
        col_setter_dict,
        editable_colnames,
        sortby,
        sort_reverse=True,
        strict=False,
        **kwargs,
    ):
        """
        parses simple lists into information suitable for making guitool headers
        """
        # Unpack the column tuples into names, getters, and types
        if not strict:
            # slopply colname definitions
            flag_list = [colname in col_getter_dict for colname in col_name_list]
            if not all(flag_list):
                invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list))
                logger.info(
                    '[api_item_widget] Warning: colnames=%r have no getters'
                    % (invalid_colnames,)
                )
                col_name_list = ut.compress(col_name_list, flag_list)
            # sloppy type inference
            for colname in col_name_list:
                getter_ = col_getter_dict[colname]
                if colname not in col_types_dict:
                    type_ = ut.get_homogenous_list_type(getter_)
                    if type_ is not None:
                        col_types_dict[colname] = type_
        # sloppy kwargs.
        # FIXME: explicitly list col_nice_dict
        col_nice_dict = kwargs.get('col_nice_dict', {})
        self.col_nice_list = [col_nice_dict.get(name, name) for name in col_name_list]

        self.col_name_list = col_name_list
        self.col_type_list = [
            col_types_dict.get(colname, str) for colname in col_name_list
        ]
        # First col is always a getter
        self.col_getter_list = [
            col_getter_dict.get(colname, str) for colname in col_name_list
        ]
        # Get number of rows / columns
        self.nCols = len(self.col_getter_list)
        if self.nCols == 0:
            self.nRows = 0
        else:
            for getter in self.col_getter_list:
                if ut.isiterable(getter):
                    break
                getter = None
            # FIXME
            assert getter is not None, 'at least one getter must be an array/list'
            self.nRows = len(getter)

        # self.nRows = 0 if self.nCols == 0 else len(self.col_getter_list[0])  # FIXME
        # Init iders to default and then overwite based on dict inputs
        self.col_ider_list = [None] * self.nCols  # ut.alloc_nones(self.nCols)
        # for colname, ider_colnames in six.iteritems(col_ider_dict):
        # import utool
        # utool.embed()
        colname2_colx = ut.make_index_lookup(self.col_name_list)
        for colname, ider_colnames in six.iteritems(col_ider_dict):
            if colname not in colname2_colx:
                continue
            # for colname in self.col_name_list:
            ider_colnames = col_ider_dict[colname]
            try:
                colx = colname2_colx[colname]
                # Col iders might have tuple input
                ider_cols = self._uinput_1to1(self.col_name_list.index, ider_colnames)
                col_ider = self._uinput_1to1(lambda c: ut.partial(self.get, c), ider_cols)
                self.col_ider_list[colx] = col_ider
                del col_ider
                del ider_cols
                del colx
                del colname
            except Exception as ex:
                ut.printex(
                    ex,
                    keys=['colname', 'ider_colnames', 'colx', 'col_ider', 'ider_cols'],
                )
                raise
        # Init setters to data, and then overwrite based on dict inputs
        self.col_setter_list = list(self.col_getter_list)
        for colname, col_setter in six.iteritems(col_setter_dict):
            colx = colname2_colx[colname]
            self.col_setter_list[colx] = col_setter
        # Init bgrole_getters to None, and then overwrite based on dict inputs
        self.col_bgrole_getter_list = [
            col_bgrole_dict.get(colname, None) for colname in self.col_name_list
        ]
        # Mark edtiable columns
        self.col_edit_list = [name in editable_colnames for name in col_name_list]
        # Mark the sort column index
        if sortby is None:
            self.col_sort_index = 0
        elif ut.is_str(sortby):
            self.col_sort_index = self.col_name_list.index(sortby)
        else:
            self.col_sort_index = sortby
        self.col_sort_reverse = sort_reverse

        # Hacks for tree widget
        self._iders = kwargs.get('iders', None)
        col_level_dict = kwargs.get('col_level_dict', None)
        if col_level_dict is None:
            self.col_level_list = None
        else:
            self.col_level_list = ut.take(col_level_dict, col_name_list)
Esempio n. 17
0
    def fix_empty_dirs(drive):
        """
        # --- FIND EMPTY DIRECTORIES ---
        """
        print('Fixing Empty Dirs in %r' % (drive, ))
        fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list)
        isempty_flags = [len(fidxs) == 0 for fidxs in fidxs_list]
        empty_dpaths = ut.compress(drive.dpath_list, isempty_flags)

        def is_cplat_link(path_):
            try:
                if islink(path_):
                    return True
                os.listdir(d)
                return False
            except SystemErrors:
                return True

        valid_flags = [not is_cplat_link(d) for d in empty_dpaths]
        if not all(valid_flags):
            print('Filtered windows links %r / %r' %
                  (len(empty_dpaths) - sum(valid_flags), len(empty_dpaths)))
            #print(ut.repr2(empty_dpaths[0:10]))
            empty_dpaths = ut.compress(empty_dpaths, valid_flags)

        print('Found %r / %r empty_dpaths' %
              (len(empty_dpaths), len(drive.dpath_list)))
        print(ut.repr2(empty_dpaths[0:10]))

        # Ensure actually still empty
        current_contents = [
            ut.glob(d, with_dirs=False)
            for d in ut.ProgIter(empty_dpaths, 'checking empty status')
        ]
        current_lens = list(map(len, current_contents))
        assert not any(current_lens), 'some dirs are not empty'

        # n ** 2 check to get only the base directories
        isbase_dir = [
            not any([
                d.startswith(dpath_) and d != dpath_ for dpath_ in empty_dpaths
            ]) for d in ut.ProgIter(empty_dpaths, 'finding base dirs')
        ]
        base_empty_dirs = ut.compress(empty_dpaths, isbase_dir)

        def list_only_files(dpath):
            # glob is too slow
            for root, dirs, fpaths in os.walk(dpath):
                for fpath in fpaths:
                    yield fpath

        base_current_contents = [
            list(list_only_files(d)) for d in ut.ProgIter(
                base_empty_dirs, 'checking emptyness', freq=10)
        ]
        is_actually_empty = [len(fs) == 0 for fs in base_current_contents]
        not_really_empty = ut.compress(base_empty_dirs,
                                       ut.not_list(is_actually_empty))
        print('%d dirs are not actually empty' % (len(not_really_empty), ))
        print('not_really_empty = %s' % (ut.repr2(not_really_empty[0:10]), ))
        truly_empty_dirs = ut.compress(base_empty_dirs, is_actually_empty)

        def list_all(dpath):
            # glob is too slow
            for root, dirs, fpaths in os.walk(dpath):
                for dir_ in dirs:
                    yield dir_
                for fpath in fpaths:
                    yield fpath

        exclude_base_dirs = [join(drive.root_dpath, 'AppData')]
        exclude_end_dirs = ['__pycache__']
        truly_empty_dirs1 = truly_empty_dirs
        for ed in exclude_base_dirs:
            truly_empty_dirs1 = [
                d for d in truly_empty_dirs1
                if (not any(d.startswith(ed)
                            for ed in exclude_base_dirs) and not any(
                                d.endswith(ed) for ed in exclude_end_dirs))
            ]
        # Ensure actually still empty (with recursive checks for hidden files)
        print('truly_empty_dirs1[::5] = %s' %
              (ut.repr2(truly_empty_dirs1[0::5], strvals=True), ))
        #print('truly_empty_dirs1 = %s' % (ut.repr2(truly_empty_dirs1, strvals=True),))

        if not dryrun:
            # FIX PART
            #from os.path import normpath
            #for d in ut.ProgIter(truly_empty_dirs):
            #    break
            #    if ut.WIN32:
            #        # http://www.sevenforums.com/system-security/53095-file-folder-read-only-attribute-wont-disable.html
            #        ut.cmd('attrib', '-r', '-s', normpath(d), verbose=False)
            #x = ut.remove_fpaths(truly_empty_dirs, strict=False)

            print('Deleting %d truly_empty_dirs1' % (len(truly_empty_dirs1), ))

            for d in ut.ProgIter(truly_empty_dirs1,
                                 'DELETE empty dirs',
                                 freq=1000):  # NOQA
                ut.delete(d, quiet=True)

            if ut.WIN32 and False:
                # remove file that failed removing
                flags = list(map(exists, truly_empty_dirs1))
                truly_empty_dirs1 = ut.compress(truly_empty_dirs1, flags)
                for d in ut.ProgIter(truly_empty_dirs1, 'rming', freq=1000):
                    ut.cmd('rmdir', d)
Esempio n. 18
0
    def fix_chktex():
        """
        ./texfix.py --fixcite --fix-chktex
        """
        import parse
        fpaths = testdata_fpaths()
        print('Running chktex')
        output_list = [
            ut.cmd('chktex', fpath, verbose=False)[0] for fpath in fpaths
        ]

        fixcite = ut.get_argflag('--fixcite')
        fixlbl = ut.get_argflag('--fixlbl')
        fixcmdterm = ut.get_argflag('--fixcmdterm')

        for fpath, output in zip(fpaths, output_list):
            text = ut.readfrom(fpath)
            buffer = text.split('\n')
            pat = '\n' + ut.positive_lookahead('Warning')
            warn_list = list(
                filter(lambda x: x.startswith('Warning'),
                       re.split(pat, output)))
            delete_linenos = []

            if not (fixcmdterm or fixlbl or fixcite):
                print(' CHOOSE A FIX ')

            modified_lines = []

            for warn in warn_list:
                warnlines = warn.split('\n')
                pres = parse.parse(
                    'Warning {num} in {fpath} line {lineno}: {warnmsg}',
                    warnlines[0])
                if pres is not None:
                    fpath_ = pres['fpath']
                    lineno = int(pres['lineno']) - 1
                    warnmsg = pres['warnmsg']
                    try:
                        assert fpath == fpath_, ('%r != %r' % (fpath, fpath_))
                    except AssertionError:
                        continue
                    if 'No errors printed' in warn:
                        #print('Cannot fix')
                        continue
                    if lineno in modified_lines:
                        print('Skipping modified line')
                        continue
                    if fixcmdterm and warnmsg == 'Command terminated with space.':
                        print('Fix command termination')
                        errorline = warnlines[1]  # NOQA
                        carrotline = warnlines[2]
                        pos = carrotline.find('^')
                        if 0:
                            print('pos = %r' % (pos, ))
                            print('lineno = %r' % (lineno, ))
                            print('errorline = %r' % (errorline, ))
                        modified_lines.append(lineno)
                        line = buffer[lineno]
                        pre_, post_ = line[:pos], line[pos + 1:]
                        newline = (pre_ + '{} ' + post_).rstrip(' ')
                        #print('newline   = %r' % (newline,))
                        buffer[lineno] = newline
                    elif fixlbl and warnmsg == 'Delete this space to maintain correct pagereferences.':
                        print('Fix label newline')
                        fpath_ = pres['fpath']
                        errorline = warnlines[1]  # NOQA
                        new_prevline = buffer[
                            lineno - 1].rstrip() + errorline.lstrip(' ')
                        buffer[lineno - 1] = new_prevline
                        modified_lines.append(lineno)
                        delete_linenos.append(lineno)
                    elif fixcite and re.match(
                            'Non-breaking space \\(.~.\\) should have been used',
                            warnmsg):
                        #print(warnmsg)
                        #print('\n'.join(warnlines))
                        print('Fix citation space')
                        carrotline = warnlines[2]
                        pos = carrotline.find('^')
                        modified_lines.append(lineno)
                        line = buffer[lineno]
                        if line[pos] == ' ':
                            pre_, post_ = line[:pos], line[pos + 1:]
                            newline = (pre_ + '~' + post_).rstrip(' ')
                        else:
                            pre_, post_ = line[:pos + 1], line[pos + 1:]
                            newline = (pre_ + '~' + post_).rstrip(' ')
                            print(warn)
                            print(line[pos])
                            assert False
                            #assert line[pos] == ' ', '%r' % line[pos]
                            break
                        if len(pre_.strip()) == 0:
                            new_prevline = buffer[
                                lineno - 1].rstrip() + newline.lstrip(' ')
                            buffer[lineno - 1] = new_prevline
                            delete_linenos.append(lineno)
                        else:
                            #print('newline   = %r' % (newline,))
                            buffer[lineno] = newline
                    #print(warn)

            if len(delete_linenos) > 0:
                mask = ut.index_to_boolmask(delete_linenos, len(buffer))
                buffer = ut.compress(buffer, ut.not_list(mask))
            newtext = '\n'.join(buffer)

            #ut.dump_autogen_code(fpath, newtext, 'tex', fullprint=False)
            ut.print_difftext(
                ut.get_textdiff(text, newtext, num_context_lines=4))
            if ut.get_argflag('-w'):
                ut.writeto(fpath, newtext)
            else:
                print('Specify -w to finialize change')
Esempio n. 19
0
def get_imageset_isoccurrence(ibs, imgsetid_list):
    flags = ut.not_list(ibs.is_special_imageset(imgsetid_list))
    #imgset_texts = ibs.get_imageset_text(imgsetid_list)
    #flags = [text.lower().startswith('occurrence') for text in imgset_texts]
    return flags
Esempio n. 20
0
    def mark_unreviewed_above_score_as_correct(qres_wgt):
        selected_qtindex_list = qres_wgt.selectedRows()
        if len(selected_qtindex_list) == 1:
            qtindex = selected_qtindex_list[0]
            # aid1, aid2 = qres_wgt.get_aidpair_from_qtindex(qtindex)
            thresh = qtindex.model().get_header_data('score', qtindex)
            logger.info('thresh = %r' % (thresh, ))

            rows = qres_wgt.review_api.ider()
            scores_ = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('score'), rows)
            valid_rows = ut.compress(rows, scores_ >= thresh)
            aids1 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('qaid'), valid_rows)
            aids2 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('aid'), valid_rows)
            # ibs = qres_wgt.ibs
            ibs = qres_wgt.ibs
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(
                aids1, aids2)
            reviewed = ibs.get_annotmatch_reviewed(am_rowids)
            unreviewed = ut.not_list(reviewed)

            valid_rows = ut.compress(valid_rows, unreviewed)
            aids1 = ut.compress(aids1, unreviewed)
            aids2 = ut.compress(aids2, unreviewed)

            import networkx as nx

            graph = nx.Graph()
            graph.add_edges_from(list(zip(aids1, aids2)),
                                 {'user_thresh_match': True})
            review_groups = list(nx.connected_component_subgraphs(graph))

            changing_aids = list(graph.nodes())
            nids = ibs.get_annot_nids(changing_aids)
            nid2_aids = ut.group_items(changing_aids, nids)
            for nid, aids in nid2_aids.items():
                # Connect all original names in the database to denote merges
                for u, v in ut.itertwo(aids):
                    graph.add_edge(u, v)
            dbside_groups = list(nx.connected_component_subgraphs(graph))

            options = [
                'Accept',
                # 'Review More'
            ]
            msg = (ut.codeblock("""
                There are %d names and %d annotations in this mass review set.
                Mass review has discovered %d internal groups.
                Accepting will induce a database grouping of %d names.
                """) % (
                len(nid2_aids),
                len(changing_aids),
                len(review_groups),
                len(dbside_groups),
            ))

            reply = gt.user_option(msg=msg, options=options)

            if reply == options[0]:
                # This is not the smartest way to group names.
                # Ideally what will happen here, is that reviewed edges will go into
                # the new graph name inference algorithm.
                # then the chosen point will be used as the threshold. Then
                # the graph cut algorithm will be applied.
                logger_ = qres_wgt.logger
                logger_.debug(msg)
                logger_.info('START MASS_THRESHOLD_MERGE')
                logger_.info('num_groups=%d thresh=%r' % (
                    len(dbside_groups),
                    thresh,
                ))
                for count, subgraph in enumerate(dbside_groups):
                    thresh_aid_pairs = [
                        edge for edge, flag in nx.get_edge_attributes(
                            graph, 'user_thresh_match').items() if flag
                    ]
                    thresh_uuid_pairs = ibs.unflat_map(ibs.get_annot_uuids,
                                                       thresh_aid_pairs)
                    aids = list(subgraph.nodes())
                    nids = ibs.get_annot_name_rowids(aids)
                    flags = ut.not_list(ibs.is_aid_unknown(aids))
                    previous_names = ibs.get_name_texts(nids)
                    valid_nids = ut.compress(nids, flags)
                    if len(valid_nids) == 0:
                        merge_nid = ibs.make_next_nids(num=1)[0]
                        type_ = 'new'
                    else:
                        merge_nid = min(valid_nids)
                        type_ = 'existing'

                    # Need to find other non-exemplar / query names that may
                    # need merging
                    other_aids = ibs.get_name_aids(valid_nids)
                    other_aids = set(ut.flatten(other_aids)) - set(aids)
                    other_auuids = ibs.get_annot_uuids(other_aids)
                    other_previous_names = ibs.get_annot_names(other_aids)

                    merge_name = ibs.get_name_texts(merge_nid)
                    annot_uuids = ibs.get_annot_uuids(aids)
                    ###
                    # Set as reviewed (so we dont see them again), but mark it
                    # with a different code to denote that it was a MASS review
                    aid1_list = ut.take_column(thresh_aid_pairs, 0)
                    aid2_list = ut.take_column(thresh_aid_pairs, 1)
                    am_rowids = ibs.add_annotmatch_undirected(
                        aid1_list, aid2_list)
                    ibs.set_annotmatch_reviewer(
                        am_rowids, ['algo:lnbnn_thresh'] * len(am_rowids))

                    logger_.info('START GROUP %d' % (count, ))
                    logger_.info(
                        'GROUP BASED ON %d ANNOT_PAIRS WITH SCORE ABOVE (thresh=%r)'
                        % (
                            len(thresh_uuid_pairs),
                            thresh,
                        ))
                    logger_.debug('(uuid_pairs=%r)' % (thresh_uuid_pairs))
                    logger_.debug('(merge_name=%r)' % (merge_name))
                    logger_.debug(
                        'CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(annot_uuids),
                            annot_uuids,
                            previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.debug(
                        'ADDITIONAL CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(other_auuids),
                            other_auuids,
                            other_previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.info('END GROUP %d' % (count, ))
                    new_nids = [merge_nid] * len(aids)
                    ibs.set_annot_name_rowids(aids, new_nids)
                logger_.info('END MASS_THRESHOLD_MERGE')
        else:
            logger.info('[context] Multiple %d selection' %
                        (len(selected_qtindex_list), ))
Esempio n. 21
0
def get_imageset_isoccurrence(ibs, imgsetid_list):
    flags = ut.not_list(ibs.is_special_imageset(imgsetid_list))
    # imgset_texts = ibs.get_imageset_text(imgsetid_list)
    # flags = [text.lower().startswith('occurrence') for text in imgset_texts]
    return flags