Exemple #1
0
def get_annotmatch_rowid_from_undirected_superkey(ibs, aids1, aids2):
    # The directed nature of this makes a few things difficult and may cause
    # odd behavior
    am_rowids = ibs.get_annotmatch_rowid_from_superkey(aids1, aids2)
    idxs = ut.where([r is None for r in am_rowids])
    # Check which ones are None
    aids1_ = ut.take(aids1, idxs)
    aids2_ = ut.take(aids2, idxs)
    am_rowids_ = ibs.get_annotmatch_rowid_from_superkey(aids2_, aids1_)
    # Use the other rowid if found
    for idx, rowid in zip(idxs, am_rowids_):
        am_rowids[idx] = rowid
    return am_rowids
Exemple #2
0
    def expand_input(inputs, index, inplace=False):
        """
        Pushes the rootmost inputs all the way up to the sources of the graph

        CommandLine:
            python -m dtool.input_helpers expand_input

        Example:
            >>> # ENABLE_DOCTEST
            >>> from dtool.input_helpers import *  # NOQA
            >>> from dtool.example_depcache2 import *  # NOQA
            >>> depc = testdata_depc4()
            >>> inputs = depc['smk_match'].rootmost_inputs
            >>> inputs = depc['neighbs'].rootmost_inputs
            >>> print('(pre-expand)  inputs  = %r' % (inputs,))
            >>> index = 'indexer'
            >>> inputs2 = inputs.expand_input(index)
            >>> print('(post-expand) inputs2 = %r' % (inputs2,))
            >>> assert 'indexer' in str(inputs), 'missing indexer1'
            >>> assert 'indexer' not in str(inputs2), (
            >>>     '(2) unexpected indexer in %s' % (inputs2,))
        """
        if isinstance(index, six.string_types):
            index_list = ut.where(
                [rmi.tablename == index for rmi in inputs.rmi_list])
            if len(index_list) == 0:
                index = 0
            else:
                index = index_list[0]

        rmi = inputs.rmi_list[index]
        parent_level = rmi.parent_level()
        if len(parent_level) == 0:
            #raise AssertionError('no parents to expand')
            new_rmi_list = inputs.rmi_list[:]
        else:
            new_rmi_list = ut.insert_values(inputs.rmi_list, index,
                                            parent_level, inplace)
            new_rmi_list = ut.unique(new_rmi_list)
        if inplace:
            inputs.rmi_list = new_rmi_list
            new_inputs = inputs
        else:
            new_inputs = TableInput(new_rmi_list, inputs.exi_graph,
                                    inputs.table)
        return new_inputs
def purge_ensure_one_annot_per_images(ibs):
    """
    pip install Pipe
    """
    # Purge all but one annotation
    images = ibs.images()
    # images.aids
    groups = images._annot_groups
    import numpy as np

    # Take all but the largest annotations per images
    large_masks = [
        ut.index_to_boolmask([np.argmax(x)], len(x)) for x in groups.bbox_area
    ]
    small_masks = ut.lmap(ut.not_list, large_masks)
    # Remove all but the largets annotation
    small_aids = ut.zipcompress(groups.aid, small_masks)
    small_aids = ut.flatten(small_aids)

    # Fix any empty images
    images = ibs.images()
    empty_images = ut.where(np.array(images.num_annotations) == 0)
    logger.info('empty_images = %r' % (empty_images, ))

    # list(map(basename, map(dirname, images.uris_original)))

    def VecPipe(func):
        import pipe

        @pipe.Pipe
        def wrapped(sequence):
            return map(func, sequence)
            # return (None if item is None else func(item) for item in sequence)

        return wrapped

    name_list = list(images.uris_original | VecPipe(dirname)
                     | VecPipe(basename))
    aids_list = images.aids
    ut.assert_all_eq(list(aids_list | VecPipe(len)))
    annots = ibs.annots(ut.flatten(aids_list))
    annots.names = name_list
Exemple #4
0
def add_annotmatch_undirected(ibs, aids1, aids2, **kwargs):
    if len(aids1) == 0 and len(aids2) == 0:
        return []
    edges = list(zip(aids1, aids2))
    from ibeis.algo.graph import nx_utils as nxu
    # Enforce new undirected constraint
    edges = ut.estarmap(nxu.e_, edges)
    aids1, aids2 = list(zip(*edges))

    am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1, aids2)
    idxs = ut.where([r is None for r in am_rowids])
    # Check which ones are None
    aids1_ = ut.take(aids1, idxs)
    aids2_ = ut.take(aids2, idxs)
    # Create anything that is None
    am_rowids_ = ibs.add_annotmatch(aids2_, aids1_)
    # Use the other rowid if found
    for idx, rowid in zip(idxs, am_rowids_):
        am_rowids[idx] = rowid
    return am_rowids
Exemple #5
0
 def compress(self, flags):
     idxs = ut.where(flags)
     return self.take(idxs)
Exemple #6
0
 def fuzzy_find_colxs(self, pat):
     import utool as ut
     colxs = ut.where(
         ut.filterflags_general_tags(self.header_tags, in_any=[pat]))
     return colxs
Exemple #7
0
    def get_col(table, tbl_rowids, colnames=None):
        """
        colnames = ('mask', 'size')

        FIXME; unpacking is confusing with sql controller
        """
        # print('Get prop of %r, colnames=%r' % (table, colnames))
        try:
            request_unpack = False
            if colnames is None:
                colnames = table.data_colnames
                #table._internal_data_colnames
            else:
                if isinstance(colnames, six.text_type):
                    request_unpack = True
                    colnames = (colnames,)
            # print('* colnames = %r' % (colnames,))

            eager = True
            nInput = None

            total = 0
            intern_colnames = []
            extern_resolve_colxs = []
            nesting_xs = []

            for c in colnames:
                if c in table.external_to_internal:
                    intern_colnames.append([table.external_to_internal[c]])
                    read_func = table.extern_read_funcs[c]
                    extern_resolve_colxs.append((total, read_func))
                    nesting_xs.append(total)
                    total += 1
                elif c in table.nested_to_flat:
                    nest = table.nested_to_flat[c]
                    nesting_xs.append(list(range(total, total + len(nest))))
                    intern_colnames.append(nest)
                    total += len(nest)
                else:
                    nesting_xs.append(total)
                    intern_colnames.append([c])
                    total += 1

            flat_intern_colnames = tuple(ut.flatten(intern_colnames))

            # do sql read
            # FIXME: understand unpack_scalars and keepwrap
            raw_prop_list = table.get_internal_columns(
                tbl_rowids, flat_intern_colnames, eager, nInput,
                unpack_scalars=True, keepwrap=True)
            # unpack_scalars=not
            # request_unpack)
            # print('depth(raw_prop_list) = %r' % (ut.depth_profile(raw_prop_list),))

            prop_listT = list(zip(*raw_prop_list))
            for extern_colx, read_func in extern_resolve_colxs:
                data_list = []
                for uri in prop_listT[extern_colx]:
                    try:
                        # FIXME: only do this for a localpath
                        uri1 = ut.unixjoin(table.depc.cache_dpath, uri)
                        data = read_func(uri1)
                    except Exception as ex:
                        ut.printex(ex, 'failed to load external data', iswarning=False)
                        raise
                        # FIXME
                        #data = None
                    data_list.append(data)
                prop_listT[extern_colx] = data_list

            nested_proplistT = ut.list_unflat_take(prop_listT, nesting_xs)

            for tx in ut.where([isinstance(xs, list) for xs in nesting_xs]):
                nested_proplistT[tx] = list(zip(*nested_proplistT[tx]))

            prop_list = list(zip(*nested_proplistT))

            if request_unpack:
                prop_list = [None if p is None else p[0] for p in prop_list]
        except Exception as ex:
            ut.printex(ex, 'failed in get col', keys=[
                'table.tablename',
                'request_unpack',
                'tbl_rowids',
                'colnames',
                'raw_prop_list',
                (ut.depth_profile, 'raw_prop_list'),
                'prop_listT',
                (ut.depth_profile, 'prop_listT'),
                'nesting_xs',
                'nested_proplistT',
                'prop_list'])
            raise
        return prop_list
Exemple #8
0
    def add_rows_from_parent(table, parent_rowids, config=None, verbose=True,
                             return_num_dirty=False):
        """
        Lazy addition
        """
        try:
            # Get requested configuration id
            config_rowid = table.get_config_rowid(config)
            # Find leaf rowids that need to be computed
            initial_rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                                config=config)
            # Get corresponding "dirty" parent rowids
            isdirty_list = ut.flag_None_items(initial_rowid_list)
            dirty_parent_rowids = ut.compress(parent_rowids, isdirty_list)
            num_dirty = len(dirty_parent_rowids)
            num_total = len(parent_rowids)
            if num_dirty > 0:
                if verbose:
                    fmtstr = 'adding %d / %d new props to %r for config_rowid=%r'
                    print(fmtstr % (num_dirty, num_total, table.tablename,
                                    config_rowid))
                args = zip(*dirty_parent_rowids)
                if table._asobject:
                    # Convinience
                    args = [table.depc.get_obj(parent, rowids)
                            for parent, rowids in zip(table.parents, args)]
                # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION
                proptup_gen = table.preproc_func(table.depc, *args, config=config)

                #proptup_gen = list(proptup_gen)

                if len(table._nested_idxs) > 0:
                    # TODO: rewrite
                    nested_nCols = len(table.data_colnames)
                    idxs1 = table._nested_idxs
                    mask1 = ut.index_to_boolmask(idxs1, nested_nCols)
                    mask2 = ut.not_list(mask1)
                    idxs2 = ut.where(mask2)
                    def unnest_data(data):
                        unnested_cols = list(zip(ut.take(data, idxs2)))
                        nested_cols = ut.take(data, idxs1)
                        grouped_items = [nested_cols, unnested_cols]
                        groupxs = [idxs1, idxs2]
                        unflat = ut.ungroup(grouped_items, groupxs, nested_nCols - 1)
                        return tuple(ut.flatten(unflat))
                    # Hack when a sql schema has tuples defined in it
                    proptup_gen = (unnest_data(data) for data in proptup_gen)

                #proptup_gen = list(proptup_gen)

                dirty_params_iter = (
                    parent_rowids + (config_rowid,) + data_cols
                    for parent_rowids, data_cols in zip(dirty_parent_rowids, proptup_gen))
                #dirty_params_iter = list(dirty_params_iter)
                #print('dirty_params_iter = %s' % (ut.repr2(dirty_params_iter, nl=1),))
                CHUNKED_ADD = table.chunksize is not None
                if CHUNKED_ADD:
                    for dirty_params_chunk in ut.ichunks(dirty_params_iter,
                                                         chunksize=table.chunksize):
                        table.db._add(table.tablename, table._table_colnames,
                                      dirty_params_chunk,
                                      nInput=len(dirty_params_chunk))
                else:
                    nInput = num_dirty
                    table.db._add(table.tablename, table._table_colnames,
                                  dirty_params_iter, nInput=nInput)
                # Now that the dirty params are added get the correct order of rowids
                rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                            config=config)
            else:
                rowid_list = initial_rowid_list
            if return_num_dirty:
                return rowid_list, num_dirty
            else:
                return rowid_list
        except Exception as ex:
            ut.printex(ex, 'error in add_rowids', keys=[
                'table', 'parent_rowids', 'config', 'args',
                'dirty_parent_rowids', 'table.preproc_func'])
            raise
Exemple #9
0
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs):
    r"""
    Args
        hsdir (str): Directory to folder *containing* _hsdb
        dbdir (str): Output directory (defaults to same as  hsdb)

    CommandLine:
        python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs
        python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"

    Ignore:
        from ibeis.dbio.ingest_hsdb import *  # NOQA
        hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"
        dbdir = "~/work/RotanTurtles"

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_hsdb import *  # NOQA
        >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None)
        >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir)
        >>> result = convert_hsdb_to_ibeis(hsdir)
        >>> print(result)
    """
    from ibeis.control import IBEISControl
    import utool as ut

    if dbdir is None:
        dbdir = hsdir
    print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir))

    assert is_hsdb(
        hsdir
    ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % (
        hsdir, )
    assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % (
        hsdir, )
    #print('FORCE DELETE: %r' % (hsdir,))
    #ibsfuncs.delete_ibeis_database(hsdir)
    imgdir = join(hsdir, 'images')

    internal_dir = get_hsinternal(hsdir)
    nametbl_fpath = join(internal_dir, 'name_table.csv')
    imgtbl_fpath = join(internal_dir, 'image_table.csv')
    chiptbl_fpath = join(internal_dir, 'chip_table.csv')

    # READ NAME TABLE
    name_text_list = ['____']
    name_hs_nid_list = [0]
    with open(nametbl_fpath, 'r') as nametbl_file:
        name_reader = csv.reader(nametbl_file)
        for ix, row in enumerate(name_reader):
            #if ix >= 3:
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_nid = int(row[0])
                name = row[1].strip()
                name_text_list.append(name)
                name_hs_nid_list.append(hs_nid)

    # READ IMAGE TABLE
    iamge_hs_gid_list = []
    image_gname_list = []
    image_reviewed_list = []
    with open(imgtbl_fpath, 'r') as imgtb_file:
        image_reader = csv.reader(imgtb_file)
        for ix, row in enumerate(image_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[0])
                gname_ = row[1].strip()
                # aif in hotspotter is equivilant to reviewed in IBEIS
                reviewed = bool(row[2])
                iamge_hs_gid_list.append(hs_gid)
                image_gname_list.append(gname_)
                image_reviewed_list.append(reviewed)

    image_gpath_list = [join(imgdir, gname) for gname in image_gname_list]

    ut.debug_duplicate_items(image_gpath_list)
    #print(image_gpath_list)
    image_exist_flags = list(map(exists, image_gpath_list))
    missing_images = []
    for image_gpath, flag in zip(image_gpath_list, image_exist_flags):
        if not flag:
            missing_images.append(image_gpath)
            print('Image does not exist: %s' % image_gpath)

    if not all(image_exist_flags):
        print('Only %d / %d image exist' %
              (sum(image_exist_flags), len(image_exist_flags)))

    SEARCH_FOR_IMAGES = False
    if SEARCH_FOR_IMAGES:
        # Hack to try and find the missing images
        from os.path import basename
        subfiles = ut.glob(hsdir,
                           '*',
                           recursive=True,
                           fullpath=True,
                           with_files=True)
        basename_to_existing = ut.group_items(subfiles,
                                              ut.lmap(basename, subfiles))

        can_copy_list = []
        for gpath in missing_images:
            gname = basename(gpath)
            if gname not in basename_to_existing:
                print('gname = %r' % (gname, ))
                pass
            else:
                existing = basename_to_existing[gname]
                can_choose = True
                if len(existing) > 1:
                    if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)):
                        can_choose = False
                if can_choose:
                    found = existing[0]
                    can_copy_list.append((found, gpath))
                else:
                    print(existing)

        src, dst = ut.listT(can_copy_list)
        ut.copy_list(src, dst)

    # READ CHIP TABLE
    chip_bbox_list = []
    chip_theta_list = []
    chip_hs_nid_list = []
    chip_hs_gid_list = []
    chip_note_list = []
    with open(chiptbl_fpath, 'r') as chiptbl_file:
        chip_reader = csv.reader(chiptbl_file)
        for ix, row in enumerate(chip_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[1])
                hs_nid = int(row[2])
                bbox_text = row[3]
                theta = float(row[4])
                notes = '<COMMA>'.join([item.strip() for item in row[5:]])

                bbox_text = bbox_text.replace('[', '').replace(']', '').strip()
                bbox_text = re.sub('  *', ' ', bbox_text)
                bbox_strlist = bbox_text.split(' ')
                bbox = tuple(map(int, bbox_strlist))
                #bbox = [int(item) for item in bbox_strlist]
                chip_hs_nid_list.append(hs_nid)
                chip_hs_gid_list.append(hs_gid)
                chip_bbox_list.append(bbox)
                chip_theta_list.append(theta)
                chip_note_list.append(notes)

    names = ut.ColumnLists({
        'hs_nid': name_hs_nid_list,
        'text': name_text_list,
    })

    images = ut.ColumnLists({
        'hs_gid': iamge_hs_gid_list,
        'gpath': image_gpath_list,
        'reviewed': image_reviewed_list,
        'exists': image_exist_flags,
    })

    chips = ut.ColumnLists({
        'hs_gid': chip_hs_gid_list,
        'hs_nid': chip_hs_nid_list,
        'bbox': chip_bbox_list,
        'theta': chip_theta_list,
        'note': chip_note_list,
    })

    IGNORE_MISSING_IMAGES = True
    if IGNORE_MISSING_IMAGES:
        # Ignore missing information
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.not_list(images['exists']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        images = images.remove(missing_gxs)
        chips = chips.remove(missing_cxs)
        valid_nids = set(chips['hs_nid'] + [0])
        isvalid = [nid in valid_nids for nid in names['hs_nid']]
        names = names.compress(isvalid)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    assert all(images['exists']), 'some images dont exist'

    # if gid is None:
    #     print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,))
    #     # continue
    # # Build mappings to new indexes
    # names_nid_to_nid  = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)}
    # names_nid_to_nid[1] = names_nid_to_nid[0]  # hsdb unknknown is 0 or 1
    # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)}

    ibs = IBEISControl.request_IBEISController(dbdir=dbdir,
                                               check_hsdb=False,
                                               **kwargs)
    assert len(ibs.get_valid_gids()) == 0, 'target database is not empty'

    # Add names, images, and annotations
    names['ibs_nid'] = ibs.add_names(names['text'])
    images['ibs_gid'] = ibs.add_images(
        images['gpath'])  # any failed gids will be None

    if True:
        # Remove corrupted images
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        chips = chips.remove(missing_cxs)
        images = images.remove(missing_gxs)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    # Index chips using new ibs rowids
    ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid'])
    ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid'])
    try:
        chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid'])
    except KeyError:
        chips['ibs_gid'] = [
            ibs_gid_lookup.get(index, None) for index in chips['hs_gid']
        ]
    try:
        chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid'])
    except KeyError:
        chips['ibs_nid'] = [
            ibs_nid_lookup.get(index, None) for index in chips['hs_nid']
        ]

    ibs.add_annots(chips['ibs_gid'],
                   bbox_list=chips['bbox'],
                   theta_list=chips['theta'],
                   nid_list=chips['ibs_nid'],
                   notes_list=chips['note'])

    # aid_list = ibs.get_valid_aids()
    # flag_list = [True] * len(aid_list)
    # ibs.set_annot_exemplar_flags(aid_list, flag_list)
    # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly'

    # Write file flagging successful conversion
    with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_:
        file_.write('Successfully converted hsdir=%r' % (hsdir, ))
    print('finished ingest')
    return ibs
Exemple #10
0
 def fuzzy_find_colxs(self, pat):
     import utool as ut
     colxs = ut.where(ut.filterflags_general_tags(self.header_tags, in_any=[pat]))
     return colxs