def group_daids_for_indexing_by_name(ibs, daid_list, num_indexers=8, verbose=True): """ returns groups with only one annotation per name in each group """ tup = ibs.group_annots_by_known_names(daid_list) aidgroup_list, invalid_aids = tup largest_groupsize = max(map(len, aidgroup_list)) num_bins = min(largest_groupsize, num_indexers) if verbose or ut.VERYVERBOSE: print('[mindex] num_indexers = %d ' % (num_indexers,)) print('[mindex] largest_groupsize = %d ' % (largest_groupsize,)) print('[mindex] num_bins = %d ' % (num_bins,)) # Group annotations for indexing according to the split criteria aids_list, overflow_aids = ut.sample_zip( aidgroup_list, num_bins, allow_overflow=True, per_bin=1) if __debug__: # All groups have the same name nidgroup_list = ibs.unflat_map(ibs.get_annot_name_rowids, aidgroup_list) for nidgroup in nidgroup_list: assert ut.allsame(nidgroup), 'bad name grouping' if __debug__: # All subsiquent indexer are subsets (in name/identity space) # of the previous nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list) prev_ = None for nids in nids_list: if prev_ is None: prev_ = set(nids) else: assert prev_.issuperset(nids), 'bad indexer grouping' return aids_list, overflow_aids, num_bins
def __init__(split_index, ibs, daid_list, num_forests=8): print('[nnsindex] make HOTSMultiIndex over %d annots' % (len(daid_list),)) # Remove unknown names aid_list = daid_list known_aids_list, unknown_aids = ibsfuncs.group_annots_by_known_names(ibs, aid_list) num_bins = min(max(map(len, known_aids_list)), num_forests) # Put one name per forest forest_aids, overflow_aids = utool.sample_zip( known_aids_list, num_bins, allow_overflow=True, per_bin=1) forest_indexes = [] extra_indexes = [] for tx, aids in enumerate(forest_aids): print('[nnsindex] building forest %d/%d with %d aids' % (tx + 1, num_bins, len(aids))) if len(aids) > 0: hsindex = HOTSIndex(ibs, aids) forest_indexes.append(hsindex) if len(overflow_aids) > 0: print('[nnsindex] building overflow forest') overflow_index = HOTSIndex(ibs, overflow_aids) extra_indexes.append(overflow_index) if len(unknown_aids) > 0: print('[nnsindex] building unknown forest') unknown_index = HOTSIndex(ibs, unknown_aids) extra_indexes.append(unknown_index) #print('[nnsindex] building normalizer forest') # TODO split_index.forest_indexes = forest_indexes split_index.extra_indexes = extra_indexes
def __init__(split_index, ibs, daid_list, num_forests=8): print('[nnsindex] make NNSplitIndex over %d annots' % (len(daid_list),)) aid_list = daid_list nid_list = ibs.get_annot_nids(aid_list) #flag_list = ibs.get_annot_exemplar_flag(aid_list) nid2_aids = utool.group_items(aid_list, nid_list) key_list = nid2_aids.keys() aids_list = nid2_aids.values() isunknown_list = ibs.is_nid_unknown(key_list) known_aids = utool.filterfalse_items(aids_list, isunknown_list) uknown_aids = utool.flatten(utool.filter_items(aids_list, isunknown_list)) num_forests_ = min(max(map(len, aids_list)), num_forests) # Put one name per forest forest_aids, overflow_aids = utool.sample_zip(known_aids, num_forests_, allow_overflow=True, per_bin=1) forest_indexes = [] extra_indexes = [] for tx, aids in enumerate(forest_aids): print('[nnsindex] building forest %d/%d with %d aids' % (tx + 1, num_forests_, len(aids))) if len(aids) > 0: nn_index = NNIndex(ibs, aids) forest_indexes.append(nn_index) if len(overflow_aids) > 0: print('[nnsindex] building overflow forest') overflow_index = NNIndex(ibs, overflow_aids) extra_indexes.append(overflow_index) if len(uknown_aids) > 0: print('[nnsindex] building unknown forest') unknown_index = NNIndex(ibs, uknown_aids) extra_indexes.append(unknown_index) #print('[nnsindex] building normalizer forest') # TODO split_index.forest_indexes = forest_indexes split_index.extra_indexes = extra_indexes