def demodata_mtest_infr(state='empty'): import ibeis ibs = ibeis.opendb(db='PZ_MTEST') annots = ibs.annots() names = list(annots.group_items(annots.nids).values()) ut.shuffle(names, rng=321) test_aids = ut.flatten(names[1::2]) infr = ibeis.AnnotInference(ibs, test_aids, autoinit=True) infr.reset(state=state) return infr
def randomized_ibeis_dset(dbname, dim=224): """ Ignore: >>> from clab.live.siam_train import * >>> datasets = randomized_ibeis_dset('PZ_MTEST') >>> ut.qtensure() >>> self = datasets['train'] >>> self.augment = True >>> self.show_sample() """ # from clab.live.siam_train import * # dbname = 'PZ_MTEST' import utool as ut from ibeis.algo.verif import vsone # pblm = vsone.OneVsOneProblem.from_empty('PZ_MTEST') pblm = vsone.OneVsOneProblem.from_empty(dbname) pccs = list(pblm.infr.positive_components()) pcc_freq = list(map(len, pccs)) freq_grouped = ub.group_items(pccs, pcc_freq) # Simpler very randomized sample strategy train_pccs = [] vali_pccs = [] test_pccs = [] import math # vali_frac = .1 test_frac = .1 vali_frac = 0 for i, group in freq_grouped.items(): group = ut.shuffle(group, rng=432232 + i) n_test = 0 if len(group) == 1 else math.ceil(len(group) * test_frac) test, learn = group[:n_test], group[n_test:] n_vali = 0 if len(group) == 1 else math.ceil(len(learn) * vali_frac) vali, train = group[:n_vali], group[-n_vali:] train_pccs.extend(train) test_pccs.extend(test) vali_pccs.extend(vali) test_dataset = RandomBalancedIBEISSample(pblm, test_pccs, dim=dim) train_dataset = RandomBalancedIBEISSample(pblm, train_pccs, dim=dim) vali_dataset = RandomBalancedIBEISSample(pblm, vali_pccs, dim=dim) train_dataset.augment = True datasets = { 'train': train_dataset, # 'vali': vali_dataset, 'test': test_dataset, } return datasets
def _precollect(self): """ Sets up an ibs object with an aids_pool Example: >>> # DISABLE_DOCTEST >>> from wbia.scripts.thesis import * >>> self = Chap3('humpbacks_fb') >>> self = Chap3('GZ_Master1') >>> self = Chap3('GIRM_Master1') >>> self = Chap3('PZ_MTEST') >>> self = Chap3('PZ_PB_RF_TRAIN') >>> self = Chap3('PZ_Master1') >>> self = Chap3('RotanTurtles') >>> self._precollect() >>> from wbia.scripts.thesis import * >>> self = Chap4('PZ_Master1') >>> self._precollect() """ import wbia from wbia.init import main_helpers self.dbdir = wbia.sysres.lookup_dbdir(self.dbname) ibs = wbia.opendb(dbdir=self.dbdir) if ibs.dbname.startswith('PZ_PB_RF_TRAIN'): aids = ibs.get_valid_aids() elif ibs.dbname.startswith('LF_ALL'): aids = ibs.get_valid_aids() elif ibs.dbname.startswith('PZ_Master'): # PZ_Master is too big to run in full. Select a smaller sample. # Be sure to include photobomb and incomparable cases. aids = ibs.filter_annots_general(require_timestamp=True, species='primary', is_known=True, minqual='poor') infr = wbia.AnnotInference(ibs=ibs, aids=aids) infr.reset_feedback('staging', apply=True) minority_ccs = find_minority_class_ccs(infr) minority_aids = set(ut.flatten(minority_ccs)) # We need to do our best to select a small sample here flags = [ 'left' in text for text in ibs.annots(aids).viewpoint_code ] left_aids = ut.compress(aids, flags) majority_aids = set( ibs.filter_annots_general( left_aids, require_timestamp=True, species='primary', minqual='poor', require_quality=True, min_pername=2, max_pername=15, )) # This produces 5720 annotations aids = sorted(majority_aids.union(minority_aids)) else: aids = ibs.filter_annots_general(require_timestamp=True, is_known=True, species='primary', minqual='poor') if ibs.dbname.startswith('MantaMatcher'): # Remove some of the singletons for this db annots = ibs.annots(aids) names = annots.group2(annots.nids) multis = [aids for aids in names if len(aids) > 1] singles = [aids for aids in names if len(aids) == 1] rng = np.random.RandomState(3988708794) aids = ut.flatten(multis) aids += ut.shuffle(ut.flatten(singles), rng=rng)[0:358] # ibs.print_annot_stats(aids, prefix='P') main_helpers.monkeypatch_encounters(ibs, aids, minutes=30) logger.info('post monkey patch') # if False: # ibs.print_annot_stats(aids, prefix='P') self.ibs = ibs self.aids_pool = aids
def att_faces_datasets(dim=224): """ https://github.com/harveyslash/Facial-Similarity-with-Siamese-Networks-in-Pytorch >>> from clab.live.siam_train import * >>> train_dataset, vali_dataset, test_dataset = att_faces_datasets() train_dataset[0][0].shape fpath = train_dataset.img1_fpaths[0] """ def ensure_att_dataset(): def unzip(zip_fpath, dpath=None, verbose=1): """ Extracts all members of a zipfile. Args: zip_fpath (str): path of zip file to unzip. dpath (str): directory to unzip to. If not specified, it defaults to a folder parallel to the zip file (excluding the extension). verbose (int): verbosity level """ import zipfile from os.path import splitext from ubelt import progiter if dpath is None: dpath = splitext(zip_fpath)[0] with zipfile.ZipFile(zip_fpath, 'r') as zf: members = zf.namelist() prog = progiter.ProgIter(members, verbose=verbose, label='unzipping') for zipinfo in prog: zf.extract(zipinfo, path=dpath, pwd=None) return dpath faces_zip_fpath = ub.grabdata( 'http://www.cl.cam.ac.uk/Research/DTG/attarchive/pub/data/att_faces.zip' ) from os.path import splitext dpath = splitext(faces_zip_fpath)[0] if not os.path.exists(dpath): dpath = unzip(faces_zip_fpath, dpath=dpath) return dpath # Download the data if you dont have it dpath = ensure_att_dataset() import torchvision.datasets torchvision.datasets.folder.IMG_EXTENSIONS += ['.pgm'] im_dset = torchvision.datasets.ImageFolder(root=dpath) class_to_id = ub.group_items(*zip(*im_dset.imgs)) import utool as ut names = sorted(list(class_to_id.keys())) names = ut.shuffle(names, rng=10) learn, test = names[:40], names[40:] train, vali = learn[:35], learn[35:] print('train = {!r}'.format(len(train))) print('vali = {!r}'.format(len(vali))) print('test = {!r}'.format(len(test))) train_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, train)), dim=dim) vali_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, vali)), dim=dim) test_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, test)), dim=dim) print('train_dataset = {!r}'.format(len(train_dataset))) print('vali_dataset = {!r}'.format(len(vali_dataset))) print('test_dataset = {!r}'.format(len(test_dataset))) return train_dataset, vali_dataset, test_dataset
def find_neg_augment_edges(infr, cc1, cc2, k=None): """ Find enough edges to between two pccs to make them k-negative complete The two CCs should be disjoint and not have any positive edges between them. Args: cc1 (set): nodes in one PCC cc2 (set): nodes in another positive-disjoint PCC k (int): redundnacy level (if None uses infr.params['redun.neg']) Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph import demo >>> k = 2 >>> cc1, cc2 = {1}, {2, 3} >>> # --- return an augmentation if feasible >>> infr = demo.demodata_infr(ccs=[cc1, cc2], ignore_pair=True) >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k)) >>> assert edges == {(1, 2), (1, 3)} >>> # --- if infeasible return a partial augmentation >>> infr.add_feedback((1, 2), INCMP) >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k)) >>> assert edges == {(1, 3)} """ if k is None: k = infr.params['redun.neg'] assert cc1 is not cc2, 'CCs should be disjoint (but they are the same)' assert len(cc1.intersection(cc2)) == 0, 'CCs should be disjoint' existing_edges = set(nxu.edges_cross(infr.graph, cc1, cc2)) reviewed_edges = { edge: state for edge, state in zip( existing_edges, infr.edge_decision_from(existing_edges) ) if state != UNREV } # Find how many negative edges we already have num = sum([state == NEGTV for state in reviewed_edges.values()]) if num < k: # Find k random negative edges check_edges = existing_edges - set(reviewed_edges) # Check the existing but unreviewed edges first for edge in check_edges: num += 1 yield edge if num >= k: return # Check non-existing edges next seed = 2827295125 try: seed += sum(cc1) + sum(cc2) except Exception: pass rng = np.random.RandomState(seed) cc1 = ut.shuffle(list(cc1), rng=rng) cc2 = ut.shuffle(list(cc2), rng=rng) cc1 = ut.shuffle(list(cc1), rng=rng) for edge in it.starmap(nxu.e_, nxu.diag_product(cc1, cc2)): if edge not in existing_edges: num += 1 yield edge if num >= k: return