def subset_metadata(dataset, key='full'): metadata_fpath = dataset.fpath_dict[key].get('metadata', None) if metadata_fpath is not None: flat_metadata = ut.load_data(metadata_fpath, verbose=True) else: flat_metadata = None return flat_metadata
def subset_data(dataset, key='full'): data_fpath = dataset.fpath_dict[key]['data'] data = ut.load_data(data_fpath, verbose=True) if len(data.shape) == 3: # add channel dimension for implicit grayscale data.shape = data.shape + (1,) return data
def ensure_results(self, expt_name=None, nocompute=None): """ Subclasses must obey the measure_<expt_name>, draw_<expt_name> contract """ if nocompute is None: nocompute = ut.get_argflag('--nocompute') if expt_name is None and exists(self.dpath): # Load all fpaths = ut.glob(str(self.dpath), '*.pkl') expt_names = [splitext(basename(fpath))[0] for fpath in fpaths] for fpath, expt_name in zip(fpaths, expt_names): self.expt_results[expt_name] = ut.load_data(fpath) else: # expt_name = splitext(basename(fpath))[0] fpath = join(str(self.dpath), expt_name + '.pkl') # fpath = ut.truepath(fpath) if not exists(fpath): ut.cprint( 'Experiment results {} do not exist'.format(expt_name), 'red') ut.cprint('First re-setup to check if it is a path issue', 'red') if nocompute: raise Exception( str(expt_name) + ' does not exist for ' + str(self.dbname)) if self.ibs is None: self._precollect() ut.cprint('Checking new fpath', 'yellow') fpath = join(str(self.dpath), expt_name + '.pkl') logger.info('fpath = %r' % (fpath, )) if not exists(fpath): ut.cprint('Results still missing need to re-measure', 'red') # assert False # self._setup() getattr(self, 'measure_' + expt_name)() else: ut.cprint('Re-setup fixed it', 'green') else: logger.info('Experiment results {} exist'.format(expt_name)) self.expt_results[expt_name] = ut.load_data(fpath) return self.expt_results[expt_name]
def load_latest_classifiers(infr, dpath): from ibeis.algo.verif import deploy task_clf_fpaths = deploy.Deployer(dpath).find_latest_local() classifiers = {} for task_key, fpath in task_clf_fpaths.items(): clf_info = ut.load_data(fpath) assert clf_info['metadata']['task_key'] == task_key, ( 'bad saved clf at fpath={}'.format(fpath)) classifiers[task_key] = clf_info infr.verifiers = classifiers
def ensure(self, task_key): _, fname = self._make_deploy_metadata(task_key=task_key) fpath = join(self.dpath, fname) if exists(fpath): deploy_info = ut.load_data(fpath) assert bool(deploy_info['clf']), 'must have clf' else: deploy_info = self.deploy(task_key=task_key) assert exists(fpath), 'must now exist' verif = verifier.Verifier(self.pblm.infr.ibs, deploy_info=deploy_info) assert verif.metadata[ 'task_key'] == task_key, 'bad saved clf at fpath={}'.format(fpath) return verif
def load(dataset): dataset.ensure_dirs() dataset.ensure_symlinked() if not exists(dataset.info_fpath): raise IOError('dataset info manifest cache miss') else: dataset._info = ut.load_data(dataset.info_fpath) if not exists(dataset.data_fpath): raise IOError('dataset data cache miss') dataset.load_splitsets() # Hack if not exists(dataset.fpath_dict['full']['metadata']): dataset.fpath_dict['full']['metadata'] = None
def load(data_fpath, labels_fpath=None): # Load X matrix (data) data = ut.load_data(data_fpath) labels = ut.load_data(labels_fpath) if labels_fpath is not None else None #if splitext(data_fpath)[1] == '.hdf5': # data = ut.load_hdf5(data_fpath) #else: # data = np.load(data_fpath, mmap_mode='r') ## Load y vector (labels) #labels = None #if labels_fpath is not None: # if splitext(labels_fpath)[1] == '.hdf5': # labels = ut.load_hdf5(labels_fpath) # else: # labels = np.load(labels_fpath, mmap_mode='r') ## TODO: This should be part of data preprocessing ## Ensure that data is 4-dimensional if len(data.shape) == 3: # add channel dimension for implicit grayscale data.shape = data.shape + (1, ) # Return data return data, labels
def load_oxford_2013(): """ Found this data in README of SMK publication https://hal.inria.fr/hal-00864684/document http://people.rennes.inria.fr/Herve.Jegou/publications.html with download script CommandLine: # Download oxford13 data cd ~/work/Oxford mkdir -p smk_data_iccv_2013 cd smk_data_iccv_2013 wget -nH --cut-dirs=4 -r -Pdata/ ftp://ftp.irisa.fr/local/texmex/corpus/iccv2013/ This dataset has 5063 images wheras 07 has 5062 This dataset seems to contain an extra junk image: ashmolean_000214 # Remember that matlab is 1 indexed! # DONT FORGET TO CONVERT TO 0 INDEXING! """ from yael.ynumpy import fvecs_read from yael.yutils import load_ext import scipy.io import vtool as vt from os.path import join dbdir = ut.truepath('/raid/work/Oxford/') datadir = dbdir + '/smk_data_iccv_2013/data/' # we are not retraining, so this is unused # # Training data descriptors for Paris6k dataset # train_sift_fname = join(datadir, 'paris_sift.uint8') # NOQA # # File storing visual words of Paris6k descriptors used in our ICCV paper # train_vw_fname = join(datadir, 'clust_preprocessed/oxford_train_vw.int32') # Pre-learned quantizer used in ICCV paper (used if docluster=false) codebook_fname = join(datadir, 'clust_preprocessed/oxford_codebook.fvecs') # Files storing descriptors/geometry for Oxford5k dataset test_sift_fname = join(datadir, 'oxford_sift.uint8') test_geom_fname = join(datadir, 'oxford_geom_sift.float') test_nf_fname = join(datadir, 'oxford_nsift.uint32') # File storing visual words of Oxford5k descriptors used in our ICCV paper test_vw_fname = join(datadir, 'clust_preprocessed/oxford_vw.int32') # Ground-truth for Oxford dataset gnd_fname = join(datadir, 'gnd_oxford.mat') oxford_vecs = load_ext(test_sift_fname, ndims=128, verbose=True) oxford_nfeats = load_ext(test_nf_fname, verbose=True) oxford_words = fvecs_read(codebook_fname) oxford_wids = load_ext(test_vw_fname, verbose=True) - 1 test_geom_invV_fname = test_geom_fname + '.invV.pkl' try: all_kpts = ut.load_data(test_geom_invV_fname) logger.info('loaded invV keypoints') except IOError: oxford_kptsZ = load_ext(test_geom_fname, ndims=5, verbose=True) logger.info('converting to invV keypoints') all_kpts = vt.convert_kptsZ_to_kpts(oxford_kptsZ) ut.save_data(test_geom_invV_fname, all_kpts) gnd_ox = scipy.io.loadmat(gnd_fname) imlist = [x[0][0] for x in gnd_ox['imlist']] qx_to_dx = gnd_ox['qidx'] - 1 data_uri_order = imlist query_uri_order = ut.take(data_uri_order, qx_to_dx) offset_list = np.hstack(([0], oxford_nfeats.cumsum())).astype(np.int64) # query_gnd = gnd_ox['gnd'][0][0] # bboxes = query_gnd[0] # qx_to_ok_gtidxs1 = [x[0] for x in query_gnd[1][0]] # qx_to_junk_gtidxs2 = [x[0] for x in query_gnd[2][0]] # # ut.depth_profile(qx_to_gtidxs1) # # ut.depth_profile(qx_to_gtidxs2) assert sum(oxford_nfeats) == len(oxford_vecs) assert offset_list[-1] == len(oxford_vecs) assert len(oxford_wids) == len(oxford_vecs) assert oxford_wids.max() == len(oxford_words) - 1 data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': oxford_vecs, 'words': oxford_words, 'idx_to_wx': oxford_wids, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } return data
def load_oxford_2007(): """ Loads data from http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf >>> from wbia.algo.smk.script_smk import * # NOQA """ from os.path import join, basename, splitext import pandas as pd import vtool as vt dbdir = ut.truepath('/raid/work/Oxford/') data_fpath0 = join(dbdir, 'data_2007.pkl') if ut.checkpath(data_fpath0): data = ut.load_data(data_fpath0) return data else: word_dpath = join(dbdir, 'word_oxc1_hesaff_sift_16M_1M') _word_fpath_list = ut.ls(word_dpath) imgid_to_word_fpath = { splitext(basename(word_fpath))[0]: word_fpath for word_fpath in _word_fpath_list } readme_fpath = join(dbdir, 'README2.txt') imgid_order = ut.readfrom(readme_fpath).split('\n')[20:-1] imgid_order = imgid_order data_uri_order = [x.replace('oxc1_', '') for x in imgid_order] imgid_to_df = {} for imgid in ut.ProgIter(imgid_order, label='reading kpts'): word_fpath = imgid_to_word_fpath[imgid] row_gen = (map(float, line.strip('\n').split(' ')) for line in ut.read_lines_from(word_fpath)[2:]) rows = [(int(word_id), x, y, e11, e12, e22) for (word_id, x, y, e11, e12, e22) in row_gen] df = pd.DataFrame( rows, columns=['word_id', 'x', 'y', 'e11', 'e12', 'e22']) imgid_to_df[imgid] = df df_list = ut.take(imgid_to_df, imgid_order) nfeat_list = [len(df_) for df_ in df_list] offset_list = [0] + ut.cumsum(nfeat_list) shape = (offset_list[-1], 128) # shape = (16334970, 128) sift_fpath = join(dbdir, 'OxfordSIFTDescriptors', 'feat_oxc1_hesaff_sift.bin') try: file_ = open(sift_fpath, 'rb') with ut.Timer('Reading SIFT binary file'): nbytes = np.prod(shape) all_vecs = np.fromstring(file_.read(nbytes), dtype=np.uint8) all_vecs = all_vecs.reshape(shape) finally: file_.close() kpts_list = [ df_.loc[:, ('x', 'y', 'e11', 'e12', 'e22')].values for df_ in df_list ] wordid_list = [df_.loc[:, 'word_id'].values for df_ in df_list] kpts_Z = np.vstack(kpts_list) idx_to_wx = np.hstack(wordid_list) # assert len(np.unique(idx_to_wx)) == 1E6 # Reqd standard query order query_files = sorted( ut.glob(dbdir + '/oxford_groundtruth', '*_query.txt')) query_uri_order = [] for qpath in query_files: text = ut.readfrom(qpath, verbose=0) query_uri = text.split(' ')[0].replace('oxc1_', '') query_uri_order.append(query_uri) logger.info('converting to invV') all_kpts = vt.convert_kptsZ_to_kpts(kpts_Z) data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': all_vecs, 'idx_to_wx': idx_to_wx, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } ut.save_data(data_fpath0, data) return data
def _make_verifier(self, ibs, deploy_fpath, task_key): """ Ignore: # py3 side clf = deploy_info['clf'] a = clf.estimators_[0] b = a.tree_ ut.save_data('_tree.pkl', b) c = b.__getstate__() d = c['nodes'] ut.save_data('_nodes.pkl', d) a.estimators_[0].tree_.__getstate__()['nodes'] Ignore: # py2 side ut.load_data('_tree.pkl') ut.load_data('_nodes.pkl') >>> from wbia.algo.verif.vsone import * # NOQA >>> params = dict(sample_method='random') >>> pblm = OneVsOneProblem.from_empty('PZ_MTEST', **params) >>> pblm.setup(with_simple=False) >>> task_key = pblm.primary_task_key >>> self = Deployer(dpath='.', pblm=pblm) >>> deploy_info = self.deploy() a = deploy_info['clf'] d = a.estimators_[0].tree_.__getstate__()['nodes'] Ignore: I'm having a similar issue when trying to use python2 to load a sklearn RandomForestClassifier that I saved in python3. I created a MWE. In python 3 import numpy as np import pickle data = np.array( [( 1, 26, 69, 5.32214928e+00, 0.69562945, 563, 908., 1), ( 2, 7, 62, 1.74883020e+00, 0.33854101, 483, 780., 1), (-1, -1, -2, -2.00000000e+00, 0.76420451, 7, 9., -2), (-1, -1, -2, -2.00000000e+00, 0. , 62, 106., -2)], dtype=[('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8'), ('missing_direction', '<i8')]) # Save using pickle with open('data.pkl', 'wb') as file_: # Use protocol 2 to support python2 and 3 pickle.dump(data, file_, protocol=2) # Save with numpy directly np.save('data.npy', data) Then in python 2 # Load with pickle import pickle with open('data.pkl', 'rb') as file_: data = pickle.load(file_) # This results in `ValueError: non-string names in Numpy dtype unpickling` # Load with numpy directly data = np.load('data.npy') # This works However this still doesn't make sklearn play nice between 2 and 3. So, how can we get pickle to load this numpy object correctly? Here is the fix suggested in the link: from lib2to3.fixes.fix_imports import MAPPING import sys import pickle # MAPPING maps Python 2 names to Python 3 names. We want this in reverse. REVERSE_MAPPING = {} for key, val in MAPPING.items(): REVERSE_MAPPING[val] = key # We can override the Unpickler and loads class Python_3_Unpickler(pickle.Unpickler): def find_class(self, module, name): if module in REVERSE_MAPPING: module = REVERSE_MAPPING[module] __import__(module) mod = sys.modules[module] klass = getattr(mod, name) return klass with open('data.pkl', 'rb') as file_: data = Python_3_Unpickler(file_).load() This still doesn't work https://stackoverflow.com/questions/41720952/unpickle-sklearn-tree-descisiontreeregressor-in-python-2-from-python3 """ deploy_info = ut.load_data(deploy_fpath) verif = verifier.Verifier(ibs, deploy_info=deploy_info) if task_key is not None: assert (verif.metadata['task_key'] == task_key ), 'bad saved clf at fpath={}'.format(deploy_fpath) return verif
def subset_labels(dataset, key='full'): labels_fpath = dataset.fpath_dict[key]['labels'] labels = (None if labels_fpath is None else ut.load_data(labels_fpath, verbose=True)) return labels
def merge_datasets(dataset_list): """ Merges a list of dataset objects into a single combined dataset. """ def consensus_check_factory(): """ Returns a temporary function used to check that all incoming values with the same key are consistent """ from collections import defaultdict past_values = defaultdict(lambda: None) def consensus_check(value, key): assert past_values[key] is None or past_values[key] == value, ( 'key=%r with value=%r does not agree with past_value=%r' % (key, value, past_values[key])) past_values[key] = value return value return consensus_check total_num_labels = 0 total_num_data = 0 input_alias_list = [dataset.alias_key for dataset in dataset_list] alias_key = 'combo_' + ut.hashstr27(repr(input_alias_list), hashlen=8) training_dpath = ut.ensure_app_resource_dir('ibeis_cnn', 'training', alias_key) data_fpath = ut.unixjoin(training_dpath, alias_key + '_data.hdf5') labels_fpath = ut.unixjoin(training_dpath, alias_key + '_labels.hdf5') try: # Try and short circut cached loading merged_dataset = DataSet.from_alias_key(alias_key) return merged_dataset except (Exception, AssertionError) as ex: ut.printex(ex, 'alias definitions have changed. alias_key=%r' % (alias_key, ), iswarning=True) # Build the dataset consensus_check = consensus_check_factory() for dataset in dataset_list: print(ut.get_file_nBytes_str(dataset.data_fpath)) print(dataset.data_fpath_dict['full']) print(dataset.num_labels) print(dataset.data_per_label) total_num_labels += dataset.num_labels total_num_data += (dataset.data_per_label * dataset.num_labels) # check that all data_dims agree data_shape = consensus_check(dataset.data_shape, 'data_shape') data_per_label = consensus_check(dataset.data_per_label, 'data_per_label') # hack record this import numpy as np data_dtype = np.uint8 label_dtype = np.int32 data = np.empty((total_num_data, ) + data_shape, dtype=data_dtype) labels = np.empty(total_num_labels, dtype=label_dtype) #def iterable_assignment(): # pass data_left = 0 data_right = None labels_left = 0 labels_right = None for dataset in ut.ProgressIter(dataset_list, lbl='combining datasets', freq=1): X_all, y_all = dataset.subset('full') labels_right = labels_left + y_all.shape[0] data_right = data_left + X_all.shape[0] data[data_left:data_right] = X_all labels[labels_left:labels_right] = y_all data_left = data_right labels_left = labels_right ut.save_data(data_fpath, data) ut.save_data(labels_fpath, labels) labels = ut.load_data(labels_fpath) num_labels = len(labels) merged_dataset = DataSet.new_training_set( alias_key=alias_key, data_fpath=data_fpath, labels_fpath=labels_fpath, metadata_fpath=None, training_dpath=training_dpath, data_shape=data_shape, data_per_label=data_per_label, output_dims=1, num_labels=num_labels, ) return merged_dataset
def get_ibeis_part_siam_dataset(**kwargs): """ PARTS based network data CommandLine: python -m ibeis_cnn.ingest_data --test-get_ibeis_part_siam_dataset --show python -m ibeis_cnn.ingest_data --test-get_ibeis_part_siam_dataset --show --db PZ_Master1 --acfg_name timectrl python -m ibeis_cnn.ingest_data --test-get_ibeis_part_siam_dataset --show --db PZ_MTEST --acfg_name unctrl --dryrun Example: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.ingest_data import * # NOQA >>> from ibeis_cnn import draw_results >>> import ibeis >>> kwargs = {} # ut.argparse_dict({'max_examples': None, 'num_top': 3}) >>> dataset = get_ibeis_part_siam_dataset(**kwargs) >>> ut.quit_if_noshow() >>> dataset.interact(ibs=dataset.getprop('ibs')) >>> ut.show_if_requested() """ import ibeis datakw = ut.argparse_dict( { 'colorspace': 'gray', 'acfg_name': 'ctrl', #'db': None, 'db': 'PZ_MTEST', }, alias_dict={'acfg_name': ['acfg']}, verbose=True) datakw.update(kwargs) print('\n\n[get_ibeis_part_siam_dataset] START') alias_key = ut.dict_str(datakw, nl=False, explicit=True) dbname = datakw.pop('db') try: if NOCACHE_DATASET: raise Exception('forced cache off') # Try and short circut cached loading dataset = DataSet.from_alias_key(alias_key) dataset.setprop('ibs', lambda: ibeis.opendb(db=dbname)) return dataset except Exception as ex: ut.printex(ex, 'alias definitions have changed. alias_key=%r' % (alias_key, ), iswarning=True) with ut.Indenter('[LOAD IBEIS DB]'): ibs = ibeis.opendb(db=dbname) # Nets dir is the root dir for all training on this data training_dpath = ibs.get_neuralnet_dir() ut.ensuredir(training_dpath) with ut.Indenter('[BuildDS]'): # Get training data pairs colorspace = datakw.pop('colorspace') (aid_pairs, label_list, flat_metadata) = ingest_ibeis.get_aidpairs_partmatch(ibs, **datakw) # Extract and cache the data, labels, and metadata if ut.get_argflag('--dryrun'): print('exiting due to dry run') import sys sys.exit(0) tup = ingest_ibeis.cached_part_match_training_data_fpaths( ibs, aid_pairs, label_list, flat_metadata, colorspace=colorspace) data_fpath, labels_fpath, metadata_fpath, training_dpath, data_shape = tup print('\n[get_ibeis_part_siam_dataset] FINISH\n\n') # hack for caching num_labels labels = ut.load_data(labels_fpath) num_labels = len(labels) dataset = DataSet.new_training_set( alias_key=alias_key, data_fpath=data_fpath, labels_fpath=labels_fpath, metadata_fpath=metadata_fpath, training_dpath=training_dpath, data_shape=data_shape, data_per_label=2, output_dims=1, num_labels=num_labels, ) dataset.setprop('ibs', ibs) return dataset
def get_ibeis_patch_siam_dataset(**kwargs): """ CommandLine: python -m ibeis_cnn.ingest_data --test-get_ibeis_patch_siam_dataset --show python -m ibeis_cnn.ingest_data --test-get_ibeis_patch_siam_dataset --show --db PZ_Master1 --acfg_name default python -m ibeis_cnn.ingest_data --test-get_ibeis_patch_siam_dataset --show --db PZ_Master1 --acfg_name timectrl python -m ibeis_cnn.ingest_data --test-get_ibeis_patch_siam_dataset --show --db PZ_MTEST --acfg_name unctrl --dryrun Example: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.ingest_data import * # NOQA >>> from ibeis_cnn import draw_results >>> import ibeis >>> kwargs = {} # ut.argparse_dict({'max_examples': None, 'num_top': 3}) >>> dataset = get_ibeis_patch_siam_dataset(**kwargs) >>> ut.quit_if_noshow() >>> dataset.interact() >>> ut.show_if_requested() """ datakw = ut.argparse_dict( { #'db': 'PZ_MTEST', 'max_examples': None, #'num_top': 3, 'num_top': None, 'min_featweight': .8 if not ut.WIN32 else None, 'controlled': True, 'colorspace': 'gray', 'acfg_name': None, }, alias_dict={'acfg_name': ['acfg', 'a']}, verbose=True) datakw.update(kwargs) #ut.get_func_kwargs(ingest_ibeis.get_aidpairs_and_matches) if datakw['acfg_name'] is not None: del datakw['controlled'] if datakw['max_examples'] is None: del datakw['max_examples'] if datakw['num_top'] is None: del datakw['num_top'] with ut.Indenter('[LOAD IBEIS DB]'): import ibeis dbname = ut.get_argval('--db', default='PZ_MTEST') ibs = ibeis.opendb(dbname=dbname, defaultdb='PZ_MTEST') # Nets dir is the root dir for all training on this data training_dpath = ibs.get_neuralnet_dir() ut.ensuredir(training_dpath) print('\n\n[get_ibeis_patch_siam_dataset] START') #log_dir = join(training_dpath, 'logs') #ut.start_logging(log_dir=log_dir) alias_key = ibs.get_dbname() + ';' + ut.dict_str( datakw, nl=False, explicit=True) try: if NOCACHE_DATASET: raise Exception('forced cache off') # Try and short circut cached loading dataset = DataSet.from_alias_key(alias_key) dataset.setprop('ibs', lambda: ibeis.opendb(db=dbname)) return dataset except Exception as ex: ut.printex(ex, 'alias definitions have changed. alias_key=%r' % (alias_key, ), iswarning=True) with ut.Indenter('[BuildDS]'): # Get training data pairs colorspace = datakw.pop('colorspace') patchmatch_tup = ingest_ibeis.get_aidpairs_and_matches(ibs, **datakw) aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists = patchmatch_tup # Extract and cache the data # TODO: metadata if ut.get_argflag('--dryrun'): print('exiting due to dry run') import sys sys.exit(0) tup = ingest_ibeis.cached_patchmetric_training_data_fpaths( ibs, aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists, colorspace=colorspace) data_fpath, labels_fpath, metadata_fpath, training_dpath, data_shape = tup print('\n[get_ibeis_patch_siam_dataset] FINISH\n\n') # hack for caching num_labels labels = ut.load_data(labels_fpath) num_labels = len(labels) dataset = DataSet.new_training_set( alias_key=alias_key, data_fpath=data_fpath, labels_fpath=labels_fpath, metadata_fpath=metadata_fpath, training_dpath=training_dpath, data_shape=data_shape, data_per_label=2, output_dims=1, num_labels=num_labels, ) dataset.setprop('ibs', ibs) return dataset
def grab_liberty_siam_dataset(pairs=250000): """ References: http://www.cs.ubc.ca/~mbrown/patchdata/patchdata.html https://github.com/osdf/datasets/blob/master/patchdata/dataset.py Notes: "info.txt" contains the match information Each row of info.txt corresponds corresponds to a separate patch, with the patches ordered from left to right and top to bottom in each bitmap image. 3 types of metadata files info.txt - contains patch ids that correspond with the order of patches in the bmp images In the format: pointid, unused interest.txt - interest points corresponding to patches with patchids has same number of rows as info.txt In the format: reference image id, x, y, orientation, scale (in log2 units) m50_<d>_<d>_0.txt - matches files patchID1 3DpointID1 unused1 patchID2 3DpointID2 unused2 CommandLine: python -m ibeis_cnn.ingest_data --test-grab_liberty_siam_dataset --show Example: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.ingest_data import * # NOQA >>> pairs = 500 >>> dataset = grab_liberty_siam_dataset(pairs) >>> ut.quit_if_noshow() >>> from ibeis_cnn import draw_results >>> #ibsplugin.rrr() >>> flat_metadata = {} >>> data, labels = dataset.subset('full') >>> ut.quit_if_noshow() >>> warped_patch1_list = data[::2] >>> warped_patch2_list = data[1::2] >>> dataset.interact() >>> ut.show_if_requested() """ datakw = { 'detector': 'dog', 'pairs': pairs, } assert datakw['detector'] in ['dog', 'harris'] assert pairs in [500, 50000, 100000, 250000] liberty_urls = { 'dog': 'http://www.cs.ubc.ca/~mbrown/patchdata/liberty.zip', 'harris': 'http://www.cs.ubc.ca/~mbrown/patchdata/liberty_harris.zip', } url = liberty_urls[datakw['detector']] ds_path = ut.grab_zipped_url(url) ds_name = splitext(basename(ds_path))[0] alias_key = 'liberty;' + ut.dict_str(datakw, nl=False, explicit=True) cfgstr = ','.join([str(val) for key, val in ut.iteritems_sorted(datakw)]) # TODO: allow a move of the base data prefix training_dpath = ut.ensure_app_resource_dir('ibeis_cnn', 'training', ds_name) if ut.get_argflag('--vtd'): ut.vd(training_dpath) ut.ensuredir(training_dpath) data_fpath = join(training_dpath, 'liberty_data_' + cfgstr + '.pkl') labels_fpath = join(training_dpath, 'liberty_labels_' + cfgstr + '.pkl') if not ut.checkpath(data_fpath, verbose=True): data, labels = ingest_helpers.extract_liberty_style_patches( ds_path, pairs) ut.save_data(data_fpath, data) ut.save_data(labels_fpath, labels) # hack for caching num_labels labels = ut.load_data(labels_fpath) num_labels = len(labels) dataset = DataSet.new_training_set( alias_key=alias_key, data_fpath=data_fpath, labels_fpath=labels_fpath, metadata_fpath=None, training_dpath=training_dpath, data_shape=(64, 64, 1), data_per_label=2, output_dims=1, num_labels=num_labels, ) return dataset