コード例 #1
0
    def get_image_features(self, task, batched_lmap_speed_thresh=None):
        if batched_lmap_speed_thresh is None:
            batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
        images = task.images
        try:
            rval, _images, cdict = self.image_features[images]
            # -- currently it is true that all tasks should be indexing into
            # -- the same set of images. Later when this is not the case,
            # -- delete this over-strict check.
            assert _images is images
        except KeyError:
            feature_lmap = self.get_image_features_lmap(task.images,
                    batched_lmap_speed_thresh)

            rval = cache_memmap(
                feature_lmap,
                self.memmap_name + '_image_features_' + task.name,
                del_atexit=self.memmap_del_atexit)

            foobar.append_ndarray_signature(rval[0],
                'get_image_features features 0', task.name)
            foobar.append_ndarray_signature(rval[100],
                'get_image_features features 100', task.name)

            cdict = {}
            self.image_features[images] = rval, images, cdict
        return rval, cdict
コード例 #2
0
    def get_image_features(self, task, batched_lmap_speed_thresh=None):
        if batched_lmap_speed_thresh is None:
            batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
        images = task.images
        try:
            rval, _images, cdict = self.image_features[images]
            # -- currently it is true that all tasks should be indexing into
            # -- the same set of images. Later when this is not the case,
            # -- delete this over-strict check.
            assert _images is images
        except KeyError:
            feature_lmap = self.get_image_features_lmap(
                task.images, batched_lmap_speed_thresh)

            rval = cache_memmap(feature_lmap,
                                self.memmap_name + '_image_features_' +
                                task.name,
                                del_atexit=self.memmap_del_atexit)

            foobar.append_ndarray_signature(rval[0],
                                            'get_image_features features 0',
                                            task.name)
            foobar.append_ndarray_signature(rval[100],
                                            'get_image_features features 100',
                                            task.name)

            cdict = {}
            self.image_features[images] = rval, images, cdict
        return rval, cdict
コード例 #3
0
ファイル: lfw.py プロジェクト: yamins81/eccv12
def slm_memmap(desc, X, name, basedir=None):
    """
    Return a cache_memmap object representing the features of the entire
    set of images.
    """
    if basedir is None:
        basedir = os.getcwd()
    feat_fn = SLMFunction(desc, X.shape[1:])
    feat = larray.lmap(feat_fn, X)
    rval = larray.cache_memmap(feat, name, basedir=basedir)
    return rval
コード例 #4
0
ファイル: lfw.py プロジェクト: yamins81/eccv12
def pairs_memmap(pair_labels, X, comparison_name, name, basedir=None):
    """
    pair_labels    - something like comes out of verification_pairs
    X              - feature vectors to be combined
    combination_fn - some lambda X[i], X[j]: features1D
    """
    if basedir is None:
        basedir = os.getcwd()
    lidxs, ridxs, matches = pair_labels
    pf = larray.lmap(
            PairFeaturesFn(X, comparison_name),
            lidxs,
            ridxs)
    pf_cache = larray.cache_memmap(pf, name, basedir=basedir)
    return pf_cache, np.asarray(matches)
コード例 #5
0
ファイル: data.py プロジェクト: dicarlolab/archconvnets
def get_stimarray(marray, mname, perm, perm_id, cache_type, base_dir, read_mode='r'):
    reorder = Reorder2(marray)
    lmap = larray.lmap(reorder, perm, f_map = reorder)
    if cache_type == 'hdf5':
        new_name = mname + '_' + perm_id + '_hdf5'
        print('Getting stimuli from cache hdf5 at %s/%s ' % (base_dir, new_name))
        return larray.cache_hdf5(lmap,
                              name=new_name,
                              basedir=base_dir,
                              mode=read_mode)
    elif cache_type == 'memmap':
        new_name = mname + '_' + perm_id + '_memmap'
        print('Getting stimuli from cache memmap at %s/%s ' % (base_dir, new_name))
        return larray.cache_memmap(lmap,
                              name=new_name,
                              basedir=base_dir)
コード例 #6
0
ファイル: test_fg11.py プロジェクト: yamins81/eccv12
    def get_fg11_features(suffix, expected_shape):
        dataset = skdata.lfw.Aligned()
        paths, identities = dataset.raw_classification_task()
        def load_path(path):
            basename = os.path.basename(path)
            name = basename[:-9]  # cut off the digits and the .jpg
            # -- touch the jpg to make sure it's there
            new_path = os.path.join(
                feature_root,
                name,
                basename)
            feature_path = new_path + suffix
            print 'loading', feature_path
            data = scipy.io.loadmat(feature_path)['data']
            assert data.shape == expected_shape
            return np.asarray(data, dtype='float32')
        # -- apply decorator manually here in nested scope
        load_path = larray.lmap_info(
            shape=expected_shape,
            dtype='float32')(load_path)

        rval = larray.lmap(load_path, paths)
        rval = larray.cache_memmap(rval, 'fcache_' + suffix, basedir=os.getcwd())
        return rval
コード例 #7
0
ファイル: pyll_slm.py プロジェクト: Rt0220/hyperopt-convnet
def larray_cache_memmap(obj, name, basedir=None, msg=None):
    return larray.cache_memmap(obj, name, basedir=basedir, msg=msg)
コード例 #8
0
ファイル: data.py プロジェクト: dicarlolab/archconvnets
    def __init__(self, data_dir, batch_range, init_epoch=1,
                       init_batchnum=None, dp_params=None, test=False,
                       read_mode='r', cache_type='memmap'):

        #load dataset and meta
        modulename, attrname = dp_params['dataset_name']
        module = importlib.import_module(modulename)
        self.dp_params = dp_params
        print('DP_PARAMS', dp_params)
        dataset_obj = getattr(module, attrname)
        print(module, attrname)
        dataset_data = dp_params.get('dataset_data', None)
        if dataset_data is not None:
            dset = dataset_obj(data=dataset_data)
        else:
            dset = dataset_obj()
        self.dset = dset        
        
        perm_type = dp_params.get('perm_type')
        perm, perm_id = self.get_perm()        
        self.perm = perm
        self.perm_id = perm_id
        if 'subslice' in dp_params:
            subslice_method, subslice_kwargs = self.subslice = dp_params['subslice']
            subslice = getattr(self.dset, subslice_method)(**subslice_kwargs).nonzero()[0]
            if perm is not None:
                self.subslice = fast.isin(perm, subslice).nonzero()[0]
            else:
                self.subslice = subslice

        metacol = self.metacol = self.get_metacol()
        if hasattr(metacol, 'keys'):
        	mlen = len(metacol.values()[0])
        else:
        	mlen = len(metacol)

        #compute number of batches
        batch_size = self.batch_size = dp_params['batch_size']
        num_batches = self.num_batches = int(math.ceil(mlen / float(batch_size)))
        num_batches_for_meta = self.num_batches_for_meta = dp_params['num_batches_for_mean']

        images = dset.get_images(preproc=dp_params['preproc'])
        if hasattr(images, 'dirname'):
            base_dir, orig_name = os.path.split(images.dirname)
        else:
            base_dir = dset.home('cache')
            orig_name = 'images_cache_' + get_id(dp_params['preproc'])

        reorder = Reorder(images)
        lmap = larray.lmap(reorder, self.perm, f_map=reorder)
        if cache_type == 'hdf5':
            new_name = orig_name + '_' + self.perm_id + '_hdf5'
            print('Getting stimuli from cache hdf5 at %s/%s ' % (base_dir, new_name))
            self.stimarray = larray.cache_hdf5(lmap,
                                  name=new_name,
                                  basedir=base_dir,
                                  mode=read_mode)
        elif cache_type == 'memmap':
            new_name = orig_name + '_' + self.perm_id + '_memmap'
            print('Getting stimuli from cache memmap at %s/%s ' % (base_dir, new_name))
            self.stimarray = larray.cache_memmap(lmap,
                                  name=new_name,
                                  basedir=base_dir)


        #default data location
        if data_dir == '':
            pstring = hashlib.sha1(repr(dp_params['preproc'])).hexdigest() + '_%d' % dp_params['batch_size']
            data_dir = dset.home('convnet_batches', pstring)
        if not os.path.exists(data_dir):
            print('data_dir %s does not exist, creating' % data_dir)
            os.makedirs(data_dir)
            
        if hasattr(self, 'subslice'):
            hashval = get_id(tuple(subslice.tolist()))
            metafile = os.path.join(data_dir, 'batches_%s.meta' % hashval)
        else:
            metafile = os.path.join(data_dir, 'batches.meta')
        self.metafile = metafile

        if os.path.exists(metafile):
            print('Meta file at %s exists, loading' % metafile)
            bmeta = cPickle.load(open(metafile))
            #assertions checking that the things that need to be the same
            #for these batches to make sense are in fact the same
            assert dp_params['batch_size'] == bmeta['num_cases_per_batch'], (dp_params['batch_size'], bmeta['num_cases_per_batch'])
            if 'subslice' in bmeta or 'subslice' in dp_params:
            	assert dp_params['subslice'] == bmeta['subslice']
            if 'dataset_name' in bmeta:
                assert dp_params['dataset_name'] == bmeta['dataset_name'], (dp_params['dataset_name'], bmeta['dataset_name'])
            if 'preproc' in bmeta:
                assert dp_params['preproc'] == bmeta['preproc'], (dp_params['preproc'], bmeta['preproc'])
                #pass
            if 'dataset_data' in bmeta:
                assert dataset_data == bmeta['dataset_data'], (dataset_data, bmeta['dataset_data'])
        else:
            print('Making batches.meta at %s ...' % metafile)
            imgs_mean = None
            isf = 0
            for bn in range(num_batches_for_meta):
                print('Meta batch %d' % bn)
                #get stimuli and put in the required format
                stims = self.get_stims(bn, batch_size)
                print('Got stims', stims.shape, stims.nbytes)
                if 'float' in repr(stims.dtype):
                    stims = n.uint8(n.round(255 * stims))
                print('Converted to uint8', stims.nbytes)
                d = dldata_to_convnet_reformatting(stims, None)
                #add to the mean
                if imgs_mean is None:
                    imgs_mean = n.zeros((d['data'].shape[0],))
                dlen = d['data'].shape[0]
                fr = isf / (isf + float(dlen))
                imgs_mean *= fr
                imgs_mean += (1 - fr) * d['data'].mean(axis=1)
                isf += dlen

            #write out batches.meta
            outdict = {'num_cases_per_batch': batch_size,
                       'label_names': self.labels_unique,
                       'num_vis': d['data'].shape[0],
                       'data_mean': imgs_mean,
                       'dataset_name': dp_params['dataset_name'],
                       'dataset_data': dataset_data,
                       'preproc': dp_params['preproc']}
            if dp_params.has_key('subslice'):
            	outdict['subslice'] = dp_params['subslice']
            with open(metafile, 'wb') as _f:
                cPickle.dump(outdict, _f)

        self.batch_meta = cPickle.load(open(metafile, 'rb'))

        LabeledDataProvider.__init__(self, data_dir, batch_range,
                                 init_epoch, init_batchnum, dp_params, test)
コード例 #9
0
    def train_indexed_image_classification(self, train, valid=None):

        if valid is None:
            train_name = train.name
            valid_name = 'None'
        else:
            train_name = train.name
            valid_name = valid.name
            assert train.all_images is valid.all_images
            assert train.all_labels is valid.all_labels

        info('train_indexed_image_classification: %s/%s' %
             (train_name, valid_name))

        normed_features, xmean, xstd, avg_nrm = \
            self.normalized_image_features(
                train.all_images, None, None, None, flatten=True)

        assert train.name is not None

        if hasattr(self, 'cmemmap'):
            assert train.all_images is self.cmemmap_all_images
        else:
            self.cmemmap_all_images = train.all_images
            self.cmemmap = cache_memmap(normed_features,
                                        self.memmap_name,
                                        del_atexit=True)

        if not hasattr(self, 'history'):
            self.load_ensemble_history(fields=[])

        svm = self.load_svm(train_name, valid_name, self.cmemmap.shape[1],
                            train.n_classes, self.pipeline['l2_reg'])
        svm.feature_xmean = xmean
        svm.feature_xstd = xstd
        svm.feature_avg_nrm = avg_nrm
        svm.train_name = train_name
        svm.valid_name = valid_name

        prev_xw_trn = self.load_prev_xw(train_name,
                                        train_name,
                                        valid_name,
                                        use_history='using_history')

        info('train_indexed_image_classification: Fitting SVM with prev_xw')
        svm.fit(self.cmemmap[train.idxs], train.all_labels[train.idxs],
                prev_xw_trn)

        info('-> loaded alpha %s' % str(svm.alpha))
        info('-> loaded prvl2 %s' % str(svm.prev_l2_regularization))
        info('-> loaded prvw2 %s' % str(svm.prev_w_l2_sqr))

        if valid is None:
            # -- XXX: it is currently a hack to use the existence
            #    of the validation set to decide when to compute
            #    an svm without the history features... it currently
            #    so happens that for the fit/val split we have a validation
            #    set and we want to train both ways, and for the sel/test
            #    split we do not have a validation set and we only want the
            #    fit-with-history training.
            assert train.name == 'sel'
            svm0 = None
        else:
            svm0 = copy.deepcopy(svm)
            if (prev_xw_trn is not None) and prev_xw_trn.size:
                info('Fitting SVM without prev_xw')
                svm0.fit(self.cmemmap[train.idxs],
                         train.all_labels[train.idxs],
                         np.zeros_like(prev_xw_trn))
        self.add_results(
            [
                'train_indexed_image_classification',
                train_name,
                valid_name,
            ], {
                'train_name': train_name,
                'valid used': (valid is not None),
                'valid_name': valid_name,
            }, {
                'model0': svm0,
                'model': svm,
            })

        self.loss_indexed_image_classification(svm, train)
        if valid is not None:
            self.loss_indexed_image_classification(svm, valid)
            self.loss_indexed_image_classification(
                svm0, valid, use_history='not_using_history')

        return svm
コード例 #10
0
def larray_cache_memmap(obj, name, basedir=None, msg=None):
    return larray.cache_memmap(obj, name, basedir=basedir, msg=msg)
コード例 #11
0
ファイル: lfw.py プロジェクト: yamins81/eccv12
def train_view2(namebases, basedirs, test=None, use_libsvm=False,
                trace_normalize=False, model_kwargs=None):
    """To use use precomputed kernels with libsvm, do
    use_libsvm = {'kernel': 'precomputed'}
    otherwise, use_libsvm = True will use 'linear'
    """
    pair_features = [[larray.cache_memmap(None,
                                   name=view2_filename(nb, snum),
                                   basedir=bdir) for snum in range(10)]             
                      for nb, bdir in zip(namebases, basedirs)]

    split_data = [verification_pairs('fold_%d' % split_num, test=test) for split_num in range(10)]
    
    train_errs = []
    test_errs = []
    if model_kwargs is None:
        model_kwargs = {}
    
    for ind in range(10):
        train_inds = [_ind for _ind in range(10) if _ind != ind]
        print ('Constructing stuff for split %d ...' % ind)
        test_X = [pf[ind][:] for pf in pair_features]

        test_y = split_data[ind][2]
        train_X = [np.vstack([pf[_ind][:] for _ind in train_inds])
                             for pf in pair_features]
        train_y = np.concatenate([split_data[_ind][2] for _ind in train_inds])
        train_decisions = np.zeros(len(train_y))
        test_decisions = np.zeros(len(test_y))
        
        #train_Xyd_n, test_Xyd_n = toyproblem.normalize_Xcols(
        #    (np.hstack(train_X), train_y, train_decisions,),
        #    (np.hstack(test_X), test_y, test_decisions,))
        
        normalized = [dan_normalize((t0, t1),
                       trace_normalize=trace_normalize,
                       data=None) for t0, t1 in zip(train_X, test_X)]
        train_X = np.hstack([n[0] for n in normalized])
        test_X = np.hstack([n[1] for n in normalized])
        
        train_Xyd_n = (train_X, train_y, train_decisions)
        test_Xyd_n = (test_X, test_y, test_decisions)
        
        print ('Training split %d ...' % ind)
        if use_libsvm:
            if hasattr(use_libsvm, 'keys'):
                kernel = use_libsvm.get('kernel', 'linear')
            else:
                kernel = 'linear'
            if kernel == 'precomputed':
                (_Xtrain, _ytrain, _dtrain) = train_Xyd_n
                print ('Computing training kernel ...')
                Ktrain = np.dot(_Xtrain, _Xtrain.T)
                print ('... computed training kernel of shape', Ktrain.shape)
                train_Xyd_n = (Ktrain, _ytrain, _dtrain)
                train_data = (Ktrain, _ytrain, _dtrain)
                print ('Computing testtrain kernel ...')
                (_Xtest, _ytest, _dtest) = test_Xyd_n
                Ktest = np.dot(_Xtest, _Xtrain.T)
                print ('... computed testtrain kernel of shape', Ktest.shape)
                test_Xyd_n = (Ktest, _ytest, _dtest)

            model_kwargs['kernel'] = kernel
            svm, _ = train_scikits(train_Xyd_n,
                                labelset=[-1, 1],
                                model_type='svm.SVC',
                                model_kwargs=model_kwargs,
                                normalization=False
                                )
        else:
            svm = toyproblem.train_svm(train_Xyd_n,
                l2_regularization=1e-3,
                max_observations=20000)

        #train_decisions = svm_decisions_lfw(svm, train_Xyd_n)
        #test_decisions = svm_decisions_lfw(svm, test_Xyd_n)
        
        #train_predictions = predictions_from_decisions(train_decisions)
        #test_predictions = predictions_from_decisions(test_decisions)

        train_predictions = svm.predict(train_Xyd_n[0])
        test_predictions = svm.predict(test_Xyd_n[0])
        train_err = (train_predictions != train_y).mean()
        test_err = (test_predictions != test_y).mean()

        print 'split %d train err %f' % (ind, train_err)
        print 'split %d test err %f' % (ind, test_err)
        
        train_errs.append(train_err)
        test_errs.append(test_err)
        
    train_err_mean = np.mean(train_errs)
    print 'train err mean', train_err_mean
    test_err_mean = np.mean(test_errs)
    print 'test err mean', test_err_mean
    
    return train_err_mean, test_err_mean
コード例 #12
0
    def train_indexed_image_classification(self, train, valid=None):

        if valid is None:
            train_name = train.name
            valid_name = 'None'
        else:
            train_name = train.name
            valid_name = valid.name
            assert train.all_images is valid.all_images
            assert train.all_labels is valid.all_labels

        info('train_indexed_image_classification: %s/%s' % (
            train_name, valid_name))

        normed_features, xmean, xstd, avg_nrm = \
            self.normalized_image_features(
                train.all_images, None, None, None, flatten=True)

        assert train.name is not None

        if hasattr(self, 'cmemmap'):
            assert train.all_images is self.cmemmap_all_images
        else:
            self.cmemmap_all_images = train.all_images
            self.cmemmap = cache_memmap(
                normed_features,
                self.memmap_name,
                del_atexit=True)

        if not hasattr(self, 'history'):
            self.load_ensemble_history(fields=[])

        svm = self.load_svm(
            train_name, valid_name, self.cmemmap.shape[1],
            train.n_classes, self.pipeline['l2_reg'])
        svm.feature_xmean = xmean
        svm.feature_xstd = xstd
        svm.feature_avg_nrm = avg_nrm
        svm.train_name = train_name
        svm.valid_name = valid_name

        prev_xw_trn = self.load_prev_xw(
            train_name, train_name, valid_name, use_history='using_history')

        info('train_indexed_image_classification: Fitting SVM with prev_xw')
        svm.fit(self.cmemmap[train.idxs],
                train.all_labels[train.idxs],
                prev_xw_trn)

        info('-> loaded alpha %s' % str(svm.alpha))
        info('-> loaded prvl2 %s' % str(svm.prev_l2_regularization))
        info('-> loaded prvw2 %s' % str(svm.prev_w_l2_sqr))

        if valid is None:
            # -- XXX: it is currently a hack to use the existence
            #    of the validation set to decide when to compute
            #    an svm without the history features... it currently
            #    so happens that for the fit/val split we have a validation
            #    set and we want to train both ways, and for the sel/test
            #    split we do not have a validation set and we only want the
            #    fit-with-history training.
            assert train.name == 'sel'
            svm0 = None
        else:
            svm0 = copy.deepcopy(svm)
            if (prev_xw_trn is not None) and prev_xw_trn.size:
                info('Fitting SVM without prev_xw')
                svm0.fit(self.cmemmap[train.idxs],
                         train.all_labels[train.idxs],
                         np.zeros_like(prev_xw_trn))
        self.add_results(
            [
            'train_indexed_image_classification',
            train_name,
            valid_name,
            ],
            {
            'train_name': train_name,
            'valid used': (valid is not None),
            'valid_name': valid_name,
            },
            {
            'model0': svm0,
            'model': svm,
            })

        self.loss_indexed_image_classification(svm, train)
        if valid is not None:
            self.loss_indexed_image_classification(svm, valid)
            self.loss_indexed_image_classification(svm0, valid,
                use_history='not_using_history')

        return svm
コード例 #13
0
 def test_memmap_cache(self):
     self.battery(lambda obj: larray.cache_memmap(obj, 'name_foo'))