def test_ule(): skip_if_no_data() # Test loading of transfer data train, valid, test, transfer = utlc.load_ndarray_dataset("ule", normalize=True, transfer=True) assert train.shape[0] == transfer.shape[0]
def test_all_utlc(): skip_if_no_data() for name in ['avicenna','harry','ule']: # not testing rita, because it requires a lot of memorz and is slow print "Loading ", name train, valid, test = utlc.load_ndarray_dataset(name, normalize=True) print "dtype, max, min, mean, std" print train.dtype, train.max(), train.min(), train.mean(), train.std() assert isinstance(train, numpy.ndarray), "train is not an ndarray in %s dataset" % name assert isinstance(valid, numpy.ndarray), "valid is not an ndarray in %s dataset" % name assert isinstance(test, numpy.ndarray), "test is not an ndarray in %s dataset" % name assert train.shape[1]==test.shape[1]==valid.shape[1], "shapes of datasets does not match for %s" % name
def test_all_utlc(): skip_if_no_data() # not testing rita, because it requires a lot of memorz and is slow for name in ['avicenna', 'harry', 'ule']: print "Loading ", name train, valid, test = utlc.load_ndarray_dataset(name, normalize=True) print "dtype, max, min, mean, std" print train.dtype, train.max(), train.min(), train.mean(), train.std() assert isinstance(train, numpy.ndarray) assert isinstance(valid, numpy.ndarray) assert isinstance(test, numpy.ndarray) assert train.shape[1] == test.shape[1] == valid.shape[1]
def test_all_utlc(): skip_if_no_data() # not testing rita, because it requires a lot of memorz and is slow for name in ['avicenna', 'harry', 'ule']: print("Loading ", name) train, valid, test = utlc.load_ndarray_dataset(name, normalize=True) print("dtype, max, min, mean, std") print(train.dtype, train.max(), train.min(), train.mean(), train.std()) assert isinstance(train, numpy.ndarray) assert isinstance(valid, numpy.ndarray) assert isinstance(test, numpy.ndarray) assert train.shape[1] == test.shape[1] == valid.shape[1]
def load_data(conf): """ Loads a specified dataset according to the parameters in the dictionary Parameters ---------- conf : WRITEME Returns ------- WRITEME """ logger.info('... loading dataset') # Special case for sparse format if conf.get('sparse', False): expected = inspect.getargspec(load_sparse_dataset)[0][1:] data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected)) valid, test = data[1:3] # Sparse TERRY data on LISA servers contains an extra null first row in # valid and test subsets. if conf['dataset'] == 'terry': valid = valid[1:] test = test[1:] assert valid.shape[0] == test.shape[0] == 4096, \ 'Sparse TERRY data loaded has wrong number of examples' if len(data) == 3: return [data[0], valid, test] else: return [data[0], valid, test, data[3]] # Load as the usual ndarray expected = inspect.getargspec(load_ndarray_dataset)[0][1:] data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected)) # Special case for on-the-fly normalization if conf.get('normalize_on_the_fly', False): return data # Allocate shared variables def shared_dataset(data_x): """Function that loads the dataset into shared variables""" if conf.get('normalize', True): return sharedX(data_x, borrow=True) else: return theano.shared(theano._asarray(data_x), borrow=True) return map(shared_dataset, data)
def load_data(conf): """ Loads a specified dataset according to the parameters in the dictionary Parameters ---------- conf : WRITEME Returns ------- WRITEME """ print '... loading dataset' # Special case for sparse format if conf.get('sparse', False): expected = inspect.getargspec(load_sparse_dataset)[0][1:] data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected)) valid, test = data[1:3] # Sparse TERRY data on LISA servers contains an extra null first row in # valid and test subsets. if conf['dataset'] == 'terry': valid = valid[1:] test = test[1:] assert valid.shape[0] == test.shape[0] == 4096, \ 'Sparse TERRY data loaded has wrong number of examples' if len(data) == 3: return [data[0], valid, test] else: return [data[0], valid, test, data[3]] # Load as the usual ndarray expected = inspect.getargspec(load_ndarray_dataset)[0][1:] data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected)) # Special case for on-the-fly normalization if conf.get('normalize_on_the_fly', False): return data # Allocate shared variables def shared_dataset(data_x): """Function that loads the dataset into shared variables""" if conf.get('normalize', True): return sharedX(data_x, borrow=True) else: return theano.shared(theano._asarray(data_x), borrow=True) return map(shared_dataset, data)
def __init__(self, which_set, standardize): train, valid, test = utlc.load_ndarray_dataset('avicenna') if which_set == 'train': self.X = train elif which_set == 'valid': self.X = valid elif which_set == 'test': self.X = test else: assert False if standardize: union = N.concatenate([train,valid,test],axis=0) self.X -= union.mean(axis=0) std = union.std(axis=0) std[std < 1e-3] = 1e-3 self.X /= std
def __init__(self, which_set, standardize): train, valid, test = utlc.load_ndarray_dataset('avicenna') if which_set == 'train': self.X = train elif which_set == 'valid': self.X = valid elif which_set == 'test': self.X = test else: assert False if standardize: union = N.concatenate([train, valid, test], axis=0) # perform mean and std in float64 to avoid losing # too much numerical precision self.X -= union.mean(axis=0, dtype='float64') std = union.std(axis=0, dtype='float64') std[std < 1e-3] = 1e-3 self.X /= std