def __init__(self, which_set, standardize): if 'PYLEARN2_DATA_PATH' not in os.environ: raise NoDataPathError() if not os.path.exists(os.path.join(os.environ['PYLEARN2_DATA_PATH'], 'avicenna')): raise NotInstalledError() #XXX: check path #train, valid, test = N.random.randn(50,50), N.random.randn(50,50), N.random.randn(50,50) #print "avicenna hacked to load small random data instead of actual data" train, valid, test = utlc.load_ndarray_dataset('avicenna') if which_set == 'train': self.X = train elif which_set == 'valid': self.X = valid elif which_set == 'test': self.X = test else: assert False if standardize: union = N.concatenate([train,valid,test],axis=0) self.X -= union.mean(axis=0) std = union.std(axis=0) std[std < 1e-3] = 1e-3 self.X /= std
def __init__(self, which_set, standardize): """ .. todo:: WRITEME """ #train, valid, test = N.random.randn(50,50), N.random.randn(50,50), N.random.randn(50,50) #print "avicenna hacked to load small random data instead of actual data" train, valid, test = utlc.load_ndarray_dataset('avicenna') if which_set == 'train': self.X = train elif which_set == 'valid': self.X = valid elif which_set == 'test': self.X = test else: assert False if standardize: union = N.concatenate([train,valid,test],axis=0) self.X -= union.mean(axis=0) std = union.std(axis=0) std[std < 1e-3] = 1e-3 self.X /= std
def load_data(conf): """ Loads a specified dataset according to the parameters in the dictionary Parameters ---------- conf : WRITEME Returns ------- WRITEME """ print '... loading dataset' # Special case for sparse format if conf.get('sparse', False): expected = inspect.getargspec(load_sparse_dataset)[0][1:] data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected)) valid, test = data[1:3] # Sparse TERRY data on LISA servers contains an extra null first row in # valid and test subsets. if conf['dataset'] == 'terry': valid = valid[1:] test = test[1:] assert valid.shape[0] == test.shape[0] == 4096, \ 'Sparse TERRY data loaded has wrong number of examples' if len(data) == 3: return [data[0], valid, test] else: return [data[0], valid, test, data[3]] # Load as the usual ndarray expected = inspect.getargspec(load_ndarray_dataset)[0][1:] data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected)) # Special case for on-the-fly normalization if conf.get('normalize_on_the_fly', False): return data # Allocate shared variables def shared_dataset(data_x): """Function that loads the dataset into shared variables""" if conf.get('normalize', True): return sharedX(data_x, borrow=True) else: return theano.shared(theano._asarray(data_x), borrow=True) return map(shared_dataset, data)
def load_data(conf): """ Loads a specified dataset according to the parameters in the dictionary """ print '... loading dataset' # Special case for sparse format if conf.get('sparse', False): expected = inspect.getargspec(load_sparse_dataset)[0][1:] data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected)) valid, test = data[1:3] # Sparse TERRY data on LISA servers contains an extra null first row in # valid and test subsets. if conf['dataset'] == 'terry': valid = valid[1:] test = test[1:] assert valid.shape[0] == test.shape[0] == 4096, \ 'Sparse TERRY data loaded has wrong number of examples' if len(data) == 3: return [data[0], valid, test] else: return [data[0], valid, test, data[3]] # Load as the usual ndarray expected = inspect.getargspec(load_ndarray_dataset)[0][1:] data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected)) # Special case for on-the-fly normalization if conf.get('normalize_on_the_fly', False): return data # Allocate shared variables def shared_dataset(data_x): """Function that loads the dataset into shared variables""" if conf.get('normalize', True): return sharedX(data_x, borrow=True) else: return theano.shared(theano._asarray(data_x), borrow=True) return map(shared_dataset, data)