Example #1
0
    def __init__(self, which_set, standardize):
        if 'PYLEARN2_DATA_PATH' not in os.environ:
            raise NoDataPathError()
        if not os.path.exists(os.path.join(os.environ['PYLEARN2_DATA_PATH'], 'avicenna')):
            raise NotInstalledError() #XXX: check path

        #train, valid, test = N.random.randn(50,50), N.random.randn(50,50), N.random.randn(50,50)
        #print "avicenna hacked to load small random data instead of actual data"

        train, valid, test = utlc.load_ndarray_dataset('avicenna')

        if which_set == 'train':
            self.X = train
        elif which_set == 'valid':
            self.X = valid
        elif which_set == 'test':
            self.X = test
        else:
            assert False

        if standardize:
            union = N.concatenate([train,valid,test],axis=0)
            self.X -= union.mean(axis=0)
            std = union.std(axis=0)
            std[std < 1e-3] = 1e-3
            self.X /= std
Example #2
0
    def __init__(self, which_set, standardize):
        """
        .. todo::

            WRITEME
        """
        #train, valid, test = N.random.randn(50,50), N.random.randn(50,50), N.random.randn(50,50)
        #print "avicenna hacked to load small random data instead of actual data"
        train, valid, test = utlc.load_ndarray_dataset('avicenna')

        if which_set == 'train':
            self.X = train
        elif which_set == 'valid':
            self.X = valid
        elif which_set == 'test':
            self.X = test
        else:
            assert False

        if standardize:
            union = N.concatenate([train,valid,test],axis=0)
            self.X -= union.mean(axis=0)
            std = union.std(axis=0)
            std[std < 1e-3] = 1e-3
            self.X /= std
Example #3
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    print '... loading dataset'

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Example #4
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary
    """
    print '... loading dataset'

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)