Example #1
0
def test_ule():
    skip_if_no_data()
    # Test loading of transfer data
    train, valid, test, transfer = utlc.load_ndarray_dataset("ule",
                                                             normalize=True,
                                                             transfer=True)
    assert train.shape[0] == transfer.shape[0]
Example #2
0
def test_ule():
    skip_if_no_data()
    # Test loading of transfer data
    train, valid, test, transfer = utlc.load_ndarray_dataset("ule",
                                                             normalize=True,
                                                             transfer=True)
    assert train.shape[0] == transfer.shape[0]
Example #3
0
def test_all_utlc():
    skip_if_no_data()
    for name in ['avicenna','harry','ule']:   # not testing rita, because it requires a lot of memorz and is slow
        print "Loading ", name
        train, valid, test = utlc.load_ndarray_dataset(name, normalize=True)
        print "dtype, max, min, mean, std"
        print train.dtype, train.max(), train.min(), train.mean(), train.std()
        assert isinstance(train, numpy.ndarray), "train is not an ndarray in %s dataset" % name
        assert isinstance(valid, numpy.ndarray), "valid is not an ndarray in %s dataset" % name
        assert isinstance(test, numpy.ndarray), "test is not an ndarray in %s dataset" % name
        assert train.shape[1]==test.shape[1]==valid.shape[1], "shapes of datasets does not match for %s" % name
Example #4
0
def test_all_utlc():
    skip_if_no_data()
    # not testing rita, because it requires a lot of memorz and is slow
    for name in ['avicenna', 'harry', 'ule']:
        print "Loading ", name
        train, valid, test = utlc.load_ndarray_dataset(name, normalize=True)
        print "dtype, max, min, mean, std"
        print train.dtype, train.max(), train.min(), train.mean(), train.std()
        assert isinstance(train, numpy.ndarray)
        assert isinstance(valid, numpy.ndarray)
        assert isinstance(test, numpy.ndarray)
        assert train.shape[1] == test.shape[1] == valid.shape[1]
Example #5
0
def test_all_utlc():
    skip_if_no_data()
    # not testing rita, because it requires a lot of memorz and is slow
    for name in ['avicenna', 'harry', 'ule']:
        print("Loading ", name)
        train, valid, test = utlc.load_ndarray_dataset(name, normalize=True)
        print("dtype, max, min, mean, std")
        print(train.dtype, train.max(), train.min(), train.mean(), train.std())
        assert isinstance(train, numpy.ndarray)
        assert isinstance(valid, numpy.ndarray)
        assert isinstance(test, numpy.ndarray)
        assert train.shape[1] == test.shape[1] == valid.shape[1]
Example #6
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    logger.info('... loading dataset')

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Example #7
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    print '... loading dataset'

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Example #8
0
    def __init__(self, which_set, standardize):
        train, valid, test = utlc.load_ndarray_dataset('avicenna')

        if which_set == 'train':
            self.X = train
        elif which_set == 'valid':
            self.X = valid
        elif which_set == 'test':
            self.X = test
        else:
            assert False

        if standardize:
            union = N.concatenate([train,valid,test],axis=0)
            self.X -= union.mean(axis=0)
            std = union.std(axis=0)
            std[std < 1e-3] = 1e-3
            self.X /= std
Example #9
0
    def __init__(self, which_set, standardize):
        train, valid, test = utlc.load_ndarray_dataset('avicenna')

        if which_set == 'train':
            self.X = train
        elif which_set == 'valid':
            self.X = valid
        elif which_set == 'test':
            self.X = test
        else:
            assert False

        if standardize:
            union = N.concatenate([train, valid, test], axis=0)
            # perform mean and std in float64 to avoid losing
            # too much numerical precision
            self.X -= union.mean(axis=0, dtype='float64')
            std = union.std(axis=0, dtype='float64')
            std[std < 1e-3] = 1e-3
            self.X /= std
Example #10
0
    def __init__(self, which_set, standardize):
        train, valid, test = utlc.load_ndarray_dataset('avicenna')

        if which_set == 'train':
            self.X = train
        elif which_set == 'valid':
            self.X = valid
        elif which_set == 'test':
            self.X = test
        else:
            assert False

        if standardize:
            union = N.concatenate([train, valid, test], axis=0)
            # perform mean and std in float64 to avoid losing
            # too much numerical precision
            self.X -= union.mean(axis=0, dtype='float64')
            std = union.std(axis=0, dtype='float64')
            std[std < 1e-3] = 1e-3
            self.X /= std