Ejemplo n.º 1
0
def test_sparse_ule():
    skip_if_no_data()
    # Test loading of transfer data
    train, valid, test, transfer = utlc.load_sparse_dataset("ule",
                                                            normalize=True,
                                                            transfer=True)
    assert train.shape[0] == transfer.shape[0]
Ejemplo n.º 2
0
def test_all_sparse_utlc():
    skip_if_no_data()
    for name in ['harry', 'terry', 'ule']:
        print "Loading sparse ", name
        train, valid, test = utlc.load_sparse_dataset(name, normalize=True)
        nb_elem = numpy.prod(train.shape)
        mi = train.data.min()
        ma = train.data.max()
        mi = min(0, mi)
        ma = max(0, ma)
        su = train.data.sum()
        mean = float(su) / nb_elem
        print name, "dtype, max, min, mean, nb non-zero, nb element, %sparse"
        print train.dtype, ma, mi, mean, train.nnz, nb_elem, (
            nb_elem - float(train.nnz)) / nb_elem
        print name, "max, min, mean, std (all stats on non-zero element)"
        print train.data.max(), train.data.min(), train.data.mean(
        ), train.data.std()
        assert scipy.sparse.issparse(
            train), "train is not sparse for %s dataset" % name
        assert scipy.sparse.issparse(
            valid), "valid is not sparse for %s dataset" % name
        assert scipy.sparse.issparse(
            test), "test is not sparse for %s dataset" % name
        assert train.shape[1] == test.shape[1] == valid.shape[
            1], "shapes of sparse %s dataset do  not match" % name
Ejemplo n.º 3
0
def test_sparse_ule():
    skip_if_no_data()
    # Test loading of transfer data
    train, valid, test, transfer = utlc.load_sparse_dataset("ule",
                                                            normalize=True,
                                                            transfer=True)
    assert train.shape[0] == transfer.shape[0]
Ejemplo n.º 4
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    logger.info('... loading dataset')

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Ejemplo n.º 5
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    print '... loading dataset'

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Ejemplo n.º 6
0
def test_all_sparse_utlc():
    skip_if_no_data()
    for name in ['harry','terry','ule']:
        print "Loading sparse ", name
        train, valid, test = utlc.load_sparse_dataset(name, normalize=True)
        nb_elem = numpy.prod(train.shape)
        mi = train.data.min()
        ma = train.data.max()
        mi = min(0, mi)
        ma = max(0, ma)
        su = train.data.sum()
        mean = float(su)/nb_elem
        print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse"
        print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem
        print name,"max, min, mean, std (all stats on non-zero element)"
        print train.data.max(), train.data.min(), train.data.mean(), train.data.std()
        assert scipy.sparse.issparse(train), "train is not sparse for %s dataset" % name
        assert scipy.sparse.issparse(valid), "valid is not sparse for %s dataset" % name
        assert scipy.sparse.issparse(test), "test is not sparse for %s dataset" % name
        assert train.shape[1]==test.shape[1]==valid.shape[1], "shapes of sparse %s dataset do  not match" % name
Ejemplo n.º 7
0
def test_all_sparse_utlc():
    skip_if_no_data()
    for name in ['harry', 'terry', 'ule']:
        print("Loading sparse ", name)
        train, valid, test = utlc.load_sparse_dataset(name, normalize=True)
        nb_elem = numpy.prod(train.shape)
        mi = train.data.min()
        ma = train.data.max()
        mi = min(0, mi)
        ma = max(0, ma)
        su = train.data.sum()
        mean = float(su) / nb_elem
        print(name, "dtype, max, min, mean, nb non-zero, nb element, %sparse")
        print(train.dtype, ma, mi, mean, train.nnz, end='')
        print(nb_elem, (nb_elem - float(train.nnz)) / nb_elem)
        print(name, "max, min, mean, std (all stats on non-zero element)")
        print(train.data.max(), train.data.min(), end='')
        print(train.data.mean(), train.data.std())
        assert scipy.sparse.issparse(train)
        assert scipy.sparse.issparse(valid)
        assert scipy.sparse.issparse(test)
        assert train.shape[1] == test.shape[1] == valid.shape[1]