Python Features Exemples, skl_groups.features.Features Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_features.py Projet : cimor/skl-groups

def test_feats_add():
    bags = [np.random.normal(size=(np.random.randint(10, 100), 10))
            for _ in xrange(20)]
    labels = np.arange(20)

    first_15 = Features(bags[:15], labels=labels[:15])
    last_5 = Features(bags[15:], labels=labels[15:])

    plus = first_15 + last_5
    assert len(plus) == 20
    assert plus[:15] == first_15

    plus_list = first_15 + bags[15:]
    assert len(plus_list) == 20
    assert not plus_list.meta
    assert np.all(plus_list[16] == bags[16])

    plus_singlelist = first_15 + [bags[18]]
    assert np.all(plus_singlelist[15] == bags[18])

    rplus_list = bags[15:] + first_15
    assert np.all(rplus_list[0] == bags[15])

    rplus_singlelist = [bags[15]] + first_15
    assert np.all(rplus_singlelist[0] == bags[15])
    assert rplus_singlelist[1:] == first_15.bare()

    wrong_type(lambda: first_15 + 12)
    wrong_type(lambda: 12 + first_15)

    assert_raises(ValueError, lambda: first_15 + np.asarray(bags))

Exemple #2

0

Afficher le fichier

Fichier : test_features.py Projet : cimor/skl-groups

def test_copy_constructor():
    bags = [np.random.normal(size=(np.random.randint(10, 100), 10))
            for _ in xrange(20)]
    unstacked = Features(bags, label=np.arange(20))

    oth_unstacked = Features(unstacked)
    assert oth_unstacked.label is unstacked.label
    assert oth_unstacked.features[0] is unstacked.features[0]
    assert oth_unstacked == unstacked

    oth_unstacked_bare = Features(unstacked, bare=True)
    assert oth_unstacked_bare == bags
    assert oth_unstacked_bare.bare() == oth_unstacked_bare

    oth_unstacked = Features(unstacked, label=np.ones(20))
    assert np.all(oth_unstacked.label == 1)

    oth_unstacked2 = Features(unstacked, bare=True, label=np.arange(20))
    assert oth_unstacked2 == unstacked

    oth_unstacked_copy = Features(unstacked, copy=True)
    assert oth_unstacked_copy == unstacked
    assert not np.may_share_memory(oth_unstacked_copy.features[0],
                                   unstacked.features[0])

    stacked = unstacked.copy()
    stacked.make_stacked()
    oth_stacked = Features(stacked)
    assert oth_stacked == stacked

Exemple #3

0

Afficher le fichier

Fichier : test_preprocessing.py Projet : zshwuhan/skl-groups

def test_pca():
    bags = [
        np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
        for _ in xrange(50)
    ]
    feats = Features(bags, stack=True)

    pca = BagPCA(k=3)
    pca.fit(bags)
    pcaed = pca.transform(bags)
    assert pcaed.dim == 3

    BagPCA(varfrac=.3).fit_transform(bags)

    pca2 = BagPCA(k=20)
    pcaed2 = pca2.fit_transform(bags)
    orig = pca2.inverse_transform(pcaed2)
    orig.make_stacked()
    assert np.allclose(feats.stacked_features, orig.stacked_features)

    assert BagPCA(k=5, randomize=True).fit_transform(bags).dim == 5

    assert_raises(TypeError, lambda: BagPCA(randomize=True))
    assert_raises(TypeError, lambda: BagPCA(mle_components=True, k=12))
    assert BagPCA(mle_components=True)

Exemple #4

0

Afficher le fichier

def test_bagofwords_basic():
    n_codewords = 10
    dim = 5
    kmeans = KMeans(n_clusters=n_codewords, max_iter=100, n_init=3,
                    random_state=47)
    bow = BagOfWords(kmeans)

    np.random.seed(42)
    bags = [np.random.randn(np.random.randint(30, 100), dim)
            for _ in xrange(50)]

    bowed = bow.fit_transform(bags)
    assert bowed.shape == (len(bags), n_codewords)
    assert bow.codewords_.shape == (n_codewords, dim)
    assert np.all(bowed >= 0)
    assert np.all(np.sum(bowed, 1) == [b.shape[0] for b in bags])

    bow.fit(Features(bags))
    bowed2 = bow.transform(bags)
    assert np.all(bowed == bowed2)
    assert bow.codewords_.shape == (n_codewords, dim)

    minikmeans = MiniBatchKMeans(n_clusters=n_codewords, max_iter=100,
                                 random_state=47)
    minibow = BagOfWords(minikmeans)
    assert_raises(AttributeError, lambda: minibow.transform(bags))
    minibowed = minibow.fit_transform(bags)
    assert minibowed.shape == bowed.shape
    assert np.all(bowed >= 0)
    assert np.all(np.sum(bowed, 1) == [b.shape[0] for b in bags])

Exemple #5

0

Afficher le fichier

Fichier : test_divs_knn.py Projet : zshwuhan/skl-groups

def test_knn_memory():
    if not have_flann:
        raise SkipTest("No flann, so skipping knn tests.")

    dim = 3
    n = 20
    np.random.seed(47)
    bags = Features(
        [np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)])

    tdir = tempfile.mkdtemp()
    div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
    Ks = (3, 4)
    est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks, memory=tdir)
    res1 = est.fit_transform(bags)

    with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
        res2 = est.transform(bags)
        assert len(l.records) == 0
    assert np.all(res1 == res2)

    with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
        res3 = est.fit_transform(bags)
        for r in l.records:
            assert not r.message.startswith("Getting divergences")
    assert np.all(res1 == res3)

Exemple #6

0

Afficher le fichier

Fichier : test_features.py Projet : cimor/skl-groups

def test_copying():
    bags = [np.random.normal(size=(np.random.randint(10, 100), 10))
            for _ in xrange(20)]

    unstacked = Features(bags, copy=False, stack=False, label=np.arange(20))
    stacked = Features(bags, stack=True, label=np.arange(20))

    assert unstacked == stacked

    unstacked_copy = copy(unstacked)
    assert not unstacked_copy.stacked
    assert stacked == unstacked_copy == unstacked
    assert unstacked_copy.label is unstacked.label
    assert not np.may_share_memory(unstacked[0], unstacked_copy[0])

    unstacked_deep = deepcopy(unstacked)
    assert not unstacked_deep.stacked
    assert stacked == unstacked_deep == unstacked
    assert unstacked_deep.label is not unstacked.label

    stacked_copy = copy(stacked)
    assert stacked_copy.stacked
    assert stacked == stacked_copy == unstacked
    assert stacked_copy.label is stacked.label

    stacked_deep = deepcopy(stacked)
    assert stacked_deep.stacked
    assert stacked == stacked_deep == unstacked
    assert stacked_deep.label is not stacked.label

    unstacked_stacked = unstacked.copy(stack=True)
    assert unstacked_stacked.stacked
    assert stacked == unstacked_stacked == stacked
    assert unstacked_stacked.label is unstacked.label

    unstacked_pickled = pickle.loads(pickle.dumps(unstacked))
    assert unstacked == unstacked_pickled
    assert not unstacked_pickled.stacked
    assert unstacked_pickled.label is not unstacked.label

    stacked_pickled = pickle.loads(pickle.dumps(stacked))
    assert stacked == stacked_pickled
    assert stacked_pickled.stacked
    assert stacked_pickled.label is not stacked.label

Exemple #7

0

Afficher le fichier

def distribution_divergence(X_s, X_l, k=10):
    """
    This function computes l2 and js divergences from samples of two distributions.
    The implementation use `skl-groups`, which implements non-parametric estimation
    of divergences.

    Args:
        + X_s: a numpy array containing point cloud in state space
        + X_e: a numpy array containing point cloud in latent space
    """

    # We discard cases with too large dimensions
    if X_s.shape[1] > 50:
        return {'l2_divergence': -1., 'js_divergence': -1.}

    # We instantiate the divergence object
    div = KNNDivergenceEstimator(div_funcs=['l2', 'js'],
                                 Ks=[k],
                                 n_jobs=4,
                                 clamp=True)

    # We turn both data to float32
    X_s = X_s.astype(np.float32)
    X_l = X_l.astype(np.float32)

    # We generate Features
    f_s = Features(X_s, n_pts=[X_s.shape[0]])
    f_l = Features(X_l, n_pts=[X_l.shape[0]])

    # We create the knn graph
    div.fit(X=f_s)

    # We compute the divergences
    l2, js = div.transform(X=f_l).squeeze()

    # We construct the returned dictionnary
    output = {'l2_divergence': l2, 'js_divergence': js}

    return output

Exemple #8

0

Afficher le fichier

Fichier : nongauss.py Projet : manodeep/nonGaussLike

def kNNdiv_Kernel(X_white,
                  kernel,
                  Knn=3,
                  div_func='renyi:.5',
                  Nref=None,
                  compwise=True,
                  njobs=1,
                  W_ica_inv=None):
    ''' `div_func` kNN divergence estimate between some data X_white and a distribution specified by Kernel.
    '''
    if isinstance(Knn, int):
        Knns = [Knn]
    elif isinstance(Knn, list):
        Knns = Knn
    # if component wise there should be X_white.shape[1]
    # kernels for each componenets
    if compwise:
        if X_white.shape[1] != len(kernel): raise ValueError

    # construct reference "bag"
    if compwise:
        ref_dist = np.zeros((Nref, X_white.shape[1]))
        for icomp in range(X_white.shape[1]):
            samp = kernel[icomp].sample(Nref)
            if isinstance(samp, tuple):
                ref_dist[:, icomp] = samp[0].flatten()
            else:
                ref_dist[:, icomp] = samp.flatten()
    else:
        samp = kernel.sample(Nref)
        if isinstance(samp, tuple):
            ref_dist = samp[0]
        else:
            ref_dist = samp
    if W_ica_inv is not None:
        ref_dist = np.dot(ref_dist, W_ica_inv.T)
    # estimate divergence
    kNN = KNNDivergenceEstimator(div_funcs=[div_func],
                                 Ks=Knns,
                                 version='slow',
                                 clamp=False,
                                 n_jobs=njobs)
    feat = Features([X_white, ref_dist])
    div_knn = kNN.fit_transform(feat)
    if len(Knns) == 1:
        return div_knn[0][0][0][1]
    div_knns = np.zeros(len(Knns))
    for i in range(len(Knns)):
        div_knns[i] = div_knn[0][i][0][1]
    return div_knns

Exemple #9

0

Afficher le fichier

Fichier : test_preprocessing.py Projet : zshwuhan/skl-groups

def test_basic():
    bags = [
        np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
        for _ in xrange(50)
    ]
    feats = Features(bags, stack=True)

    stder = BagStandardizer()
    stdized = stder.fit_transform(bags)
    stdized.make_stacked()

    assert np.allclose(np.mean(stdized.stacked_features), 0)
    assert np.allclose(np.std(stdized.stacked_features), 1)

    first_five = stder.transform(bags[:5])
    assert first_five == stdized[:5]

    minmaxer = BagMinMaxScaler([3, 7])
    minmaxed = minmaxer.fit_transform(feats)
    minmaxed.make_stacked()
    assert np.allclose(np.min(minmaxed.stacked_features, 0), 3)
    assert np.allclose(np.max(minmaxed.stacked_features, 0), 7)

    normer = BagNormalizer('l1')
    normed = normer.fit_transform(Features(bags))
    normed.make_stacked()
    assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1)

    class GetMean(BaseEstimator, TransformerMixin):
        def fit(self, X, y=None):
            return self

        def transform(self, X):
            return X.mean(axis=1)[None, :]

    m = BagPreprocesser(GetMean())
    assert_raises(ValueError, lambda: m.transform(bags))

Exemple #10

0

Afficher le fichier

Fichier : test_divs_knn.py Projet : zshwuhan/skl-groups

def test_knn_sanity_slow():
    if not have_flann:
        raise SkipTest("No flann, so skipping knn tests.")

    dim = 3
    n = 20
    np.random.seed(47)
    bags = Features(
        [np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)])

    # just make sure it runs
    div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
    Ks = (3, 4)
    est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks)
    res = est.fit_transform(bags)
    assert res.shape == (len(div_funcs), len(Ks), n, n)
    assert np.all(np.isfinite(res))

    # test that JS blows up when there's a huge difference in bag sizes
    # (so that K is too low)
    assert_raises(
        ValueError,
        partial(est.fit_transform, bags + [np.random.randn(1000, dim)]))

    # test fit() and then transform() with JS, with different-sized test bags
    est = KNNDivergenceEstimator(div_funcs=('js', ), Ks=(5, ))
    est.fit(bags, get_rhos=True)
    with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l:
        res = est.transform([np.random.randn(300, dim)])
        assert res.shape == (1, 1, 1, len(bags))
        assert len(l.records) == 1
        assert l.records[0].message.startswith('Y_rhos had a lower max_K')

    # test that passing div func more than once raises
    def blah(df):
        est = KNNDivergenceEstimator(div_funcs=[df, df])
        return est.fit(bags)

    assert_raises(ValueError, lambda: blah('kl'))
    assert_raises(ValueError, lambda: blah('renyi:.8'))
    assert_raises(ValueError, lambda: blah('l2'))

Exemple #11

0

Afficher le fichier

def kNNdiv_general(
    X,
    Y,
    Knn=3,
    div_func='kl',
    alpha=None,
    njobs=1,
):  #renyi:.5
    """
    kNN divergence estimate for samples drawn from any two arbitrary distributions.
    """
    if Y.shape[1] != X.shape[1]:
        raise ValueError(
            'dimension between X_white and Gaussian reference distribution do not match'
        )

    if isinstance(Knn, int):
        Knns = [Knn]
    elif isinstance(Knn, list):
        Knns = Knn

    if alpha is not None:
        div_func = div_func + ':%s' % alpha

    kNN = KNNDivergenceEstimator(div_funcs=[div_func],
                                 Ks=Knns,
                                 version='slow',
                                 clamp=False,
                                 n_jobs=njobs)
    feat = Features([X, Y])
    div_knn = kNN.fit_transform(feat)

    if len(Knns) == 1:
        return div_knn[0][0][0][1]

    div_knns = np.zeros(len(Knns))
    for i in range(len(Knns)):
        div_knns[i] = div_knn[0][i][0][1]
    return div_knns

Exemple #12

0

Afficher le fichier

Fichier : nongauss.py Projet : manodeep/nonGaussLike

def kNNdiv_gauss(X_white,
                 cov_X,
                 Knn=3,
                 div_func='renyi:.5',
                 gauss=None,
                 Nref=None,
                 njobs=1):
    ''' `div_func` kNN divergence estimate between X_white and a 
    reference Gaussian with covariance matrix cov_X.
    '''
    if gauss is None:
        if Nref is None:
            raise ValueError
        gauss = np.random.multivariate_normal(
            np.zeros(X_white.shape[1]), cov_X,
            size=Nref)  # Gaussian reference distribution
    if gauss.shape[1] != X_white.shape[1]:
        raise ValueError(
            'dimension between X_white and Gaussian reference distribution do not match'
        )

    if isinstance(Knn, int):
        Knns = [Knn]
    elif isinstance(Knn, list):
        Knns = Knn

    kNN = KNNDivergenceEstimator(div_funcs=[div_func],
                                 Ks=Knns,
                                 version='slow',
                                 clamp=False,
                                 n_jobs=njobs)
    feat = Features([X_white, gauss])
    div_knn = kNN.fit_transform(feat)
    if len(Knns) == 1:
        return div_knn[0][0][0][1]
    div_knns = np.zeros(len(Knns))
    for i in range(len(Knns)):
        div_knns[i] = div_knn[0][i][0][1]
    return div_knns

Exemple #13

0

Afficher le fichier

Fichier : generateSimulationData.py Projet : sumedhasingla/COPDImageAnalysis

def computePairwiseSimilarities2(patients, y):
    """
    Compute the pairwise similarity between bags using Dougal code

    Inputs:
    - patients: the collection of patient features
    - y: labels (number of abnormal nodes) for each patient. Used to fit the
         KNNDivergenceEstimator

    Returns: 
    - sims: the pairwise similarities between each patient
    * Note: sims is a NxN symmetric matrix, where N is the number of patients
    """

    # pass the features and labels to scikit-learn Features
    feats = Features(patients, labels=y)  # directly from Dougal
    # note: learning methods won't use the labels, this is for conveinence

    # estimate the distances between the bags (patients) using KNNDivergenceEstimator
    # details: use the kl divergence, find 3 nearest neighbors
    #          not sure what the pairwise picker line does?
    #          rbf and projectPSD help ensure the data is separable?
    distEstModel = Pipeline(
        [  # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time
            ('divs',
             KNNDivergenceEstimator(div_funcs=['kl'],
                                    Ks=[3],
                                    n_jobs=-1,
                                    version='fast')),
            ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize()),
            ('rbf', RBFize(gamma=1, scale_by_median=True)),
            ('project', ProjectPSD())
        ])

    # return the pairwise similarities between the bags (patients)
    sims = distEstModel.fit_transform(feats)
    return sims

Exemple #14

0

Afficher le fichier

Fichier : test_divs_knn.py Projet : zshwuhan/skl-groups

def test_knn_version_consistency():
    if not have_flann:
        raise SkipTest("No flann, so skipping knn tests.")
    if not have_accel:
        raise SkipTest("No skl-groups-accel, so skipping version consistency.")

    n = 20
    for dim in [1, 7]:
        np.random.seed(47)
        bags = Features([
            np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)
        ])

        div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
        Ks = (3, 4)
        get_est = partial(KNNDivergenceEstimator, div_funcs=div_funcs, Ks=Ks)
        results = {}
        for version in ('fast', 'slow', 'best'):
            est = get_est(version=version)
            results[version] = res = est.fit_transform(bags)
            assert res.shape == (len(div_funcs), len(Ks), n, n)
            assert np.all(np.isfinite(res))

        for df, fast, slow in zip(div_funcs, results['fast'], results['slow']):
            assert_array_almost_equal(fast,
                                      slow,
                                      decimal=1 if df == 'js' else 5,
                                      err_msg="({}, dim {})".format(df, dim))
            # TODO: debug JS differences

        est = get_est(version='fast', n_jobs=-1)
        res = est.fit_transform(bags)
        assert np.all(results['fast'] == res)

        est = get_est(version='slow', n_jobs=-1)
        res = est.fit_transform(bags)
        assert np.all(results['slow'] == res)

Exemple #15

0

Afficher le fichier

def computeSubjSubjKernel(subjects, div='KL', numNeighbors=3):
    """
    Start by computing the pairwise similarities between subject
    using Dougal's code. Then, for HE and KL, symmetrize, RBFize,
    and project the similarities onto a positive semi-definite space.

    Inputs:
    - subjects: the collection of patient features
    - div: which divergence to use. Options are
            - 'KL': Kullback-Leibler divergence, 'kl' in the function (default)
            - 'HE': Hellinger divergence, 'hellinger' in the function
            - 'MMD': Maximum Mean Discrepancy, calls another function
    - numNeighbors: how many neighbors to look at. Default is 3

    Returns: 
    - kernel: the kernel calculated using the pairwise similarities between each subject
    * Note: kernel is a NxN symmetric matrix, where N is the number of subjects
    """

    # pass the features and labels to scikit-learn Features
    feats = Features(subjects)  # directly from Dougal

    # specify the divergence to use
    if div == 'KL':
        # estimate the distances between the bags (patients) using KNNDivergenceEstimator
        # details: use the kl divergence, find 3 nearest neighbors
        #          not sure what the pairwise picker line does?
        #          rbf and projectPSD help ensure the data is separable?
        distEstModel = Pipeline(
            [  # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time
                ('divs',
                 KNNDivergenceEstimator(div_funcs=['kl'],
                                        Ks=[numNeighbors],
                                        n_jobs=-1,
                                        version='fast')),
                ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize())
                # ('rbf', RBFize(gamma=1, scale_by_median=True)),
                # ('project', ProjectPSD())
            ])
        # return the pairwise similarities between the bags (patients)
        sims = distEstModel.fit_transform(feats)

        # Great, we have the similarities and they're symmetric
        # Now RBFize them, but do the scale by median by hand
        rbf = RBFize(gamma=1, scale_by_median=False)
        simsMedian = np.median(sims[np.triu_indices_from(sims)])
        medianScaledSims = sims / simsMedian
        rbfedSims = rbf.fit_transform(medianScaledSims)

        # Final step in building the kernel: project the rbf'ed similarities
        #   onto a positive semi-definite space
        psd = ProjectPSD()
        kernel = psd.fit_transform(rbfedSims)

    elif div == 'HE':
        # estimate the distances between the bags (patients) using KNNDivergenceEstimator
        # details: use the hellinger divergence, find 3 nearest neighbors
        #          not sure what the pairwise picker line does?
        #          rbf and projectPSD help ensure the data is separable?
        distEstModel = Pipeline(
            [  # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time
                ('divs',
                 KNNDivergenceEstimator(div_funcs=['hellinger'],
                                        Ks=[numNeighbors],
                                        n_jobs=-1,
                                        version='fast')),
                ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize())
                # ('rbf', RBFize(gamma=1, scale_by_median=True)),
                # ('project', ProjectPSD())
            ])

        # return the pairwise similarities between the bags (patients)
        sims = distEstModel.fit_transform(feats)

        # Great, we have the similarities and they're symmetric
        # Now RBFize them, but do the scale by median by hand
        rbf = RBFize(gamma=1, scale_by_median=False)
        simsMedian = np.median(sims[np.triu_indices_from(sims)])
        # medianScaledSims = sims/simsMedian
        # rbfedSims = rbf.fit_transform(medianScaledSims)
        rbfedSims = rbf.fit_transform(sims)

        # Final step in building the kernel: project the rbf'ed similarities
        #   onto a positive semi-definite space
        psd = ProjectPSD()
        kernel = psd.fit_transform(rbfedSims)

    elif div == 'MMD':
        # start by getting the median pairwise squared distance between subject,
        #   used as a heuristic for choosing the bandwidth of the inner RBF kernel
        subset = np.vstack(feats)
        subset = subset[np.random.choice(subset.shape[0],
                                         min(2000, subset.shape[0]),
                                         replace=False)]
        subsetSquaredDists = euclidean_distances(subset, squared=True)
        featsMedianSquaredDist = np.median(
            subsetSquaredDists[np.triu_indices_from(subsetSquaredDists,
                                                    k=numNeighbors)],
            overwrite_input=True)

        # now we need to determine gamma (scaling factor, inverse of sigma)
        #   This was initially done in the library, but Kayhan believes there's
        #   a multiplication instead of a division, so it's being done by hand
        firstGamma = 1 / featsMedianSquaredDist

        # calculate the mmds
        mmds, mmkDiagonals = mmd.rbf_mmd(feats,
                                         gammas=firstGamma,
                                         squared=True,
                                         ret_X_diag=True)

        # now let's turn the squared MMD distances into a kernel
        # symmetrize it
        sym = Symmetrize()
        mmds = sym.fit_transform(mmds)
        # get the median squared MMD distance
        mmdMedianSquaredDist = np.median(mmds[np.triu_indices_from(
            mmds, k=numNeighbors)])
        kernel = np.exp(np.multiply(mmds, -1 / mmdMedianSquaredDist))

    else:
        print("Error: divergence entered is not valid.")
        return -1

    return kernel

Exemple #16

0

Afficher le fichier

Fichier : test_features.py Projet : cimor/skl-groups

def test_features_basic():
    bags = [np.random.normal(size=(np.random.randint(10, 100), 10))
            for _ in xrange(20)]

    assert repr(Features([[[8, 9], [12, 12]]]))

    feats = Features(bags, copy=False, stack=False, label=np.arange(20))
    assert len(feats) == 20
    assert feats.total_points == sum(bag.shape[0] for bag in bags)
    assert np.all(feats[3] == bags[3])
    assert np.all(feats.label == np.arange(20))
    assert repr(feats)
    assert feats.dtype == np.float64
    assert feats != bags
    assert feats.bare() == bags

    sub = feats[[5, 2]]
    assert np.all(sub.label == [5, 2])
    assert np.all(sub[0] == feats[5])
    assert np.all(sub[1] == feats[2])
    assert repr(sub)

    feats[4][0, 0] = 1000
    assert bags[4][0, 0] == 1000

    feats.make_stacked()
    assert feats != bags
    assert feats.bare() == bags
    assert len(feats) == 20
    assert feats.total_points == sum(bag.shape[0] for bag in bags)
    assert np.all(feats[3] == bags[3])
    assert np.all(feats.label == np.arange(20))
    feats[0][0, 0] = -800
    assert feats.features[0][0, 0] == -800
    assert feats.stacked_features[0, 0] == -800
    assert repr(feats)

    wrong_type(lambda: feats['label'])
    wrong_type(lambda: feats[['label']])
    wrong_type(lambda: feats[[3, 'label']])

    cop = feats.copy()
    assert cop == feats
    assert cop.stacked
    cop.make_stacked()
    assert cop == feats
    cop[0][0, 0] = 12
    assert cop != feats
    assert repr(cop)

    fs = lambda *a, **kw: partial(Features, *a, **kw)

    bags = np.random.normal(size=(10, 5))
    wrong_type(fs(bags))
    wrong_type(fs(bags, [[4], [12]]))
    wrong_type(fs(bags, []))
    wrong_type(fs(bags, [-3, 13]))
    wrong_type(fs(bags, [7.5, 2.5]))
    wrong_type(fs(bags, [7, 2]))
    wrong_type(fs(np.zeros((10, 0)), [7, 3]))

    bags = [np.random.normal(size=(5, 8)), np.random.normal(size=(6, 8))]
    wrong_type(fs(bags, [5, 6]))
    assert np.all(
        Features([[5, 6], [[7, 9], [0, 0]]])[0] == np.reshape([5, 6], (1, 2)))
    wrong_type(fs([ [[[5]]] ]))
    wrong_type(fs([["hello", "there"]]))
    wrong_type(fs([[np.arange(10, dtype=int)], [np.arange(10, dtype=float)]]))
    wrong_type(fs([np.random.randn(8, 7), np.random.randn(0, 7)]))

    assert np.all(
        Features([[[1, 2]], [[3, 4]]], stack=True).stacked_features
        == [[1, 2], [3, 4]])

    assert_raises(ValueError, fs(bags, labels=np.arange(3)))

    with warnings.catch_warnings(record=True) as w:
        Features(bags, total_points=[1, 2])
        assert len(w) == 1