Exemple #1
0
def test_tree_destructor_with_node_destructor():
    node_tree_destructor = IndependentDestructor(
        independent_density=IndependentDensity(
            univariate_estimators=HistogramUnivariateDensity(
                bins=10, alpha=100, bounds=[0, 1])))
    for node_destructor in [IdentityDestructor(), node_tree_destructor]:
        destructor = TreeDestructor(tree_density=TreeDensity(
            tree_estimator=RandomTreeEstimator(max_leaf_nodes=3,
                                               random_state=0),
            node_destructor=node_destructor,
            uniform_weight=0.9,
        ))
        assert check_destructor(destructor)
def _get_pair_canonical_destructor(model_name):
    if model_name == 'image-pairs-tree':
        return TreeDestructor(tree_density=TreeDensity(
            tree_estimator=MlpackDensityTreeEstimator(
                max_depth=None,
                min_samples_leaf=100,
                max_leaf_nodes=50,
            ),
            get_tree=None,
            node_destructor=None,
            uniform_weight=0.5,
        ))
    elif model_name == 'image-pairs-copula':
        return _get_copula_destructor()
    else:
        raise ValueError('Invalid model name "%s"')
Exemple #3
0
def test_inverse_canonical_destructor():
    rng = check_random_state(0)
    fitted_canonical_destructor = IdentityDestructor().fit(rng.rand(10, 2))
    destructor = get_inverse_canonical_destructor(fitted_canonical_destructor)
    assert check_destructor(destructor)

    # Alpha must be high to pass the identity test
    fitted_canonical_destructor = get_inverse_canonical_destructor(
        TreeDestructor(TreeDensity(uniform_weight=0.99)).fit(rng.rand(10, 2))
    )
    destructor = get_inverse_canonical_destructor(fitted_canonical_destructor)
    assert check_destructor(destructor)

    # Alpha must be high to pass the identity test
    fitted_canonical_destructor = IndependentDestructor(
        independent_density=IndependentDensity(
            univariate_estimators=HistogramUnivariateDensity(bins=10, alpha=1000, bounds=[0, 1])
        )
    ).fit(rng.rand(10, 2))
    destructor = get_inverse_canonical_destructor(fitted_canonical_destructor)
    assert check_destructor(destructor)
def _get_toy_destructors_and_names():
    # BASELINE SHALLOW DESTRUCTORS
    gaussian_full = CompositeDestructor(
        destructors=[
            LinearProjector(
                linear_estimator=PCA(),
                orthogonal=False,
            ),
            IndependentDestructor(),
        ],
    )
    mixture_20 = AutoregressiveDestructor(
        density_estimator=GaussianMixtureDensity(
            covariance_type='spherical',
            n_components=20,
        )
    )
    random_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50),
                    node_destructor=IndependentDestructor(
                        independent_density=IndependentDensity(
                            univariate_estimators=HistogramUnivariateDensity(
                                bins=10, alpha=10, bounds=[0, 1]
                            )
                        )
                    )
                )
            )
        ]
    )
    density_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                    uniform_weight=0.001,
                )
            )
        ]
    )
    baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree]
    baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree']

    # LINEAR DESTRUCTORS
    alpha_histogram = [10]  # [1, 10, 100]
    random_linear_projector = LinearProjector(
        linear_estimator=RandomOrthogonalEstimator(), orthogonal=True
    )
    canonical_histogram_destructors = [
        IndependentDestructor(
            independent_density=IndependentDensity(
                univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a)
            )
        )
        for a in alpha_histogram
    ]
    linear_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=CompositeDestructor(destructors=[
                IndependentInverseCdf(),  # Project to inf real space
                random_linear_projector,  # Random linear projector
                IndependentDestructor(),  # Project to canonical space
                destructor,  # Histogram destructor in canonical space
            ]),
            n_extend=20,  # Need to extend since random projections
        )
        for destructor in canonical_histogram_destructors
    ]
    linear_names = ['RandLin (%g)' % a for a in alpha_histogram]

    # MIXTURE DESTRUCTORS
    fixed_weight = [0.5]  # [0.1, 0.5, 0.9]
    mixture_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            AutoregressiveDestructor(
                density_estimator=FirstFixedGaussianMixtureDensity(
                    covariance_type='spherical',
                    n_components=20,
                    fixed_weight=w,
                )
            )
        ])
        for w in fixed_weight
    ]
    # Make deep destructors
    mixture_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            n_extend=5,
        )
        for destructor in mixture_destructors
    ]
    mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight]

    # TREE DESTRUCTORS
    # Random trees
    histogram_alpha = [10]  # [1, 10, 100]
    tree_destructors = [
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=RandomTreeEstimator(
                    max_leaf_nodes=4
                ),
                node_destructor=IndependentDestructor(
                    independent_density=IndependentDensity(
                        univariate_estimators=HistogramUnivariateDensity(
                            alpha=a, bins=10, bounds=[0, 1]
                        )
                    )
                ),
            )
        )
        for a in histogram_alpha
    ]
    tree_names = ['RandTree (%g)' % a for a in histogram_alpha]

    # Density trees using mlpack
    tree_uniform_weight = [0.5]  # [0.1, 0.5, 0.9]
    tree_destructors.extend([
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                uniform_weight=w,
            )
        )
        for w in tree_uniform_weight
    ])
    tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight])

    # Add random rotation to tree destructors
    tree_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            LinearProjector(linear_estimator=RandomOrthogonalEstimator()),
            IndependentDestructor(),
            destructor,
        ])
        for destructor in tree_destructors
    ]

    # Make deep destructors
    tree_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            # Density trees don't need to extend as much as random trees
            n_extend=50 if 'Rand' in name else 5,
        )
        for destructor, name in zip(tree_destructors, tree_names)
    ]
    # Collect all destructors and set CV parameter
    destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors
    destructor_names = baseline_names + linear_names + mixture_names + tree_names
    for d in destructors:
        if 'cv' in d.get_params():
            d.set_params(cv=cv)
        # **** Change from notebook to make faster ****
        if 'max_canonical_destructors' in d.get_params():
            d.set_params(max_canonical_destructors=1)

    return destructors, destructor_names
def test_mlpack_density_tree_destructor():
    destructor = TreeDestructor(
        tree_density=TreeDensity(tree_estimator=MlpackDensityTreeEstimator(
            max_leaf_nodes=10), ))
    assert check_destructor(destructor)