コード例 #1
0
def test_pca_destructor():
    destructor = CompositeDestructor(destructors=[
        LinearProjector(
            linear_estimator=PCA(),
            orthogonal=False,
        ),
        IndependentDestructor(),
    ], )
    assert check_destructor(destructor, is_canonical=False)
コード例 #2
0
def test_random_linear_householder_destructor():
    destructor = CompositeDestructor(
        destructors=[
            LinearProjector(
                # Since n_components=1, a Householder matrix will be used
                linear_estimator=RandomOrthogonalEstimator(n_components=1),
                orthogonal=False,
            ),
            IndependentDestructor(),
        ], )
    assert check_destructor(destructor, is_canonical=False)
コード例 #3
0
def get_rbig_model(bins="auto", bounds=0.1, alpha=1e-10):

    # ================================ #
    # Step I - Marginal Uniformization
    # ================================ #

    # Choose the Histogram estimator that converts the data X to uniform U(0,1)
    univariate_estimator = HistogramUnivariateDensity(bounds=bounds,
                                                      bins=bins,
                                                      alpha=alpha)

    # Marginally uses histogram estimator
    marginal_uniformization = IndependentDensity(
        univariate_estimators=univariate_estimator)

    # Creates "Destructor" D_theta_1
    uniform_density = IndependentDestructor(marginal_uniformization)

    # ================================== #
    # Step II - Marginal Gaussianization
    # ================================== #

    # Choose destructor D_theta_2 that converts data
    marginal_gaussianization = IndependentInverseCdf()

    # =================== #
    # Step III - Rotation
    # =================== #

    # Choose a linear projection to rotate the features (PCA) "D_theta_3"
    rotation = LinearProjector(linear_estimator=PCA())

    # ==================== #
    # Composite Destructor
    # ==================== #

    # Composite Destructor
    rbig_model = CompositeDestructor([
        clone(uniform_density),  # Marginal Uniformization
        clone(marginal_gaussianization),  # Marginal Gaussianization
        clone(rotation),  # Rotation (PCA)
    ])

    return rbig_model
コード例 #4
0
def _get_toy_destructors_and_names():
    # BASELINE SHALLOW DESTRUCTORS
    gaussian_full = CompositeDestructor(
        destructors=[
            LinearProjector(
                linear_estimator=PCA(),
                orthogonal=False,
            ),
            IndependentDestructor(),
        ],
    )
    mixture_20 = AutoregressiveDestructor(
        density_estimator=GaussianMixtureDensity(
            covariance_type='spherical',
            n_components=20,
        )
    )
    random_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50),
                    node_destructor=IndependentDestructor(
                        independent_density=IndependentDensity(
                            univariate_estimators=HistogramUnivariateDensity(
                                bins=10, alpha=10, bounds=[0, 1]
                            )
                        )
                    )
                )
            )
        ]
    )
    density_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                    uniform_weight=0.001,
                )
            )
        ]
    )
    baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree]
    baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree']

    # LINEAR DESTRUCTORS
    alpha_histogram = [10]  # [1, 10, 100]
    random_linear_projector = LinearProjector(
        linear_estimator=RandomOrthogonalEstimator(), orthogonal=True
    )
    canonical_histogram_destructors = [
        IndependentDestructor(
            independent_density=IndependentDensity(
                univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a)
            )
        )
        for a in alpha_histogram
    ]
    linear_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=CompositeDestructor(destructors=[
                IndependentInverseCdf(),  # Project to inf real space
                random_linear_projector,  # Random linear projector
                IndependentDestructor(),  # Project to canonical space
                destructor,  # Histogram destructor in canonical space
            ]),
            n_extend=20,  # Need to extend since random projections
        )
        for destructor in canonical_histogram_destructors
    ]
    linear_names = ['RandLin (%g)' % a for a in alpha_histogram]

    # MIXTURE DESTRUCTORS
    fixed_weight = [0.5]  # [0.1, 0.5, 0.9]
    mixture_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            AutoregressiveDestructor(
                density_estimator=FirstFixedGaussianMixtureDensity(
                    covariance_type='spherical',
                    n_components=20,
                    fixed_weight=w,
                )
            )
        ])
        for w in fixed_weight
    ]
    # Make deep destructors
    mixture_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            n_extend=5,
        )
        for destructor in mixture_destructors
    ]
    mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight]

    # TREE DESTRUCTORS
    # Random trees
    histogram_alpha = [10]  # [1, 10, 100]
    tree_destructors = [
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=RandomTreeEstimator(
                    max_leaf_nodes=4
                ),
                node_destructor=IndependentDestructor(
                    independent_density=IndependentDensity(
                        univariate_estimators=HistogramUnivariateDensity(
                            alpha=a, bins=10, bounds=[0, 1]
                        )
                    )
                ),
            )
        )
        for a in histogram_alpha
    ]
    tree_names = ['RandTree (%g)' % a for a in histogram_alpha]

    # Density trees using mlpack
    tree_uniform_weight = [0.5]  # [0.1, 0.5, 0.9]
    tree_destructors.extend([
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                uniform_weight=w,
            )
        )
        for w in tree_uniform_weight
    ])
    tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight])

    # Add random rotation to tree destructors
    tree_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            LinearProjector(linear_estimator=RandomOrthogonalEstimator()),
            IndependentDestructor(),
            destructor,
        ])
        for destructor in tree_destructors
    ]

    # Make deep destructors
    tree_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            # Density trees don't need to extend as much as random trees
            n_extend=50 if 'Rand' in name else 5,
        )
        for destructor, name in zip(tree_destructors, tree_names)
    ]
    # Collect all destructors and set CV parameter
    destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors
    destructor_names = baseline_names + linear_names + mixture_names + tree_names
    for d in destructors:
        if 'cv' in d.get_params():
            d.set_params(cv=cv)
        # **** Change from notebook to make faster ****
        if 'max_canonical_destructors' in d.get_params():
            d.set_params(max_canonical_destructors=1)

    return destructors, destructor_names