def test_pca_destructor(): destructor = CompositeDestructor(destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) assert check_destructor(destructor, is_canonical=False)
def test_random_linear_householder_destructor(): destructor = CompositeDestructor( destructors=[ LinearProjector( # Since n_components=1, a Householder matrix will be used linear_estimator=RandomOrthogonalEstimator(n_components=1), orthogonal=False, ), IndependentDestructor(), ], ) assert check_destructor(destructor, is_canonical=False)
def get_rbig_model(bins="auto", bounds=0.1, alpha=1e-10): # ================================ # # Step I - Marginal Uniformization # ================================ # # Choose the Histogram estimator that converts the data X to uniform U(0,1) univariate_estimator = HistogramUnivariateDensity(bounds=bounds, bins=bins, alpha=alpha) # Marginally uses histogram estimator marginal_uniformization = IndependentDensity( univariate_estimators=univariate_estimator) # Creates "Destructor" D_theta_1 uniform_density = IndependentDestructor(marginal_uniformization) # ================================== # # Step II - Marginal Gaussianization # ================================== # # Choose destructor D_theta_2 that converts data marginal_gaussianization = IndependentInverseCdf() # =================== # # Step III - Rotation # =================== # # Choose a linear projection to rotate the features (PCA) "D_theta_3" rotation = LinearProjector(linear_estimator=PCA()) # ==================== # # Composite Destructor # ==================== # # Composite Destructor rbig_model = CompositeDestructor([ clone(uniform_density), # Marginal Uniformization clone(marginal_gaussianization), # Marginal Gaussianization clone(rotation), # Rotation (PCA) ]) return rbig_model
def _get_toy_destructors_and_names(): # BASELINE SHALLOW DESTRUCTORS gaussian_full = CompositeDestructor( destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) mixture_20 = AutoregressiveDestructor( density_estimator=GaussianMixtureDensity( covariance_type='spherical', n_components=20, ) ) random_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=10, bounds=[0, 1] ) ) ) ) ) ] ) density_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=0.001, ) ) ] ) baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree] baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree'] # LINEAR DESTRUCTORS alpha_histogram = [10] # [1, 10, 100] random_linear_projector = LinearProjector( linear_estimator=RandomOrthogonalEstimator(), orthogonal=True ) canonical_histogram_destructors = [ IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a) ) ) for a in alpha_histogram ] linear_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=CompositeDestructor(destructors=[ IndependentInverseCdf(), # Project to inf real space random_linear_projector, # Random linear projector IndependentDestructor(), # Project to canonical space destructor, # Histogram destructor in canonical space ]), n_extend=20, # Need to extend since random projections ) for destructor in canonical_histogram_destructors ] linear_names = ['RandLin (%g)' % a for a in alpha_histogram] # MIXTURE DESTRUCTORS fixed_weight = [0.5] # [0.1, 0.5, 0.9] mixture_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), AutoregressiveDestructor( density_estimator=FirstFixedGaussianMixtureDensity( covariance_type='spherical', n_components=20, fixed_weight=w, ) ) ]) for w in fixed_weight ] # Make deep destructors mixture_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, n_extend=5, ) for destructor in mixture_destructors ] mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight] # TREE DESTRUCTORS # Random trees histogram_alpha = [10] # [1, 10, 100] tree_destructors = [ TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator( max_leaf_nodes=4 ), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( alpha=a, bins=10, bounds=[0, 1] ) ) ), ) ) for a in histogram_alpha ] tree_names = ['RandTree (%g)' % a for a in histogram_alpha] # Density trees using mlpack tree_uniform_weight = [0.5] # [0.1, 0.5, 0.9] tree_destructors.extend([ TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=w, ) ) for w in tree_uniform_weight ]) tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight]) # Add random rotation to tree destructors tree_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), LinearProjector(linear_estimator=RandomOrthogonalEstimator()), IndependentDestructor(), destructor, ]) for destructor in tree_destructors ] # Make deep destructors tree_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, # Density trees don't need to extend as much as random trees n_extend=50 if 'Rand' in name else 5, ) for destructor, name in zip(tree_destructors, tree_names) ] # Collect all destructors and set CV parameter destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors destructor_names = baseline_names + linear_names + mixture_names + tree_names for d in destructors: if 'cv' in d.get_params(): d.set_params(cv=cv) # **** Change from notebook to make faster **** if 'max_canonical_destructors' in d.get_params(): d.set_params(max_canonical_destructors=1) return destructors, destructor_names