def test_tree_destructor_with_node_destructor(): node_tree_destructor = IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=100, bounds=[0, 1]))) for node_destructor in [IdentityDestructor(), node_tree_destructor]: destructor = TreeDestructor(tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(max_leaf_nodes=3, random_state=0), node_destructor=node_destructor, uniform_weight=0.9, )) assert check_destructor(destructor)
def _get_pair_canonical_destructor(model_name): if model_name == 'image-pairs-tree': return TreeDestructor(tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator( max_depth=None, min_samples_leaf=100, max_leaf_nodes=50, ), get_tree=None, node_destructor=None, uniform_weight=0.5, )) elif model_name == 'image-pairs-copula': return _get_copula_destructor() else: raise ValueError('Invalid model name "%s"')
def test_inverse_canonical_destructor(): rng = check_random_state(0) fitted_canonical_destructor = IdentityDestructor().fit(rng.rand(10, 2)) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor) # Alpha must be high to pass the identity test fitted_canonical_destructor = get_inverse_canonical_destructor( TreeDestructor(TreeDensity(uniform_weight=0.99)).fit(rng.rand(10, 2)) ) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor) # Alpha must be high to pass the identity test fitted_canonical_destructor = IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=10, alpha=1000, bounds=[0, 1]) ) ).fit(rng.rand(10, 2)) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor)
def _get_toy_destructors_and_names(): # BASELINE SHALLOW DESTRUCTORS gaussian_full = CompositeDestructor( destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) mixture_20 = AutoregressiveDestructor( density_estimator=GaussianMixtureDensity( covariance_type='spherical', n_components=20, ) ) random_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=10, bounds=[0, 1] ) ) ) ) ) ] ) density_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=0.001, ) ) ] ) baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree] baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree'] # LINEAR DESTRUCTORS alpha_histogram = [10] # [1, 10, 100] random_linear_projector = LinearProjector( linear_estimator=RandomOrthogonalEstimator(), orthogonal=True ) canonical_histogram_destructors = [ IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a) ) ) for a in alpha_histogram ] linear_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=CompositeDestructor(destructors=[ IndependentInverseCdf(), # Project to inf real space random_linear_projector, # Random linear projector IndependentDestructor(), # Project to canonical space destructor, # Histogram destructor in canonical space ]), n_extend=20, # Need to extend since random projections ) for destructor in canonical_histogram_destructors ] linear_names = ['RandLin (%g)' % a for a in alpha_histogram] # MIXTURE DESTRUCTORS fixed_weight = [0.5] # [0.1, 0.5, 0.9] mixture_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), AutoregressiveDestructor( density_estimator=FirstFixedGaussianMixtureDensity( covariance_type='spherical', n_components=20, fixed_weight=w, ) ) ]) for w in fixed_weight ] # Make deep destructors mixture_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, n_extend=5, ) for destructor in mixture_destructors ] mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight] # TREE DESTRUCTORS # Random trees histogram_alpha = [10] # [1, 10, 100] tree_destructors = [ TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator( max_leaf_nodes=4 ), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( alpha=a, bins=10, bounds=[0, 1] ) ) ), ) ) for a in histogram_alpha ] tree_names = ['RandTree (%g)' % a for a in histogram_alpha] # Density trees using mlpack tree_uniform_weight = [0.5] # [0.1, 0.5, 0.9] tree_destructors.extend([ TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=w, ) ) for w in tree_uniform_weight ]) tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight]) # Add random rotation to tree destructors tree_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), LinearProjector(linear_estimator=RandomOrthogonalEstimator()), IndependentDestructor(), destructor, ]) for destructor in tree_destructors ] # Make deep destructors tree_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, # Density trees don't need to extend as much as random trees n_extend=50 if 'Rand' in name else 5, ) for destructor, name in zip(tree_destructors, tree_names) ] # Collect all destructors and set CV parameter destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors destructor_names = baseline_names + linear_names + mixture_names + tree_names for d in destructors: if 'cv' in d.get_params(): d.set_params(cv=cv) # **** Change from notebook to make faster **** if 'max_canonical_destructors' in d.get_params(): d.set_params(max_canonical_destructors=1) return destructors, destructor_names
def test_mlpack_density_tree_destructor(): destructor = TreeDestructor( tree_density=TreeDensity(tree_estimator=MlpackDensityTreeEstimator( max_leaf_nodes=10), )) assert check_destructor(destructor)