# We collect the various base boosting (hyper)parameters. options_with_lad = dict(n_regressors=n_regressors, boosting_loss=ls_loss, line_search_options=line_search_options_with_lad) options_with_huber = dict(n_regressors=n_regressors, boosting_loss=huber_loss, line_search_options=line_search_options_with_huber) print('Building scoring DataFrame for each single-target regression subtask.') test_error = [] for index in range(5): if index in [0, 1, 4]: # We make an instance of Regressor with our choice of stacking # for the single-target regression subtasks: 1, 2, and 5. reg = Regressor(regressor_choice=stacking_basis_fn, params=paper_params(index), target_index=index, stacking_options=dict(layers=stack), base_boosting_options=options_with_huber) elif index == 3: # We make an instance of Regressor with our choice of stacking # for the single-target regression subtask: 4. reg = Regressor(regressor_choice=stacking_basis_fn, params=paper_params(index), target_index=index, stacking_options=dict(layers=stack), base_boosting_options=options_with_lad) else: # We make an instance of Regressor with our choice of ridge # regression for the single-target regression subtask: 3. # The parameter alpha denotes the regularization strength
# We collect the various base boosting (hyper)parameters. base_boosting_options = dict(n_regressors=n_regressors, boosting_loss=boosting_loss, line_search_options=line_search_options) # We focus on the third single-target regression subtask, # as this is the least difficult subtask for the base regressor. # Note that the index corresponds to the Python convention. index = 2 # We generate the augmented learning curve with our choice # of stacking in the single-target regression subtask: 3, # wherein we use 40 different training data sizes. The # first training data size corresponds to a quarter of # the training data, and the last training data size # corresponds to the full amount of training data. plot_learning_curve(regressor_choice=basis_fn, title='Augmented learning curve', X=X_train, y=y_train, verbose=1, cv=5, train_sizes=np.linspace(0.25, 1.0, 40), alpha=0.1, train_color='b', cv_color='orange', y_ticks_step=0.15, fill_std=False, legend_loc='best', save_plot=False, path=None, pipeline_transform='quantilenormal', pipeline_memory=None, chain_order=None, params=paper_params(index), target_index=index, ylabel='Mean absolute error', stacking_options=dict(layers=stack), base_boosting_options=base_boosting_options, return_incumbent_score=True)