# Initial weighting---uniform original_weights_train = numpy.ones(len(original_train)) original_weights_test = numpy.ones(len(original_test)) # Plot settings hist_settings = {'bins': 50, 'density': True, 'alpha': 0.7} ################ # Plot initial # ################ draw_distributions('initial.png', columns, original, target, original_weights, filename_as_title=True, xlim=((0, 50), (0, 12)), ylim=((0, 0.07), (0, 1.7)), nrows=1, ncols=2, hist_settings=hist_settings) draw_distributions('initial_train.png', columns, original_train, target_train, original_weights_train, filename_as_title=True, xlim=((0, 50), (0, 12)), ylim=((0, 0.07), (0, 1.7)), nrows=1, ncols=2, hist_settings=hist_settings) draw_distributions('initial_test.png', columns, original_test, target_test, original_weights_test, filename_as_title=True, xlim=((0, 50), (0, 12)), ylim=((0, 0.07), (0, 1.7)),
############################### reweighter = reweight.GBReweighter(n_estimators=500, learning_rate=0.1, max_depth=3, min_samples_leaf=1000, gb_args={'subsample': 0.4}) reweighter.fit(original_train, target_train) gb_weights_test = reweighter.predict_weights(original_test) # Validate reweighting rule on the test part comparing 1d projections draw_distributions('gbr5_validate_ConeMult.png', [ columns[0], ], original_test, target_test, gb_weights_test, filename_as_title=True, nrows=1, ncols=1, hist_settings=hist_settings) draw_distributions('gbr5_validate_VtxMult.png', [columns[1]], original_test, target_test, gb_weights_test, filename_as_title=True, nrows=1, ncols=1, hist_settings=hist_settings) draw_distributions('gbr5_validate_ConePtAsym.png', [columns[2]],
# Divide target samples into training ant test parts target_train, target_test = train_test_split(target) original_weights_train = numpy.ones(len(original_train)) original_weights_test = numpy.ones(len(original_test)) # Pay attention, actually we have very few data print('Length of original data: %s\nLength of target data: %s' % (len(original), len(target))) ################################################### # Print the unmodified original and test data set # ################################################### draw_distributions('initial.png', columns, original, target, original_weights) print_statistics(columns, original, target, original_weights) # Train part of original distribution draw_distributions('initial_train.png', columns, original_train, target_train, original_weights_train) # Test part of target distribution draw_distributions('initial_test.png', columns, original_test, target_test, original_weights_test) ############################### # Gradient boosted Reweighter # ############################### reweighter = reweight.GBReweighter(n_estimators=50,
reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, max_depth=3, min_samples_leaf=1000, gb_args={'subsample': 0.4}) reweighter.fit(original_train, target_train) gb_weights_test = reweighter.predict_weights(original_test) # Validate reweighting rule on the test part comparing 1d projections draw_distributions('gb_weights_test.png', columns, original_test, target_test, gb_weights_test, filename_as_title=True, xlim=((0, 50), (0, 12)), ylim=((0, 0.10), (0, 3.0)), nrows=1, ncols=2, hist_settings=hist_settings) ###################### # Folding Reweighter # ###################### # Define base reweighter reweighter_base = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,
) reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2) # Not need to divide data into train/test parts reweighter.fit(original, target, target_weight=target_weights) # Prediect weights for the input file folding_weights = reweighter.predict_weights(toReweight) draw_distributions( 'GBR4_validate.png', columns, toReweight, target, folding_weights, filename_as_title=True, # yscale=('log',), nrows=2, ncols=2, hist_settings=hist_settings) draw_distributions('GBR4_before_after.png', columns, toReweight, toReweight, None, folding_weights, filename_as_title=True, yscale=('linear', 'linear', 'linear', 'linear'), nrows=2,