コード例 #1
0
# Initial weighting---uniform
original_weights_train = numpy.ones(len(original_train))
original_weights_test = numpy.ones(len(original_test))

# Plot settings
hist_settings = {'bins': 50, 'density': True, 'alpha': 0.7}


################
# Plot initial #
################

draw_distributions('initial.png',
                   columns, original, target, original_weights,
                   filename_as_title=True,
                   xlim=((0, 50), (0, 12)),
                   ylim=((0, 0.07), (0, 1.7)),
                   nrows=1, ncols=2, hist_settings=hist_settings)

draw_distributions('initial_train.png',
                   columns, original_train, target_train, original_weights_train,
                   filename_as_title=True,
                   xlim=((0, 50), (0, 12)),
                   ylim=((0, 0.07), (0, 1.7)),
                   nrows=1, ncols=2, hist_settings=hist_settings)

draw_distributions('initial_test.png',
                   columns, original_test, target_test, original_weights_test,
                   filename_as_title=True,
                   xlim=((0, 50), (0, 12)),
                   ylim=((0, 0.07), (0, 1.7)),
コード例 #2
0
###############################

reweighter = reweight.GBReweighter(n_estimators=500,
                                   learning_rate=0.1,
                                   max_depth=3,
                                   min_samples_leaf=1000,
                                   gb_args={'subsample': 0.4})
reweighter.fit(original_train, target_train)
gb_weights_test = reweighter.predict_weights(original_test)

# Validate reweighting rule on the test part comparing 1d projections
draw_distributions('gbr5_validate_ConeMult.png', [
    columns[0],
],
                   original_test,
                   target_test,
                   gb_weights_test,
                   filename_as_title=True,
                   nrows=1,
                   ncols=1,
                   hist_settings=hist_settings)

draw_distributions('gbr5_validate_VtxMult.png', [columns[1]],
                   original_test,
                   target_test,
                   gb_weights_test,
                   filename_as_title=True,
                   nrows=1,
                   ncols=1,
                   hist_settings=hist_settings)

draw_distributions('gbr5_validate_ConePtAsym.png', [columns[2]],
コード例 #3
0
# Divide target samples into training ant test parts
target_train, target_test = train_test_split(target)

original_weights_train = numpy.ones(len(original_train))
original_weights_test = numpy.ones(len(original_test))

# Pay attention, actually we have very few data
print('Length of original data: %s\nLength of target data: %s' %
      (len(original), len(target)))

###################################################
# Print the unmodified original and test data set #
###################################################

draw_distributions('initial.png', columns, original, target, original_weights)
print_statistics(columns, original, target, original_weights)

# Train part of original distribution
draw_distributions('initial_train.png', columns, original_train, target_train,
                   original_weights_train)

# Test part of target distribution
draw_distributions('initial_test.png', columns, original_test, target_test,
                   original_weights_test)

###############################
# Gradient boosted Reweighter #
###############################

reweighter = reweight.GBReweighter(n_estimators=50,
コード例 #4
0
reweighter = reweight.GBReweighter(n_estimators=50,
                                   learning_rate=0.1,
                                   max_depth=3,
                                   min_samples_leaf=1000,
                                   gb_args={'subsample': 0.4})
reweighter.fit(original_train, target_train)
gb_weights_test = reweighter.predict_weights(original_test)

# Validate reweighting rule on the test part comparing 1d projections
draw_distributions('gb_weights_test.png',
                   columns,
                   original_test,
                   target_test,
                   gb_weights_test,
                   filename_as_title=True,
                   xlim=((0, 50), (0, 12)),
                   ylim=((0, 0.10), (0, 3.0)),
                   nrows=1,
                   ncols=2,
                   hist_settings=hist_settings)

######################
# Folding Reweighter #
######################

# Define base reweighter
reweighter_base = reweight.GBReweighter(n_estimators=50,
                                        learning_rate=0.1,
                                        max_depth=3,
                                        min_samples_leaf=1000,
コード例 #5
0
)

reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)

# Not need to divide data into train/test parts
reweighter.fit(original, target, target_weight=target_weights)

# Prediect weights for the input file
folding_weights = reweighter.predict_weights(toReweight)

draw_distributions(
    'GBR4_validate.png',
    columns,
    toReweight,
    target,
    folding_weights,
    filename_as_title=True,
    # yscale=('log',),
    nrows=2,
    ncols=2,
    hist_settings=hist_settings)

draw_distributions('GBR4_before_after.png',
                   columns,
                   toReweight,
                   toReweight,
                   None,
                   folding_weights,
                   filename_as_title=True,
                   yscale=('linear', 'linear', 'linear', 'linear'),
                   nrows=2,