def reweightermodel(ioriginal, itarget, ioriginal_weights, itarget_weights, args): numpy.random.seed(args[5]) #Fix any random seed using numpy arrays reweighter_base = reweight.GBReweighter(n_estimators=args[0], learning_rate=args[1], max_depth=args[2], min_samples_leaf=args[3], gb_args={'subsample': args[4]}) reweighter = reweight.FoldingReweighter(reweighter_base, random_state=args[5], n_folds=3, verbose=False) reweighter.fit(ioriginal, itarget, ioriginal_weights, itarget_weights) return reweighter
## 'original_weights_test.png') ## ### Gradient boosted Reweighter ##reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, ## max_depth=3, min_samples_leaf=1000, ## gb_args={'subsample': 0.4}) ##reweighter.fit(original_train, target_train) ##gb_weights_test = reweighter.predict_weights(original_test) ## ### Validate reweighting rule on the test part comparing 1d projections ##draw_distributions(original_test, target_test, gb_weights_test, ## 'gb_weights_test.png') ## # Folding Reweighter # define base reweighter reweighter_base = reweight.GBReweighter(n_estimators=50, learning_rate=0.1, max_depth=2, min_samples_leaf=1000, gb_args={'subsample': 0.4}) reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2) # not need to divide data into train/test parts reweighter.fit(original, target, target_weight=target_sWeights) folding_weights = reweighter.predict_weights(original) # cast the array into float cast_target_sWeights = target_sWeights.astype(float) draw_distributions_weighted(original, target, folding_weights, cast_target_sWeights, 'FoldingReweight.png') #draw_distributions(original, target, folding_weights, # 'FoldingReweight.png')
weights_target = numpy.ones(dtype='float64', shape=len(target)) ## now train the BDT reweighter print '... starting the reweighting' rnd_seed = 123456 numpy.random.seed(rnd_seed) # the seed is set through numpy arrays reweighter_base = reweight.GBReweighter(n_estimators=100, learning_rate=0.1, max_depth=4, min_samples_leaf=400, gb_args={'subsample': 0.5}) reweighter = reweight.FoldingReweighter(reweighter_base, random_state=rnd_seed, n_folds=2, verbose=True) reweighter.fit(origin, target, weights_origin, weights_target) print '... reweighting fit done' ws = reweighter.predict_weights(origin, weights_origin, lambda x: numpy.mean(x, axis=0)) weights = numpy.multiply(ws, transfer_factor) factor = float(float(len(target.index)) / weights.sum()) print " == Summary of the reweighting ==" print " ================================" print " - The transfer factor = ", transfer_factor print " - The sum of target weights = ", weights_target.sum( ), "+/-", math.sqrt(numpy.square(weights_target).sum())
def reweightermodel(original,target,original_weights,target_weights,args): reweighter_base = reweight.GBReweighter(n_estimators=args[0], learning_rate=args[1], max_depth=args[2], min_samples_leaf=args[3],gb_args={'subsample': args[4]}) reweighter = reweight.FoldingReweighter(reweighter_base,random_state=2019, n_folds=2, verbose=True) reweighter.fit(original,target,original_weights,target_weights) return reweighter
gb_args={'subsample': 0.4, 'max_features' : 6, 'min_samples_split' : 201}) gb_reweighter.fit(original_train[i], target_train[i]) gb_weights_test[i] = gb_reweighter.predict_weights(original_test[i]) #Check weighted distributions on the test splits draw_distributions(original_test[1], target_test[1], gb_weights_test[1]) #Folding Reweighter folding_weights = np.empty(sets,dtype=object) for i in range(sets): #Gradient boosted decision tree as base reweighter_gb = reweight.GBReweighter( learning_rate=0.1, n_estimators=64, max_depth=32, min_samples_leaf=200, gb_args={'subsample': 0.4,}) folding_gb = reweight.FoldingReweighter(reweighter_gb, n_folds=5) #Give full datasets to the reweighter folding_gb.fit(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist(), allCollisions[i,1].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist()) #folding_weights[i] = folding_reweighter.predict_weights(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist(),vote_function = lambda x: np.mean(x, axis=0)) #calculate weights for each point folding_weights[i] = folding_gb.predict_weights(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist()) draw_distributions(allCollisions[0,0], allCollisions[0,1], folding_weights[0]) """# Model Evaluation"""