def lambdarank_ref(qids, y, y_pred, metric): n_samples = y.shape[0] ranknet_cost = 0 lambdarank_cost = 0 discrete_metric = 0 pairCount = 0 lambdas = np.zeros(qids.shape) metric = NDCG(k=7) n_queries = 0 for qid, a, b in get_groups(qids): (r, l, d, lambdas[a:b], _, _, p) = calc_lambdas(qid, y[a:b], y_pred[a:b], metric) ranknet_cost += r lambdarank_cost += l discrete_metric += d pairCount += p n_queries += 1 # Scale by pairCount lambdarank_cost *= 100.0 / n_samples ranknet_cost *= 100.0 / n_samples discrete_metric /= n_queries lambdas *= 100.0 / n_samples print(pairCount, n_queries, ranknet_cost, lambdarank_cost, discrete_metric) # print(lambdas) return (ranknet_cost, lambdarank_cost, discrete_metric, lambdas)
def train_lambda_mart(df_train): x_train = df_train.drop(["target_score", "srch_id", "prop_id"], axis=1) y_train = df_train["target_score"] query_ids = df_train["srch_id"].copy() print("Fitting LambdaMART...") model = LambdaMART(metric=NDCG(len(df_train)), n_estimators=100, verbose=1) model.fit(x_train, y_train, query_ids) print_feature_importances(x_train, model) return model
def getmodel(grad, X_train, y_train, query_ids): if grad: model = GradientBoostingRegressor(n_estimators=100, verbose=1) model.fit(X_train, y_train) else: #X_train = X_train.drop(["srch_id"],axis=1) query_ids = query_ids.copy() model = LambdaMART(metric=NDCG(len(X_train)), n_estimators=100, verbose=1) model.fit(X_train, y_train, query_ids) return model
'LambdaMart' from pyltr.models import LambdaMART from pyltr.metrics import NDCG from pyltr.models.monitors import ValidationMonitor train_['rel'] = train_['booking_bool'] * 5 + train_[ 'click_bool'] #should have 5 as maximum train_.loc[ train_['rel'] == 6, 'rel'] = 5 # 6 can only occure if click and bool are 1. Hence, cap at 5. #add the srch_id as the index to the new train values m2_rnn.append('srch_id') LambdaMART_train = train_[m2_rnn].set_index('srch_id').sort_index() qids_val = LambdaMART_train.index metric = NDCG(k=100) monitor = ValidationMonitor(LambdaMART_train, train_['rel'], qids_val.values, metric=metric) model = LambdaMART(metric=metric, max_depth=10, n_estimators=1000, learning_rate=.1, verbose=1, max_features=0.5, query_subsample=0.5, max_leaf_nodes=10, min_samples_leaf=64) model.fit(train_[m2_rnn], train_['rel'], qids_val, monitor=monitor)
def calculate_ndcg(truth, prediction): print("Calculating score...") return NDCG(k=len(truth)).calc_mean(truth["srch_id"].values, truth["target_score"].values, prediction["result"].values)
import pandas as pd from pyltr.models.lambdamart import LambdaMART from pyltr.metrics import NDCG print("Reading train_0.csv ...") train = pd.read_csv("data/train/train_0.csv").sort_values("srch_id") print("Finished reading") qids = train["srch_id"].copy() train_Y = train["target"] train_X = train.drop([ "target", "srch_id", "click_bool", "booking_bool", "gross_bookings_usd", "position" ], axis=1) model = LambdaMART(metric=NDCG(len(train)), n_estimators=200, min_samples_leaf=6, max_depth=10, max_leaf_nodes=7, verbose=1) del train print("Fitting lambdaMART ...") model.fit(train_X, train_Y, qids) outfile = "data/lambdamart_EST10_SL20_D10_LN7" print("Dumping model ...") pickle.dump(model, open(outfile, 'wb')) print("Model dumped") print("Calculating feature importances ...")
# In[3]: y_train = np.asarray(y_train) qids_train = np.asarray(qids_train) y_val = np.asarray(y_val) qids_val = np.asarray(qids_val) y_test = np.asarray(y_test) qids_test = np.asarray(qids_test) # In[4]: metric = NDCG(k=40) monitor = ValidationMonitor(x_val, y_val, qids_val, metric=metric, stop_after=50) # In[5]: model = LambdaMART(metric=metric, max_depth = 4, n_estimators=450, learning_rate=.04, verbose=1, max_features = 25, min_samples_split = 1000, min_samples_leaf = 200, max_leaf_nodes = 20) model.fit(x_train, y_train, qids_train, monitor=monitor) # In[ ]: