Esempio n. 1
0
def hgb_objective_map(params):
    """
	objective function for HistGradientBoostingRegressor.
	"""

    # hyperopt casts as float
    params['max_iter'] = int(params['max_iter'])
    params['max_leaf_nodes'] = int(params['max_leaf_nodes'])

    model = HistGradientBoostingRegressor(**params)
    model.fit(train, y_train)
    preds = model.predict(X_valid)

    df_eval['interest'] = preds
    df_ranked = df_eval.sort_values(['user_id_hash', 'interest'],
                                    ascending=[False, False])
    df_ranked = (df_ranked.groupby('user_id_hash')['coupon_id_hash'].apply(
        list).reset_index())
    recomendations_dict = pd.Series(df_ranked.coupon_id_hash.values,
                                    index=df_ranked.user_id_hash).to_dict()

    actual = []
    pred = []
    for k, _ in recomendations_dict.items():
        actual.append(list(interactions_valid_dict[k]))
        pred.append(list(recomendations_dict[k]))

    result = mapk(actual, pred)
    print("INFO: iteration {} MAP {:.3f}".format(lgb_objective_map.i, result))

    hgb_objective_map.i += 1

    return 1 - result
def compute_mapk(interactions_dict, recomendations_dict):
    actual = []
    pred = []
    for k, _ in recomendations_dict_hot.items():
        actual.append(list(interactions_dict[k]))
        pred.append(list(recomendations_dict[k]))
    return mapk(actual, pred)
Esempio n. 3
0
def lgb_objective_map(params):
    """
	objective function for lightgbm.
	"""

    # hyperopt casts as float
    params['num_boost_round'] = int(params['num_boost_round'])
    params['num_leaves'] = int(params['num_leaves'])

    # need to be passed as parameter
    params['verbose'] = -1
    params['seed'] = 1

    cv_result = lgb.cv(
        params,
        lgtrain,
        nfold=3,
        metrics='rmse',
        num_boost_round=params['num_boost_round'],
        early_stopping_rounds=20,
        stratified=False,
    )
    early_stop_dict[lgb_objective_map.i] = len(cv_result['rmse-mean'])
    params['num_boost_round'] = len(cv_result['rmse-mean'])

    model = lgb.LGBMRegressor(**params)
    model.fit(train,
              y_train,
              feature_name=all_cols,
              categorical_feature=cat_cols)
    preds = model.predict(X_valid)

    df_eval['interest'] = preds
    df_ranked = df_eval.sort_values(['user_id_hash', 'interest'],
                                    ascending=[False, False])
    df_ranked = (df_ranked.groupby('user_id_hash')['coupon_id_hash'].apply(
        list).reset_index())
    recomendations_dict = pd.Series(df_ranked.coupon_id_hash.values,
                                    index=df_ranked.user_id_hash).to_dict()

    actual = []
    pred = []
    for k, _ in recomendations_dict.items():
        actual.append(list(interactions_valid_dict[k]))
        pred.append(list(recomendations_dict[k]))

    result = mapk(actual, pred)
    print("INFO: iteration {} MAP {:.3f}".format(lgb_objective_map.i, result))

    lgb_objective_map.i += 1

    return 1 - result
Esempio n. 4
0
def xl_objective(params):

    start = time()

    xl_objective.i+=1

    params['task'] = 'reg'
    params['metric'] = 'rmse'
    params['stop_window'] = 3

    # remember hyperopt casts as floats
    params['epoch'] = int(params['epoch'])
    params['k'] = int(params['k'])

    xl_model = xl.create_ffm()
    xl_model.setTrain(train_data_file)
    # xl_model.setValidate(valid_data_file_opt)
    xl_model.setTest(valid_data_file)
    # xl_model.setQuiet()
    xl_model.fit(params, xlmodel_fname_tmp)
    xl_model.predict(xlmodel_fname_tmp, xlpreds_fname_tmp)

    preds = np.loadtxt(xlpreds_fname_tmp)
    df_preds['interest'] = preds

    df_ranked = df_preds.sort_values(['user_id_hash', 'interest'],
        ascending=[False, False])
    df_ranked = (df_ranked
        .groupby('user_id_hash')['coupon_id_hash']
        .apply(list)
        .reset_index())
    recomendations_dict = pd.Series(df_ranked.coupon_id_hash.values,
        index=df_ranked.user_id_hash).to_dict()

    actual = []
    pred = []
    for k,_ in recomendations_dict.items():
        actual.append(list(interactions_valid_dict[k]))
        pred.append(list(recomendations_dict[k]))

    score = mapk(actual,pred)
    end = round((time() - start)/60.,2)

    print("INFO: iteration {} was completed in {} min. Score {:.3f}".format(xl_objective.i, end, score))

    return 1-score
Esempio n. 5
0
def mapk_similarity(alpha, at_random=False):

	mpv = user_mean_purchase_vector_valid.copy()
	feat_cols = [c for c in mpv.columns if 'id_hash' not in c]
	mvv = user_mean_visit_vector_valid.copy()
	mvv[feat_cols] = alpha*mvv[feat_cols]

	user_vector= (pd.concat([mpv, mvv])
		.groupby('user_id_hash')
		.sum()
		.reset_index())

	user_ids = user_vector.user_id_hash.values
	item_ids = df_coupons_valid_feat_oh.coupon_id_hash.values
	# ensure the same column order
	user_cols = ['user_id_hash'] + [c for c in user_vector.columns if 'id_hash' not in c]
	item_cols = ['coupon_id_hash'] + [c for c in user_vector.columns if 'id_hash' not in c]
	user_feat = user_vector[user_cols[1:]].values
	item_feat = df_coupons_valid_feat_oh[item_cols[1:]].values

	user_item_sim = euclidean_distances(user_feat, item_feat)
	top_n_idx = np.apply_along_axis(np.argsort, 1, user_item_sim)

	if at_random:
		item_feat_rnd = item_ids.copy()
		recomendations_dict = {}
		for user,idx in zip(user_ids,top_n_idx):
			np.random.shuffle(item_feat_rnd)
			recomendations_dict[user] = item_feat_rnd
	else:
		recomendations_dict = {}
		for user,idx in zip(user_ids,top_n_idx):
			recomendations_dict[user] = [item_ids[i] for i in idx]

	actual = []
	pred = []
	for k,_ in recomendations_dict.items():
		actual.append(list(interactions_valid_dict[k]))
		pred.append(list(recomendations_dict[k]))

	return mapk(actual, pred)
Esempio n. 6
0
    'coupon_id_hash':
    'unique'
}).reset_index())
tmp_valid_dict = pd.Series(df_interactions_valid.coupon_id_hash.values,
                           index=df_interactions_valid.user_id_hash).to_dict()

# keep users that have interacted at least with one validation coupon
keep_users = []
for user, coupons in tmp_valid_dict.items():
    if np.intersect1d(valid_coupon_ids, coupons).size != 0:
        keep_users.append(user)
# out of 6924, we end up with 6071, so not bad
interactions_valid_dict = {
    k: v
    for k, v in tmp_valid_dict.items() if k in keep_users
}

coupon_id_rn = valid_coupon_ids.copy()
recomendations_dict = {}
for user, _ in interactions_valid_dict.items():
    np.random.shuffle(coupon_id_rn)
    recomendations_dict[user] = coupon_id_rn

actual = []
pred = []
for k, _ in recomendations_dict.items():
    actual.append(list(interactions_valid_dict[k]))
    pred.append(list(recomendations_dict[k]))

print(mapk(actual, pred))
Esempio n. 7
0
    dist, nnidx = dist.ravel(), nnidx.ravel()
    ranked_dist = np.argsort(dist)
    ranked_cp_idxs = nnidx[ranked_dist][:50]
    ranked_cp_ids = [idx_item_dict[i] for i in ranked_cp_idxs]
    ranked_cp_idxs_valid = [
        train_to_valid_most_similar[c] for c in ranked_cp_ids
    ]
    return (user, ranked_cp_idxs_valid)


start = time()

cores = multiprocessing.cpu_count()

pool = Pool(cores)
all_users = list(interactions_valid_dict.keys())
recommend_coupons = pool.map(build_recommendations, all_users)

# recommend_coupons = Parallel(cores)(delayed(build_recommendations)(user) for user,_ in user_items_tuple)

print(time() - start)

recommendations_dict = {k: v for k, v in recommend_coupons}
actual = []
pred = []
for k, _ in recommendations_dict.items():
    actual.append(list(interactions_valid_dict[k]))
    pred.append(list(recommendations_dict[k]))

result = mapk(actual, pred)
print(result)
Esempio n. 8
0
    elif set_up_name is 'set_up_4':
        lr_scheduler = MultiStepLR(optimizer, milestones=[3, 8], gamma=0.1)
    elif set_up_name is 'set_up_5':
        lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4], gamma=0.1)

    model.fit(train_loader,
              criterion,
              optimizer,
              n_epochs=n_epochs,
              eval_loader=eval_loader,
              lr_scheduler=lr_scheduler)
    preds = model.predict(test_loader)

    df_all_interactions['interest'] = preds
    df_ranked = df_all_interactions.sort_values(['user_id_hash', 'interest'],
                                                ascending=[False, False])
    df_ranked = (df_ranked.groupby('user_id_hash')['coupon_id_hash'].apply(
        list).reset_index())
    recomendations_dict = pd.Series(df_ranked.coupon_id_hash.values,
                                    index=df_ranked.user_id_hash).to_dict()
    true_valid_interactions = wd_interactions['true_valid_interactions']

    actual = []
    pred = []
    for k, _ in recomendations_dict.items():
        actual.append(list(true_valid_interactions[k]))
        pred.append(list(recomendations_dict[k]))
    print("Mean Average Precission: {}".format(mapk(actual, pred)))
    results[set_up_name] = mapk(actual, pred)
    del (model, optimizer, criterion)