def optimise_similarity():
    """ Optimise similarity weights
    Use hyperopt to minimize a desired target (example: minimize -median)
    """

    d_uchar = {}
    d_test = {}
    d_user_full = {}
    d_coupon = {}
    d_user_pred = {}
    d_user_purchase = {}

    metric = "correlation"
    var_choice = "1"
    list_week = ["week51", "week52"]
    for week_ID in list_week:
        test_sparse, uchar_sparse, list_user, list_coupon, user_pred = prepare_similarity(
            week_ID, var_choice, metric)
        d_uchar[week_ID] = uchar_sparse
        d_test[week_ID] = test_sparse
        d_user_full[week_ID] = list_user
        d_coupon[week_ID] = list_coupon
        d_user_pred[week_ID] = user_pred
        #Get actual purchase dict
        with open(
                "../Data/Validation/" + week_ID +
                "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase[week_ID] = pickle.load(fp)

        # Take care of users which registered during validation test week
        for key in d_user_purchase[week_ID].keys():
            try:
                d_user_pred[week_ID][key]
            except KeyError:
                d_user_pred[week_ID][key] = []

    print "Loading OK"

    def objective_function(x_int):
        objective_function.n_iterations += 1

        list_score = []

        # Parameter to optimise
        gnr, disc, disp, large, small, val, us_sum, sex = x_int

        #Build sparse matrix of weights
        Wm = sps.block_diag(
            (gnr * np.eye(13), disc * np.eye(1), disp * np.eye(1),
             large * np.eye(9), small * np.eye(55), val * np.eye(2),
             us_sum * np.eye(1), sex * np.eye(2)))
        Wm_sparse = sps.csr_matrix(Wm)

        for week_ID in list_week:

            WmT = Wm_sparse.dot(d_test[week_ID])
            score = 1. / distance.cdist(uchar_sparse.todense(),
                                        WmT.T.todense(), metric)

            #Store predictions in a dict
            d_user_pred[week_ID] = {}

            # Compute score
            for i, user in enumerate(d_user_full[week_ID]):
                list_pred = np.ravel(score[i, :])
                list_index_top10 = list_pred.argsort()[-10:][::-1]
                d_user_pred[week_ID][user] = d_coupon[week_ID][
                    list_index_top10]

            for key in d_user_purchase[week_ID].keys():
                try:
                    d_user_pred[week_ID][key]
                except KeyError:
                    d_user_pred[week_ID][key] = []

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_user_pred[week_ID][key] for key in list_user]

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print objective_function.n_iterations, \
            "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score)
        return -np.min(list_score)

    objective_function.n_iterations = 0
    best = hyperopt_fmin(objective_function,
                         space=(hp.uniform('gnr', 0,
                                           5), hp.uniform('disc', 0, 5),
                                hp.uniform('disp', 0,
                                           5), hp.uniform('large', 0, 5),
                                hp.uniform('small', 0,
                                           5), hp.uniform('val', 0, 5),
                                hp.uniform('us_sum', 0,
                                           5), hp.uniform('sex', 0, 5)),
                         algo=tpe.suggest,
                         max_evals=10)

    print best
    objective_function(
        tuple([
            best[key] for key in
            ["gnr", "disc", "disp", "large", "small", "val", "us_sum", "sex"]
        ]))
Example #2
0
def optimise_blend(list_week_ID):
    """ Optimise blending weights

    args : list_week_ID = list of validation weeks
    """
    # Initialise dict which will serve later on
    d_user_pred = {}
    d_blend_pred = {}
    d_user_purchase = {}
    d_coupon = {}
    d_user = {}
    for week_ID in list_week_ID :
        d_user_pred[week_ID] = {}
        d_blend_pred[week_ID] = {}

    #Loop over weeks
    for week_ID in list_week_ID :
        #Get actual purchase
        with open( "../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase[week_ID] = pickle.load(fp)

        # Open different predictors
        pred_lightfm = {}
        with open("../Data/Validation/%s/d_pred_lightfm_%s.pickle" % (week_ID, week_ID), "r") as f:
            pred_lightfm = pickle.load(f)
        pred_sim = {}
        with open("../Data/Validation/%s/d_pred_similarity_%s.pickle" % (week_ID, week_ID), "r") as f:
            pred_sim = pickle.load(f)
        pred_xgb_rank = {}
        with open("../Data/Validation/%s/d_pred_xgb_rank_pairwise_%s.pickle" % (week_ID, week_ID), "r") as f:
            pred_xgb_rank = pickle.load(f)
        pred_xgb_reg = {}
        with open("../Data/Validation/%s/d_pred_xgb_reg_linear_%s.pickle" % (week_ID, week_ID), "r") as f:
            pred_xgb_reg = pickle.load(f)
        pred_SVM = {}
        with open("../Data/Validation/%s/d_pred_SVM_%s.pickle" % (week_ID, week_ID), "r") as f:
            pred_SVM = pickle.load(f)

        list_coupon = np.array(pred_sim["list_coupon"])
        list_user = pred_sim["d_user_pred"].keys()
        d_coupon[week_ID] = list_coupon
        d_user[week_ID] = list_user

        # Sanity checks
        assert(pred_lightfm["list_coupon"] == pred_sim["list_coupon"])
        assert(pred_lightfm["list_coupon"] == pred_xgb_rank["list_coupon"])
        assert(pred_lightfm["list_coupon"] == pred_xgb_reg["list_coupon"])
        assert(pred_lightfm["list_coupon"] == pred_SVM["list_coupon"])
        assert(set(d_user_purchase[week_ID].keys()) == set(list_user))

        for user in list_user :
            d_user_pred[week_ID][user] = {"sim" : np.array(pred_sim["d_user_pred"][user])}
            d_user_pred[week_ID][user]["xgb_rank"] = np.array(pred_xgb_rank["d_user_pred"][user])
            d_user_pred[week_ID][user]["xgb_reg"] = np.array(pred_xgb_reg["d_user_pred"][user])
            d_user_pred[week_ID][user]["xgb_SVM"] = np.array(pred_SVM["d_user_pred"][user])
            d_user_pred[week_ID][user]["lightfm"] = np.array(pred_lightfm["d_user_pred"][user])
            
            #Scale for blending
            d_user_pred[week_ID][user]["sim"] = MinMaxScaler().fit_transform(d_user_pred[week_ID][user]["sim"].astype(float))
            d_user_pred[week_ID][user]["xgb_rank"] = MinMaxScaler().fit_transform(d_user_pred[week_ID][user]["xgb_rank"].astype(float))
            d_user_pred[week_ID][user]["xgb_reg"] = MinMaxScaler().fit_transform(d_user_pred[week_ID][user]["xgb_reg"].astype(float))
            d_user_pred[week_ID][user]["xgb_SVM"] = MinMaxScaler().fit_transform(d_user_pred[week_ID][user]["xgb_SVM"].astype(float))
            d_user_pred[week_ID][user]["lightfm"] = MinMaxScaler().fit_transform(d_user_pred[week_ID][user]["lightfm"].astype(float))


    def objective_function(x_int):
        objective_function.n_iterations += 1
        list_score = []
        # Parameter to optimise
        w_sim, w_rank, w_reg, w_SVM, w_lightfm = x_int

        for week_ID in list_week_ID :
            for index, user in enumerate(d_user[week_ID]) :
                d_blend_pred[week_ID][user] = d_user_pred[week_ID][user]["sim"]*w_sim
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_rank"]*w_rank
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_reg"]*w_reg
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_SVM"]*w_SVM
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["lightfm"]*w_lightfm
                
                list_pred        = d_blend_pred[week_ID][user]
                top_k = np.argsort(-list_pred)[:10]
                d_blend_pred[week_ID][user] = d_coupon[week_ID][top_k]

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_blend_pred[week_ID][key] for key in list_user] 

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print(objective_function.n_iterations, \
            "w_sim, w_rank, w_reg, w_SVM, w_lightfm =", w_sim, w_rank, w_reg, w_SVM, w_lightfm, \
            "\nList_score = ", list_score, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score))
        
        return -np.median(list_score)

    objective_function.n_iterations = 0
    best = hyperopt_fmin(objective_function,
        space=(hp.uniform('w_sim', 0, 1), 
               hp.uniform('w_rank', 0, 1),
               hp.uniform('w_reg', 0, 1),
               hp.uniform('w_SVM', 0, 1),
               hp.uniform('w_lightfm', 0, 1)),
        algo=tpe.suggest,
        max_evals=10)

    print(best)
    objective_function(tuple([best[key] for key in ["w_sim", "w_rank", "w_reg", "w_SVM", "w_lightfm"]]))    
Example #3
0
def optimise_similarity():
    """ Optimise similarity weights
    Use hyperopt to minimize a desired target (example: minimize -median)
    """

    d_uchar         = {}
    d_test          = {}
    d_user_full     = {}
    d_coupon        = {}
    d_user_pred     = {}
    d_user_purchase = {}

    metric = "correlation"
    var_choice = "1"
    list_week = ["week51", "week52"]
    for week_ID in list_week :
        test_sparse, uchar_sparse,list_user, list_coupon, user_pred = prepare_similarity(week_ID,var_choice, metric)
        d_uchar[week_ID]     = uchar_sparse
        d_test[week_ID]      = test_sparse
        d_user_full[week_ID] = list_user
        d_coupon[week_ID]    = list_coupon
        d_user_pred[week_ID] = user_pred
        #Get actual purchase dict
        with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase[week_ID] = pickle.load(fp)

        # Take care of users which registered during validation test week
        for key in d_user_purchase[week_ID].keys() :
            try :
                d_user_pred[week_ID][key]
            except KeyError :
                d_user_pred[week_ID][key] = []

    print "Loading OK"

    def objective_function(x_int):
        objective_function.n_iterations += 1

        list_score = []

        # Parameter to optimise
        gnr, disc, disp, large, small, val, us_sum, sex = x_int

        #Build sparse matrix of weights
        Wm = sps.block_diag((gnr*np.eye(13), disc*np.eye(1), disp*np.eye(1), large*np.eye(9), small*np.eye(55), val*np.eye(2), us_sum*np.eye(1), sex*np.eye(2)))
        Wm_sparse = sps.csr_matrix(Wm)

        for week_ID in list_week :

            WmT = Wm_sparse.dot(d_test[week_ID])
            score = 1./distance.cdist(uchar_sparse.todense(), WmT.T.todense(), metric) 

            #Store predictions in a dict
            d_user_pred[week_ID] = {}

            # Compute score  
            for i, user in enumerate(d_user_full[week_ID]) :
                list_pred = np.ravel(score[i,:])
                list_index_top10 = list_pred.argsort()[-10:][::-1]
                d_user_pred[week_ID][user] = d_coupon[week_ID][list_index_top10]

            for key in d_user_purchase[week_ID].keys() :
                try :
                    d_user_pred[week_ID][key]
                except KeyError :
                    d_user_pred[week_ID][key] = []

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_user_pred[week_ID][key] for key in list_user] 

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print objective_function.n_iterations, \
            "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score)
        return -np.min(list_score)

    objective_function.n_iterations = 0
    best = hyperopt_fmin(objective_function,
        space=(hp.uniform('gnr', 0, 5), 
               hp.uniform('disc', 0, 5),
               hp.uniform('disp', 0, 5),
               hp.uniform('large', 0, 5),
               hp.uniform('small', 0, 5),
               hp.uniform('val', 0, 5), 
               hp.uniform('us_sum', 0, 5),
               hp.uniform('sex', 0, 5)),
        algo=tpe.suggest,
        max_evals=10)

    print best
    objective_function(tuple([best[key] for key in ["gnr", "disc", "disp", "large", "small", "val", "us_sum", "sex"]]))