def validation(df, validation_params):
    model_name = validation_params['model_name']
    target = validation_params['target']
    train_blocks = validation_params['train_blocks']
    serialize = validation_params['serialize']
    plot_feature_importances = validation_params['plot_feature_importances']
    f_frac = validation_params['return_feature_fraction']

    print('Validating ...')

    rmse_folds = []
    best_alphas = []
    n_features = len(df.columns)
    important_features = set()
    for i, cur_block_num in enumerate(train_blocks):
        print('\t' + 'Training fold ' + str(i + 1) + '...')
        X_train_i = df.loc[df['date_block_num'] < cur_block_num].drop(
            columns=[target], axis=1)
        y_train_i = pd.DataFrame(
            data=df.loc[df['date_block_num'] < cur_block_num],
            columns=[target])
        if not serialize:
            X_validation_i = df.loc[df['date_block_num'] ==
                                    cur_block_num].drop(columns=target, axis=1)
            y_validation_i = pd.DataFrame(
                data=df.loc[df['date_block_num'] == cur_block_num],
                columns=[target])

        if model_name == 'lr':
            # 1. fit linear regression model_name
            model = LinearRegression(n_jobs=-1)
            model.fit(X_train_i.values, y_train_i)
            if not serialize:
                # score = r2_score(y_validation_i, pred_lr_i, squared=False)
                pred_lr_i = np.clip(model.predict(X_validation_i.values), 0,
                                    20)
                score = mean_squared_error(y_validation_i,
                                           pred_lr_i,
                                           squared=False)
                rmse_folds.append(score)
                print('\tLR RMSE: ', score)
        elif model_name == 'xgb':
            model = XGBRegressor(max_depth=8,
                                 n_estimators=100,
                                 min_child_weight=300,
                                 colsample_bytree=0.8,
                                 subsample=0.8,
                                 eta=0.3,
                                 seed=42,
                                 nthread=-1)
            model.fit(X_train_i,
                      y_train_i,
                      eval_metric="rmse",
                      eval_set=[(X_train_i, y_train_i)],
                      verbose=False,
                      early_stopping_rounds=10)
            if not serialize:
                pred_xgb_i = model.predict(X_validation_i).clip(0, 20)
                score = mean_squared_error(y_validation_i,
                                           pred_xgb_i,
                                           squared=False)
                rmse_folds.append(score)
                print('\tXGB RMSE: ', score)
                if plot_feature_importances:
                    plot_importance(booster=model)
                    plt.show()
                # fi_vals_i_temp = model.get_booster().get_score(importance_type='gain')
                if_i = model.get_booster().get_score(importance_type='gain')
                keys = list(if_i.keys())
                values = list(if_i.values())
                if_i_list = sorted([(values[i], keys[i])
                                    for i in range(len(keys))],
                                   reverse=True)[0:int(f_frac * n_features)]
                top = set([item[1] for item in if_i_list])
                important_features.update(top)
        elif model_name == 'lr_xgb_mix':  # out of order. need one hot encoding on categorical features and
            # feature scaling on numerical features
            model_lr = LinearRegression(n_jobs=-1, fit_intercept=True)
            model_lr.fit(X_train_i.values, y_train_i.values)
            model_xgb = model = XGBRegressor(max_depth=8,
                                             n_estimators=100,
                                             min_child_weight=300,
                                             colsample_bytree=0.8,
                                             subsample=0.8,
                                             eta=0.3,
                                             seed=42,
                                             nthread=-1)
            model_xgb.fit(X_train_i,
                          y_train_i,
                          eval_metric="rmse",
                          eval_set=[(X_train_i, y_train_i)],
                          verbose=False,
                          early_stopping_rounds=10)
            if not serialize:
                pred_lr_i = np.clip(model_lr.predict(X_validation_i.values), 0,
                                    20)
                pred_xgb_i = np.clip(model_xgb.predict(X_validation_i.values),
                                     0, 20)
                alpha = np.linspace(0, 1, 100, endpoint=True)
                score_max = 999
                alpha_max = 0
                for j in range(len(alpha)):
                    composite = [
                        alpha[j] * pred_lr_i[k] +
                        (1 - alpha[j]) * pred_xgb_i[k]
                        for k in range(len(pred_lr_i))
                    ]
                    score = mean_squared_error(y_validation_i,
                                               composite,
                                               squared=False)
                    if score < score_max:
                        score_max = score
                        alpha_max = alpha[j]
                print('\tConvex RMSE mix score: ', score_max)
                rmse_folds.append(score_max)
                best_alphas.append(alpha_max)

    if serialize:
        print('\n\tSerializing model...')
        filename = ''.join([serialize, '/model_', model_name, '.sav'])
        pickle.dump(model, open(filename, 'wb'))  # 'wb' write binary
    else:
        print('\n\tRMSE min, max, average: ', np.min(rmse_folds),
              np.max(rmse_folds), np.average(rmse_folds))
        if model_name == 'lr_xgb_mix':
            print('\n\tAlphas min, max, average: ', np.min(best_alphas),
                  np.max(best_alphas), np.average(best_alphas))

    return important_features