def test_data_ndcg(model_path, test_path): ''' 评估测试数据的ndcg ''' with open(test_path, 'r', encoding='utf-8') as testfile: test_X, test_y, test_qids, comments = letor.read_dataset(testfile) gbm = lgb.Booster(model_file=model_path) test_predict = gbm.predict(test_X) average_ndcg, _ = ndcg.validate(test_qids, test_y, test_predict, 60) # 所有qid的平均ndcg print("all qid average ndcg: ", average_ndcg) print("job done!")
def model(train_data, test_data, train_x, test_x, train_y, test_y, objective, metric): fp = open("train_model.txt", "a+") param = { 'task': 'train', 'boosting_type': 'gbdt', 'num_leaves': 16, 'num_trees': 100, 'objective': 'multiclassova', 'metric': 'multi_error', 'max_bin': 255, 'learning_rate': 0.05, 'early_stopping': 10 } param.update({'objective': objective}) param.update({'metric': metric}) print(param) print("multierror") num_round = 10 bst = lgb.train(param, train_data, num_round, valid_sets=[test_data]) bst.save_model('model.txt') mybst = lgb.Booster(model_file='model.txt') # init model ypred_train = bst.predict(train_x) ypred_test = bst.predict(test_x) print("ypred_train", ypred_train) print("ypred_test", ypred_test) train_pred = [] for i in range(len(ypred_train)): if ypred_train[i] > 0.5: train_pred.append(1) else: train_pred.append(0) test_pred = [] for i in range(len(ypred_test)): if ypred_test[i] > 0.5: test_pred.append(1) else: test_pred.append(0) print('The train error rate of prediction is:', (accuracy_score(train_y, train_pred))) print('The test error rate of prediction is:', (accuracy_score(test_y, test_pred))) cv_results = lgb.cv(param, train_data, num_round, nfold=5) name = metric + "-mean" print('best n_estimators:', (len(cv_results[name]))) print('best cv score:', (pd.Series(cv_results[name]).min()), '\n')
def test_5model(): # model_goss_noall_0.889: 0.658 , model_gbdt_0.947:0.612 ,model_goss_0.93: 0.642, model_dart_0.921:0.637 # model_dart_5league0.953 0.676 , model_gbdt_5league0.953 .651 model_file = root + 'usemodel\\model_gbdt_5league0.953.txt' gbm = lgb.Booster(model_file=model_file) print('开始预测...') test_data = pd.read_table(root + 'predict_0531_5league.txt') testlabel = np.array(test_data.label) testdata = np.array(test_data.drop("label", axis=1)) y_predt = gbm.predict(testdata, num_iteration=gbm.best_iteration) y_preds = [list(x).index(max(x)) for x in y_predt] # 3 value get_accuracy(y_preds, testlabel)
def predict(): model_file = root + 'usemodel\\model_dart_10000.675.txt' # model_dart_10000.675 gbm = lgb.Booster(model_file=model_file) print('开始预测...') predict_data = pd.read_table(root + 'predict_zcw2020-08-01.txt') # predict0619 predictdata = np.array(predict_data) y_predt = gbm.predict(predictdata, num_iteration=gbm.best_iteration) # print(y_predt) # y_preds = [ 1 if i >=0.5 else 0 for i in y_predt] # 2 value # y_preds = [list(x).index(max(x)) for x in y_predt] # 3 value pred_limit(y_predt, 0.6)
def predict(): model_file = root + 'usemodel\\model_gbdt_score_1.0.txt' # model_goss_0.889 gbm = lgb.Booster(model_file=model_file) print('开始预测...') y_predt = gbm.predict(predictdata, num_iteration=gbm.best_iteration) #print(y_predt) # y_preds = [ 1 if i >=0.5 else 0 for i in y_predt] # 2 value y_preds = [list(x).index(max(x)) for x in y_predt] # 3 value print(y_preds) dt = pd.DataFrame(y_preds) dt.to_csv(root + "result10624.csv", encoding='utf_8_sig')
def lgbPredict(predictInput, modelFile): """ Desc:借助已经跑出的模型,来预测A榜的正确性 """ model = lgb.Booster(model_file='./model/' + modelFile) #init model dfData = pd.read_csv(predictInput) # 去掉uid列,uid不是特征 data = dfData.drop(labels='USRID', axis=1) preds = model.predict(data) preds = pd.DataFrame(preds) preds.columns = ['RST'] df = pd.concat([dfData[['USRID']], preds], axis=1) df.to_csv('./model/test_result.csv', sep='\t', index=None)
def load_models(self, directory): if not os.path.exists(directory): raise IOError(directory + ' is not exists') print('loading models from ', directory, ' ...................') # 这里只是简单地把一个文件夹下所有的model文件都读取进来,日后可能会有改动 for files in os.listdir(directory): model_file_path = os.path.join(directory, files) if os.path.isfile(model_file_path): best_model = lgb.Booster(model_file=model_file_path) self.best_model.append(best_model) self.best_round_list.append(best_model.best_iteration) print('completed loading models from ', directory, ' ..........')
def _preprocess(self, data): preprocessed_data = {} filesDatas = [] for k, v in data.items(): for file_name, file_content in v.items(): test_data = pd.read_csv(file_content) test_set = feat(test_data) if test_data['Frequency Band'][0] == 2585.0: lgb_model_path = os.path.join(self.model_path, 'lgb_model_2585.0') clf = lgb.Booster(model_file=lgb_model_path) pb_data = clf.predict(test_set, num_iteration=clf.best_iteration) elif test_data['Frequency Band'][0] == 2604.8: lgb_model_path = os.path.join(self.model_path, 'lgb_model_2604.8') clf = lgb.Booster(model_file=lgb_model_path) pb_data = clf.predict(test_set, num_iteration=clf.best_iteration) elif test_data['Frequency Band'][0] == 2624.6: lgb_model_path = os.path.join(self.model_path, 'lgb_model_2624.6') clf = lgb.Booster(model_file=lgb_model_path) pb_data = clf.predict(test_set, num_iteration=clf.best_iteration) else: # 取平均 lgb_model_path = os.path.join(self.model_path, 'lgb_model_2585.0') clf1 = lgb.Booster(model_file=lgb_model_path) lgb_model_path = os.path.join(self.model_path, 'lgb_model_2604.8') clf2 = lgb.Booster(model_file=lgb_model_path) lgb_model_path = os.path.join(self.model_path, 'lgb_model_2624.6') clf3 = lgb.Booster(model_file=lgb_model_path) pb_data1 = clf1.predict(test_set, num_iteration=clf1.best_iteration).reshape(-1) pb_data2 = clf2.predict(test_set, num_iteration=clf2.best_iteration).reshape(-1) pb_data3 = clf3.predict(test_set, num_iteration=clf3.best_iteration).reshape(-1) pb_data = (pb_data1 + pb_data2 + pb_data3) / 3 # print('other frequency! use model 2585.0') # lgb_model_path = os.path.join(self.model_path, 'lgb_model_2585.0') # clf = lgb.Booster(model_file=lgb_model_path) # if test_data['Frequency Band'][0] == 2604.8: # lgb_model_path = os.path.join(self.model_path, 'lgb_model_2604.8') # else: # lgb_model_path = os.path.join(self.model_path, 'lgb_model_2624.6') # clf = lgb.Booster(model_file=lgb_model_path) # test_set = feat(test_data) # pb_data = clf.predict(test_set, num_iteration=clf.best_iteration) input_data = np.array(pb_data.reshape(-1, 1)) print(file_name, input_data.shape) filesDatas.append(input_data) filesDatas = np.array(filesDatas,dtype=np.float32).reshape(-1, 1) preprocessed_data['inputs'] = filesDatas print("preprocessed_data[\'inputs\'].shape = ", preprocessed_data['inputs'].shape) return preprocessed_data
def test(self): X_train, X_test, y_train, y_test = train_test_split( *load_breast_cancer(True), test_size=0.1, random_state=1) train_data = lgb.Dataset(X_train, max_bin=255, label=y_train) valid_data = train_data.create_valid(X_test, label=y_test) params = { "objective": "binary", "metric": "auc", "min_data": 1, "num_leaves": 15, "verbose": -1 } bst = lgb.Booster(params, train_data) bst.add_valid(valid_data, "valid_1") for i in range(30): bst.update() if i % 10 == 0: print(bst.eval_train(), bst.eval_valid()) bst.save_model("model.txt") pred_from_matr = bst.predict(X_test) with tempfile.NamedTemporaryFile() as f: tname = f.name with open(tname, "w+b") as f: np.savetxt(f, X_test, delimiter=',') pred_from_file = bst.predict(tname) os.remove(tname) self.assertEqual(len(pred_from_matr), len(pred_from_file)) for preds in zip(pred_from_matr, pred_from_file): self.assertAlmostEqual(*preds, places=15) # check saved model persistence bst = lgb.Booster(params, model_file="model.txt") pred_from_model_file = bst.predict(X_test) self.assertEqual(len(pred_from_matr), len(pred_from_model_file)) for preds in zip(pred_from_matr, pred_from_model_file): self.assertEqual(*preds)
def predict_with_lgbm_building(test_df, row_ids, model_filepath): """" Takes a given directory which contains n folders (one for each building) and then predicts the rows with the respective models :param test_df: DataFrame containing the test data :param row_ids: A vector with the matching row ids for the predicted labels :param model_filepath: Directory that contains the trained model :return: Vector containing the predicted labels for the test data """ buildings_in_dir = sorted(os.listdir(model_filepath), key=int) test_df["row_id"] = row_ids test_df = test_df.drop(columns=["site_id"], axis=1) test_df = test_df.groupby("building_id") predictions_by_building = [] row_id_by_building = [] for b in buildings_in_dir: test_by_building = test_df.get_group(int(b)) test_by_building = test_by_building.reset_index(drop=True) rows_grouped = list(test_by_building["row_id"]) test_by_building = test_by_building.drop(columns=["building_id"], axis=1) models_in_dir = os.listdir(model_filepath + "/" + b) num_models = len(models_in_dir) predictions_group = np.zeros(len(rows_grouped)) i = 0 for model in models_in_dir: i += 1 click.echo("Predicting Building " + b + " [" + str(i) + "/" + str(num_models) + "]") lgbm_model = lgb.Booster(model_file=model_filepath + "/" + b + "/" + model) predictions_current = lgbm_model.predict(test_by_building) predictions_group += np.expm1(predictions_current) predictions_group = predictions_group / num_models predictions_by_building.extend(list(predictions_group)) row_id_by_building.extend(rows_grouped) # Order the predictions by merging them to the original row ids pred_df = pd.DataFrame({ "row_id": row_id_by_building, "pred": predictions_by_building }) pred_df = pred_df.sort_values("row_id") predictions = pred_df["pred"].copy(deep=True) predictions[predictions < 0] = 0 return predictions
def predict(model_path, X_test, is_lgbm=False, is_catboost=False, is_cnn=False, maxlen=400, lgbm_threshold=0.5): """ load the model and predict unseen data """ print('\n === predict === \n') if is_lgbm: # lightgbm model = lgb.Booster(model_file=model_path) elif is_catboost: model = CatBoostClassifier() model = model.load_model(model_path) elif is_cnn: model = load_model(model_path) else: # sklearn # xgboost model = joblib.load(model_path) # y_pred = model.predict_prob(X_test) y_pred = model.predict(X_test) if is_lgbm: #print('==') #print(y_pred) y_output = [] for y in y_pred: if y > lgbm_threshold: y_output.append(1) else: y_output.append(0) #print('==') #print(y_output) return (np.array(y_output)) #return np.array([np.argmax(y) for y in y_pred]) elif is_cnn: # X_test = sequence.pad_sequences(X_test, maxlen=maxlen) y_pred = model.predict(X_test) y_pred = [np.argmax(y) for y in y_pred] return np.array(y_pred) else: return y_pred
def evaluate_params( trial: optuna.trial.Trial, train_data: lgb.Dataset, validation_data: lgb.Dataset, ) -> Union[None, dict]: """Compute out-of-sample performance for a parameter set.""" params = {} params["num_iterations"] = trial.suggest_int( "num_iterations", 8, 128) params["learning_rate"] = trial.suggest_uniform( "learning_rate", 2**-5, 0.5) params["num_leaves"] = trial.suggest_int("num_leaves", 8, 256) params["max_depth"] = trial.suggest_int("max_depth", 4, 32) params["min_data_in_leaf"] = trial.suggest_int( "min_data_in_leaf", 4, 512) params["min_sum_hessian_in_leaf"] = trial.suggest_uniform( "min_sum_hessian_in_leaf", 2**-5, 0.25) params["bagging_freq"] = trial.suggest_int("bagging_freq", 0, 1) params["bagging_fraction"] = trial.suggest_uniform( "bagging_fraction", 0.5, 1) params["feature_fraction"] = trial.suggest_uniform( "feature_fraction", 0.5, 1) params["lambda_l1"] = trial.suggest_uniform("lambda_l1", 0, 64) params["lambda_l2"] = trial.suggest_uniform("lambda_l2", 0, 64) params["min_gain_to_split"] = trial.suggest_uniform( "min_gain_to_split", 0, 0.25) params["min_data_per_group"] = trial.suggest_int( "min_data_per_group", 1, 512) params["max_cat_threshold"] = trial.suggest_int( "max_cat_threshold", 1, 512) params["cat_l2"] = trial.suggest_uniform("cat_l2", 0, 64) params["cat_smooth"] = trial.suggest_uniform("cat_smooth", 0, 2048) params["max_cat_to_onehot"] = trial.suggest_int( "max_cat_to_onehot", 1, 64) params["max_bin"] = trial.suggest_int("max_bin", 32, 1024) params["min_data_in_bin"] = trial.suggest_int( "min_data_in_bin", 1, 64) params["objective"] = self.objective params["num_class"] = self.num_class params["verbosity"] = -1 booster = lgb.Booster(params=params, train_set=train_data) booster.add_valid(validation_data, "validation_set") for step in range(params["num_iterations"]): booster.update() validation_loss = booster.eval_valid()[0][2] trial.report(validation_loss, step) if trial.should_prune(): raise optuna.exceptions.TrialPruned() return validation_loss
def get_leaf_index(data, model_path): ''' 得到叶结点并进行one-hot编码 :param data: :param model_path: :return: ''' gbm = lgb.Booster(model_file=model_path) ypred = gbm.predict(data, pred_leaf=True) one_hot_encoder = OneHotEncoder() x_one_hot = one_hot_encoder.fit_transform(ypred) print(x_one_hot.shape) print(x_one_hot.toarray())
def lgb_predict(logger, model_file, test_file, output_file, predictors): logger.print_log('loading test data...') test_df = pd.read_csv(test_file, dtype=default_config.dtypes) sub = pd.DataFrame() sub['click_id'] = test_df['click_id'].astype('int') logger.print_log("Predicting...") model = lgb.Booster(model_file=model_file) sub['is_attributed'] = model.predict(test_df[predictors]) logger.print_log("writing to file <" + output_file + ">...") if output_file[-2:] == "gz": sub.to_csv(output_file, index=False, compression='gzip') else: sub.to_csv(output_file, index=False) logger.print_log("done.")
def select_model(feats): model_id = 0 model_folder_path = './models/' models = ['clf_pre.txt', 'clf_A.txt', 'clf_B.txt', 'clf_C.txt'] fundings = [ 'PreSeries_post_money_valuation_usd_augmented', 'RoundA_post_money_valuation_usd_augmented', 'RoundB_post_money_valuation_usd_augmented', 'RoundC_post_money_valuation_usd_augmented', ] for i, round in enumerate(fundings): if feats[round] and float(feats[round]) > 0: model_id = i return model_id, lgb.Booster(model_file=model_folder_path + models[model_id])
def get_fold_mae(save_path_orig, split, fold, num_folds, train_val_split, features, targets): fold_description = get_fold_description(fold, num_folds) save_path = '{}-{}-{}.txt'.format(save_path_orig, split, fold_description) loaded_model = lgb.Booster(model_file=save_path) valid_ids = train_val_split[split][fold][1] if fold < num_folds else ( np.arange(features.shape[0])) x_valid = features.iloc[valid_ids] y_valid = targets[valid_ids] valid_preds = loaded_model.predict(x_valid) oof_mae = np.abs(valid_preds - y_valid).mean() error_description = "OOF MAE" if fold < num_folds else "Train error" print('{}: {}'.format(error_description, np.round(oof_mae, 3))) return oof_mae, valid_ids.size
def mypredict(testInputs, predictDate, ROUND): average = None for i in range(ROUND): model_save_path = os.path.join( LOCALDATAPATH, 'lightgbmModel2', str(predictDate) + '_' + str(i + 1) + '.txt') gbm = lgb.Booster(model_file=model_save_path) predict = gbm.predict(testInputs) if i == 0: average = predict else: average = average + predict average = average / ROUND return average pass
def test_add_features_same_booster_behaviour(self): X = np.random.random((1000, 5)) X[:, [1, 3]] = 0 names = ['col_%d' % (i, ) for i in range(5)] for j in range(1, 5): d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d1.add_features_from(d2) d = lgb.Dataset(X, feature_name=names).construct() y = np.random.random(1000) d1.set_label(y) d.set_label(y) b1 = lgb.Booster(train_set=d1) b = lgb.Booster(train_set=d) for k in range(10): b.update() b1.update() dname = self.tempFileName() d1name = self.tempFileName() b1.save_model(d1name) b.save_model(dname) self.assertFilesEqual(d1name, dname) os.remove(d1name) os.remove(dname)
def lgb_predict(model_filename, test): print(model_filename) lgb_model = lgb.Booster(model_file=path + '/models/' + model_filename) print("Loading Model ...") print("Predicting ... ") submit = lgb_model.predict(test) submit = np.argmax(submit, axis=1) print("Predict over ...") for i, v in enumerate(submit): submit[i] = service_label[v] data = pd.read_csv('./data/submit_sample.csv') data['current_service'] = submit data['current_service'] = data['current_service'].astype(int) data.to_csv('./temp/lgb_prediction-new.csv', index=False)
def import_models(): stations=[] models=[] # model_files=glob.glob(BASE+"/data/*.pk") model_files=glob.glob(BASE+"/lgb_model/*.txt") for file in model_files: model_station,model_id=file.replace(BASE+"/lgb_model/","").replace(".txt","").split("_") stations.append([model_station,model_id]) model_tmp=lgb.Booster(model_file=file) models.append([model_tmp]) stations=np.array(stations) return stations,models
def lgb_cv_predict(logger, model_files, test_file, output_file, predictors): logger.print_log('loading test data...') test_df = pd.read_csv(test_file, dtype=default_config.dtypes) sub = pd.DataFrame() sub['click_id'] = test_df['click_id'].astype('int') logger.print_log("Predicting...") sub['is_attributed'] = 0 for model_file in model_files: model = lgb.Booster(model_file=model_file) sub['is_attributed'] += model.predict(test_df[predictors]) sub['is_attributed'] = sub['is_attributed'] / len(model_files) logger.print_log("writing to file <" + output_file + ">...") sub.to_csv(output_file, index=False) logger.print_log("done.")
def build_gbm(): home = str(Path.home()) if os.path.isfile(home+'/.deepface/weights/face-recognition-ensemble-model.txt') != True: print("face-recognition-ensemble-model.txt will be downloaded...") url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt' output = home+'/.deepface/weights/face-recognition-ensemble-model.txt' gdown.download(url, output, quiet=False) ensemble_model_path = home+'/.deepface/weights/face-recognition-ensemble-model.txt' deepface_ensemble = lgb.Booster(model_file = ensemble_model_path) return deepface_ensemble
def post_init(self): super().post_init() if self.model_path and os.path.exists(self.model_path): import lightgbm self.booster = lightgbm.Booster(model_file=self.model_path) model_num_features = self.booster.num_feature() expected_num_features = len(self.query_feature_names + self.match_feature_names) if model_num_features != expected_num_features: raise ValueError( f'The number of features expected by the LightGBM model {model_num_features} is different' f'than the ones provided in input {expected_num_features}') else: raise PretrainedModelFileDoesNotExist( f'model {self.model_path} does not exist')
def _load(self, models_dir: pathlib.Path): import lightgbm as lgb seeds = [123] if self.seeds is None else self.seeds self.gbms_ = np.array( [ [ lgb.Booster( model_file=str(models_dir / f"model.fold{fold}.seed{seed}.txt") ) for seed in seeds ] for fold in range(self.nfold) ] )
def pred_lgbm(X_test, categorical_features, feature_name, fold_id, lgb_params, fit_params, model_name, score_func, calc_importances=True): model = lgb.Booster( model_file=('{}_fold{}.txt'.format(model_name, fold_id))) y_pred_test = model.predict(X_test) y_pred_test[y_pred_test < 0] = 0 return y_pred_test
def get_predictions_per_era(df=None, num_models=1, prefix=None, folder_name=None, era_idx=[], model_type='xgb', rank_average=False): """ :param df: dataframe with the features used to train and predict :param num_models: number of models in the folder :param prefix: prefix to choose specific models from the folder - use it only if you had run a CV scheme for many different targets or something :param folder_name: name of the folder :param era_idx: indices of dataframe :param model_type: xgb or lgb :param rank_average: True - rank the predictions per era or False - total ranks in the whole dataframe :return: final predictions with proper dimensions for further use """ model_lst = bf.get_model_lst(num_models=num_models, prefix=prefix, folder_name=folder_name) predictions_total = [] X_test = df for cv_num in range(num_models): if model_type == 'lgb': model = lgb.Booster(model_file=model_lst[cv_num]) if model_type == 'xgb': model = bf.create_model(model_type='xgb') model.load_model(model_lst[cv_num]) predictions = predict_in_era_batch(model=model, df=X_test, era_idx=era_idx, rank_per_era=rank_average) predictions_total.append(predictions) if rank_average: scaler = MinMaxScaler(feature_range=(0, 1)) predictions_final = scaler.fit_transform( X=np.mean(predictions_total, axis=0).reshape(-1, 1)) else: predictions_final = np.mean(predictions_total, axis=0) return predictions_final.squeeze()
def test_pandas_categorical(self): params = { #需要更详细的的测试 "objective": "binary", "metric": "logloss", 'early_stop': 5, 'num_boost_round': 50, "verbosity": 1, } import pandas as pd X = pd.DataFrame({"A": np.random.permutation(['a', 'b', 'c', 'd'] * 75), # str "B": np.random.permutation([1, 2, 3] * 100), # int "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60), # float "D": np.random.permutation([True, False] * 150)}) # bool y = np.random.permutation([0, 1] * 150) X_test = pd.DataFrame({"A": np.random.permutation(['a', 'b', 'e'] * 20), "B": np.random.permutation([1, 3] * 30), "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15), "D": np.random.permutation([True, False] * 30)}) if True: X, X_test = Mort_Preprocess.OrdinalEncode_(X,X_test) for col in ["A", "B", "C", "D"]: X[col] = X[col].astype('category') X_test[col] = X_test[col].astype('category') #trn_data = lgb.Dataset(X, label=y) if isMORT: mort0 = LiteMORT(params).fit(X, y) pred0 = list(mort0.predict(X_test)) mort1 = LiteMORT(params).fit(X, y, categorical_feature=[0]) pred1 = list(mort1.predict(X_test)) mort2 = LiteMORT(params).fit(X, y, categorical_feature=['A']) pred2 = list(mort2.predict(X_test)) mort3 = LiteMORT(params).fit(X, y, categorical_feature=['A', 'B', 'C', 'D']) pred3 = list(mort3.predict(X_test)) else: clf=lgb.sklearn.LGBMClassifier() gbm_ = clf.fit(X, y) gbm0 = lgb.sklearn.LGBMClassifier().fit(X, y) pred0 = list(gbm0.predict(X_test)) gbm1 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=[0]) pred1 = list(gbm1.predict(X_test)) gbm2 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A']) pred2 = list(gbm2.predict(X_test)) gbm3 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A', 'B', 'C', 'D']) pred3 = list(gbm3.predict(X_test)) gbm3.booster_.save_model('categorical.model') gbm4 = lgb.Booster(model_file='categorical.model') pred4 = list(gbm4.predict(X_test)) pred_prob = list(gbm0.predict_proba(X_test)[:, 1]) np.testing.assert_almost_equal(pred_prob, pred4) input("...")
def predict(): model_file = root + 'usemodel\\goss_73_model0621.txt' gbm = lgb.Booster(model_file=model_file) print('开始预测...') predict_data = pd.read_table(root + 'predict.txt') # predict0619 predictdata = np.array(predict_data) y_predt = gbm.predict(predictdata, num_iteration=gbm.best_iteration) # print(y_predt) # y_preds = [ 1 if i >=0.5 else 0 for i in y_predt] # 2 value y_preds = [list(x).index(max(x)) for x in y_predt] # 3 value print(y_preds) dt = pd.DataFrame(y_preds) dt.to_csv(root + "result1.csv", encoding='utf_8_sig')
def load_parameters(self, params): # Load model parameters h5_model_base64 = params.get('h5_model_base64', None) data_config_base64 = params.get('data_config_base64', None) data_config_bytes = base64.b64decode( data_config_base64.encode('utf-8')) self._features, self._target = pickle.loads(data_config_bytes) with tempfile.NamedTemporaryFile() as tmp: # Convert back to bytes & write to temp file h5_model_bytes = base64.b64decode(h5_model_base64.encode('utf-8')) with open(tmp.name, 'wb') as f: f.write(h5_model_bytes) # Load model from temp file self._model = lgb.Booster(model_file=tmp.name)
def load_pickle(self): self.le_color = joblib.load("le_color.pkl") self.le_fuel = joblib.load("le_fuel.pkl") self.le_trans = joblib.load("le_trans.pkl") self.st_price = joblib.load("st_price.pkl") self.st_weight = joblib.load("st_weight.pkl") self.st_age = joblib.load("st_age.pkl") self.st_km = joblib.load("st_km.pkl") self.st_cc = joblib.load("st_cc.pkl") self.st_hp = joblib.load("st_hp.pkl") for i in range(4): m_path = "model_{i}.txt".format(i=i + 1) print(m_path) m = lgb.Booster(model_file=m_path) print(type(m)) self.models.append(m)