def model_test(self, test_file): test_data = load_data(test_file) xgb_clf = XGB(xgb_model_name) xgb_clf.test_model(test_data) lr_clf = LR(lr_model_name) x, y = load_lr_data(test_file, '\t') lr_clf.test_model(x, y)
def model_train(self, train_file): train_data = load_data(train_file) # xgboost print('train a single xgb model...') xgb_clf = XGB(xgb_model_name) xgb_clf.train_model(train_data) print('train a single xgb model done.\n') # lr print('train a single lr model...') lr_clf = LR(lr_model_name) x, y = load_lr_data(train_file, '\t') lr_clf.train_model(x, y) print('train a single LR model done.\n')
def model_test(self, test_file): test_x, test_y = load_sample_data(test_file) features = Feature(tfidf_model_name, best_feature_model_name) features.load_model() model_test_x_feature = features.transform(test_x) xgb_clf = XGB(xgb_model_name) xgb_clf.test_model(model_test_x_feature, test_y) lr_clf = LR(lr_model_name) lr_clf.test_model(model_test_x_feature, test_y) xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name) xgb_lr_clf.test_model(model_test_x_feature, test_y)
def model_test(test_file): test_x, test_y = load_sample_data(test_file, sep=sep, has_pos=True) features = Feature(tfidf_model_name, best_feature_model_name) features.load_model() model_test_x_feature = features.transform(test_x) xgb_clf = XGB(xgb_model_name) xgb_preds = xgb_clf.test_model(model_test_x_feature, test_y) lr_clf = LR(lr_model_name) lr_preds = lr_clf.test_model(model_test_x_feature, test_y) xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name) xgb_lr_preds = xgb_lr_clf.test_model(model_test_x_feature, test_y) save(xgb_preds, pred_save_path=xgb_pred_name) save(lr_preds, pred_save_path=lr_pred_name) save(xgb_lr_preds, pred_save_path=xgblr_pred_name)
def trainLR(model_id, train_features, train_label, val_features, val_label): ''' @Description : @Time :2020/07/24 16:23:41 @Author :sam.qi @Param : @Return : ''' print(str.format("LR-{} start training", model_id)) use_cuda = True if use_cuda: device = torch.device(str.format('cuda:{}', model_id) ) if torch.cuda.is_available() else 'cpu' else: device = torch.device('cpu') clf = LR(120, 20, device=device) clf.to(device) train_features = train_features.to(device) train_label = train_label.to(device) val_features = val_features.to(device) val_label = val_label.to(device) clf.fit(train_features, train_label, val_features, val_label, train_iters=2000)
def model_train(self, train_file): train_x, train_y = load_sample_data(train_file) features = Feature(tfidf_model_name, best_feature_model_name) features.fit(max_feature_cnt, feature_max_df, feature_min_df, ngram_range, train_x, train_y) model_train_x_feature = features.transform(train_x) # xgboost print('train a single xgb model...') xgb_clf = LR(xgb_model_name) xgb_clf.train_model(model_train_x_feature, train_y) print('train a single xgb model done.\n') # lr print('train a single lr model...') lr_clf = LR(lr_model_name) lr_clf.train_model(model_train_x_feature, train_y) print('train a single LR model done.\n') # xgboost+lr print('train a xgboost+lr model...') xgb_lr_clf = XGBLR(xgblr_xgb_model_name, xgblr_lr_model_name, one_hot_encoder_model_name) xgb_lr_clf.train_model(model_train_x_feature, train_y) print('train a xgboost+lr model done.\n')
with open('../dataset.pkl', 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) user_count, item_count, cate_count = pickle.load(f) if sys.argv[1] == 'deepfm': from deepfm import DeepFM model = DeepFM(user_count=user_count, item_count=item_count, cate_count=cate_count, cate_list=cate_list) elif sys.argv[1] == 'lr': from lr import LR model = LR(user_count=user_count, item_count=item_count, cate_count=cate_count, cate_list=cate_list) elif sys.argv[1] == 'lrcross': from lrcross import LR model = LR(user_count=user_count, item_count=item_count, cate_count=cate_count, cate_list=cate_list) elif sys.argv[1] == 'dnn': from dnn import DNN model = DNN(user_count=user_count, item_count=item_count, cate_count=cate_count, cate_list=cate_list) elif sys.argv[1] == 'widedeep': from widedeep import WideDeep
cv = CountVectorizer() X = cv.fit_transform(corpus).toarray() ############################################################## # Training the model print("Training the model with train_set=80% & test_set=20%") # Splitting the dataset into the Training set and Test set from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) # Fitting Logistic Regression to the Training set model = LR(X_train, y_train) y_pred = model.predictor(X_test) # Calculating Accuracy accuracy = model.accuracy(y_test, y_pred) print("Model Accuracy : ", accuracy) # Retraining the model with complete dataset model = LR(X, y) print("Model Retrained With the Complete DataSet") ############################################################## # For Testing New Sentences : ############################################################## while True: