Esempio n. 1
0
def process_training_model():
    """
    说明:
        执行模型的训练,需要反复调试,以获得最好的训练模型
    """
    # step 1 : 获取,并装载训练数据集
    myprint("Ready to load train data set ...")
    dataloader = myload.DataLoader()
    train_set = dataloader.load_train_set()
    myprint("Load train data complete.")

    # step 2 : 清理数据 (基于特征分析的结果,并将根据性能做相应调整)
    myprint("Ready to prepare Data ... ")
    preparer = myprepare.PrepareData(train_set)
    preparer.prepare_data()
    train_prepared = preparer.train_prepared
    train_label = preparer.train_label
    myprint("Prepare data complete.")

    # step 3 : 反复调整参数,训练模型,并记录模型的性能评分,以获得最好的训练模型
    myprint("Ready to training models and compare performance score ... ")

    # 基本模型利用缺省参数进行训练,并进行交叉验证
    trainer = mytrain.TrainModel("lin_model_1", get_default_model("linear"), train_prepared, train_label)
    trainer.train_model()

    trainer = mytrain.TrainModel("decision_model_1", get_default_model("decisiontree"), train_prepared, train_label)
    trainer.train_model()

    trainer = mytrain.TrainModel("random_model_1", get_default_model("randomforest"), train_prepared, train_label)
    trainer.train_model()

    # 自定义参数的训练模型,训练并进行交叉验证
    reg_model = RandomForestRegressor(n_estimators=10, max_features=4, bootstrap=False)
    trainer = mytrain.TrainModel("random_model_s1", reg_model, train_prepared, train_label)
    trainer.train_model()

    # GridSearchCV 训练 (耗时1,2分钟)
    param_grid = [
        {'n_estimators': [3, 10, 30], 'max_features':[2, 4, 6, 8]},
        {'bootstrap': [False], 'n_estimators':[3, 10], 'max_features':[2, 3, 4]}
    ]
    trainer = mytrain.GridSearchModel("random_grid_1", get_default_model("randomforest"),
                                      param_grid, train_prepared, train_label)
    trainer.train_model()  # 若传入参数 True, 将显示所有参数组合的性能评分

    myprint("")
    myprint("Training and compare models complete. ")
    myprint("All trained models are saved in folder 'trainmodels' with the same name you provided. ")
    myprint("Please choose the best model for actual predict. Thanks ! ")
    myprint("")
Esempio n. 2
0
def main():
    participantCount = 0
    conn = sqlite3.connect('FACE_DB.db')
    c = conn.cursor()
    names = []
    faces = []
    # Create table - CLIENTS
    c.execute(
        '''CREATE TABLE Participants ([ID] integer PRIMARY KEY,[Name] text)''')
    while (True):
        print('1. Register Participant')
        print('2. Train Model')
        print('3. Start Monitoring')
        print('4. Quit')
        choice = int(input('\nEnter your choice: '))
        if choice == 1:
            participantCount = fd.RegisterFace(participantCount)
        elif choice == 2:
            names, faces = tm.TrainModel()
        elif choice == 3:
            mn.StartMonitoring(names, faces)
        elif choice == 4:
            break
        else:
            print('\nInvalid Choice! Try again!!')
Esempio n. 3
0
def process_predict():
    """
    说明:
        当已经训练好模型后, 利用最佳模型进行预测
    """
    # step 1 : 获取,并装载原始测试数据集 或实际待预测的数据
    myprint("Ready to load test data set ...")
    dataloader = myload.DataLoader()
    test_set = dataloader.load_test_set()

    # 有一个问题要解决:若测试文本字段的分类数量跟训练集不一致,则会造成特征列个数不同的情况,需要考虑如何处理
    # 在大数据情况下没有关系,若只取几十条,或单条进行预测,会引起报错(待解决)
    myprint("Load test data complete.")

    # step 2 : 清理数据
    myprint("Ready to prepare Data ... ")
    preparer = myprepare.PrepareData(test_set)
    preparer.prepare_data()
    test_prepared = preparer.train_prepared
    test_label = preparer.train_label
    myprint("Prepare data complete.")

    # step 3 : 用最好的训练模型进行预测
    myprint("Ready to predict.")
    best_model_name = 'random_grid_1'
    trainer = mytrain.TrainModel(best_model_name)
    test_pred, test_rmse_score = trainer.predict(test_prepared, test_label)
    myprint("Predict complete.")
    myprint("")

    myprint("Show or use predict result : ")
    # step 4 : 使用预测结果,供后续系统使用
    print(np.c_[test_label[:20], test_pred[:20]])
    print(np.c_[test_prepared[:5], test_label[:5], test_pred[:5]])
Esempio n. 4
0
def case_Two(sorted_data_path):
    """
    Get: the path to the updated sorted data set
    return: the model path and the labels path
           the mpdel path -> contains the trained model
           the labels path - contains the labels for each images (for the predict)

    this function call to the handle_train() fanction that locaited in train_model.py file
    """

    model_path = Cheak_Dir.GetDirectory.get_New_Dir(
        "Enter the full path (with the name) to output model: ")
    lb_path = Cheak_Dir.GetDirectory.get_New_Dir(
        "Enter the full path (with the name) to output label binarizer: ")

    while model_path == lb_path:
        PrintsForUser.printError("Error - file will be override")
        lb_path = Cheak_Dir.GetDirectory.get_New_Dir(
            "Enter the full path (with the name) to output label binarizer: ")

    plot_dir = Cheak_Dir.GetDirectory.get_New_Dir(
        "Enter the folder directory to output accuracy/loss plot: ")

    while model_path == plot_dir or lb_path == plot_dir:
        PrintsForUser.printError("Error - file will be override")
        plot_dir = Cheak_Dir.GetDirectory.get_New_Dir(
            "Enter the folder directory to output accuracy/loss plot: ")

    os.mkdir(plot_dir)

    train_obj = train_model.TrainModel(sorted_data_path, model_path, lb_path,
                                       plot_dir)

    train_obj.handle_train()

    return model_path, lb_path