Esempio n. 1
0
def train_script_2():

    dbreader = DbReader(PATH, split_size=ONE_PERSON_SPLIT)
    training_commands = getting_commands_from_signals(
        dbreader.training_signals[:2], dbreader.training_text[:2])
    valid_commands = getting_commands_from_signals(
        dbreader.training_signals[2:], dbreader.training_text[2:])

    training_mfcc_data = simple_mfcc(training_commands)
    valid_mfcc_data = simple_mfcc(valid_commands)

    y_train = training_mfcc_data['command']
    X_train = training_mfcc_data.drop(columns=['command'])

    y_valid = valid_mfcc_data['command']
    X_valid = valid_mfcc_data.drop(columns=['command'])

    rf_model = RandomForestModel()

    model_to_fit = rf_model.gridsearchCV()
    model_to_fit.fit(X_train, y_train)
    rf_model.set_internal_model(model_to_fit.best_estimator_)
    print(model_to_fit.best_estimator_)
    rf_model.save_model()

    joblib.dump(dbreader, "dbreader.mdl")

    predictions = rf_model.predict(X_valid)
    plot_confusion_matrix(y_valid, predictions)
Esempio n. 2
0
def evaluating_script():

    db_reader = DbReader()
    hyper_dataset = db_reader.load_csv("../allhyper.test")
    hypo_dataset = db_reader.load_csv("../allhypo.test")
    X, y = create_dataset_for_evaluation(hyper_dataset, hypo_dataset)
    X = preprocess_the_data(X)

    rf_model = RandomForestModel()
    load_model = rf_model.load_model()

    with open('selected_best_features.data', 'rb') as filehandle:
        filtered_features = pickle.load(filehandle)

    predicted_values = load_model.predict(X[filtered_features])
    print(rf_model.__class__.__name__)
    print(classification_report(y, predicted_values))
def train_script(training_X, training_y):

    models = [
        LinearRegressionModel(),
        SVM_Model(),
        LogisticRegressionModel(),
        RandomForestModel()
    ]

    for model in models:

        fitting_model = model.gridsearchCV()
        fitting_model.fit(training_X, training_y)
        print(fitting_model.best_score_)
        model.set_internal_model(fitting_model.best_estimator_)
        model.save_model()
Esempio n. 4
0
def evaluating_script(test_X, test_y):

    models = [
        LinearRegressionModel(),
        SVM_Model(),
        LogisticRegressionModel(),
        RandomForestModel()
    ]

    for model in models:

        load_model = model.load_model()
        predicted_values = load_model.predict(test_X)

        print(model.__class__.__name__)
        print(classification_report(test_y, predicted_values))
def train_script():
    dbreader = DbReader(PATH, split_size=ONE_PERSON_SPLIT)
    commands = getting_commands_from_signals(dbreader.training_signals,
                                             dbreader.training_text)
    mfcc_data = simple_mfcc(commands)

    y_train = mfcc_data['command']
    X_train = mfcc_data.drop(columns=['command'])

    rf_model = RandomForestModel()

    model_to_fit = rf_model.gridsearchCV()
    model_to_fit.fit(X_train, y_train)
    rf_model.set_internal_model(model_to_fit.best_estimator_)
    print(model_to_fit.best_estimator_)
    rf_model.save_model()

    joblib.dump(dbreader, "dbreader.mdl")
def train_script():

    db_reader = DbReader()
    hyper_dataset = db_reader.load_csv("../allhyper.data")
    hypo_dataset = db_reader.load_csv("../allhypo.data")
    X, y = create_dataset_for_training(hyper_dataset, hypo_dataset)
    X = preprocess_the_data(X)

    rf_model = RandomForestModel()
    filtered_features = feature_selection(X, y, rf_model.internal_model)

    with open('selected_best_features.data', 'wb') as filehandle:
        pickle.dump(filtered_features,filehandle)

    model_to_fit = rf_model.gridsearchCV()
    model_to_fit.fit(X[filtered_features], y)
    print(model_to_fit.best_score_)
    print(model_to_fit.best_params_)
    print(filtered_features)
    rf_model.set_internal_model(model_to_fit.best_estimator_)
    rf_model.save_model()
Esempio n. 7
0
from Model import RandomForestModel

## Test 1
model = RandomForestModel(X_train=[[1, 2, 3], [11, 12, 13]], y_train = [0, 1], X_test=[[3, 4, 1],[14, 11, 17]], n_estimators=1)

model.fit()

out = list(model.predict())
desired_out = [0, 1]

print("Desired out:" + "\t" + str(desired_out))
print("Actual out:" + "\t" + str(out))
for index in range(0, len(out)):
    if out[index]!=desired_out[index]:
        print("Test 1 failed")
        exit(0)
print("Test 1 passed")
Esempio n. 8
0
def main():
    reader = DbReader()
    plotter = Plotter(reader)

    # LABELS
    labels = list(range(3))
    # labels = list(range(1, 11))

    # OBA RODZAJE
    # 3 klasy
    train_X, val_X, test_X, train_y, val_y, test_y = reader.get_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_splitted_data()

    # ONLY RED
    # 3 klasy
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_data()

    # ONLY WHITE
    # 3 klasy
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_data()

    # scaler = StandardScaler()
    # scaler.fit_transform(train_X, train_y)
    # scaler.transform(val_X, val_y)

    models = [
        LinearRegressionModel(),
        LogisticRegressionModel(),
        SVMModel(),
        RandomForestModel()
    ]

    model_names = [
        'LinRegModel', 'LogRegModel', 'SVMModel', 'RandomForestModel'
    ]
    # test_X, test_y = train_X, train_y
    # zapis do pliku MAE, MSE dla test
    with open(stats_file_path, "a") as stat_file:
        stat_file.write("Model errors:\n")
        for i, model in enumerate(models):
            model.load()
            mae = model.get_mae(test_X, test_y)
            mse = model.get_mse(test_X, test_y)
            print(
                f"Model name: {model.name:27}   MAE: {mae:{6}.{4}}    MSE: {mse:{6}.{4}}"
            )
            stat_file.write((f"\t{model_names[i]:{22}}: "))
            stat_file.write(f"MAE = {mae:{8}.{4}}  ")
            stat_file.write(f"MSE = {mse:{8}.{4}}  ")
            stat_file.write(f"SCORE = {model.score(test_X, test_y)}\n")

    plotter.heatmap()
    plotter.kdeplot()
    plotter.pairplot()
    plotter.confusion_matrix(test_y, model.predict(test_X))
    plotter.classification_report(test_y, model.predict(test_X), labels)
Esempio n. 9
0
def main():
    warnings.simplefilter("ignore")
    warnings.warn("deprecated", DeprecationWarning)

    # INIT
    reader = DbReader()
    plotter = Plotter(reader)

    # OBA RODZAJE
    # 3 klasy
    train_X, val_X, test_X, train_y, val_y, test_y = reader.get_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_splitted_data()

    # ONLY RED
    # 3 klasy
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_data()

    # ONLY WHITE
    # 3 klasy
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_packed_data()
    # 10 klas
    # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_data()

    models = [
        LinearRegressionModel(),
        LogisticRegressionModel(),
        SVMModel(),
        RandomForestModel()
    ]
    model_names = [
        'LinRegModel', 'LogRegModel', 'SVMModel', 'RandomForestModel'
    ]
    params = [
        dict(fit_intercept=[True, False], normalize=[True,
                                                     False]),  # true, 1, false
        dict(tol=[1e-3, 1e-4, 1e-5],
             C=[1, 10, 20, 30],
             fit_intercept=[True, False],
             warm_start=[True, False]),
        # 1e-4, 20, true, true
        dict(C=[2, 5, 10, 20, 30, 50], gamma=[0.1, 0.01, 0.0001, 0.00001]),
        # gamma:rb, pl, sim
        # 20, 1e-5
        dict(n_estimators=[50, 100, 165, 200, 300, 500, 700],
             max_depth=[10, 20, 33, 40])  # 33, 165
    ]
    best_params = []
    times = []

    # GSCV
    for i, model in enumerate(models):
        clf = GridSearchCV(models[i].model, params[i], cv=5, refit=False)
        clf.fit(train_X, train_y)
        print(f"{model_names[i]}")
        print(f"Best params: {clf.best_params_}")
        print(f"Best score:  {clf.best_score_}")
        print(f"Worst score: {clf.cv_results_['mean_test_score'].min()}\n")
        best_params.append(clf.best_params_)

    # LEARN WITH BEST_PARAMS
    for i, model in enumerate(models):
        model.set_estimator(best_params[i])
        t_start = time()
        model.fit(train_X, train_y)
        t_end = time()
        mae = model.get_mae(val_X, val_y)
        mse = model.get_mse(val_X, val_y)
        times.append(t_end - t_start)
        model.save()
        print(
            f"Model name: {model.name:27}  MAE: {mae:{6}.{4}}  MSE: {mse:{6}.{4}}  t: {times[i]}"
        )

    # SAVE LOGS TO FILE
    with open(stats_file_path, "a") as stat_file:
        stat_file.write("Duration times of model fitting:\n")
        for i, model in enumerate(model_names):
            stat_file.write((f"\t{model_names[i]:{22}}: {times[i]}\n"))
        stat_file.write('\n')