Esempio n. 1
0
def trainWithHotEncoding(hot_encoded_train_features, hot_encoded_train_labels,
                         hot_encoded_test_features, hot_encoded_test_labels,
                         results, algorithms, isTesting):
    # TRAIIN RANDOM FOREST
    class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(hot_encoded_train_labels.flatten()),
        hot_encoded_train_labels.flatten())
    param_grid = {
        'max_features': [
            # None,
            # "sqrt",
            "log2"
        ],
        'n_estimators': [
            # 1000,
            2000,
            # 3000
        ]
    }
    if (isTesting):
        print("TESTING MODE RF: ONLY TRAINING 1 MODEL")
        param_grid = {}

    rf_hot_encoding = RandomForest(class_weights=class_weights,
                                   param_grid=param_grid)
    rf_hot_encoding.title = "RANDOM FOREST HOT ENCODING TRAIN"
    rf_hot_encoding.train(hot_encoded_train_features, hot_encoded_train_labels)

    model_name = 'RF-HOT.model'
    print("SAVING MODEL: ", model_name)
    try:
        joblib.dump(rf_hot_encoding.model, model_name)
    except Exception as e:
        print("Cannot save {} because: \n\n".format(model_name), str(e))

    algorithms["RANDOM FOREST HOT ENCODING"] = rf_hot_encoding
    rf_hot_encoding.drawCurves(X=hot_encoded_train_features,
                               y=hot_encoded_train_labels)
    rf_hot_encoding.title = "RANDOM FOREST HOT ENCODING TEST"
    results["RANDOM FOREST HOT ENCODING"] = rf_hot_encoding.drawCurves(
        X=hot_encoded_test_features, y=hot_encoded_test_labels)
    return rf_hot_encoding, results, algorithms
Esempio n. 2
0
def trainWithFrecuencies(tetra_freq_train_features, tetra_freq_train_labels,
                         tetra_freq_test_features, tetra_freq_test_labels,
                         results, algorithms, isTesting):
    class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(tetra_freq_train_labels.flatten()),
        tetra_freq_train_labels.flatten())
    param_grid = {
        'max_features': [
            # None,
            # "sqrt",
            "log2"
        ],
        'n_estimators': [
            # 1000,
            2000,
            # 3000
        ]
    }
    if (isTesting):
        print("TESTING MODE RF: ONLY TRAINING 1 MODEL")
        param_grid = {}

    rf = RandomForest(class_weights=class_weights, param_grid=param_grid)
    rf.title = "RANDOM FOREST TETRA NUCLEOTIDE FREQUENCY TRAIN"
    rf.train(tetra_freq_train_features, tetra_freq_train_labels)

    model_name = 'RF-TETRA.model'
    print("SAVING MODEL USING JOBLIB: ", model_name)
    try:
        joblib.dump(rf.model, model_name)
    except Exception as e:
        print("Cannot save {} because: \n\n".format(model_name), str(e))

    algorithms["RANDOM FOREST"] = rf
    rf.drawCurves(X=tetra_freq_train_features, y=tetra_freq_train_labels)
    rf.title = "RANDOM FOREST TETRA NUCLEOTIDE FREQUENCY TEST"
    results["RANDOM FOREST"] = rf.drawCurves(X=tetra_freq_test_features,
                                             y=tetra_freq_test_labels)
    return rf, results, algorithms