Example #1
0
def main():
    from pandas.io.parsers import read_csv
    from utils import divided_data, divided_y, datamini, data_visualize
    A = read_csv("Data_on_year.csv", index_col=0).values
    X = A[:, :13]
    y = A[:, 13]
    y = divided_y(y)
    X_train, X_test, y_train, y_test = divided_data(X, y, standard=False)
    model = datamini(X_train, X_test, y_train)
    y_pred = model.xgboost()
    threshold_portion=y_train.mean()
    plot=data_visualize(y_test,y_pred,threshold_portion,'XGboost',divided_by_threshold=True)
    plot.indicators_table()
Example #2
0
def main():
    from pandas.io.parsers import read_csv
    from utils import divided_data, divided_y, datamini, data_visualize
    A = read_csv("Data_on_year.csv", index_col=0).values
    X = A[:, :13]
    y = A[:, 13]
    y = divided_y(y)
    X_train, X_test, y_train, y_test = divided_data(X, y)
    model = datamini(X_train, X_test, y_train, use_threshold=False)
    y_pred = model.FNNetwork()
    threshold_portion = y_train.mean()
    plot = data_visualize(y_test, y_pred, threshold_portion, 'FNN')
    plot.ROC_curve()
    plot.PR_curve()
    plot.COST_curve()
    plot.indicators_table()
Example #3
0
"""
This is a example to run model svm with threshold to classify
"""
from pandas.io.parsers import read_csv
from utils import divided_data, divided_y, datamini, data_visualize
import numpy as np
A = read_csv("Data_on_year.csv", index_col=0).values
random_list = np.random.choice(len(A), 5000)
A_sample = A[random_list]
X = A_sample[:, :13]
y = A_sample[:, 13]
y = divided_y(y)
X_train, X_test, y_train, y_test = divided_data(X, y, standard_num=13)
model = datamini(X_train, X_test, y_train)
y_pred = model.svm()
threshold_portion = y_train.mean()
plot = data_visualize(y_test,
                      y_pred,
                      threshold_portion,
                      'svm',
                      divided_by_threshold=True)
plot.indicators_table()
Example #4
0
"""
This is a example to run model randomforest without threshold to classify
"""
from pandas.io.parsers import read_csv
from utils import divided_data, divided_y, datamini, data_visualize
A = read_csv("Data_on_year.csv", index_col=0).values
X = A[:, :13]
y = A[:, 13]
y = divided_y(y)
X_train, X_test, y_train, y_test = divided_data(X, y, standard=False)
model = datamini(X_train, X_test, y_train, use_threshold=False)
y_pred = model.ranforest()
threshold_portion = y_train.mean()
plot = data_visualize(y_test, y_pred, threshold_portion, 'RandomForest')
plot.ROC_curve()
plot.PR_curve()
plot.COST_curve()
plot.indicators_table()