Beispiel #1
0
def draw(clf, ds: DataSet, step):
    X = ds.get_X()
    y = ds.get_y()
    x_min, y_min = np.amin(X, 0)
    x_max, y_max = np.amax(X, 0)
    xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
                         np.arange(y_min, y_max, step))

    grid = np.c_[xx.ravel(), yy.ravel()]

    predict_z = np.array(pm_predict(clf.predict_single, grid,
                                    name='predict')).reshape(xx.shape)

    x0, y0 = X[y == -1].T
    x1, y1 = X[y == 1].T

    X_sup = X[clf.support_indices]
    x_sup, y_sup = X_sup.T

    def plot(_predict_z):
        plt.figure(figsize=(10, 10))
        plt.pcolormesh(xx,
                       yy,
                       _predict_z,
                       cmap=plt.get_cmap('seismic'),
                       shading='auto')
        plt.scatter(x0, y0, color='red', s=100)
        plt.scatter(x1, y1, color='blue', s=100)

        plt.scatter(x_sup, y_sup, color='white', marker='x', s=60)
        plt.show()

    plot(predict_z)
Beispiel #2
0
def choose_best(ds: DataSet):
    gs = GridSearchCV(estimator=SMO(),
                      param_grid=GRID,
                      cv=4,
                      scoring='accuracy',
                      verbose=1,
                      n_jobs=-1)

    gs.fit(ds.get_X(), ds.get_y())
    draw_metrics(gs.cv_results_)
    print(f'Got best score {gs.best_score_} with params {gs.best_params_}')
    return gs.best_params_
Beispiel #3
0
def test(ds: DataSet, name):
    print("Metric")
    train_ds, test_ds = ds.test_train_split(test_size=0.33)

    metric_data = {"test": [], "train": []}

    def add_metric(ds, clf, ds_name):
        metric_data[ds_name].append(accuracy_score(ds.y, clf.predict(ds.X)))

    def clbck(clf, step):
        add_metric(test_ds, clf, "test")
        add_metric(train_ds, clf, "train")

    clf = AdaBoost(n_estimator=STEPS, callback=clbck, verbose=True)
    clf.fit(train_ds.X, train_ds.y)

    metric_plot(metric_data,
                x_label='Steps',
                x_values=list(range(1, STEPS + 1)),
                title=f'Accuracy for {name}',
                default_color=True)
Beispiel #4
0
def read_dataset(filename) -> DataSet:
    data = pd.read_csv(filename)
    X = data.values[:, :-1]
    tmp_y = data.values[:, -1]
    y = np.vectorize(lambda t: 1 if t == 'P' else -1)(tmp_y)
    return DataSet(X, y)
Beispiel #5
0
import os
import csv
from utils import common
from config import configer
from utils.data_set import DataSet, get_important_subject


def get_run_path():
    return os.path.dirname(os.path.realpath(__file__))


if __name__ == "__main__":
    configer.load_data(get_run_path())
    # 获取专业数据
    A = DataSet("2014-2015_计科.csv")
    B = DataSet("2016-2017_计科.csv")
    C = DataSet("2018-2019_计科.csv")
    A.get_special_data()
    B.get_special_data()
    C.get_special_data()

    # 获取课程交集
    important = get_important_subject(A.get_column("ZWMC"),
                                      B.get_column("ZWMC"),
                                      C.get_column("ZWMC"))
    with open(
            os.path.join(configer.get_value("dataset_path"),
                         "csv/important_sub.txt"), "w+") as w:
        w.write(str(important))
    print("ok")