예제 #1
0
from sklearn.model_selection import cross_val_score
import configparser

from preprocess import load_arff, fill_miss


if __name__ == '__main__':
    config = configparser.ConfigParser()
    config.read('../config.ini')
    with open(os.path.join(config['path']['OUTPUT_PATH'], 'bagging_acc.md'), 'w') as of:
        of.write('||naive bayes|decision tree|KNN|MLP|LinearSVM|\n|:---:|:---:|:---:|:---:|:---:|:---:|\n')
        for i in range(10):
            fpath = config['path']['df' + f'{i}']

            data = load_arff(fpath)
            X, Y = fill_miss(data)
            of.write(f"|{fpath[fpath.rfind('/')+1:fpath.rfind('.')]}|")
            for i in range(5):
                print(fpath[fpath.rfind('/') + 1:fpath.rfind('.')] + '\t' + config['alg']['alg' + f'{i}'], end='\t')
                start_time = time.time()
                if i == 0:
                    clf = BaggingClassifier(base_estimator=GaussianNB(),
                                            n_estimators=10,
                                            max_samples=0.5,
                                            max_features=0.5)
                elif i == 1:
                    clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=0, criterion='gini'),
                                            n_estimators=10,
                                            max_samples=0.5,
                                            max_features=0.5)
                elif i == 2:
예제 #2
0
import configparser
from preprocess import load_arff, fill_miss

if __name__ == '__main__':
    config = configparser.ConfigParser()
    config.read('../config.ini')
    with open(
            os.path.join(config['path']['OUTPUT_PATH'],
                         'decision_tree_gini.md'), 'w') as of:
        of.write('| |accuracy|auc|\n|:---:|:---:|:---:|\n')
        for i in range(10):
            start_time = time.time()
            fpath = config['path']['df' + f'{i}']
            print(fpath[fpath.rfind('/') + 1:fpath.rfind('.')], end='\t')
            data = load_arff(fpath)
            X, y = fill_miss(data)
            clf = DecisionTreeClassifier(random_state=0, criterion='gini')
            # clf = DecisionTreeClassifier(random_state=0, criterion='entropy')
            skf = StratifiedKFold(n_splits=10)
            skf_accuracy1 = []
            skf_accuracy2 = []
            n_classes = np.arange(np.unique(y).size)
            for train, test in skf.split(X, y):
                clf.fit(X[train], y[train])
                skf_accuracy1.append(clf.score(X[test], y[test]))
                if n_classes.size < 3:
                    skf_accuracy2.append(
                        roc_auc_score(y[test],
                                      clf.predict_proba(X[test])[:, 1],
                                      average='micro'))
                else: