Esempio n. 1
0
def accuracy(train, test):
    turn = 4
    entityScoreSum = 0
    emotionScoreSum = 0
    for i in range(turn):
        # 切分数据
        data_split('../coreEntityEmotion_baseline/data',
                   'coreEntityEmotion_train.txt')
        # 训练
        train.train_ents()
        # 测试
        test.test()
        # 计算F1
        entityScore, emotionScore = computeF1Score(
            '../coreEntityEmotion_baseline/data/coreEntityEmotion_train.txt',
            '../coreEntityEmotion_baseline/data/2_coreEntityEmotion_train_result.txt'
        )
        print('turn:', i + 1, 'entityScore:', entityScore, 'emotionScore:',
              emotionScore)
        # 统计F1
        entityScoreSum += entityScore
        emotionScoreSum += emotionScore
    # 输出平均值
    print('平均entityScore:', entityScoreSum / turn, '平均emotionScore:',
          emotionScoreSum / turn)
    return entityScoreSum / turn, emotionScoreSum / turn
Esempio n. 2
0
def accuracy(train, test, feature_ents_func):
    turn = 4
    entityScoreSum = 0
    emotionScoreSum = 0
    for i in range(turn):
        # 切分数据
        data_split('../coreEntityEmotion_baseline/data',
                   'coreEntityEmotion_train.txt')

        # trueData = loadTrueData('../coreEntityEmotion_baseline/data/8_coreEntityEmotion_train.txt')
        # entity_list= []
        # for newsId in trueData:
        #     entity_list+= trueData[newsId]['entity']
        #
        # feature_ents_func.set_train_data_entity(entity_list)

        # 训练
        train.train_ents()
        # 测试
        test.test()
        # 计算F1
        entityScore, emotionScore = computeF1Score(
            '../coreEntityEmotion_baseline/data/coreEntityEmotion_train.txt',
            '../coreEntityEmotion_baseline/data/2_coreEntityEmotion_train_result.txt'
        )
        print('turn:', i + 1, 'entityScore:', entityScore, 'emotionScore:',
              emotionScore)
        # 统计F1
        entityScoreSum += entityScore
        emotionScoreSum += emotionScore
    # 输出平均值
    print('平均entityScore:', entityScoreSum / turn, '平均emotionScore:',
          emotionScoreSum / turn)
    return entityScoreSum / turn, emotionScoreSum / turn
Esempio n. 3
0
    # smoothen step values due to more fluctuation
    valid_acc_steps = sp.savgol_filter(valid_acc_steps, 3, 1)
    train_acc_steps = sp.savgol_filter(train_acc_steps, 3, 1)
    plot_data_steps(valid_acc_steps, train_acc_steps, max_loc_steps)

    ######


if __name__ == "__main__":
    # seed of 6 was found to be most optimal
    seed = 6
    split_size = 0.2
    # convert all files, normalize and split data.
    # convert_to_numpy()        # commented out since files do not need be constantly converted
    # normalize_data()          # commented out since files do not need be constantly normalized
    data_split(seed, split_size)
    # load data
    train_data = np.load('./data/train_data.npy')
    train_label = np.load('./data/train_label.npy')
    val_data = np.load('./data/val_data.npy')
    val_label = np.load('./data/val_label.npy')

    # set hyperparameters
    lr = 0.005175
    steps = 100
    epochs = 500
    bs = 32

    # train if necessary
    train()
Esempio n. 4
0
from sprint2.test import Test
from data_split import data_split
from f1_score import computeF1Score
from sprint2.train import Train

if __name__ == '__main__':
    from time import time

    start = time()
    test = Test()
    turn = 5
    entityScoreSum = 0
    emotionScoreSum = 0
    for i in range(turn):
        # 切分数据
        data_split('../coreEntityEmotion_baseline/data', 'coreEntityEmotion_train.txt')
        # 训练
        # trainer = Train()
        # trainer.trainCoreEntity()
        # trainer.trainEmotion()
        # 测试
        test.testCoreEntity('../coreEntityEmotion_baseline/data/2_coreEntityEmotion_train.txt',
                            '../coreEntityEmotion_baseline/data/2_coreEntityEmotion_train_result.txt')
        # 计算F1
        entityScore, emotionScore = computeF1Score('../coreEntityEmotion_baseline/data/coreEntityEmotion_train.txt',
                                                   '../coreEntityEmotion_baseline/data/2_coreEntityEmotion_train_result.txt')
        print('turn:', i + 1, 'entityScore:', entityScore, 'emotionScore:', emotionScore)
        # 统计F1
        entityScoreSum += entityScore
        emotionScoreSum += emotionScore
    # 输出平均值
Esempio n. 5
0
def main_loop(config_list, final_table, count):
    print(
        " chosen_method: %s \n expanding_windows: %d \n optimize_method: %d \n randomizedsearch: %d \n"
        % (config_list['chosen_method'], config_list['expanding_window_ml'],
           config_list['optimize_method'], config_list['randomized_search']))
    online_measures = pd.DataFrame({
        'day': [],
        'accuracy': [],
        'f-measure': []
    })
    opt_modulo_params_list = []

    correct_pred = []
    tp_list = []
    fp_list = []
    tn_list = []
    fn_list = []
    y_pred_list = []
    y_prob_list = []

    tpr_list = []
    fpr_list = []

    opt_modulo_params = 0
    # +10 beschreiben!
    for x in range(tune_size + 10, len(X) - 1):

        ###
        # Test/Train Split in every iteration x
        ###
        X_train, X_test, y_train, y_test = ds.data_split(
            X, y, x, timeseries, config_list)

        ###
        # Actual Training and Prediction
        ###
        if config_list['chosen_method'] == config.method[0]:
            y_pred, opt_modulo_params = ml_nb.classifier(
                X_train, y_train, X_test, x, tune_size, config_list,
                opt_modulo_params)
        elif config_list['chosen_method'] == config.method[1]:
            y_pred, opt_modulo_params = ml_rf.classifier(
                X_train, y_train, X_test, x, tune_size, config_list,
                opt_modulo_params)
        elif config_list['chosen_method'] == config.method[2]:
            y_pred, opt_modulo_params = ml_svc.classifier(
                X_train, y_train, X_test, x, tune_size, config_list,
                opt_modulo_params)
        elif config_list['chosen_method'] == config.method[3]:
            y_pred, opt_modulo_params = ml_ann.classifier(
                X_train, y_train, X_test, x, tune_size, config_list,
                opt_modulo_params)
        else:
            print("Select a classifier in the config file!")

        ###
        # Evaluation; Initially a function was written; Though the function was difficult to implement in the running program
        ###
        if opt_modulo_params != 0 and x % 200 == 0:
            opt_modulo_params_w_day = opt_modulo_params.copy()
            opt_modulo_params_w_day['day'] = x
            opt_modulo_params_w_day['chosen_method'] = config_list[
                'chosen_method']
            opt_modulo_params_list.append(opt_modulo_params_w_day)

        try:
            y_pred = int(test_predictions)
            y_test = int(y_test)
        except:
            pass

        y_pred_list.append(y_pred)
        tp_list.append(y_pred == +1 and y_test == +1)
        fp_list.append(y_pred == +1 and y_test == -1)
        tn_list.append(y_pred == -1 and y_test == -1)
        fn_list.append(y_pred == -1 and y_test == +1)
        eval_tuple = (tp_list, fp_list, tn_list, fn_list)

        pos_prec = 0
        neg_prec = 0
        pos_recall = 0
        neg_recall = 0
        accuracy = 0
        f_measures = 0

        tp = tp_list.count(True)  #einfachere Init raussuchen
        fp = fp_list.count(True)
        tn = tn_list.count(True)
        fn = fn_list.count(True)

        try:
            #Accuracy
            accuracy = (tp + tn) / (tp + fp + tn + fn)

            #pos Precision and Recall
            pos_prec = tp / (tp + fp)
            pos_recall = tp / (tp + fn)

            #F-Measures
            f_measures = (2 * pos_prec * pos_recall) / (pos_prec + pos_recall)

            #negative Precision andRecall
            neg_prec = tn / (tn + fn)
            neg_recall = tn / (tn + fp)

        except:
            print("Division by zero")
        current_measures = pd.DataFrame({
            'day': [x],
            'accuracy': [accuracy],
            'f-measure': [f_measures]
        })
        print(current_measures)
        online_measures = online_measures.append(current_measures)

    ###
    # Documentation
    ###
    if config.documentation == 1:

        documentation.save_doc(config_list, online_measures, dataset_input, df,
                               count, opt_modulo_params_list)
        series_frame = documentation.concat_results(online_measures,
                                                    config_list, dataset_input,
                                                    start_time)
        pos_neg_total = pd.DataFrame({
            'tp': [tp],
            'fp': [fp],
            'tn': [tn],
            'fn': fn
        })

        if config_list['optimize_method'] == 1:
            print(opt_modulo_params_list)
            opt_modulo_params_list = pd.DataFrame(opt_modulo_params_list)
            opt_modulo_params_list.to_excel('%d_opt_modulo_params_list.xlsx' %
                                            count)
        final_table.to_excel('%d_final_table.xlsx' % count)
        online_measures.iloc[-1]
    print("Mainloop was executed")
    return online_measures, series_frame, pos_neg_total
Esempio n. 6
0
# ? 因为完全依赖于距离的计算,对于维度大的数据,有维度灾难的问题。 百万级别的维度会不能处理。
# =============================================================================
from data_split import data_split
from data_split import calc_ac
from knnmy import KNNCLF
from scaling import scaling
# =============================================================================
from sklearn import datasets

#test KNNCLF

iris = datasets.load_iris()

# seed 固定, 便于测试随机数
# seed = 123 时 scaling 效果差, 456 效果好
Xtr, Ytr, Xt, Yt = data_split(iris.data, iris.target.reshape(-1,1), seed=123)
sc = scaling()
sc.fit(Xtr)
Xtr1 = sc.transform(Xtr)
Xtr1 = Xtr
knnmy = KNNCLF()

knnmy.fit(Xtr1,Ytr)
Xt1 = sc.transform(Xt)
Xt1 = Xt
y_pred = knnmy.predict2all(Xt1)

ac1 = calc_ac(y_pred, Yt)

#test sklearn