def main_lr(dataset, range_th):
    data = np.load('Datasets/' + dataset + '.npy')
    np.random.shuffle(data)
    num_samples = data.shape[0]
    data_x = data[:num_samples, :-1]
    data_y = data[:num_samples, -1]

    accs = []
    ths = []
    for th in range_th:
        step = data_x.shape[0] // k_fold
        num_of_class = int(np.max(data_y) + 1)
        acc = 0.
        for i in range(k_fold):
            test_x, test_y = data_x[i:i + step], data_y[i:i + step]
            train_x, train_y = np.concatenate((data_x[0:i], \
                    data_x[i+step:]), axis=0), \
                myutility.convertToOneHot(np.concatenate((data_y[0:i], \
                    data_y[i+step:]), axis=0), num_of_class)

            LR = logistic.logistic_regression(train_x.shape[1],
                                              train_y.shape[1],
                                              thres=th / 1000)
            LR.fit(train_x, train_y)
            predict_y = LR.predict(test_x)
            acc += np.sum(predict_y == test_y) / test_y.shape[0]
        accs.append(acc / k_fold)
        ths.append(th / 1000)

    return (accs)
Beispiel #2
0
def main_lr(dataset):
    data = np.load('Datasets/' + dataset + '.npy')
    np.random.shuffle(data)
    num_samples = data.shape[0]
    size_of_dataset = []
    acclr_model = []
    acclr_sk = []
    for m in range(num_samples):
        if ((m % 10 == 0) & (m != 0)):
            data_x = data[:m, :-1]
            data_y = data[:m, -1]
            step = data_x.shape[0] // k_fold
            num_of_class = int(np.max(data_y) + 1)
            acc = 0.

            for i in range(k_fold):
                test_x, test_y = data_x[i:i + step], data_y[i:i + step]
                train_x, train_y = np.concatenate((data_x[0:i], \
                        data_x[i+step:]), axis=0), \
                    myutility.convertToOneHot(np.concatenate((data_y[0:i], \
                        data_y[i+step:]), axis=0), num_of_class)

                LR = logistic.logistic_regression(train_x.shape[1],
                                                  train_y.shape[1])
                LR.fit(train_x, train_y)
                predict_y = LR.predict(test_x)
                acc += np.sum(predict_y == test_y) / test_y.shape[0]

            acclr_model.append(acc / k_fold)
            # print(acc/k_fold)

            # acc = 0.
            # for i in range(k_fold):
            #     test_x, test_y = data_x[i:i+step], data_y[i:i+step]
            #     train_x, train_y = np.concatenate((data_x[0:i], \
            #             data_x[i+step:]), axis=0), \
            #         np.concatenate((data_y[0:i], data_y[i+step:]), axis=0)

            #     LR2 = LogisticRegression(solver='lbfgs', multi_class='auto')
            #     LR2.fit(train_x, train_y)
            #     predict_y = LR2.predict(test_x)
            #     acc += np.sum(predict_y == test_y) / test_y.shape[0]

            # # print(acc/k_fold)

            # acclr_sk.append(acc/k_fold)

            size_of_dataset.append(m)
    return acclr_model, size_of_dataset
Beispiel #3
0
def main_lr(dataset):
    data = np.load('Datasets/' + dataset + '.npy')
    np.random.shuffle(data)
    num_samples = data.shape[0]
    data_x = data[:num_samples, :-1]
    data_y = data[:num_samples, -1]

    step = data_x.shape[0] // k_fold
    num_of_class = int(np.max(data_y) + 1)
    acc = 0.

    for i in range(k_fold):
        test_x, test_y = data_x[i:i + step], data_y[i:i + step]
        train_x, train_y = np.concatenate((data_x[0:i], \
          data_x[i+step:]), axis=0), \
         myutility.convertToOneHot(np.concatenate((data_y[0:i], \
          data_y[i+step:]), axis=0), num_of_class)

        LR = logistic.logistic_regression(train_x.shape[1], train_y.shape[1])
        LR.fit(train_x, train_y)
        predict_y = LR.predict(test_x)
        acc += np.sum(predict_y == test_y) / test_y.shape[0]
    print(acc / k_fold)
    print(metrics.classification_report(test_y, predict_y))

    acc = 0.
    for i in range(k_fold):
        test_x, test_y = data_x[i:i + step], data_y[i:i + step]
        train_x, train_y = np.concatenate((data_x[0:i], \
          data_x[i+step:]), axis=0), \
         np.concatenate((data_y[0:i], data_y[i+step:]), axis=0)

        LR2 = LogisticRegression(solver='lbfgs', multi_class='auto')
        LR2.fit(train_x, train_y)
        predict_y = LR2.predict(test_x)
        acc += np.sum(predict_y == test_y) / test_y.shape[0]
    print(acc / k_fold)
    print(metrics.classification_report(test_y, predict_y))

    return predict_y, test_y, num_of_class
Beispiel #4
0
def main_nb(dataset):
    data = np.load('Datasets/' + dataset + '.npy')
    np.random.shuffle(data)
    num_samples = data.shape[0]
    data_x = data[:num_samples, :-1]
    data_y = data[:num_samples, -1]

    step = data_x.shape[0] // k_fold
    num_of_class = int(np.max(data_y) + 1)
    acc = 0.

    for i in range(k_fold):
        test_x, test_y = data_x[i:i + step], data_y[i:i + step]
        train_x, train_y = np.concatenate((data_x[0:i], \
          data_x[i+step:]), axis=0), \
         myutility.convertToOneHot(np.concatenate((data_y[0:i], \
          data_y[i+step:]), axis=0), num_of_class)

        NB = naivebayes.NaiveBayes(train_x.shape[1], train_y.shape[1])
        NB.fit(train_x, train_y)
        predict_y = NB.predict(test_x)
        acc += np.sum(predict_y == test_y) / test_y.shape[0]
    print(acc / k_fold)
    print(metrics.classification_report(test_y, predict_y))

    acc = 0.
    for i in range(k_fold):
        test_x, test_y = data_x[i:i + step], data_y[i:i + step]
        train_x, train_y = np.concatenate((data_x[0:i], data_x[i+step:]), axis=0), \
         np.concatenate((data_y[0:i], data_y[i+step:]), axis=0)

        NB2 = MultinomialNB()
        NB2.fit(train_x, train_y)
        predict_y = NB2.predict(test_x)
        acc += np.sum(predict_y == test_y) / test_y.shape[0]
    print(acc / k_fold)
    print(metrics.classification_report(test_y, predict_y))
Beispiel #5
0
 data_y = data[:num_samples, -1]
 iteration = 200
 k_fold = 5
 step = data_x.shape[0] // k_fold
 num_of_class = int(np.max(data_y) + 1)
 train_acc_all = [0] * iteration
 test_acc_all = [0] * iteration
 train_accs_avg = [] * len(datasets)
 test_accs_avg = [] * len(datasets)
 for i in range(k_fold):
     train_accs = []
     test_accs = []
     test_x, test_y = data_x[i:i + step], data_y[i:i + step]
     train_x, train_y = np.concatenate((data_x[0:i], \
       data_x[i+step:]), axis=0), \
      myutility.convertToOneHot(np.concatenate((data_y[0:i], \
       data_y[i+step:]), axis=0), num_of_class)
     LR = logistic.logistic_regression(train_x.shape[1],
                                       train_y.shape[1],
                                       max_iter=1000,
                                       thres=0.05,
                                       lr=0.0001)
     for i in range(iteration):
         LR.fit(train_x, train_y)
         train_predict_y = LR.predict(train_x)
         # if i == 0:
         # 	print(train_predict_y.shape)
         # 	print(train_y.shape)
         # 	print(train_predict_y == train_y)
         test_predict_y = LR.predict(test_x)
         train_acc = np.sum(np.argmax(train_y, axis=1) ==
                            train_predict_y) / train_y.shape[0]