예제 #1
0
def main():
    # If you do not have an existing feature vector
    data_set = "../machine_learning/income-data/income.train.txt.5k"
    X, Y, features = get_binary_features(data_set)

    # If you have an existing feature vector you want to compute against
    data_set = "../machine_learning/income-data/income.dev.txt"
    X_dev, Y_dev, features = get_binary_features(data_set, features)
예제 #2
0
def main():
    data_set = "../income-data/income.train.txt"
    X, Y, features = get_binary_features(data_set)
    print (X.shape)

    data_set = "../income-data/income.dev.txt"
    X_dev, Y_dev, features = get_binary_features(data_set, features)
    print (X_dev.shape)

    perceptron = Perceptron(feature_size=len(X[0,:]))

    # print ("Before training:")
    # print(perceptron.test(X, Y))
    # perceptron.train(X,Y)

    # print("After (batch) training:")
    # print(perceptron.test(X, Y))

    perceptron.reset()

    for j in range(1):
        for i in range(len(X[:,0])):
            perceptron.train_online(X[i,:], Y[i])

    print ("After single training: ")
    print(perceptron.test(X,Y))

    print ("Average")
    perceptron.reset()
    perceptron.average_train(X, Y, maxIter=5)

    print ("Naive average (with maximum iterations)")
    perceptron.reset()
    perceptron.naive_average_train(X, Y, maxIter=10)

    print(perceptron.test(X,Y))
    print ("MIRA")
    mira = Perceptron(feature_size=len(X[0,:]), mira_aggro=0.0)
    for j in range(10):
        for i in range(len(X[:,0])):
            mira.train_mira(X[i,:], Y[i])

    print(mira.test(X,Y))


    print ("MIRA Average")
    mira.reset()
    mira.train_mira_average(X,Y, maxIter=5)
    print(mira.test(X,Y))
예제 #3
0
def main():

    data_set = "../income-data/income.train.txt"
    X, Y, features = get_binary_features(data_set)
    print(X.shape)

    data_set = "../income-data/income.dev.txt"
    X_dev, Y_dev, features = get_binary_features(data_set, features)
    print(X_dev.shape)

    # data_set = "../income-data/income.train.txt"
    # X, Y, features = get_numbered_features(data_set)
    # print X.shape

    # data_set = "../income-data/income.dev.txt"
    # X_dev, Y_dev, features = get_numbered_features(data_set, features)
    # print X_dev.shape

    # data_set = "../income-data/income.train.txt"
    # X, Y, features = get_numbered_binary_features(data_set)
    # print X.shape

    # data_set = "../income-data/income.dev.txt"
    # X_dev, Y_dev, features = get_numbered_binary_features(data_set, features)
    # print X_dev.shape

    # data_set = "../income-data/income.train.txt"
    # X, Y, features = get_binned_features(data_set)
    # print X.shape

    # data_set = "../income-data/income.dev.txt"
    # X_dev, Y_dev, features = get_binned_features(data_set, features)
    # print X_dev.shape

    # data_set = "../income-data/income.train.txt"
    # X, Y, features = get_num_ed_features(data_set)
    # print X.shape

    # data_set = "../income-data/income.dev.txt"
    # X_dev, Y_dev, features = get_num_ed_features(data_set, features)
    # print X_dev.shape

    print("---------------------------------------------------------------")
    print("---------------------------------------------------------------")

    perceptron = Perceptron(feature_size=len(X[0, :]))

    perceptron.reset()

    epochs = 5
    count = 0
    max_score = 0
    max_score_epoch = 0
    err_rate = 0
    best_err_rate = 0
    err_rate_list = np.array([])

    print("START PERCEPTRON")

    for j in range(epochs):
        print("EPOCH ", j + 1)
        for i in range(len(X[:, 0])):
            perceptron.train_online(X[i, :], Y[i])
            count += 1
            if count % 1000 == 0:
                score = perceptron.test(X_dev, Y_dev)
                err_rate = (1.0 - score) * 100
                err_rate_list = np.append(err_rate_list, err_rate)
                epoch_v = (1.0 * j) + ((1.0 * i) / len(X[:, 0]))
                print("Epoch: ", epoch_v, "Score: ", score, "Error Rate: ",
                      err_rate)
                if max_score < score:
                    max_score = score
                    best_err_rate = (1.0 - max_score) * 100
                    max_score_epoch = (1.0 * j) + ((1.0 * i) / len(X[:, 0]))

        avg_err_rate = np.average(err_rate_list)
        print("Average Error Rate: ", avg_err_rate)

    print("Max Score: ", max_score)
    print("Best Error Rate: ", best_err_rate)
    print("At Epoch: ", max_score_epoch)

    print("END PERCEPTRON")

    print("---------------------------------------------------------------")

    print("START NAIVE AVERAGE PERCEPTRON")

    perceptron.reset()

    na_score = 0
    na_err_rate = 0

    perceptron.naive_average_train(X, Y, maxIter=5)
    na_score = perceptron.test(X_dev, Y_dev) * 100
    na_err_rate = 100 - na_score
    print("Score: ", na_score, "Error Rate: ", na_err_rate)

    print("END NAIVE AVERAGE PERCEPTRON")

    print("---------------------------------------------------------------")

    print("START SMART AVERAGE PERCEPTRON")

    perceptron.reset()

    sa_score = 0
    sa_err_rate = 0

    perceptron.average_train(X, Y, maxIter=5)
    sa_score = perceptron.test(X_dev, Y_dev) * 100
    sa_err_rate = 100 - sa_score
    print("Score: ", sa_score, "Error Rate: ", sa_err_rate)

    print("END SMART AVERAGE PERCEPTRON")

    print("---------------------------------------------------------------")

    print("START MIRA")

    perceptron.reset()

    mira_score = 0
    mira_err_rate = 0

    mira = Perceptron(feature_size=len(X[0, :]), mira_aggro=0.0)
    for j in range(5):
        for i in range(len(X[:, 0])):
            mira.train_mira(X[i, :], Y[i])

    mira_score = mira.test(X_dev, Y_dev) * 100
    mira_err_rate = 100 - mira_score
    print("Score: ", mira_score, "Error Rate: ", mira_err_rate)

    print("END MIRA")

    # print("---------------------------------------------------------------")
    #
    # print("START AVERAGE MIRA")
    #
    # mira.reset()
    #
    # a_mira_score = 0
    # a_mira_err_rate = 0
    #
    # mira.train_mira_average(X, Y, maxIter=5)
    # a_mira_score = mira.test(X_dev, Y_dev) * 100
    # a_mira_err_rate = 100 - a_mira_score
    # print("Score: ", a_mira_score, "Error Rate: ", a_mira_err_rate)
    #
    # print("END AVERAGE MIRA")

    print("---------------------------------------------------------------")
    print("---------------------------------------------------------------")
예제 #4
0
from data_reader import get_binary_features

if __name__ == "__main__":

	# If you do not have an existing feature vector
	data_set = "../income-data/income.train.txt.5k"
    X, Y, features = get_binary_features(data_set)

    # If you have an existing feature vector you want to compute against
    data_set = "../income-data/income.dev.txt"
    X_dev, Y_dev, features = get_binary_features(data_set, features)
예제 #5
0
def main(C=1):
    # If you do not have an existing feature vector
    data_set = "../machine_learning/income-data/income.train.txt.5k"
    X, Y, features = get_binary_features(data_set)

    # If you have an existing feature vector you want to compute against
    data_set = "../machine_learning/income-data/income.dev.txt"
    X_dev, Y_dev, features = get_binary_features(data_set, features)

    Y = np.ravel(Y)
    Y_dev = np.ravel(Y_dev)

    # -----------------------------------------------------------------------------

    start = time.time()

    clf = svm.SVC(kernel='linear', C=C)
    clf.fit(X, Y)

    end = time.time()

    # -----------------------------------------------------------------------------

    supp_vec = clf.support_vectors_
    num_supp_vec = clf.n_support_

    train_score = clf.score(X, Y)
    train_error = 1.0 - train_score

    dev_score = clf.score(X_dev, Y_dev)
    dev_error = 1.0 - dev_score

    t = end - start

    # -----------------------------------------------------------------------------

    w = clf.coef_
    a = clf.dual_coef_

    sum_xi = 0.0

    xi_array = np.zeros((len(supp_vec[:, 0]), 1), dtype=np.float64)

    for i in range(len(supp_vec[:, 0])):
        xi = 1 - (1.0 * Y[clf.support_[i]] * np.inner(w, X[clf.support_[i], :]))
        xi_array[i, 0] = xi

    print("------------------Xi-------------------")
    print(xi_array.shape)
    print(xi_array)

    np.set_printoptions(threshold=np.nan)

    abs_xi_array = np.transpose(abs(xi_array))
    print(abs_xi_array.argsort())

    # print(abs_xi_array)

    print(xi_array[723, 0])
    print(xi_array[232, 0])
    print(xi_array[873, 0])
    print(xi_array[107, 0])
    print(xi_array[699, 0])