Ejemplo n.º 1
0
from hw5.libs.common.dataset import Dataset
from hw5.libs.common.util import plot_digit_data
import matplotlib.pyplot as plt

def index_digit_ones(Y_train):
    idx_ones = []
    for i in range(len(Y_train)):
        if int(Y_train[i]) == 1:
            idx_ones.append(i)
    return idx_ones

if __name__ == '__main__':
    # dataset = Dataset(train_data=1000, test_data=100)
    # dataset = Dataset(train_data=40, test_data=10)
    dataset = Dataset(train_data=80, test_data=20)
    # dataset = Dataset()
    X_train, Y_train, X_test, Y_test = dataset.get_dataset()

    print("before PCA = ", X_train.shape)
    # Dimensional reduction
    # pca = PCA(n_components=2, whiten=False)
    # pca = PCA(n_components=64, whiten=False)
    pca = PCA(n_components=80, whiten=False)
    X_train = pca.fit_transform(X_train)
    print("after PCA = ", X_train.shape)

    # print(X_train[1].shape)
    print(Y_train)

    idx_ones = index_digit_ones(Y_train)
Ejemplo n.º 2
0
# https://deeplearningcourses.com/c/data-science-supervised-machine-learning-in-python
# https://www.udemy.com/data-science-supervised-machine-learning-in-python
# Code: https://github.com/lazyprogrammer/machine_learning_examples/blob/master/supervised_class/knn.py

import matplotlib.pyplot as plt
from hw5.libs.algo.knn import KNN
from hw5.libs.common.dataset import Dataset
from hw5.libs.common.util import int_to_tuple, save_to_csv
from datetime import datetime

if __name__ == '__main__':
    # dataset = Dataset(train_data=80, test_data=20)
    # dataset = Dataset(train_data=800, test_data=200)
    dataset = Dataset()
    X_train, Y_train, X_test, Y_test = dataset.get_dataset()
    train_scores = []
    test_scores = []
    exec_times = []
    best_test_score = 0
    stored_accuracy = [0, 0] # [<acc>, <k-th>]

    # Define number of iteration (K)
    K = 50
    ks = int_to_tuple(K) # used to plot the results

    # Start KNN: Scratch
    print("Evaluating Scratch KNN")
    for i in range(K):
        k = i+1
        print("\nk =", k)
        knn = KNN(k)
Ejemplo n.º 3
0
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# import matplotlib.pyplot as plt
import seaborn as sns
sns.set()  # for plot styling
import numpy as np

from hw6.libs.algo.kmeans import MyKMeans

if __name__ == '__main__':
    # dataset = Dataset(train_data=1000, test_data=100)
    # dataset = Dataset(train_data=20, test_data=10)
    # dataset = Dataset(train_data=40, test_data=10)
    # dataset = Dataset(train_data=80, test_data=20)
    dataset = Dataset(train_data=800, test_data=200)
    # dataset = Dataset()
    # X_train, Y_train, X_test, Y_test = dataset.get_dataset()
    init_X_train, init_Y_train, _, _ = dataset.get_dataset()
    acc_scores = []  # ONLY collect the highest accuracy!

    # Simulate clustering in K times.
    K = 50
    # K = 5
    ks = int_to_tuple(K)  # used to plot the results

    # simulate different number of clusters
    n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10]  # from n=2 ~ n=10 (max)
    # n_clusters = [2, 3] # from n=2 ~ n=10 (max)

    # Start simulation ...
Ejemplo n.º 4
0
    # Define value of clustering simulation
    # K = 9
    # K = 1 # total number of simulations
    K = 4 # total number of simulations
    dataset_size = 200 # initialize started dataset size here
    # dataset_step_size = 100 # e.g. datasets = {100, 200, 300, ..., 1000}
    dataset_step_size = 200 # e.g. datasets = {100, 200, 300, ..., 1000}
    ratio_train, ratio_test = 0.8, 0.2 # should be total value=1 ! e.g. ratio = { (80,20), (160,40), ..., (800,200)  }

    datasets = []
    ds = [] # list of dataset size in each iterations
    for i in range(K):
        # print(" >> dataset_size = ", dataset_size)
        train_data = int(dataset_size * ratio_train)
        test_data = dataset_size - train_data
        dataset = Dataset(train_data=train_data, test_data=test_data)
        X_train, Y_train, X_test, Y_test = dataset.get_dataset()
        datasets.append({
            "X_train": X_train,
            "Y_train": Y_train,
            "X_test": X_test,
            "Y_test": X_train
        })
        ds.append(dataset_size)
        dataset_size += dataset_step_size
        # print(" >> train_data, test_data = ", train_data, test_data)

    # print(" >> ds = ", ds)

    # Start KMeans: sklearn
    print("Evaluating sklearn KMeans")
Ejemplo n.º 5
0
Based on the method used in "Learning To Remember Rare Events"
by Lukasz Kaiser, Ofir Nachun, Aurko Roy, and Samy Bengio
Paper: https://openreview.net/pdf?id=SJTQLdqlg
'''

import numpy as np
import theano
import theano.tensor as T
import time
import os

from hw5.libs.common.dataset import Dataset
from hw5.libs.common.util import int_to_tuple

if __name__ == '__main__':
    dataset = Dataset(train_data=10, test_data=3)
    # dataset = Dataset()
    X_train, Y_train, X_test, Y_test = dataset.get_dataset()
    train_scores = []
    test_scores = []

    print(" > Total X_train = ", len(X_train))
    print(" > Total X_test = ", len(X_test))

    print(">>> Y_test = ", Y_test)

    # Define number of iteration (K)
    K = 20
    ks = int_to_tuple(K)  # used to plot the results

    def l2_normalize(x, dim, epsilon=1e-12):
Ejemplo n.º 6
0
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# import matplotlib.pyplot as plt
import seaborn as sns
sns.set()  # for plot styling
import numpy as np
from datetime import datetime
from hw6.libs.algo.gmm import MyGMM

if __name__ == '__main__':
    # dataset = Dataset(train_data=1000, test_data=100)
    # dataset = Dataset(train_data=20, test_data=10)
    # dataset = Dataset(train_data=40, test_data=10)
    # dataset = Dataset(train_data=80, test_data=20)
    dataset = Dataset(train_data=800, test_data=200)
    # dataset = Dataset()
    X_train, Y_train, _, _ = dataset.get_dataset()
    acc_scores = []

    # Simulate clustering in K times.
    K = 50
    # K = 5
    ks = int_to_tuple(K)  # used to plot the results

    # to make it easier to analyze, take only digit={0, 1, ...}
    selected_digits = [1, 2, 3]
    # selected_digits = [1, 2]
    X_train, Y_train = filter_dataset(selected_digits, X_train, Y_train)

    # Used for visualization only