예제 #1
0
def main(CV=False, PLOT=True):
    """Entry Point.

    Parameters
    ----------
    CV: bool
        Cross-validation flag
    PLOT: bool
        Plotting flag
    """
    _data = fetch_data()

    if CV:
        method, params = cross_validate(_data, 10)
    else:
        method = 'l2'
        params = {'n_neighbors': 1, 'metric': chisquare}

    data = normalise(_data, method)

    X_train, y_train = data['train']
    X_test, y_test = data['test']

    classifier = KNeighborsClassifier(**params)
    classifier.fit(X_train, y_train)

    print('ACCURACY: ', classifier.score(X_test, y_test))

    if PLOT:

        y_hat = classifier.predict(X_test)

        cnf_matrix = confusion_matrix(y_test, y_hat)

        plot_confusion_matrix(cnf_matrix,
                              classes=list(set(y_test)),
                              title='K-Nearest-Neighbours\nConfusion Matrix',
                              cmap=plt.cm.Greens)

        plt.savefig('data/out/knn_cnf_matrix.pdf',
                    format='pdf',
                    dpi=300,
                    transparent=True)

        neighbors_matrix = classifier.kneighbors_graph(X_test)

        plot_kneighbors_graph(neighbors_matrix, title='Neighbours Graph')

        plt.savefig('data/out/knn_neighbours.pdf',
                    format='pdf',
                    dpi=300,
                    transparent=True)
예제 #2
0
def main(CV=False, PLOT=True):
    """Entry Point.

    Parameters
    ----------
    CV: bool
        Cross-validation flag
    PLOT: bool
        Plotting flag
    """
    _data = fetch_data()

    if CV:
        method, params = cross_validate(_data)
    else:
        method = 'robust'
        params = {'activation': 'logistic', 'hidden_layer_sizes': (25, )}

    data = normalise(_data, method)

    X_train, y_train = data['train']
    X_test, y_test = data['test']

    classifier = MLPClassifier(learning_rate="adaptive",
                               max_iter=5000,
                               solver='adam',
                               random_state=42,
                               alpha=0.01,
                               **params)
    classifier.fit(X_train, y_train)

    print('ACCURACY: ', classifier.score(X_test, y_test))

    if PLOT:

        y_hat = classifier.predict(X_test)

        cnf_matrix = confusion_matrix(y_test, y_hat)

        plot_confusion_matrix(cnf_matrix,
                              classes=list(set(y_test)),
                              title='Multi-Layer-Perceptron\nConfusion Matrix',
                              cmap=plt.cm.Reds)

        plt.savefig('data/out/mlp_cnf_matrix.pdf',
                    format='pdf',
                    dpi=300,
                    transparent=True)
예제 #3
0
def main(CV=False, PLOT=True):
    """Entry Point.

    Parameters
    ----------
    CV: bool
        Cross-validation flag
    PLOT: bool
        Plotting flag
    """
    _data = fetch_data()

    if CV:
        method, params = cross_validate(_data)
    else:
        method = 'l2'
        params = {'metric': chisquare}

    data = normalise(_data, method)

    X_train, y_train = data['train']
    X_test, y_test = data['test']

    classifier = NearestCentroid(**params)
    classifier.fit(X_train, y_train)

    print('ACCURACY: ', classifier.score(X_test, y_test))

    if PLOT:

        y_hat = classifier.predict(X_test)

        cnf_matrix = confusion_matrix(y_test, y_hat)

        plot_confusion_matrix(cnf_matrix,
                              classes=list(set(y_test)),
                              title='Nearest Centroid\nConfusion Matrix',
                              cmap=plt.cm.Blues)

        plt.savefig('data/out/nc_cnf_matrix.pdf',
                    format='pdf',
                    dpi=300,
                    transparent=True)
예제 #4
0
from utils import root_mean_square_error

# models
from bias_sgd import BiasSGD
from embeddings import Embeddings
from autoencoder import Autoencoder
from iterative_svd import IterativeSVD

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('GPU device not found')
    print('Be sure you want to continue...')
else:
    print('Found GPU at: {}'.format(device_name))

dataloader = fetch_data(train_size=1)
number_of_users, number_of_movies = 10000, 1000

IDs, users, movies, ratings, _ = dataloader['train']


def create_parser():
    parser = argparse.ArgumentParser(
        description="Run cross validation for model")
    parser.add_argument("--verbose", "-v", action="store_true")

    parser.add_argument("--splits-num", type=int, default=10)
    parser.add_argument("--shuffle", action="store_true", default=False)
    parser.add_argument("--model", "-m", type=str)

    #BSGD parameters
예제 #5
0
    # parse arguments
    argv = parser.parse_args()
    # get log level
    _level = argv.log or ''

    logger = logging.getLogger(os.path.basename(__file__).replace('.py', ''))

    if _level.upper() == 'INFO':
        coloredlogs.install(level='IFNO', logger=logger)
    elif _level.upper() == 'DEBUG':
        coloredlogs.install(level='DEBUG', logger=logger)
    else:
        coloredlogs.install(level='WARNING', logger=logger)

    logger.info('Fetching data...')
    data = fetch_data()

    X_train, y_train = data['train']

    D, N = X_train.shape
    logger.debug('Number of features: D=%d' % D)
    logger.debug('Number of train data: N=%d' % N)

    # mean face
    mean_face = X_train.mean(axis=1).reshape(-1, 1)

    logger.info('Plotting mean face...')
    plt.imshow(mean_face.reshape(SHAPE).T,
               cmap=plt.get_cmap('gray'),
               vmin=0,
               vmax=255)
예제 #6
0
    # get flag of standardization
    standard = argv.standard or True
    # get flag of cross validation
    cv = argv.cross_validation or False

    logger = logging.getLogger(os.path.basename(__file__).replace('.py', ''))

    if _level.upper() == 'INFO':
        coloredlogs.install(level='IFNO', logger=logger)
    elif _level.upper() == 'DEBUG':
        coloredlogs.install(level='DEBUG', logger=logger)
    else:
        coloredlogs.install(level='WARNING', logger=logger)

    logger.info('Fetching data...')
    data = fetch_data(ratio=0.8)

    X_train, y_train = data['train']

    D, N = X_train.shape

    pca = PCA(n_comps=M, standard=standard, logger=logger)
    logger.info('Applying PCA with M=%d' % M)

    # normalise data
    W_train = pca.fit(X_train)
    logger.debug('W_train.shape=%s' % (W_train.shape,))

    X_test, y_test = data['test']
    I, K = X_test.shape
    assert I == D, logger.error(
예제 #7
0
    return parser


if __name__ == "__main__":
    parser = create_parser()
    args = parser.parse_args()

    device_name = tf.test.gpu_device_name()
    if device_name != '/device:GPU:0':
        print('GPU device not found')
        print('Be sure you want to continue...')
    else:
        print('Found GPU at: {}'.format(device_name))

    dataloader = fetch_data(train_size=args.train_size)
    number_of_users, number_of_movies = 10000, 1000

    # Training
    train_IDs, train_users, train_movies, train_ratings, A_train = dataloader[
        'train']

    # Validation
    valid_IDs, valid_users, valid_movies, valid_ratings, A_valid = dataloader[
        'valid']

    # Testing
    test_IDs, test_users, test_movies = dataloader['test']

    known_train = ~np.isnan(A_train)
    known_validation = ~np.isnan(A_valid)
예제 #8
0
# helper data preprocessor
from reader import fetch_data
# custom PCA transformer
from pca import PCA
# KNN Classifer
from sklearn.neighbors import KNeighborsClassifier

M = 121
standard = False

data = fetch_data('../src/face.mat', ratio=0.8)

X_train, y_train = data['train']

D, N = X_train.shape

pca = PCA(n_comps=M, standard=standard)

W_train = pca.fit(X_train)

X_test, y_test = data['test']
I, K = X_test.shape

W_test = pca.transform(X_test)

nn = KNeighborsClassifier(n_neighbors=1)
nn.fit(W_train.T, y_train.T.ravel())
acc = nn.score(W_test.T, y_test.T.ravel())
print('Accuracy = %.2f%%' % (acc * 100))
예제 #9
0
import numpy as np
import pandas as pd

# from svdplus import SVDplus
from bias_sgd import BiasSGD
from reader import fetch_data
from utils import root_mean_square_error

N_USERS = 10000
N_MOVIES = 1000
N_ITERS = 1000000

#data = fetch_data(train_size=0.88, train_file="../data/data_train.csv",
#                  test_file="../data/sampleSubmission.csv")
data = fetch_data(train_size=0.88)

# Training
_, train_users, train_movies, train_ratings, A_train = data['train']

# Validation
_, valid_users, valid_movies, valid_ratings, A_valid = data['valid']


def predict_with_config(args,
                        hidden_size=12,
                        lr=0.04,
                        reg_matrix=0.08,
                        reg_vector=0.04):
    predictor = BiasSGD(N_USERS,
                        N_MOVIES,
예제 #10
0
sns.set_style("ticks")
plt.rcParams['figure.figsize'] = [6.0, 12.0]

fig, axes = plt.subplots(nrows=4, ncols=2)

tuples = [(axes[0, 0], 'none', 'Raw'), (axes[0, 1], 'l2', 'L2 Normalised'),
          (axes[1, 0], 'l1', 'L1 Normalised'),
          (axes[1, 1], 'max', '$L_{\infty}$ Normalised'),
          (axes[2, 0], 'standard', 'Standardardised'),
          (axes[2, 1], 'maxabs', 'Maximum Absolute Value Scaled'),
          (axes[3, 0], 'minmax', 'Minimum to Maximum Values Scaled'),
          (axes[3, 1], 'robust', 'IQR and Median Scaled')]

for ax, method, title in tuples:

    data = normalise(data=fetch_data(), method=method)

    X_train, y_train = data['train']
    X_test, y_test = data['test']

    pca = PCA(n_components=2)

    W_train = pca.fit_transform(X_train)
    W_test = pca.transform(X_test)

    _drawn = [False, False, False]
    col = [sns_blue, sns_green, sns_red]

    for w, y in zip(W_train, y_train):
        if not _drawn[y - 1]:
            ax.scatter(w[0], w[1], c=col[y - 1], label='%s' % (y + 1))