Ejemplo n.º 1
0
def knn():
    train_data, train_labels = load_train()

    #for validation
    valid_data, valid_labels = load_valid()

    #for test
    #valid_data, valid_labels = load_test()

    values = [1, 3, 5, 7, 9]
    ratio = []
    for k in values:
        c = 0
        prediction_labels = run_knn(k, train_data, train_labels, valid_data)

        for i in range(len(valid_labels)):
            if valid_labels[i] == prediction_labels[i]:
                c += 1
        ratio.append(float(c) / len(prediction_labels))

    plt.plot(values, ratio)

    #for validation
    plt.axis([1, 9, 0.81, 0.87])

    #for test
    #plt.axis([1, 9, 0.87, 0.95])

    plt.show()
Ejemplo n.º 2
0
def knn():
    train_data, train_labels = load_train()

    #for validation
    valid_data, valid_labels = load_valid()
    
    #for test
    #valid_data, valid_labels = load_test()
    
    values = [1, 3, 5, 7, 9]
    ratio = []
    for k in values:
        c = 0
        prediction_labels = run_knn(k, train_data, train_labels, valid_data)
        
        for i in range(len(valid_labels)):
            if valid_labels[i] == prediction_labels[i]:
                c += 1
        ratio.append(float(c) / len(prediction_labels))

    plt.plot(values, ratio)
    
    #for validation
    plt.axis([1, 9, 0.81, 0.87])
    
    #for test
    #plt.axis([1, 9, 0.87, 0.95])
    
    plt.show()
def run_logistic_regression():
    train_inputs, train_targets = load_train()
    valid_inputs, valid_targets = load_valid()

    # TODO: initialize parameters
    parameters = {
                    'learning_rate': 0.01 ,          
                    'weight_regularization': 0 ,
                    'num_iterations': 10
                 }

    # logistic regression weights
    dimension = 28*28
    z = np.ones([dimension+1, 1], int)
    z = z/100.0
    #weight = np.matrix('1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1')
    for i in xrange(0,28*28):
      if i%2 == 1:
        z[i] = 0
        
    weights = z


    #weights = 1,1,2,1

    # Verify that your logistic function produces the right gradient.
    # diff should be very close to 0.
    #run_check_grad(parameters)

    # Begin learning with gradient descent
    for t in xrange(parameters['num_iterations']):

        # TODO: you will need to modify this loop to create plots, etc.

        # find the negative log likelihood and derivatives w.r.t. weights
        f, df, frac_correct_train = logistic(weights,
                                             train_inputs,
                                             train_targets,
                                             parameters)

        _, _, frac_correct_valid = logistic(weights,
                                            valid_inputs,
                                            valid_targets,
                                            parameters)
        
        if np.isnan(f) or np.isinf(f):
            raise ValueError("nan/inf error")

        # update parameters
        for i in range(weights.shape[0]):
          weights[i] = weights[i] + parameters['learning_rate'] * (df[i] - 0.001*(weights[i]))

        # print some stats
        print ("ITERATION:{:4d}   LOGL:{:4.2f}   "
               "TRAIN FRAC:{:2.2f}   VALID FRAC:{:2.2f}").format(t+1,
                                                                 f,
                                                                 frac_correct_train*100,
                                                                 frac_correct_valid*100)
Ejemplo n.º 4
0
def run_logistic_regression(hyperparameters):

    # TODO specify training data
    train_inputs, train_targets = load_train()

    valid_inputs, valid_targets = load_valid()

    # N is number of examples; M is the number of features per example.
    N, M = train_inputs.shape

    # Logistic regression weights
    # TODO:Initialize to random weights here.
    #weights = np.random.normal(0, 0.2, (train_inputs.shape[1]+1,1))
    weights = np.zeros(785).reshape((785, 1))

    # Verify that your logistic function produces the right gradient.
    # diff should be very close to 0.
    run_check_grad(hyperparameters)

    # Begin learning with gradient descent
    logging = np.zeros((hyperparameters['num_iterations'], 5))
    for t in xrange(hyperparameters['num_iterations']):

        # Find the negative log likelihood and its derivatives w.r.t. the weights.
        f, df, predictions = logistic(weights, train_inputs, train_targets,
                                      hyperparameters)

        # Evaluate the prediction.
        cross_entropy_train, frac_correct_train = evaluate(
            train_targets, predictions)

        if np.isnan(f) or np.isinf(f):
            raise ValueError("nan/inf error")

        # update parameters
        weights = weights - hyperparameters['learning_rate'] * df / N

        # Make a prediction on the valid_inputs.
        predictions_valid = logistic_predict(weights, valid_inputs)

        # Evaluate the prediction.
        cross_entropy_valid, frac_correct_valid = evaluate(
            valid_targets, predictions_valid)

        # print some stats
        print(
            "ITERATION:{:4d}  TRAIN NLOGL:{:4.2f}  TRAIN CE:{:.6f} "
            "TRAIN FRAC:{:2.2f}  VALID CE:{:.6f}  VALID FRAC:{:2.2f}").format(
                t + 1, f / N, cross_entropy_train, frac_correct_train * 100,
                cross_entropy_valid, frac_correct_valid * 100)
        logging[t] = [
            f / N, cross_entropy_train, frac_correct_train * 100,
            cross_entropy_valid, frac_correct_valid * 100
        ]
    return logging
    dist = l2_distance(valid_data.T, train_data.T)
    nearest = np.argsort(dist, axis=1)[:,:k]

    train_labels = train_labels.reshape(-1)
    valid_labels = train_labels[nearest]

    # note this only works for binary labels
    valid_labels = (np.mean(valid_labels, axis=1) >= 0.5).astype(np.int)
    valid_labels = valid_labels.reshape(-1,1)

    return valid_labels

if __name__ == '__main__':
    train_inputs, train_targets = utils.load_train()
    valid_inputs, valid_targets = utils.load_valid()
    test_inputs, test_targets = utils.load_test()
    
    set_k = [1,3,5,7,9]
    
    accuracy_valid_output = {}
    accuracy_test_output = {}
   
    length_valid = len(valid_inputs)
    length_test = len(test_inputs)
    
    for k in set_k:
        valid_outputs = run_knn(k, train_inputs, train_targets, valid_inputs)
        test_outputs =  run_knn(k, train_inputs, train_targets, test_inputs)

        count_valid = np.sum(valid_outputs == valid_targets) 
Ejemplo n.º 6
0
def run_logistic_regression(hyperparameters):
    # specify training data
    xIn = False
    while xIn == False:
        x = raw_input('Training Set LARGE or SMALL? ')
        print(x)
        if x == 'LARGE':
            print("HELLO")
            train_inputs, train_targets = load_train()
            xIn = True
        elif x == 'SMALL':
            print("hello")
            train_inputs, train_targets = load_train_small()
            xIn = True
        else:
            print("Please input LARGE or SMALL")

    valid_inputs, valid_targets = load_valid()
    test_inputs, test_targets = load_test()

    # N is number of examples; M is the number of features per example.
    N, M = train_inputs.shape
    print("N:", N, "  M:", M)

    # Logistic regression weights
    # Initialize to random weights here.
    weights = np.random.normal(0, 0.001, (M+1, 1))

    # Verify that your logistic function produces the right gradient.
    # diff should be very close to 0.
    run_check_grad(hyperparameters)

    # Begin learning with gradient descent
    logging = np.zeros((hyperparameters['num_iterations'], 5))
    for t in xrange(hyperparameters['num_iterations']):

        # Find the negative log likelihood and its derivatives w.r.t. the weights.
        f, df, predictions = logistic(weights, train_inputs, train_targets, hyperparameters)

        # Evaluate the prediction.
        cross_entropy_train, frac_correct_train = evaluate(train_targets, predictions)

        if np.isnan(f) or np.isinf(f):
            raise ValueError("nan/inf error")

        # update parameters
        weights = weights - hyperparameters['learning_rate'] * df / N

        # Make a prediction on the valid_inputs.
        predictions_valid = logistic_predict(weights, valid_inputs)

        # Evaluate the prediction.
        cross_entropy_valid, frac_correct_valid = evaluate(valid_targets, predictions_valid)

        # print some stats
        print ("ITERATION:{:4d}  TRAIN NLOGL:{:4.2f}  TRAIN CE:{:.6f} "
               "TRAIN FRAC:{:2.2f}  VALID CE:{:.6f}  VALID FRAC:{:2.2f}").format(
                   t+1, f / N, cross_entropy_train, frac_correct_train*100,
                   cross_entropy_valid, frac_correct_valid*100)
        logging[t] = [f / N, cross_entropy_train, frac_correct_train*100, cross_entropy_valid, frac_correct_valid*100]
    return logging
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-

from utils import load_train, load_valid

from run_knn import run_knn

(train_inputs, train_targets) = load_train()
(valid_inputs, valid_targets) = load_valid()

for k in [1, 3, 5, 7, 9]:
    print run_knn(k, train_inputs, train_targets, valid_inputs)
Created on Feb 26 2017
Author: Weiping Song
"""
import os, sys
import tensorflow as tf
import numpy as np
import argparse, random

from model import GRU4Rec
from utils import load_train, load_valid

unfold_max = 20
error_during_training = False

train_x, train_y, n_items = load_train(unfold_max)
valid_x, valid_y, _ = load_valid(unfold_max)


class Args():
    is_training = True
    layers = 1
    rnn_size = 100
    n_epochs = 10
    batch_size = 50
    keep_prob = 1
    learning_rate = 0.001
    decay = 0.98
    decay_steps = 2 * 1e3
    sigma = 0.0001
    init_as_normal = False
    grad_cap = 0
Ejemplo n.º 9
0
import utils
from run_knn import run_knn 
import plot_digits
import numpy as np
import matplotlib.pyplot as plt



if __name__ == "__main__":
    

    #loading the dataset
    train_data, train_labels = utils.load_train()

    #loading the validation set
    valid_data,valid_labels = utils.load_valid()


    # vector of each k
    K = np.array([1,3,5,7,9])
    
    #dictionnay result
    results={}

    for k in K:
        
        #prediction 
        prediction = run_knn(k,train_data,train_labels,valid_data)

        #computing the precision
        results[k]= np.mean(prediction==valid_labels)
Ejemplo n.º 10
0
    dist = l2_distance(valid_data.T, train_data.T)
    nearest = np.argsort(dist, axis=1)[:, :k]

    train_labels = train_labels.reshape(-1)
    valid_labels = train_labels[nearest]

    # note this only works for binary labels
    valid_labels = (np.mean(valid_labels, axis=1) >= 0.5).astype(np.int)
    valid_labels = valid_labels.reshape(-1, 1)

    return valid_labels


if (__name__ == "__main__"):
    train_x, train_y = utils.load_train()
    valid_x, valid_y = utils.load_valid()
    k = [1, 3, 5, 7, 9]
    y1 = []
    for i in k:
        knn = run_knn(i, train_x, train_y, valid_x)
        # print(knn)
        # print(valid_y)
        # print(valid_y == knn)
        result = (np.sum(np.bitwise_xor(knn, valid_y)) /
                  float(valid_x.shape[0]))
        # result = np.mean(valid_y == knn)
        y1.append(result)
        print("Error Rate " + str(result))
    test_x, test_y = utils.load_test()
    y2 = []
    for j in k:
Ejemplo n.º 11
0
def train_RCNN(args):
    train_x, train_y, n_items = load_train(args.max_len)
    args.n_items = n_items
    data = list(zip(train_x, train_y))
    random.shuffle(data)
    train_x, train_y = zip(*data)
    num_batches = len(train_x) // args.batch_size
    global valid_x
    global valid_y
    valid_x, valid_y, _ = load_valid(args.max_len)

    print('#Items: {}'.format(n_items))
    print('#Training Nums: {}'.format(len(train_x)))

    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        model = RCNN(args)
        if args.is_store:
            saver = tf.train.Saver(tf.global_variables())
            ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Restore model from {} successfully!'.format(
                    ckpt.model_checkpoint_path))
            else:
                print('Restore model from {} failed!'.format(
                    args.checkpoint_dir))
                return
        else:
            sess.run(tf.global_variables_initializer())
        best_epoch = -1
        best_step = -1
        best_loss = np.inf
        best_HR = np.inf

        max_stay, stay_cnt = 20, 0
        losses = 0.0
        for epoch in range(args.epochs):
            for i in range(num_batches):
                x = train_x[i * args.batch_size:(i + 1) * args.batch_size]
                y = train_y[i * args.batch_size:(i + 1) * args.batch_size]
                fetches = [
                    model.sum_loss, model.global_step, model.lr, model.train_op
                ]
                feed_dict = {model.X: x, model.Y: y}
                loss, step, lr, _ = sess.run(fetches, feed_dict)
                losses += loss
                if step % 50 == 0:
                    print('Epoch-{}\tstep-{}\tlr:{:.6f}\tloss: {:.6f}'.format(
                        epoch + 1, step, lr, losses / 50))
                    losses = 0.0
                if step % 1000 == 0:
                    valid_loss, HR, NDCG, MRR = eval_validation(
                        model, sess, args.batch_size)
                    print(
                        'step-{}\teval_validation\tloss:{:6f}\tHR@{}:{:.6f}\tNDCG@{}:{:.6f}\tMRR@{}:{:.6f}'
                        .format(step, valid_loss, args.top_k, HR, args.top_k,
                                NDCG, args.top_k, MRR))
                    if HR > best_HR or (valid_loss < best_loss and HR > 0.0):
                        best_HR = HR
                        best_loss = valid_loss
                        best_epoch = epoch + 1
                        best_step = step
                        stay_cnt = 0
                        ckpt_path = args.checkpoint_dir + 'model.ckpt'
                        model.saver.save(sess, ckpt_path, global_step=step)
                        print("model saved to {}".format(ckpt_path))
                    else:
                        stay_cnt += 1
                        if stay_cnt >= max_stay:
                            break
            if stay_cnt >= max_stay:
                break
        print("best model at:epoch-{}\tstep-{}\tloss:{:.6f}\tHR@{}:{:.6f}".
              format(best_epoch, best_step, best_loss, args.top_k, best_HR))
Ejemplo n.º 12
0
from l2_distance import l2_distance
import numpy as np
from run_knn import run_knn
import matplotlib.pyplot as plt
"""
CSC 2515 - Assignment 1
Tausif Sharif

Notes:
	- Runs the run_knn.py functions from here
	- Will show and save relevant plots
"""

trainInputs, trainTargets = load_train()
smallInputs, smallTargets = load_train_small()
validInputs, validTargets = load_valid()
testInputs, testTargets = load_test()

kList = [1, 3, 5, 7, 9]
classRates = range(0, len(kList))
classRatesT = range(0, len(kList))
listCount = 0

for k in kList:
    correctCount = 0
    validLables = run_knn(k, trainInputs, trainTargets, validInputs)
    for i in xrange(len(validLables)):
        if validLables[i] == validTargets[i]:
            correctCount += 1
    classRates[listCount] = (correctCount / float(len(validLables)))
    listCount += 1