import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score

from MlcLinReg import MlcLinReg
from helpers import shuffle_dataset, split_train_test, tic, toc, load_delicious
"""
This script is used to compare performance of MlcLinReg and sklearn's SGDClassifier.
"""
feature = 1

# Load shuffle and split data
X_train, y_train, X_test, y_test = load_delicious(feature)

X_train_s, y_train_s = shuffle_dataset(X_train, y_train)

X_train, y_train, X_test, y_test = split_train_test(X_train_s, y_train_s)

# N = np.array(range(100, 1000, 10))
# N = np.array([50, 100, 200, 300, 400, 600])
# N = np.array([0.001, 0.005, 0.025, 0.05, 0.1])
N = np.array([5, 32, 64, 100, 128, 200, 256, 350, 512, 700, 1024, 1500, 2048])

scores = list()
scores_sgd = list()
times = list()
times_sgd = list()

batch_size = 2048
iterations = 200
Example #2
0
"""
This script runs a randomised grid search on all features of delicious dataset
"""
warnings.filterwarnings("ignore")

param_dist = {
    "learning_rate": st.uniform(0.001, 0.4),
    "iterations": sp_randint(50, 1000),
    "batch_size": sp_randint(2, 2000),
    "l_one": st.uniform(0.01, 0.5)
}
best_params = np.zeros((501, 4))

# run randomized search
for feature in tqdm.tqdm(range(0, 501)):
    X_train, y_train, X_test, y_test = helpers.load_delicious(feature)
    clf = MlcLinReg()
    n_iter_search = 60
    random_search = RandomizedSearchCV(clf,
                                       param_distributions=param_dist,
                                       n_iter=n_iter_search)

    start = time()
    random_search.fit(X_train.toarray(), y_train.toarray())
    conf = helpers.report_params(random_search.cv_results_, n_top=1)
    best_params[feature, :] = conf.values()

# 'delicious_best_params' has the following columns :
# learning_rate l_one iterations batch_size
np.savetxt("delicious_best_params.txt", best_params)
Example #3
0
import MlcLinReg
import helpers

# for i in range(100, 100):
#     helpers.plot_roc_curve(MlcLinReg.MlcLinReg(batch_size=i), savefig=True)
helpers.plot_roc_curve(MlcLinReg.MlcLinReg(batch_size=256),
                       dataset=helpers.load_delicious(2),
                       savefig=False)