Ejemplo n.º 1
0
def _run(parameters):
    print(parameters)
    dataset_params, label_params = parameters
    label, params = label_params
    dataset = logistic.PyDataset(**dataset_params)
    dataset.load()
    model = logistic.PygisticSGD(**params)
    print('run model {}'.format(label))
    res = model.fit(dataset)
    print('done model {}'.format(label))
    return label, res
Ejemplo n.º 2
0
import os
import logistic
import pickle
import numpy as np

directory = "results"
if not os.path.exists(directory):
    os.makedirs(directory)

# create datasets
madelon = logistic.PyDataset("madelon", "/Users/jb/data/madelon.txt", 2000,
                             500, False)
rcv1 = logistic.PyDataset("rcv1", "/mlodata1/jb/data/rcv1_train.binary", 20242,
                          47236, True)
rcv1_test = logistic.PyDataset("rcv1_test",
                               "/mlodata1/jb/data/rcv1_test.binary", 677399,
                               47236, True)
epsilon = logistic.PyDataset("epsilon", "/mlodata1/jb/data/epsilon_normalized",
                             400000, 2000, True)

# """
# RCV1-test theory
# """
# if not rcv1_test.is_loaded():
#   rcv1_test.load()
#
# num_samples = 677399
# num_features = 47236
# dataset = rcv1_test
# dataset_name = "RCV1"
# file = "rcv1-th.pickle"
Ejemplo n.º 3
0
#
# CREATE PARAMS FOR DATASET
#

dataset_params = {
    "name": "rcv1_test",
    "inputFile": "/mlodata1/jb/data/rcv1_test.binary",
    "numSamples": 677399,
    "numFeatures": 47236,
    "is_sparse": True
}

num_features = dataset_params["numFeatures"]
num_samples = dataset_params["numSamples"]

dataset = logistic.PyDataset(**dataset_params)

#
# CREATE PARAMS FOR EXPERIMENT
#

common_params = {
    "numEpochs": 20,
    "lrType": "optimal",
    "lr": 1.,
    "tau": 10 * num_features,
    "lambda_": 1. / num_samples,
    "printPerEpoch": 20,
    "weightingScheme": 'final',
}
Ejemplo n.º 4
0
from sklearn.linear_model import SGDClassifier
from sklearn.datasets import load_svmlight_file
import time
import numpy as np

repeat = 3

# #RVV1
# rcv1_test = logistic.PyDataset("rcv1_test", "/mlodata1/jb/data/rcv1_test.binary", 677399, 47236, True)
# svm_path = "/mlodata1/jb/data/rcv1_test.binary"
# dataset = rcv1_test
# num_samples = 677399
# print("RUN ON RCV1")

#epsilon
epsilon = logistic.PyDataset("epsilon", "/mlodata1/jb/data/epsilon_normalized",
                             400000, 2000, True)
svm_path = "/mlodata1/jb/data/epsilon_normalized"
dataset = epsilon
num_samples = 400000
print("RUN ON epsilon")

dataset.load()

res = np.zeros(repeat)
for i in range(repeat):
    model = logistic.PygisticSGD(numEpochs=1,
                                 lrType="bottou",
                                 lr=.1,
                                 tau=1.,
                                 lambda_=1. / num_samples,
                                 weightingScheme='final',