Beispiel #1
0
def trainData(name, data, regularization=eye_loss, alpha=0.01, n_epochs=300,
              learning_rate=1e-3, batch_size=4000, r=None, test=False):
    '''
    return validation auc, average precision, score1
    if test is true, combine train and val and report on test performance
    '''
    m = data

    if test:
        name = 'test' + name
        xtrain = np.vstack([m.xtrain, m.xval])
        xval = m.xte
        ytrain = np.hstack([m.ytrain, m.yval])
        yval = m.yte
    else:
        xtrain = m.xtrain
        xval = m.xval
        ytrain = m.ytrain
        yval = m.yval

    # note: for cross validation, just split data into n fold and
    # choose appropriate train_data and valdata from those folds
    # not doing here for simplicity
    d = m.r.size(0)
    train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtrain, ytrain)))
    data = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    valdata = TensorDataset(*map(lambda x: x.data, prepareData(xval, yval)))
    valdata = DataLoader(valdata, batch_size=4000, shuffle=True)

    n_output = 2 # binary classification task 
    model = LR(d, n_output)
    reg_parameters = model.i2o.weight

    t = Trainer(model, lr=learning_rate, risk_factors=m.r, alpha=alpha,
                regularization=regularization, reg_parameters=reg_parameters,
                name=name)
    losses, vallosses = t.fit(data, n_epochs=n_epochs, print_every=1, valdata=valdata)

    # report statistics
    val_auc = model_auc(model, valdata)
    ap = calcAP(m.r.data.numpy(), (reg_parameters[1] - reg_parameters[0]).data.numpy())
    t, s1 = sweepS1(model, valdata)
    sp = sparsity((reg_parameters[1]-reg_parameters[0]).data.numpy())
    joblib.dump((val_auc, ap, s1, sp), 'models/' + name + '.pkl')    
    return val_auc, ap, s1, sp
Beispiel #2
0
 def __init__(self, data, n_cpus=10):
     self.tasks = []
     self.hyperparams = []
     self.n_cpus = n_cpus
     self.data = data
     valdata = TensorDataset(*map(lambda x: x.data,
                                  prepareData(data.xval, data.yval)))
     self.valdata = DataLoader(valdata, batch_size=4000, shuffle=True)    
Beispiel #3
0
 def __init__(self, data, p):  # p is param parser
     self.tasks = []
     self.hyperparams = []
     self.n_cpus = p.n_cpus
     self.data = data
     valdata = TensorDataset(
         *map(lambda x: x.data, prepareData(data.xval, data.yval)))
     self.valdata = DataLoader(valdata,
                               batch_size=p.batch_size,
                               shuffle=True)
     self.p = p
Beispiel #4
0
def model_acc(model, x, y):
    x, _ = prepareData(x, y)
    yhat = np.argmax(to_np(model(x)), 1)
    return accuracy_score(y, yhat)
Beispiel #5
0

n, d = 1000, 2


def gendata():
    x = np.random.randn(n, d)
    y = (x.sum(1) > 0).astype(np.int)
    return x, y


xtr, ytr = gendata()
xte, yte = gendata()

r = to_var(torch.FloatTensor([0, 1]))
train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtr, ytr)))
data = DataLoader(train_data, batch_size=100, shuffle=True)

n_output = 2  # binary classification task
model = LR(d, n_output)
learning_rate = 0.01
alpha = 0.08  # regularization strength

reg_parameters = model.i2o.weight
t = Trainer(model,
            lr=learning_rate,
            risk_factors=r,
            alpha=alpha,
            regularization=eye_loss,
            reg_parameters=reg_parameters)
t.fit(data, n_epochs=60, print_every=50)