Esempio n. 1
0
    def new_trainer(self, pars, data):
        modules = ['theano', 'breze', 'climin', 'alchemie']
        git_log(modules)
        copy_theanorc()

        m = Mlp(2, [pars['n_hidden']],
                1,
                hidden_transfers=[pars['hidden_transfer']],
                out_transfer='sigmoid',
                loss='bern_ces',
                optimizer=pars['optimizer'])
        climin.initialize.randomize_normal(m.parameters.data, 0,
                                           pars['par_std'])

        n_report = 100

        t = Trainer(
            model=m,
            data=data,
            stop=climin.stops.Any([
                climin.stops.AfterNIterations(10000),
                climin.stops.NotBetterThanAfter(1e-1, 5000, key='val_loss')
            ]),
            pause=climin.stops.ModuloNIterations(n_report),
            report=OneLinePrinter(
                keys=['n_iter', 'runtime', 'train_loss', 'val_loss'],
                spaces=[6, '10.2f', '15.8f', '15.8f']),
            interrupt=climin.stops.OnSignal(),
        )

        return t
Esempio n. 2
0
def new_trainer(pars, data):
    m = Mlp(2, [pars['n_hidden']],
            1,
            hidden_transfers=[pars['hidden_transfer']],
            out_transfer='sigmoid',
            loss='bern_ces',
            optimizer=pars['optimizer'])
    climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std'])

    n_report = 100

    interrupt = climin.stops.OnSignal()
    print dir(climin.stops)
    stop = climin.stops.Any([
        climin.stops.AfterNIterations(10000),
        climin.stops.OnSignal(signal.SIGTERM),
        climin.stops.NotBetterThanAfter(1e-1, 500, key='train_loss'),
    ])

    pause = climin.stops.ModuloNIterations(n_report)
    reporter = KeyPrinter(['n_iter', 'train_loss'])

    t = Trainer(m,
                stop=stop,
                pause=pause,
                report=reporter,
                interrupt=interrupt)

    make_data_dict(t, data)

    return t
Esempio n. 3
0
def test_mlp_fit():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))

    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    mlp.fit(X, Z)
Esempio n. 4
0
def test_mlp_iter_fit():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))
    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    for i, info in enumerate(mlp.iter_fit(X, Z)):
        if i >= 10:
            break
Esempio n. 5
0
def test_mlp_fit_with_imp_weight():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))
    W = np.random.random((10, 1)) > 0.5

    X, Z, W = theano_floatx(X, Z, W)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10, imp_weight=True)
    mlp.fit(X, Z, W)
Esempio n. 6
0
def test_mlp_pickle():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))

    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=2)

    climin.initialize.randomize_normal(mlp.parameters.data, 0, 1)
    mlp.fit(X, Z)

    Y = mlp.predict(X)

    pickled = cPickle.dumps(mlp)
    mlp2 = cPickle.loads(pickled)

    Y2 = mlp2.predict(X)

    assert np.allclose(Y, Y2)
Esempio n. 7
0
def test_mlp_predict():
    X = np.random.standard_normal((10, 2))
    X, = theano_floatx(X)
    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    mlp.predict(X)
#max_iter = max_passes * X.shape[ 0] / batch_size
max_iter = 75000000
n_report = X.shape[0] / batch_size

stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)

optimizer = 'gd', {'step_rate': 0.001, 'momentum': 0}

typ = 'plain'
if typ == 'plain':
    m = Mlp(2099, [800, 800],
            15,
            X,
            Z,
            hidden_transfers=['tanh', 'tanh'],
            out_transfer='identity',
            loss='squared',
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter)
elif typ == 'fd':
    m = FastDropoutNetwork(2099, [800, 800],
                           15,
                           X,
                           Z,
                           TX,
                           TZ,
                           hidden_transfers=['tanh', 'tanh'],
                           out_transfer='identity',
                           loss='squared',
                           p_dropout_inpt=.1,
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum,
                decay, c_wd, counter, opt):
    seed = 3453
    np.random.seed(seed)
    max_passes = 200
    batch_size = 25
    max_iter = 5000000
    n_report = X.shape[0] / batch_size
    weights = []
    optimizer = 'gd', {
        'step_rate': step_rate,
        'momentum': momentum,
        'decay': decay
    }

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)
    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(2100, [400, 100],
            1,
            X,
            Z,
            hidden_transfers=['tanh', 'tanh'],
            out_transfer='identity',
            loss='squared',
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter)

    #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3)

    # Transform the test data
    #TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
    losses = []
    print 'max iter', max_iter

    m.init_weights()

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() +
                    (weights[2]**2).sum())

    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(
        T.square((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.write("%f %f %f %f %s" % (step_rate, momentum, decay, c_wd, opt))
    results.write('\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp_" + opt + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp_" + opt + str(counter) + ".pkl", 'rb') as tp:
            n_iter, best_pars = dill.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        if math.isnan(info['loss']) == True:
            info.update({'mae_test': f_mae(TX, test_labels)})
            n_iter = info['n_iter']
            break
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(m.transformedData(X), train_labels),
            'rmse_train': f_rmse(m.transformedData(X), train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)
        })
        info['n_iter'] += n_iter
        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result.txt', 'a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp_" + opt + str(counter) + ".pkl", 'wb') as fp:
            dill.dump((info['n_iter'], info['best_pars']), fp)
        with open("apsis_pars_" + opt + str(counter) + ".pkl", 'rb') as fp:
            LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load(
                fp)
        n_iter1 = info['n_iter']
        result1 = info['mae_test']
        with open("apsis_pars_" + opt + str(counter) + ".pkl", 'wb') as fp:
            dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter,
                       n_iter1, result1), fp)

    return info['mae_test'], info['n_iter']
Esempio n. 10
0
def run_mlp(arch, func, step, batch, X, Z, TX, TZ, wd, opt):
    batch_size = batch
    #max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 100000
    n_report = X.shape[0] / batch_size
    weights = []
    input_size = len(X[0])
    train_labels = Z
    test_labels = TZ

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)


    optimizer = opt, {'step_rate': step}

    typ = 'plain'
    if typ == 'plain':
        m = Mlp(input_size, arch, 1, X, Z, hidden_transfers=func, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)

    elif typ == 'fd':
        m = FastDropoutNetwork(2099, [400, 100], 1, X, Z, TX, TZ,
                hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared',
                p_dropout_inpt=.1,
                p_dropout_hiddens=.2,
                optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)


    climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt))


    # Transform the test data
    #TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)

    losses = []
    print 'max iter', max_iter

    m.init_weights()

    X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)]

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])


    weight_decay = ((weights[0]**2).sum()
                        + (weights[1]**2).sum()
                        + (weights[2]**2).sum()
			+ (weights[3]**2).sum()
			)


    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay


    '''
    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                        + (m.parameters.hidden_to_out**2).sum()
                        + (m.parameters.hidden_to_hidden_0**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = 0.1
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay
    '''

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)



    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()



    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(m.transformedData(X), train_labels),
            'rmse_train': f_rmse(m.transformedData(X), train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)

        })

        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result.txt','a')
        print row
        results.write(row + '\n')
        results.close()


    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'], open('best_pars.pkl', 'w'))

    Y = m.predict(m.transformedData(X))
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)


    print 'TRAINING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_train - train_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_train - train_labels).mean(axis=0) ** .5)


    print 'TESTING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_test - test_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_test - test_labels).mean(axis=0) ** .5)


    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0) ** .5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0) ** .5


    results = open('result.txt', 'a')
    results.write('Training set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_train)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_train)
    results.write('\nTesting set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_test)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_test)


    results.close()
def do_one_eval(X, Z, VX, VZ, step_rate, momentum, decay, c_wd):
    """
    Does one evaluation of a neural network with the above parameters.

    Parameters
    ----------
    X, Z : matrix
        Feature and Target matrices of the training set, one-hot encoded.
    VX, VZ : matrix
        Feature and Target matrices of the validation set, one-hot encoded.
    step_rate : float
        The step-rate/learning rate of the rmsprop-algorithm
    momentum : float
        The momentum of the rmsprop.
    decay : float
        The step-rate decay
    c_wd : float
        Penalty term for the weight

    Returns
    -------
    val_emp : float
        The percentage of wrongly classified samples.
    """

    max_passes = 100
    batch_size = 250
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size
    optimizer = 'rmsprop', {
        'step_rate': step_rate,
        'momentum': momentum,
        'decay': decay
    }

    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(784, [800],
            10,
            hidden_transfers=['sigmoid'],
            out_transfer='softmax',
            loss='cat_ce',
            optimizer=optimizer,
            batch_size=batch_size)
    climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1)
    losses = []
    weight_decay = ((m.parameters.in_to_hidden**2).sum() +
                    (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay
    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1),
                       T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)
    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    #print header
    #print '-' * len(header)

    for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)):
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))

        #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1))
        #save_and_display(img, 'filters-%i.png' % i)
        info.update({
            'time': passed,
            'train_emp': f_n_wrong(X, Z),
            'val_emp': f_n_wrong(VX, VZ),
        })
        row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info

        # Comment in this row if you want updates during the computation.
        #print row
    return info["val_emp"]