Esempio n. 1
0
def test_mlp_iter_fit():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))
    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    for i, info in enumerate(mlp.iter_fit(X, Z)):
        if i >= 10:
            break
Esempio n. 2
0
def test_mlp_fit():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))

    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    mlp.fit(X, Z)
Esempio n. 3
0
def test_mlp_fit_with_imp_weight():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))
    W = np.random.random((10, 1)) > 0.5

    X, Z, W = theano_floatx(X, Z, W)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10, imp_weight=True)
    mlp.fit(X, Z, W)
Esempio n. 4
0
def test_mlp_iter_fit():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))
    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    for i, info in enumerate(mlp.iter_fit(X, Z)):
        if i >= 10:
            break
Esempio n. 5
0
def run_mlp(n_job, pars):

    f = h5.File('../../../datasets/eigdata.hdf5', 'r')
    X = f['matrices'][...]
    Z = f['eigvals'][...]

    f = open('mlp_training_%d' %n_job, 'w')

    max_passes = 100
    batch_size = 2000
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    m = Mlp(20000, pars['n_hidden'], 1, hidden_transfers=[pars['hidden_transfer']]*len(pars['n_hidden']), out_transfer='identity', loss='squared',
            optimizer=pars['optimizer'], batch_size=batch_size)
    climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std'])

    losses = []
    f.write('max iter: %d \n' %max_iter)

    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                    + (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = 0.001
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val_loss'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    f.write(header + '\n')
    f.write(('-' * len(header)) + '\n')

    for i, info in enumerate(m.powerfit((X, Z), (X, Z), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))

        info.update({
            'time': passed})
        row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g' % info
        f.write(row)

    f.write('best val_loss: %f \n' %info['best_loss'])
    f.close()

    cp.dump(info['best_pars'], open('best_pars_%d.pkl' %n_job, 'w'))
def new_trainer(pars, data):

    # 132 for the hand-crafted features
    input_size = 156
    # 13 as there are 12 fields
    output_size = 13
    batch_size = pars["batch_size"]
    m = Mlp(
        input_size,
        pars["n_hidden"],
        output_size,
        hidden_transfers=pars["hidden_transfers"],
        out_transfer="softmax",
        loss="cat_ce",
        batch_size=batch_size,
        optimizer=pars["optimizer"],
    )
    climin.initialize.randomize_normal(m.parameters.data, 0, pars["par_std"])

    weight_decay = (
        (m.parameters.in_to_hidden ** 2).sum()
        + (m.parameters.hidden_to_hidden_0 ** 2).sum()
        + (m.parameters.hidden_to_out ** 2).sum()
    )
    weight_decay /= m.exprs["inpt"].shape[0]
    m.exprs["true_loss"] = m.exprs["loss"]
    c_wd = pars["L2"]
    m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay

    # length of dataset should be 270000 (for no time-integration)
    n_report = 270000 / batch_size
    max_iter = n_report * 100

    interrupt = climin.stops.OnSignal()
    print dir(climin.stops)
    stop = climin.stops.Any(
        [
            climin.stops.AfterNIterations(max_iter),
            climin.stops.OnSignal(signal.SIGTERM),
            # climin.stops.NotBetterThanAfter(1e-1,500,key='train_loss'),
        ]
    )

    pause = climin.stops.ModuloNIterations(n_report)
    reporter = KeyPrinter(["n_iter", "train_loss", "val_loss"])

    t = Trainer(m, stop=stop, pause=pause, report=reporter, interrupt=interrupt)

    make_data_dict(t, data)

    return t
Esempio n. 7
0
    def new_trainer(self, pars, data):
        modules = ['theano', 'breze', 'climin', 'alchemie']
        git_log(modules)
        copy_theanorc()

        m = Mlp(2, [pars['n_hidden']],
                1,
                hidden_transfers=[pars['hidden_transfer']],
                out_transfer='sigmoid',
                loss='bern_ces',
                optimizer=pars['optimizer'])
        climin.initialize.randomize_normal(m.parameters.data, 0,
                                           pars['par_std'])

        n_report = 100

        t = Trainer(
            model=m,
            data=data,
            stop=climin.stops.Any([
                climin.stops.AfterNIterations(10000),
                climin.stops.NotBetterThanAfter(1e-1, 5000, key='val_loss')
            ]),
            pause=climin.stops.ModuloNIterations(n_report),
            report=OneLinePrinter(
                keys=['n_iter', 'runtime', 'train_loss', 'val_loss'],
                spaces=[6, '10.2f', '15.8f', '15.8f']),
            interrupt=climin.stops.OnSignal(),
        )

        return t
Esempio n. 8
0
def new_trainer(pars, data):
    m = Mlp(2, [pars['n_hidden']],
            1,
            hidden_transfers=[pars['hidden_transfer']],
            out_transfer='sigmoid',
            loss='bern_ces',
            optimizer=pars['optimizer'])
    climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std'])

    n_report = 100

    interrupt = climin.stops.OnSignal()
    print dir(climin.stops)
    stop = climin.stops.Any([
        climin.stops.AfterNIterations(10000),
        climin.stops.OnSignal(signal.SIGTERM),
        climin.stops.NotBetterThanAfter(1e-1, 500, key='train_loss'),
    ])

    pause = climin.stops.ModuloNIterations(n_report)
    reporter = KeyPrinter(['n_iter', 'train_loss'])

    t = Trainer(m,
                stop=stop,
                pause=pause,
                report=reporter,
                interrupt=interrupt)

    make_data_dict(t, data)

    return t
def new_trainer(pars, data):

    # 3700 for binning
    input_size = 3700
    # 13 as there are 12 fields
    output_size = 13
    batch_size = pars['batch_size']
    m = Mlp(input_size, pars['n_hidden'], output_size, 
            hidden_transfers=pars['hidden_transfers'], out_transfer='softmax',
            loss='cat_ce', batch_size = batch_size,
            optimizer=pars['optimizer'])
    climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std'])

    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                    + (m.parameters.hidden_to_hidden_0**2).sum()
                    + (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = pars['L2']
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    # length of dataset should be 270000 (for no time-integration)
    n_report = 40000/batch_size
    max_iter = n_report * 100

    print m.exprs

    interrupt = climin.stops.OnSignal()
    print dir(climin.stops)
    stop = climin.stops.Any([
        climin.stops.Patience('val_loss', max_iter, 1.2),
        climin.stops.OnSignal(signal.SIGTERM),
        #climin.stops.NotBetterThanAfter(1e-1,500,key='train_loss'),
    ])

    pause = climin.stops.ModuloNIterations(n_report)
    reporter = KeyPrinter(['n_iter', 'train_loss', 'val_loss'])

    t = Trainer(
        m,
        stop=stop, pause=pause, report=reporter,
        interrupt=interrupt)

    make_data_dict(t,data)

    return t
Esempio n. 10
0
def test_mlp_pickle():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))

    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=2)

    climin.initialize.randomize_normal(mlp.parameters.data, 0, 1)
    mlp.fit(X, Z)

    Y = mlp.predict(X)

    pickled = cPickle.dumps(mlp)
    mlp2 = cPickle.loads(pickled)

    Y2 = mlp2.predict(X)

    assert np.allclose(Y, Y2)
Esempio n. 11
0
def test_mlp_pickle():
    X = np.random.standard_normal((10, 2))
    Z = np.random.standard_normal((10, 1))

    X, Z = theano_floatx(X, Z)

    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=2)

    climin.initialize.randomize_normal(mlp.parameters.data, 0, 1)
    mlp.fit(X, Z)

    Y = mlp.predict(X)

    pickled = cPickle.dumps(mlp)
    mlp2 = cPickle.loads(pickled)

    Y2 = mlp2.predict(X)

    assert np.allclose(Y, Y2)
#max_iter = max_passes * X.shape[ 0] / batch_size
max_iter = 75000000
n_report = X.shape[0] / batch_size

stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)

optimizer = 'gd', {'step_rate': 0.001, 'momentum': 0}

typ = 'plain'
if typ == 'plain':
    m = Mlp(2099, [800, 800],
            15,
            X,
            Z,
            hidden_transfers=['tanh', 'tanh'],
            out_transfer='identity',
            loss='squared',
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter)
elif typ == 'fd':
    m = FastDropoutNetwork(2099, [800, 800],
                           15,
                           X,
                           Z,
                           TX,
                           TZ,
                           hidden_transfers=['tanh', 'tanh'],
                           out_transfer='identity',
                           loss='squared',
                           p_dropout_inpt=.1,
Esempio n. 13
0
def do_one_eval(X, Z, VX, VZ, step_rate, momentum, decay, c_wd):
    """
    Does one evaluation of a neural network with the above parameters.

    Parameters
    ----------
    X, Z : matrix
        Feature and Target matrices of the training set, one-hot encoded.
    VX, VZ : matrix
        Feature and Target matrices of the validation set, one-hot encoded.
    step_rate : float
        The step-rate/learning rate of the rmsprop-algorithm
    momentum : float
        The momentum of the rmsprop.
    decay : float
        The step-rate decay
    c_wd : float
        Penalty term for the weight

    Returns
    -------
    val_emp : float
        The percentage of wrongly classified samples.
    """

    max_passes = 100
    batch_size = 250
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size
    optimizer = 'rmsprop', {'step_rate': step_rate, 'momentum': momentum, 'decay': decay}

    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(784, [800], 10, hidden_transfers=['sigmoid'], out_transfer='softmax', loss='cat_ce',
            optimizer=optimizer, batch_size=batch_size)
    climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1)
    losses = []
    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                + (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay
    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)
    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    #print header
    #print '-' * len(header)

    for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)):
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))

        #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1))
        #save_and_display(img, 'filters-%i.png' % i)
        info.update({
            'time': passed,
            'train_emp': f_n_wrong(X, Z),
            'val_emp': f_n_wrong(VX, VZ),
        })
        row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info

        # Comment in this row if you want updates during the computation.
        #print row
    return info["val_emp"]
Esempio n. 14
0
def run_mlp(arch, func, step, batch, init, X, Z, VX, VZ, wd):

    max_passes = 200
    batch_size = batch
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size

    input_size = len(X[0])

    stop = climin.stops.after_n_iterations(max_iter)
    pause = climin.stops.modulo_n_iterations(n_report)

    #optimizer = 'rmsprop', {'steprate': 0.0001, 'momentum': 0.95, 'decay': 0.8}
    optimizer = 'gd', {'steprate': step}

    m = Mlp(input_size, arch, 2, hidden_transfers=func, out_transfer='softmax', loss='cat_ce',
            optimizer=optimizer, batch_size=batch_size)
    climin.initialize.randomize_normal(m.parameters.data, 0, init)

    losses = []
    print 'max iter', max_iter

    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                    + (m.parameters.hidden_to_out**2).sum()
                    + (m.parameters.hidden_to_hidden_0**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('results.txt','a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()

    for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))

        info.update({
            'time': passed,
            'train_emp': f_n_wrong(X, Z),
            'val_emp': f_n_wrong(VX, VZ),
        })

        row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info
        results = open('results.txt','a')
        print row
        results.write(row + '\n')
        results.close()

    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'],open('best_%s_%s_%s_%s_%s.pkl' %(arch,func,step,batch,init),'w'))
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum,
                decay, c_wd, counter, opt):
    seed = 3453
    np.random.seed(seed)
    max_passes = 200
    batch_size = 25
    max_iter = 5000000
    n_report = X.shape[0] / batch_size
    weights = []
    optimizer = 'gd', {
        'step_rate': step_rate,
        'momentum': momentum,
        'decay': decay
    }

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)
    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(2100, [400, 100],
            1,
            X,
            Z,
            hidden_transfers=['tanh', 'tanh'],
            out_transfer='identity',
            loss='squared',
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter)

    #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3)

    # Transform the test data
    #TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
    losses = []
    print 'max iter', max_iter

    m.init_weights()

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() +
                    (weights[2]**2).sum())

    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(
        T.square((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.write("%f %f %f %f %s" % (step_rate, momentum, decay, c_wd, opt))
    results.write('\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp_" + opt + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp_" + opt + str(counter) + ".pkl", 'rb') as tp:
            n_iter, best_pars = dill.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        if math.isnan(info['loss']) == True:
            info.update({'mae_test': f_mae(TX, test_labels)})
            n_iter = info['n_iter']
            break
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(m.transformedData(X), train_labels),
            'rmse_train': f_rmse(m.transformedData(X), train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)
        })
        info['n_iter'] += n_iter
        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result.txt', 'a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp_" + opt + str(counter) + ".pkl", 'wb') as fp:
            dill.dump((info['n_iter'], info['best_pars']), fp)
        with open("apsis_pars_" + opt + str(counter) + ".pkl", 'rb') as fp:
            LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load(
                fp)
        n_iter1 = info['n_iter']
        result1 = info['mae_test']
        with open("apsis_pars_" + opt + str(counter) + ".pkl", 'wb') as fp:
            dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter,
                       n_iter1, result1), fp)

    return info['mae_test'], info['n_iter']
def do_one_eval(X, Z, VX, VZ, step_rate, momentum, decay, c_wd):
    """
    Does one evaluation of a neural network with the above parameters.

    Parameters
    ----------
    X, Z : matrix
        Feature and Target matrices of the training set, one-hot encoded.
    VX, VZ : matrix
        Feature and Target matrices of the validation set, one-hot encoded.
    step_rate : float
        The step-rate/learning rate of the rmsprop-algorithm
    momentum : float
        The momentum of the rmsprop.
    decay : float
        The step-rate decay
    c_wd : float
        Penalty term for the weight

    Returns
    -------
    val_emp : float
        The percentage of wrongly classified samples.
    """

    max_passes = 100
    batch_size = 250
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size
    optimizer = 'rmsprop', {
        'step_rate': step_rate,
        'momentum': momentum,
        'decay': decay
    }

    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(784, [800],
            10,
            hidden_transfers=['sigmoid'],
            out_transfer='softmax',
            loss='cat_ce',
            optimizer=optimizer,
            batch_size=batch_size)
    climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1)
    losses = []
    weight_decay = ((m.parameters.in_to_hidden**2).sum() +
                    (m.parameters.hidden_to_out**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay
    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1),
                       T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)
    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    #print header
    #print '-' * len(header)

    for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)):
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))

        #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1))
        #save_and_display(img, 'filters-%i.png' % i)
        info.update({
            'time': passed,
            'train_emp': f_n_wrong(X, Z),
            'val_emp': f_n_wrong(VX, VZ),
        })
        row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info

        # Comment in this row if you want updates during the computation.
        #print row
    return info["val_emp"]
Esempio n. 17
0
    def __init__(self):
        with open('config.txt', 'r') as config_f:
            for line in config_f:
                if not line.find('mode='):
                    self.mode = line.replace('mode=', '').replace('\n', '')
                if not line.find('robust='):
                    self.robust = line.replace('robust=', '').replace('\n', '')
        print 'mode=%s\nrobustness=%s' %(self.mode, self.robust)

        if self.robust == 'majority':
            self.pred_count = 0
            self.predictions = np.zeros((13,))
        if self.robust == 'markov':
            self.markov = Markov_Chain()
            self.last_state = 0
            self.current_state = 0
        if self.robust == 'markov_2nd':
            self.markov = Markov_Chain_2nd()
            self.pre_last_state = 0
            self.last_state = 0
            self.current_state = 0

        self.sample_count = 0
        self.sample = []

        if self.mode == 'cnn':
            self.bin_cm = 10
            self.max_x_cm = 440
            self.min_x_cm = 70
            self.max_y_cm = 250
            self.max_z_cm = 200
            self.nr_z_intervals = 2
            self.x_range = (self.max_x_cm - self.min_x_cm)/self.bin_cm
            self.y_range = self.max_y_cm*2/self.bin_cm
            self.z_range = self.nr_z_intervals
            self.input_size = 3700
            self.output_size = 13
            self.n_channels = 2
            self.im_width = self.y_range
            self.im_height = self.x_range

            print 'initializing cnn model.'
            self.model = Cnn(self.input_size, [16, 32], [200, 200], self.output_size, ['tanh', 'tanh'], ['tanh', 'tanh'],
                        'softmax', 'cat_ce', image_height=self.im_height, image_width=self.im_width,
                        n_image_channel=self.n_channels, pool_size=[2, 2], filter_shapes=[[5, 5], [5, 5]], batch_size=1)
            self.model.parameters.data[...] = cp.load(open('./best_cnn_pars.pkl', 'rb'))

        if self.mode == 'crafted':
            self.input_size = 156
            self.output_size = 13
            self.means = cp.load(open('means_crafted.pkl', 'rb'))
            self.stds = cp.load(open('stds_crafted.pkl', 'rb'))

            print 'initializing crafted features model.'
            self.model = Mlp(self.input_size, [1000, 1000], self.output_size, ['tanh', 'tanh'], 'softmax', 'cat_ce',
                             batch_size=1)
            self.model.parameters.data[...] = cp.load(open('./best_crafted_pars.pkl', 'rb'))

        # this is just a trick to make the internal C-functions compile before the first real sample arrives
        compile_sample = np.random.random((1,self.input_size))
        self.model.predict(compile_sample)

        print 'starting to listen to topic.'
        self.listener()
Esempio n. 18
0
def test_mlp_predict():
    X = np.random.standard_normal((10, 2))
    X, = theano_floatx(X)
    mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10)
    mlp.predict(X)
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum, decay, c_wd, counter, opt):
    seed = 3453
    np.random.seed(seed)
    max_passes = 200
    batch_size = 25
    max_iter = 5000000
    n_report = X.shape[0] / batch_size
    weights = []
    optimizer = 'gd', {'step_rate': step_rate, 'momentum': momentum, 'decay': decay}


    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)
    # This defines our NN. Since BayOpt does not support categorical data, we just
    # use a fixed hidden layer length and transfer functions.
    m = Mlp(2100, [400, 100], 1, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared',
            optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)

    #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3)

    # Transform the test data
    #TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
    losses = []
    print 'max iter', max_iter

    m.init_weights()

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])


    weight_decay = ((weights[0]**2).sum()
                        + (weights[1]**2).sum()
                        + (weights[2]**2).sum())

    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = c_wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.write("%f %f %f %f %s" %(step_rate, momentum, decay, c_wd, opt))
    results.write('\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp_"+opt+str(counter)+".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp_"+opt+str(counter)+".pkl", 'rb') as tp:
            n_iter, best_pars = dill.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        if math.isnan(info['loss']) == True:
            info.update({'mae_test': f_mae(TX, test_labels)})
            n_iter = info['n_iter']
            break
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(m.transformedData(X), train_labels),
            'rmse_train': f_rmse(m.transformedData(X), train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)

        })
        info['n_iter'] += n_iter
        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result.txt','a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp_"+opt+str(counter)+".pkl", 'wb') as fp:
            dill.dump((info['n_iter'], info['best_pars']), fp)
        with open("apsis_pars_"+opt+str(counter)+".pkl", 'rb') as fp:
            LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load(fp)
        n_iter1 = info['n_iter']
        result1 = info['mae_test']
        with open("apsis_pars_"+opt+str(counter)+".pkl", 'wb') as fp:
            dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1), fp)


    return info['mae_test'], info['n_iter']
Esempio n. 20
0
nets = [ f for f in listdir(path) if isfile(join(path,f)) and not f.find('best') ]

best_error = np.inf
best_net = ''

for net in nets:
    file = net
    net = net.replace('.pkl','')
    net = net.replace('best_','')
    net = net.replace('[','')
    net = net.replace(']','')
    net = net.split('_')
    arch = [int(n) for n in net[0].split(',')]
    func = [n.replace(' ','')[1:-1] for n in net[1].split(',')]
    batch_size = int(net[3])
    optimizer = 'gd', {'steprate': 0.1}
    m = Mlp(input_size, arch, 2, hidden_transfers=func, out_transfer='softmax', loss='cat_ce',
            optimizer=optimizer, batch_size=batch_size)
    best_pars = cp.load(open(file,'r'))
    m.parameters.data[...] = best_pars
    n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean()
    f_n_wrong = m.function(['inpt', 'target'], n_wrong)
    error = f_n_wrong(VX,VZ)
    if error < best_error:
        best_error = error
        best_net = net
    print 'loaded best parameters from file %s' % net
    print 'percentage of misclassified samples on validation/test set: %f' % error

print 'the best net found was ' + str(net) + ' with an error of %f ' % error
Esempio n. 21
0
def run_mlp(arch, func, step, batch, X, Z, TX, TZ, wd, opt):
    batch_size = batch
    #max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 100000
    n_report = X.shape[0] / batch_size
    weights = []
    input_size = len(X[0])
    train_labels = Z
    test_labels = TZ

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)


    optimizer = opt, {'step_rate': step}

    typ = 'plain'
    if typ == 'plain':
        m = Mlp(input_size, arch, 1, X, Z, hidden_transfers=func, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)

    elif typ == 'fd':
        m = FastDropoutNetwork(2099, [400, 100], 1, X, Z, TX, TZ,
                hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared',
                p_dropout_inpt=.1,
                p_dropout_hiddens=.2,
                optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)


    climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt))


    # Transform the test data
    #TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)

    losses = []
    print 'max iter', max_iter

    m.init_weights()

    X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)]

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])


    weight_decay = ((weights[0]**2).sum()
                        + (weights[1]**2).sum()
                        + (weights[2]**2).sum()
			+ (weights[3]**2).sum()
			)


    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = wd
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay


    '''
    weight_decay = ((m.parameters.in_to_hidden**2).sum()
                        + (m.parameters.hidden_to_out**2).sum()
                        + (m.parameters.hidden_to_hidden_0**2).sum())
    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = 0.1
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay
    '''

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)



    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()



    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(m.transformedData(X), train_labels),
            'rmse_train': f_rmse(m.transformedData(X), train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)

        })

        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result.txt','a')
        print row
        results.write(row + '\n')
        results.close()


    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'], open('best_pars.pkl', 'w'))

    Y = m.predict(m.transformedData(X))
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)


    print 'TRAINING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_train - train_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_train - train_labels).mean(axis=0) ** .5)


    print 'TESTING SET\n'
    print('MAE:  %5.2f kcal/mol'%np.abs(output_test - test_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol'%np.square(output_test - test_labels).mean(axis=0) ** .5)


    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0) ** .5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0) ** .5


    results = open('result.txt', 'a')
    results.write('Training set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_train)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_train)
    results.write('\nTesting set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" %mae_test)
    results.write('\nRMSE:\n')
    results.write("%5.2f" %rmse_test)


    results.close()
stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)


optimizer = "gd", {"step_rate": 0.001, "momentum": 0}

typ = "plain"
if typ == "plain":
    m = Mlp(
        2099,
        [400, 100],
        1,
        X,
        Z,
        hidden_transfers=["tanh", "tanh"],
        out_transfer="identity",
        loss="squared",
        optimizer=optimizer,
        batch_size=batch_size,
        max_iter=max_iter,
    )
elif typ == "fd":
    m = FastDropoutNetwork(
        2099,
        [400, 100],
        1,
        X,
        Z,
        TX,
        TZ,
batch_size = 25
#max_iter = max_passes * X.shape[ 0] / batch_size
max_iter = 75000000
n_report = X.shape[0] / batch_size


stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)


optimizer = 'gd', {'step_rate': 0.001, 'momentum': 0}

typ = 'plain'
if typ == 'plain':
    m = Mlp(2099, [800, 800], 15, X, Z,
            hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)
elif typ == 'fd':
    m = FastDropoutNetwork(2099, [800, 800], 15, X, Z, TX, TZ,
            hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared',
            p_dropout_inpt=.1,
            p_dropout_hiddens=.2,
            optimizer=optimizer, batch_size=batch_size, max_iter=max_iter)


#climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt))


m.init_weights()
#Transform the test data
#TX = m.transformedData(TX)
TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
def run_mlp(func, step, momentum, X, Z, TX, TZ, wd, opt, counter):

    print func, step, momentum, wd, opt, counter
    seed = 3453
    np.random.seed(seed)
    batch_size = 25
    # max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 25000000
    n_report = X.shape[0] / batch_size
    weights = []
    input_size = len(X[0])

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    optimizer = opt, {"step_rate": step, "momentum": momentum}

    typ = "plain"
    if typ == "plain":
        m = Mlp(
            input_size,
            [400, 100],
            1,
            X,
            Z,
            hidden_transfers=func,
            out_transfer="identity",
            loss="squared",
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter,
        )

    elif typ == "fd":
        m = FastDropoutNetwork(
            2099,
            [400, 100],
            1,
            X,
            Z,
            TX,
            TZ,
            hidden_transfers=["tanh", "tanh"],
            out_transfer="identity",
            loss="squared",
            p_dropout_inpt=0.1,
            p_dropout_hiddens=0.2,
            optimizer=optimizer,
            batch_size=batch_size,
            max_iter=max_iter,
        )

    # climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt))

    # Transform the test data
    # TX = m.transformedData(TX)
    TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
    print TX.shape

    losses = []
    print "max iter", max_iter

    m.init_weights()

    X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)]

    for layer in m.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = (weights[0] ** 2).sum() + (weights[1] ** 2).sum() + (weights[2] ** 2).sum()

    weight_decay /= m.exprs["inpt"].shape[0]
    m.exprs["true_loss"] = m.exprs["loss"]
    c_wd = wd
    m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay

    mae = T.abs_((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean()
    f_mae = m.function(["inpt", "target"], mae)

    rmse = T.sqrt(
        T.square((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean()
    )
    f_rmse = m.function(["inpt", "target"], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = "#", "seconds", "loss", "val loss", "mae_train", "rmse_train", "mae_test", "rmse_test"
    max_len = max(len(i) for i in keys)
    header = "\t".join(i for i in keys)
    print header
    print "-" * len(header)
    results = open("result_hp.txt", "a")
    results.write(header + "\n")
    results.write("-" * len(header) + "\n")
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp" + str(counter) + ".pkl", "rb") as tp:
            n_iter, best_pars = cp.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info["n_iter"] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info["loss"], info["val_loss"]))
        info.update(
            {
                "time": passed,
                "mae_train": f_mae(m.transformedData(X), train_labels),
                "rmse_train": f_rmse(m.transformedData(X), train_labels),
                "mae_test": f_mae(TX, test_labels),
                "rmse_test": f_rmse(TX, test_labels),
            }
        )

        info["n_iter"] += n_iter

        row = (
            "%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g"
            % info
        )
        results = open("result_hp.txt", "a")
        print row
        results.write(row + "\n")
        results.close()
        with open("pars_hp" + str(counter) + ".pkl", "wb") as fp:
            cp.dump((info["n_iter"], info["best_pars"]), fp)
        with open("hps" + str(counter) + ".pkl", "wb") as tp:
            cp.dump((func, step, momentum, wd, opt, counter, info["n_iter"]), tp)

    m.parameters.data[...] = info["best_pars"]
    cp.dump(info["best_pars"], open("best_pars.pkl", "wb"))

    Y = m.predict(m.transformedData(X))
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)

    print "TRAINING SET\n"
    print ("MAE:  %5.2f kcal/mol" % np.abs(output_train - train_labels).mean(axis=0))
    print ("RMSE: %5.2f kcal/mol" % np.square(output_train - train_labels).mean(axis=0) ** 0.5)

    print "TESTING SET\n"
    print ("MAE:  %5.2f kcal/mol" % np.abs(output_test - test_labels).mean(axis=0))
    print ("RMSE: %5.2f kcal/mol" % np.square(output_test - test_labels).mean(axis=0) ** 0.5)

    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0) ** 0.5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0) ** 0.5

    results = open("result_hp.txt", "a")
    results.write("Training set:\n")
    results.write("MAE:\n")
    results.write("%5.2f" % mae_train)
    results.write("\nRMSE:\n")
    results.write("%5.2f" % rmse_train)
    results.write("\nTesting set:\n")
    results.write("MAE:\n")
    results.write("%5.2f" % mae_test)
    results.write("\nRMSE:\n")
    results.write("%5.2f" % rmse_test)

    results.close()
Esempio n. 25
0
TZ = one_hot(TZ, 10)

image_dims = 28, 28

max_passes = 150
batch_size = 250
max_iter = max_passes * X.shape[0] / batch_size
n_report = X.shape[0] / batch_size

stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)

#optimizer = 'rmsprop', {'steprate': 0.0001, 'momentum': 0.95, 'decay': 0.8}
optimizer = 'gd', {'steprate': 0.1}

m = Mlp(784, [800], 10, hidden_transfers=['sigmoid'], out_transfer='softmax', loss='cat_ce',
        optimizer=optimizer, batch_size=batch_size)
climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1)

losses = []
print 'max iter', max_iter

weight_decay = ((m.parameters.in_to_hidden**2).sum()
                + (m.parameters.hidden_to_out**2).sum())
weight_decay /= m.exprs['inpt'].shape[0]
m.exprs['true_loss'] = m.exprs['loss']
c_wd = 0.001
m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean()
f_n_wrong = m.function(['inpt', 'target'], n_wrong)
Esempio n. 26
0
class Predictor:

    # initialize the object
    def __init__(self):
        with open('config.txt', 'r') as config_f:
            for line in config_f:
                if not line.find('mode='):
                    self.mode = line.replace('mode=', '').replace('\n', '')
                if not line.find('robust='):
                    self.robust = line.replace('robust=', '').replace('\n', '')
        print 'mode=%s\nrobustness=%s' %(self.mode, self.robust)

        if self.robust == 'majority':
            self.pred_count = 0
            self.predictions = np.zeros((13,))
        if self.robust == 'markov':
            self.markov = Markov_Chain()
            self.last_state = 0
            self.current_state = 0
        if self.robust == 'markov_2nd':
            self.markov = Markov_Chain_2nd()
            self.pre_last_state = 0
            self.last_state = 0
            self.current_state = 0

        self.sample_count = 0
        self.sample = []

        if self.mode == 'cnn':
            self.bin_cm = 10
            self.max_x_cm = 440
            self.min_x_cm = 70
            self.max_y_cm = 250
            self.max_z_cm = 200
            self.nr_z_intervals = 2
            self.x_range = (self.max_x_cm - self.min_x_cm)/self.bin_cm
            self.y_range = self.max_y_cm*2/self.bin_cm
            self.z_range = self.nr_z_intervals
            self.input_size = 3700
            self.output_size = 13
            self.n_channels = 2
            self.im_width = self.y_range
            self.im_height = self.x_range

            print 'initializing cnn model.'
            self.model = Cnn(self.input_size, [16, 32], [200, 200], self.output_size, ['tanh', 'tanh'], ['tanh', 'tanh'],
                        'softmax', 'cat_ce', image_height=self.im_height, image_width=self.im_width,
                        n_image_channel=self.n_channels, pool_size=[2, 2], filter_shapes=[[5, 5], [5, 5]], batch_size=1)
            self.model.parameters.data[...] = cp.load(open('./best_cnn_pars.pkl', 'rb'))

        if self.mode == 'crafted':
            self.input_size = 156
            self.output_size = 13
            self.means = cp.load(open('means_crafted.pkl', 'rb'))
            self.stds = cp.load(open('stds_crafted.pkl', 'rb'))

            print 'initializing crafted features model.'
            self.model = Mlp(self.input_size, [1000, 1000], self.output_size, ['tanh', 'tanh'], 'softmax', 'cat_ce',
                             batch_size=1)
            self.model.parameters.data[...] = cp.load(open('./best_crafted_pars.pkl', 'rb'))

        # this is just a trick to make the internal C-functions compile before the first real sample arrives
        compile_sample = np.random.random((1,self.input_size))
        self.model.predict(compile_sample)

        print 'starting to listen to topic.'
        self.listener()

    # build the full samples from the arriving point clouds
    def build_samples(self, sample_part):
        for point in read_points(sample_part):
            self.sample.append(point)

        self.sample_count += 1

        if self.sample_count == 6:
            if self.mode == 'cnn':
                self.cnn_predict()
            if self.mode == 'crafted':
                self.crafted_predict()
            self.sample = []
            self.sample_count = 0

    # start listening to the point cloud topic
    def listener(self):
        rospy.init_node('listener', anonymous=True)
        rospy.Subscriber("/USArray_pc", PointCloud2, self.build_samples)
        rospy.spin()

    # let the model predict the output
    def cnn_predict(self):
        grid = np.zeros((self.z_range, self.x_range, self.y_range))

        for point in self.sample:
            if point[0]*100 < self.min_x_cm or point[0]*100 > self.max_x_cm-1 or point[1]*100 > self.max_y_cm-1 or point[1]*100 < -self.max_y_cm:
                continue

            x = (int(point[0]*100) - self.min_x_cm) / self.bin_cm
            y = (int(point[1]*100) + self.max_y_cm) / self.bin_cm
            z = int(point[2]*100) > (self.max_z_cm / self.nr_z_intervals)
            pow = point[4]

            if grid[z][x][y] != 0:
                if grid[z][x][y] < pow:
                    grid[z][x][y] = pow
            else:
                grid[z][x][y] = pow

        grid = np.reshape(grid,(1,-1))

        self.output_prediction(self.model.predict(grid))


    # let the model predict the output
    def crafted_predict(self):
        vec = np.zeros((156,), dtype=np.float32)
        area_points = [[] for _ in np.arange(12)]
        area_counts = np.zeros(12)
        area_x_means = np.zeros(12)
        area_y_means = np.zeros(12)
        area_z_means = np.zeros(12)
        area_highest = np.zeros(12)
        area_highest_pow = np.zeros(12)
        area_pow_means = np.zeros(12)
        area_x_vars = np.zeros(12)
        area_y_vars = np.zeros(12)
        area_z_vars = np.zeros(12)
        area_xy_covars = np.zeros(12)
        area_xz_covars = np.zeros(12)
        area_yz_covars = np.zeros(12)
        bad = False

        for qpoint in self.sample:
            # need to substract -1 since the function returns the value starting with 1
            label = determine_label((float(qpoint[0]), float(qpoint[1]), float(qpoint[2])))-1
            area_points[label].append(qpoint)
            area_counts[label] += 1
            if float(qpoint[2]) > area_highest[label]:
                area_highest[label] = float(qpoint[2])
            if float(qpoint[4]) > area_highest_pow[label]:
                area_highest_pow[label] = float(qpoint[4])

        for area in np.arange(12):
            for point in area_points[area]:
                area_x_means[area] += float(point[0])
                area_y_means[area] += float(point[1])
                area_z_means[area] += float(point[2])
                area_pow_means[area] += float(point[4])
            if area_counts[area] > 0:
                area_x_means[area] /= area_counts[area]
                area_y_means[area] /= area_counts[area]
                area_z_means[area] /= area_counts[area]
                area_pow_means[area] /= area_pow_means[area]

            for point in area_points[area]:
                area_x_vars[area] += (float(point[0]) - area_x_means[area])**2
                area_y_vars[area] += (float(point[1]) - area_y_means[area])**2
                area_z_vars[area] += (float(point[2]) - area_z_means[area])**2
            # if there is only one point, we assume the uncorrected estimator and implicitly divide by one
            if area_counts[area] > 1:
                area_x_vars[area] *= 1/(area_counts[area]-1)
                area_y_vars[area] *= 1/(area_counts[area]-1)
                area_z_vars[area] *= 1/(area_counts[area]-1)

            for point in area_points[area]:
                area_xy_covars[area] += (float(point[0]) - area_x_means[area])*(float(point[1]) - area_y_means[area])
                area_xz_covars[area] += (float(point[0]) - area_x_means[area])*(float(point[2]) - area_z_means[area])
                area_yz_covars[area] += (float(point[1]) - area_y_means[area])*(float(point[2]) - area_z_means[area])
            # if there is only one point, we assume the uncorrected estimator and implicitly divide by one
            if area_counts[area] > 1:
                area_xy_covars[area] *= 1/(area_counts[area]-1)
                area_xz_covars[area] *= 1/(area_counts[area]-1)
                area_yz_covars[area] *= 1/(area_counts[area]-1)

        for area in np.arange(12):
            vec[area*11] = area_counts[area]
            vec[area*11+1] = area_x_means[area]
            vec[area*11+2] = area_y_means[area]
            vec[area*11+3] = area_z_means[area]
            vec[area*11+4] = area_x_vars[area]
            vec[area*11+5] = area_y_vars[area]
            vec[area*11+6] = area_z_vars[area]
            vec[area*11+7] = area_xy_covars[area]
            vec[area*11+8] = area_xz_covars[area]
            vec[area*11+9] = area_yz_covars[area]
            vec[area*11+10] = area_highest[area]
            vec[area*11+11] = area_highest_pow[area]
            vec[area*11+12] = area_pow_means[area]

        vec = np.reshape(vec, (1, 156))
        vec -= self.means
        vec /= self.stds

        self.output_prediction(self.model.predict(vec))

    # create the output
    def output_prediction(self, probabilites):
        if self.robust == 'majority':
            prediction = np.argmax(probabilites)
            # majority vote among the last three predictions
            self.predictions[prediction] += 1
            self.pred_count += 1
            if self.pred_count == 3:
                print 'majority prediction: %d' %np.argmax(self.predictions)
                self.pred_count = 0
                self.predictions = np.zeros((13,))
        if self.robust == 'markov':
            markov_probs = self.markov.transition_table[self.last_state]
            probabilites *= markov_probs
            probabilites /= np.sum(probabilites)
            prediction = np.argmax(probabilites)
            print 'markov prediction: %d' %prediction
            self.last_state = prediction
        if self.robust == 'markov_2nd':
            markov_probs = self.markov.transition_table[self.pre_last_state][self.last_state]
            probabilites *= markov_probs
            probabilites /= np.sum(probabilites)
            prediction = np.argmax(probabilites)
            print 'markov 2nd order prediction: %d' %prediction
            self.pre_last_state = self.last_state
            self.last_state = prediction
        if self.robust == 'off':
            prediction = np.argmax(probabilites)
            print 'fast prediction: %d' %prediction