Esempio n. 1
0
def do_fold_libsvm(bs = 2**15):
    # Low memory
    print('Loading')
    read = pd.HDFStore('data/train_folds.h5', mode='r')
    for i in xrange(10):
        print('Fold {}'.format(i))
        
        print('Training set')
        name = 'train_fold_{}'.format(i)
        nrows = read.get_storer(name).nrows
        pb = progressbar.ProgressBar(maxval=nrows//bs).start()
        with open('train_folds_{}.txt'.format(i), 'wb') as out:
            for j,chunk in enumerate(read.select(name, iterator=True, chunksize=bs)):
                pb.update(j)
                X = chunk.drop('Expected', axis=1).values.astype('float')
                y = chunk['Expected'].values.clip(0,70).astype('int')
                RainCompetition.to_libsvm(X, y, out)
            pb.finish()
            
        print('Validation set')
        name = 'valid_fold_{}'.format(i)
        nrows = read.get_storer(name).nrows
        pb = progressbar.ProgressBar(maxval=nrows//bs).start()
        with open('valid_folds_{}.txt'.format(i), 'wb') as out:
            for j,chunk in enumerate(read.select(name, iterator=True, chunksize=bs)):
                pb.update(j)
                X = chunk.drop('Expected', axis=1).values.astype('float')
                y = chunk['Expected'].values.clip(0,70).astype('int')
                RainCompetition.to_libsvm(X, y, out)
            pb.finish()
Esempio n. 2
0
File: nn.py Progetto: PKostya/kaggle
                               ('dense1', DenseLayer),
                               ('dropout1', DropoutLayer),
                               ('dense2', DenseLayer),
                               ('dropout2', DropoutLayer),
                               ('dense3', DenseLayer),
                               ('output', DenseLayer)],
             input_shape=(None, num_features),
             dense1_num_units=512,
             dropout1_p=0.5,
             dense2_num_units=512,
             dropout2_p=0.5,
             dense3_num_units=512,
             output_num_units=num_classes,
             output_nonlinearity=softmax,
             update=nesterov_momentum,
             eval_size=0.2,
             verbose=1,
             update_learning_rate=theano.shared(float32(0.01)),
             update_momentum=theano.shared(float32(0.9)),
             on_epoch_finished=[
                     AdjustVariable('update_learning_rate', start=0.01, stop=0.00001),
                     AdjustVariable('update_momentum', start=0.9, stop=0.999),
                     EarlyStopping(),
             ],
             max_epochs=10000,)
    net0.initialize()
#    do_fit(net0, 'data/train_impu_norm_shuf.csv', n_iter=1)
    net0.load_weights_from('nn_weights')
    RainCompetition.do_predict(net0, RainCompetition.__data__['test_normalized'], 'data/rain_nn_pred.csv')