def train_test(train, labels, test, **parameters):
    params = {
        'name': name_from_file(),
        'max_epochs': 600,
        'auto_stopping': True,
        'adaptive_weight_decay': False,
        'save_snapshots_stepsize': None,
        'epoch_steps': None,
        #'dense3_size': 0,
        'momentum_scaling': 1200,
        'dense1_nonlinearity': 'rectify',
        'dense1_init': 'glorot_uniform',
        'dense2_nonlinearity': 'rectify',
        'dense2_init': 'glorot_uniform',
        'batch_size': 128,
        'learning_rate': 0.0003,
        'learning_rate_scaling': 1000,
        'momentum': 0.98,
        'dense1_size': 700,
        'dense2_size': 550,
        'dense3_size': 400,
        #'nn__dense3_size': randint(low = 100, high = 400),
        'dropout0_rate': 0.,
        'dropout1_rate': 0.1,
        'dropout2_rate': 0.45,
        'dropout3_rate': 0.58,
        #'nn__dropout3_rate': triang(loc = 0, c = 0, scale = 1),
        #'nn__weight_decay': norm(0.00006, 0.0001),
    }
    params.update(parameters)
    estimator = Pipeline([
        ('row', PositiveSparseRowFeatureGenerator()),
        ('gen23',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3),
                                        extra_features=40)),
        ('gen234',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3, 4),
                                        extra_features=40)),
        ('gen19',
         PositiveSparseFeatureGenerator(difficult_classes=(1, 9),
                                        extra_features=63)),
        ('log',
         LogTransform()),  # log should be after integer feats but before dist
        #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
        #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
        ('scale03', MinMaxScaler(feature_range=(0, 3))
         ),  # scale should apply to int and float feats
        ('nn', NNet(**params)),
    ])
    estimator.fit(train, labels)
    prediction = estimator.predict_proba(test)
    return prediction
Exemple #2
0
from nnet.prepare import LogTransform
from nnet.scikit import NNet
from settings import SUBMISSIONS_DIR, PRIORS, BASE_DIR
from utils.features import PositiveSparseFeatureGenerator, PositiveSparseRowFeatureGenerator
from utils.ioutil import makeSubmission
from utils.loading import get_testing_data
from utils.postprocess import scale_to_priors


labels = load(join(BASE_DIR, 'data', 'trainclas.npy'))
train = load(join(BASE_DIR, 'data', 'trainmat.npy'))[:, 1:]
valid = load(join(BASE_DIR, 'data', 'testmat.npy'))[:, 1:]
test = get_testing_data()[0]

pipe = Pipeline([
	('row', PositiveSparseRowFeatureGenerator()),
	('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)),
	('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)),
	('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)),
	('log', LogTransform()), # log should be after integer feats but before dist
	#('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
	#('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
	('scale03', MinMaxScaler(feature_range = (0, 3))), # scale should apply to int and float feats
])

train = pipe.fit_transform(train, labels)
valid = pipe.transform(valid)
test = pipe.transform(test)


names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475']