Esempio n. 1
0
def train_test(train, labels, test, **parameters):
    params = {
        'name': name_from_file(),
        'max_epochs': 600,
        'auto_stopping': True,
        'adaptive_weight_decay': False,
        'save_snapshots_stepsize': None,
        'epoch_steps': None,
        #'dense3_size': 0,
        'momentum_scaling': 1200,
        'dense1_nonlinearity': 'rectify',
        'dense1_init': 'glorot_uniform',
        'dense2_nonlinearity': 'rectify',
        'dense2_init': 'glorot_uniform',
        'batch_size': 128,
        'learning_rate': 0.0003,
        'learning_rate_scaling': 1000,
        'momentum': 0.98,
        'dense1_size': 700,
        'dense2_size': 550,
        'dense3_size': 400,
        #'nn__dense3_size': randint(low = 100, high = 400),
        'dropout0_rate': 0.,
        'dropout1_rate': 0.1,
        'dropout2_rate': 0.45,
        'dropout3_rate': 0.58,
        #'nn__dropout3_rate': triang(loc = 0, c = 0, scale = 1),
        #'nn__weight_decay': norm(0.00006, 0.0001),
    }
    params.update(parameters)
    estimator = Pipeline([
        ('row', PositiveSparseRowFeatureGenerator()),
        ('gen23',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3),
                                        extra_features=40)),
        ('gen234',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3, 4),
                                        extra_features=40)),
        ('gen19',
         PositiveSparseFeatureGenerator(difficult_classes=(1, 9),
                                        extra_features=63)),
        ('log',
         LogTransform()),  # log should be after integer feats but before dist
        #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
        #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
        ('scale03', MinMaxScaler(feature_range=(0, 3))
         ),  # scale should apply to int and float feats
        ('nn', NNet(**params)),
    ])
    estimator.fit(train, labels)
    prediction = estimator.predict_proba(test)
    return prediction
Esempio n. 2
0
	estimator = Pipeline([
		#('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)),
		#('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)),
		#('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)),
		('log', LogTransform()),
		('scale03', MinMaxScaler(feature_range = (0, 3))),
		('nn', NNet(**{
			'dense1_nonlinearity': 'rectify',
			'dense1_init': 'glorot_normal',
			'max_epochs': 200,
			'learning_rate': 0.000676,
			'learning_rate_scaling': 30,
			'momentum': 0.9,
			'momentum_scaling': 10,
			'dense1_size': 35,#
			#'dense2_size': 463,
			#'dense3_size': 329,
			#'dropout0_rate': 0.06,
			#'dropout1_rate': 0.56,
			#'dropout2_rate': 0.19,
			#'dropout3_rate': 0.04,
			'adaptive_weight_decay': False,
			'auto_stopping': True, ##
			'save_snapshots_stepsize': None,
			'epoch_steps': None,
		})),
	]),
	param_distributions = {
		'nn__name': ['nn{0:03d}'.format(k) for k in range(10000)],
		#'nn__batch_size': binom(n = 256, p = 0.5),
		#'nn__learning_rate': norm(0.0005, 0.0005),
		#'nn__learning_rate_scaling': [1, 10, 100, 1000],
Esempio n. 3
0
	#('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
	#('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
	('scale03', MinMaxScaler(feature_range = (0, 3))), # scale should apply to int and float feats
])

train = pipe.fit_transform(train, labels)
valid = pipe.transform(valid)
test = pipe.transform(test)


names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475']
totalvalid = totaltest = 0

for name in names:

	net = NNet.load(filepath = join(BASE_DIR, 'results', 'nets', name))

	for nm, val in net.get_params().iteritems():
		print '{0:s} = {1:}'.format(nm, val)

	#for nm, data in [('val', valid), ('tst', test)]:
		#probs = net.predict_proba(data)
		#save(join(SUBMISSIONS_DIR, '{0}_{1}_raw.npy'.format(name, nm)), probs)
		#makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_{1}_rescale.csv'.format(name, nm)), digits = 8)
	probs = net.predict_proba(valid)
	probs = scale_to_priors(probs, priors = PRIORS)
	save(join(SUBMISSIONS_DIR, '{0}_valid.npy'.format(name)), probs)
	totalvalid += probs

	probs = net.predict_proba(test)
	probs = scale_to_priors(probs, priors = PRIORS)
Esempio n. 4
0
     #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
     #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
     ('scale03', MinMaxScaler(feature_range=(0, 3))
      ),  # scale should apply to int and float feats
     (
         'nn',
         NNet(
             **{
                 'max_epochs': 800,
                 'auto_stopping': True,
                 'adaptive_weight_decay': False,
                 'save_snapshots_stepsize': None,
                 'epoch_steps': None,
                 'dense1_size': 400,
                 'dense2_size': 300,
                 'dense3_size': 300,
                 'dense1_nonlinearity': 'rectify',
                 'dense1_init':
                 'glorot_uniform',  # uniform to reduce randomness
                 'momentum': 0.98,
                 'momentum_scaling': 10,
                 'learning_rate_scaling': 500,
                 'batch_size': 128,
                 'dropout0_rate': 0,
             })),
 ]),
 param_distributions={
     'nn__name': [
         'final_{0:s}_{1:03d}'.format(name_from_file(), k)
         for k in range(10000)
     ],
from nnet.scikit import NNet
from utils.loading import get_training_data

train, labels = get_training_data()[:2]
#test = get_testing_data()[0]
#train, labels = expand_from_test(train, labels, test, confidence = 0.9)
#gen = PositiveSparseRowFeatureGenerator()
#train = gen.fit_transform(train, labels)
#test = gen.transform(test, labels)

#random = RandomState()

opt = RandomizedSearchCV(
    estimator=NNet(
        name=name_from_file(),
        auto_stopping=True,
        max_epochs=1500,  # binom(n = 4000, p = 0.25)
    ),
    param_distributions={
        'dense1_size': randint(low=100, high=1200),
        'dense2_size': randint(low=50, high=900),
        'dense3_size': randint(low=25, high=700),
    },
    fit_params={},
    n_iter=600,
    n_jobs=cpus - 1,
    scoring=log_loss_scorer,
    refit=False,
    pre_dispatch=3,
    cv=ShuffleSplit(
        n=train.shape[0],
Esempio n. 6
0
      PositiveSparseFeatureGenerator(difficult_classes=(1, 9),
                                     extra_features=63)),
     ('log',
      LogTransform()),  # log should be after integer feats but before dist
     #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
     #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
     ('scale03', MinMaxScaler(feature_range=(0, 3))
      ),  # scale should apply to int and float feats
     (
         'nn',
         NNet(
             **{
                 #name = name_from_file(),
                 'max_epochs': 1200,
                 'auto_stopping': True,
                 'adaptive_weight_decay': False,
                 'save_snapshots_stepsize': None,
                 'epoch_steps': None,
                 'dense3_size': 0,
                 'momentum_scaling': 1200,
             })),
 ]),
 param_distributions={
     'nn__name': ['nn{0:03d}'.format(k) for k in range(10000)],
     'nn__dense1_nonlinearity': nonlinearities.keys(),
     'nn__dense1_init': initializers.keys(),
     'nn__dense2_nonlinearity': nonlinearities.keys(),
     'nn__dense2_init': initializers.keys(),
     'nn__batch_size': binom(n=256, p=0.5),
     'nn__learning_rate': norm(0.0005, 0.0002),
     'nn__learning_rate_scaling': [10, 100, 1000],
Esempio n. 7
0
     ('scale03', MinMaxScaler(feature_range=(0, 3))
      ),  # scale should apply to int and float feats
     (
         'nn',
         NNet(
             **{
                 'max_epochs': 800,
                 'auto_stopping': True,
                 'adaptive_weight_decay': False,
                 'save_snapshots_stepsize': None,
                 'epoch_steps': None,
                 'dense1_size': 1000,
                 'dense2_size': 1500,
                 'dense3_size': 1000,
                 'dense1_nonlinearity': 'leaky20',
                 'dense1_init':
                 'glorot_uniform',  # uniform to reduce randomness
                 'momentum': 0.9,
                 'momentum_scaling': 10,
                 'learning_rate': 0.02,
                 'learning_rate_scaling': 500,
                 'batch_size': 64,
                 'dropout0_rate': 0.1,
                 'dropout1_rate': 0.6,
                 'dropout2_rate': 0.6,
                 'dropout3_rate': 0.6,
             })),
 ]),
 param_distributions={
     'nn__name': [
         'final_{0:s}_{1:03d}'.format(name_from_file(), k)
Esempio n. 8
0
    ('log', LogTransform()),
    ('scale03', MinMaxScaler(feature_range=(0, 3))),
]),
                                       expand_confidence=0.9)

net = NNet(
    name=name_from_file(),
    dense1_nonlinearity='rectify',
    dense1_init='glorot_normal',
    auto_stopping=True,
    max_epochs=1000,
    batch_size=256,
    learning_rate=0.0005,
    learning_rate_scaling=100,
    momentum=0.9,
    momentum_scaling=100,
    dense1_size=100,
    dense2_size=50,
    dense3_size=None,
    dropout0_rate=0,
    dropout1_rate=0,
    dropout2_rate=0,
    dropout3_rate=0,
    weight_decay=0.001,
    adaptive_weight_decay=True,
)

cv = ShuffleSplit(
    n=train.shape[0],
    n_iter=5,
    test_size=0.2,
Esempio n. 9
0
# Due to this error:
#  ValueError: Loading weights from a list of parameter values is no longer supported.
#  Please send me something like the return value of 'net.get_all_param_values()' instead.
# testing new method

import warnings
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from nnet.prepare import LogTransform
from nnet.scikit import NNet
from utils.loading import get_preproc_data

warnings.filterwarnings("ignore")

train, labels, test = get_preproc_data(Pipeline([
    ('log', LogTransform()),
    ('scale03', MinMaxScaler(feature_range=(0, 3))),
]),
                                       expand_confidence=0.9)

nn = NNet(max_epochs=1)
nn.fit(train, labels)

nn.save(filepath='/tmp/test')
nn = NNet.load(filepath='/tmp/test')

w = nn.net.get_all_params_values()
print w
nn.net.load_params_from(w)