def train_test(train, labels, test, **parameters): params = { 'name': name_from_file(), 'max_epochs': 600, 'auto_stopping': True, 'adaptive_weight_decay': False, 'save_snapshots_stepsize': None, 'epoch_steps': None, #'dense3_size': 0, 'momentum_scaling': 1200, 'dense1_nonlinearity': 'rectify', 'dense1_init': 'glorot_uniform', 'dense2_nonlinearity': 'rectify', 'dense2_init': 'glorot_uniform', 'batch_size': 128, 'learning_rate': 0.0003, 'learning_rate_scaling': 1000, 'momentum': 0.98, 'dense1_size': 700, 'dense2_size': 550, 'dense3_size': 400, #'nn__dense3_size': randint(low = 100, high = 400), 'dropout0_rate': 0., 'dropout1_rate': 0.1, 'dropout2_rate': 0.45, 'dropout3_rate': 0.58, #'nn__dropout3_rate': triang(loc = 0, c = 0, scale = 1), #'nn__weight_decay': norm(0.00006, 0.0001), } params.update(parameters) estimator = Pipeline([ ('row', PositiveSparseRowFeatureGenerator()), ('gen23', PositiveSparseFeatureGenerator(difficult_classes=(2, 3), extra_features=40)), ('gen234', PositiveSparseFeatureGenerator(difficult_classes=(2, 3, 4), extra_features=40)), ('gen19', PositiveSparseFeatureGenerator(difficult_classes=(1, 9), extra_features=63)), ('log', LogTransform()), # log should be after integer feats but before dist #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range=(0, 3)) ), # scale should apply to int and float feats ('nn', NNet(**params)), ]) estimator.fit(train, labels) prediction = estimator.predict_proba(test) return prediction
# ('row', PositiveSparseRowFeatureGenerator()), # ('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), # ('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), #]), expand_confidence = 0.9) train, labels = get_training_data()[:2] test = get_testing_data()[0] #cpus = max(cpu_count() - 1, 1) #random = RandomState() opt = RandomizedSearchCV( estimator = Pipeline([ #('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)), #('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)), #('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)), ('log', LogTransform()), ('scale03', MinMaxScaler(feature_range = (0, 3))), ('nn', NNet(**{ 'dense1_nonlinearity': 'rectify', 'dense1_init': 'glorot_normal', 'max_epochs': 200, 'learning_rate': 0.000676, 'learning_rate_scaling': 30, 'momentum': 0.9, 'momentum_scaling': 10, 'dense1_size': 35,# #'dense2_size': 463, #'dense3_size': 329, #'dropout0_rate': 0.06, #'dropout1_rate': 0.56, #'dropout2_rate': 0.19,
from nnet.prepare import LogTransform from utils.loading import get_training_data from utils.shuffling import shuffle from nndist.distance import DistanceFeatureGenerator N = 500 train, labels = get_training_data()[:2] train = train[logical_and(5 <= labels, labels <= 7), :] labels = labels[logical_and(5 <= labels, labels <= 7)] - 4 train, labels = shuffle(train, labels)[:2] train = train[:N, :] labels = labels[:N] print train.shape, train.dtype train = LogTransform().fit_transform(train) print train.shape, train.dtype gen = DistanceFeatureGenerator(n_neighbors=3, distance_p=2, nr_classes=3) train = gen.fit_transform(train, labels) print train.shape, train.dtype #train = MinMaxScaler(feature_range = (0, 3)).fit_transform(train) #print train.shape, train.dtype fig, ax = subplots(figsize=(7, 6)) colors = ['r', 'g', 'b'] for cls in range(1, 4): ax.scatter(train[labels == cls, -2], train[labels == cls, -1], c=colors[cls - 1],
from utils.ioutil import makeSubmission from utils.loading import get_testing_data from utils.postprocess import scale_to_priors labels = load(join(BASE_DIR, 'data', 'trainclas.npy')) train = load(join(BASE_DIR, 'data', 'trainmat.npy'))[:, 1:] valid = load(join(BASE_DIR, 'data', 'testmat.npy'))[:, 1:] test = get_testing_data()[0] pipe = Pipeline([ ('row', PositiveSparseRowFeatureGenerator()), ('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)), ('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)), ('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)), ('log', LogTransform()), # log should be after integer feats but before dist #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range = (0, 3))), # scale should apply to int and float feats ]) train = pipe.fit_transform(train, labels) valid = pipe.transform(valid) test = pipe.transform(test) names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475'] totalvalid = totaltest = 0 for name in names: