def train_test_NN(train, labels, test, use_rescale_priors=False, normalize_log=True, extra_feature_count=0, extra_feature_seed=0, **parameters): """ Train and test a neural network given a set of parameters (which should contain no iterables). Returns test data probabilities for use in (parallel) optimizer. """ net, train, test = train_NN(train, labels, test, use_rescale_priors=use_rescale_priors, normalize_log=normalize_log, extra_feature_count=extra_feature_count, extra_feature_seed=extra_feature_seed, **parameters) prediction = net.predict_proba(test) if use_rescale_priors: prediction = scale_to_priors(prediction, priors=bincount(labels)[1:] / float64(len(labels))) return prediction
def predict(parameters, networkfile, data): """ Calculate probabilities for the data. :param parameters: parameters for the network (must match networkfile) :param networkfile: .net.npz file for the network :param data: ndarray with data (train or test) :return: probabilities Doesn't work with added test data or with outlier removal. """ parameters = copy(parameters) parameters.update({'verbosity': False, 'pretrain': networkfile}) net = train_NN(data, labels = None, test = None, test_only = True, **parameters)[0] load_knowledge(net, networkfile) prediction = net.predict_proba(data) scale_to_priors(prediction, priors = normalized_sum([1929, 16122, 8004, 2691, 2739, 14135, 2839, 8464, 4955])) print 'predicted {0:d} samples'.format(prediction.shape[0]) return prediction
def train_test(train, labels, test, n_neighbors, distance_p, use_log = False, use_autoscale = False, use_calibration = False): if use_log: train, test = log10(1 + train), log10(1 + test) if use_autoscale: train /= train.max(0) test /= test.max(0) clf = DistanceClassifier(n_neighbors = n_neighbors, distance_p = distance_p) if use_calibration: clf = CalibratedClassifierCV(clf, cv = 3) clf.fit(train, labels) probs = clf.predict_proba(test) probs = scale_to_priors(probs) return probs
def train_test_NN(train, labels, test, use_rescale_priors=False, outlier_frac=0, outlier_method='OCSVM', normalize_log=True, use_calibration=False, **parameters): net = make_net(**parameters) train, test = conormalize_data(train, test, use_log=normalize_log) load_knowledge(net, 'results/nnets/optimize_new.log_1000.net.npz') prediction = net.predict_proba(test) if use_rescale_priors: prediction = scale_to_priors(prediction, priors=bincount(labels)[1:] / float64(len(labels))) return prediction
names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475'] totalvalid = totaltest = 0 for name in names: net = NNet.load(filepath = join(BASE_DIR, 'results', 'nets', name)) for nm, val in net.get_params().iteritems(): print '{0:s} = {1:}'.format(nm, val) #for nm, data in [('val', valid), ('tst', test)]: #probs = net.predict_proba(data) #save(join(SUBMISSIONS_DIR, '{0}_{1}_raw.npy'.format(name, nm)), probs) #makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_{1}_rescale.csv'.format(name, nm)), digits = 8) probs = net.predict_proba(valid) probs = scale_to_priors(probs, priors = PRIORS) save(join(SUBMISSIONS_DIR, '{0}_valid.npy'.format(name)), probs) totalvalid += probs probs = net.predict_proba(test) probs = scale_to_priors(probs, priors = PRIORS) save(join(SUBMISSIONS_DIR, '{0}_test.npy'.format(name)), probs) makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_test.csv'.format(name)), digits = 8) totaltest += probs save(join(SUBMISSIONS_DIR, 'total_valid.npy'), totalvalid) save(join(SUBMISSIONS_DIR, 'total_valid.npy'), totaltest) makeSubmission(totaltest, fname = join(SUBMISSIONS_DIR, 'total_test.csv'), digits = 8) print 'saved predictions'
if not isfile(pretrain): print '>> pretraining network' make_pretrain(pretrain, train, labels, extra_feature_count = extra_feature_count, **params) print '>> loading pretrained network' load_knowledge(net, pretrain) print '>> training network' out = net.fit(train, labels - 1) print '>> saving network' save_knowledge(net, join(NNET_STATE_DIR, 'single_trained.net.npz')) print '>> calculating train error' prediction = net.predict_proba(train) prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels))) print 'train loss: {0:.4f} / {0:.4f} (unscaled / scaled)'.format(calc_logloss(prediction, labels)) print '>> predicting test data' prediction = net.predict_proba(test) print '>> scaling to priors' prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels))) print '>> making submission file' make_submission(prediction, fname = join(SUBMISSIONS_DIR, 'single.csv'), digits = 8) print '>> plotting training progress' fig, ax = show_train_progress(net) print '>> done!'