コード例 #1
0
ファイル: train_test.py プロジェクト: EdwardBetts/kaggle_otto
def train_test_NN(train,
                  labels,
                  test,
                  use_rescale_priors=False,
                  normalize_log=True,
                  extra_feature_count=0,
                  extra_feature_seed=0,
                  **parameters):
    """
		Train and test a neural network given a set of parameters (which should contain no iterables). Returns test data probabilities for use in (parallel) optimizer.
	"""
    net, train, test = train_NN(train,
                                labels,
                                test,
                                use_rescale_priors=use_rescale_priors,
                                normalize_log=normalize_log,
                                extra_feature_count=extra_feature_count,
                                extra_feature_seed=extra_feature_seed,
                                **parameters)
    prediction = net.predict_proba(test)
    if use_rescale_priors:
        prediction = scale_to_priors(prediction,
                                     priors=bincount(labels)[1:] /
                                     float64(len(labels)))
    return prediction
コード例 #2
0
ファイル: predict.py プロジェクト: EdwardBetts/kaggle_otto
def predict(parameters, networkfile, data):
	"""
		Calculate probabilities for the data.

		:param parameters: parameters for the network (must match networkfile)
		:param networkfile: .net.npz file for the network
		:param data: ndarray with data (train or test)
		:return: probabilities

		Doesn't work with added test data or with outlier removal.
	"""
	parameters = copy(parameters)
	parameters.update({'verbosity': False, 'pretrain': networkfile})
	net = train_NN(data, labels = None, test = None, test_only = True, **parameters)[0]
	load_knowledge(net, networkfile)
	prediction = net.predict_proba(data)
	scale_to_priors(prediction, priors = normalized_sum([1929, 16122, 8004, 2691, 2739, 14135, 2839, 8464, 4955]))
	print 'predicted {0:d} samples'.format(prediction.shape[0])
	return prediction
コード例 #3
0
def train_test(train, labels, test, n_neighbors, distance_p, use_log = False, use_autoscale = False, use_calibration = False):
	if use_log:
		train, test = log10(1 + train), log10(1 + test)
	if use_autoscale:
		train /= train.max(0)
		test /= test.max(0)
	clf = DistanceClassifier(n_neighbors = n_neighbors, distance_p = distance_p)
	if use_calibration:
		clf = CalibratedClassifierCV(clf, cv = 3)
	clf.fit(train, labels)
	probs = clf.predict_proba(test)
	probs = scale_to_priors(probs)
	return probs
コード例 #4
0
ファイル: playground.py プロジェクト: EdwardBetts/kaggle_otto
def train_test_NN(train,
                  labels,
                  test,
                  use_rescale_priors=False,
                  outlier_frac=0,
                  outlier_method='OCSVM',
                  normalize_log=True,
                  use_calibration=False,
                  **parameters):
    net = make_net(**parameters)
    train, test = conormalize_data(train, test, use_log=normalize_log)
    load_knowledge(net, 'results/nnets/optimize_new.log_1000.net.npz')
    prediction = net.predict_proba(test)
    if use_rescale_priors:
        prediction = scale_to_priors(prediction,
                                     priors=bincount(labels)[1:] /
                                     float64(len(labels)))
    return prediction
コード例 #5
0
ファイル: predict.py プロジェクト: EdwardBetts/kaggle_otto
names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475']
totalvalid = totaltest = 0

for name in names:

	net = NNet.load(filepath = join(BASE_DIR, 'results', 'nets', name))

	for nm, val in net.get_params().iteritems():
		print '{0:s} = {1:}'.format(nm, val)

	#for nm, data in [('val', valid), ('tst', test)]:
		#probs = net.predict_proba(data)
		#save(join(SUBMISSIONS_DIR, '{0}_{1}_raw.npy'.format(name, nm)), probs)
		#makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_{1}_rescale.csv'.format(name, nm)), digits = 8)
	probs = net.predict_proba(valid)
	probs = scale_to_priors(probs, priors = PRIORS)
	save(join(SUBMISSIONS_DIR, '{0}_valid.npy'.format(name)), probs)
	totalvalid += probs

	probs = net.predict_proba(test)
	probs = scale_to_priors(probs, priors = PRIORS)
	save(join(SUBMISSIONS_DIR, '{0}_test.npy'.format(name)), probs)
	makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_test.csv'.format(name)), digits = 8)
	totaltest += probs


save(join(SUBMISSIONS_DIR, 'total_valid.npy'), totalvalid)
save(join(SUBMISSIONS_DIR, 'total_valid.npy'), totaltest)
makeSubmission(totaltest, fname = join(SUBMISSIONS_DIR, 'total_test.csv'), digits = 8)
print 'saved predictions'
コード例 #6
0
if not isfile(pretrain):
	print '>> pretraining network'
	make_pretrain(pretrain, train, labels, extra_feature_count = extra_feature_count, **params)

print '>> loading pretrained network'
load_knowledge(net, pretrain)

print '>> training network'
out = net.fit(train, labels - 1)

print '>> saving network'
save_knowledge(net, join(NNET_STATE_DIR, 'single_trained.net.npz'))

print '>> calculating train error'
prediction = net.predict_proba(train)
prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels)))
print 'train loss: {0:.4f} / {0:.4f} (unscaled / scaled)'.format(calc_logloss(prediction, labels))

print '>> predicting test data'
prediction = net.predict_proba(test)

print '>> scaling to priors'
prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels)))

print '>> making submission file'
make_submission(prediction, fname = join(SUBMISSIONS_DIR, 'single.csv'), digits = 8)

print '>> plotting training progress'
fig, ax = show_train_progress(net)

print '>> done!'