Ejemplo n.º 1
0
def predict_300_v1():
	data, labels = get_training_data()[:2]
	params = {
		'dense1_nonlinearity': 'rectify',
		'dense1_init': 'glorot_normal',
		'dense1_size': 300,
		'dense2_size': 0,
		'dense3_size': None,
		'dropout1_rate': 0.5,
		'dropout2_rate': None,
		'dropout3_rate': None,
		'extra_feature_count': 0,
	}
	probs = predict(params, 'results/pretrain/single_pretrain_300_0_0.net.npz', data)
	print 'logloss', calc_logloss(probs, labels)
Ejemplo n.º 2
0
def predict_ensemble():
	test = load('/home/mark/testmat.npy')
	train = load('/home/mark/trainmat.npy')
	train_labels = load('/home/mark/trainclas.npy')
	test_labels = load('/home/mark/testclas.npy')
	params = {
		'dense1_nonlinearity': 'rectify',
		'dense1_init': 'glorot_normal',
		'dense1_size': 300,
		'dense2_size': 0,
		'dense3_size': None,
		'dropout1_rate': 0.5,
		'dropout2_rate': None,
		'dropout3_rate': None,
		'extra_feature_count': 0,
	}
	probs = predict(params, 'results/pretrain/single_pretrain_300_0_0.net.npz', train)
	print 'logloss', calc_logloss(probs, train_labels)
Ejemplo n.º 3
0
def geneticEnsemble(predictions, trueclasses, precision=20):

    Q, N, C = np.shape(predictions)

    P = 100  #population size
    pc = 0.7  #crossover probability
    pm = 1.0 / C  #mutation probability
    maxiterations = 1000

    if precision is None:
        r = lambda: np.random.rand()
    else:
        r = lambda: np.random.randint(precision)

    Population = np.array([[r() for j in range(Q)] for i in range(P)])
    print Population[5, :]
    iterations = 1
    bestFitness = [1000, None]
    fitnessTracer = np.zeros(maxiterations)

    while iterations < maxiterations:
        if iterations % 10 == 0:
            print 'At iteration ', iterations
        fitness = np.array([
            calc_logloss(mean_ensemble(predictions, Population[i, :]),
                         trueclasses) for i in range(P)
        ])
        fitnessTracer[iterations] = fitness.min()
        if fitness.min() < bestFitness[0]:
            bestFitness = [fitness.min(), Population[np.argmin(fitness), :]]

        Population = np.array(
            [getChild(Population, pc, fitness) for i in range(P)])
        Population = np.array([mutate(p, pm, r) for p in Population])
        iterations += 1

    print fitnessTracer
    return bestFitness
Ejemplo n.º 4
0
	def add_prediction(self, prediction, _force_round = None):
		"""
			Register a classification result for scoring.

			:param prediction: SxC array with predicted probabilities, with each row corresponding to a test data sample and each column corresponding to a class.
			:return: (logloss, accuracy, duration) tuple of floats

			You should never need the _force_round parameter.
		"""
		duration = clock() - self.yield_time
		#assert prediction.shape[1] == NCLASSES, 'There should be a probability for each class.'
		assert len(self.results) < len(self.samples), 'There is already a prediction for each sample generated.'
		test_classes = self.samples[len(self.results)]
		logloss = calc_logloss(prediction, test_classes)
		accuracy = calc_accuracy(prediction, test_classes)
		if VERBOSITY >= 1 and not len(self.results):
			stdout.write('  #   loss   accuracy  time\n')
		confusion = confusion_matrix(prediction, test_classes)
		size_mismatch = average_size_mismatch(prediction, test_classes)
		self.results.append((logloss, accuracy, duration, confusion, size_mismatch))
		if VERBOSITY >= 1:
			stdout.write('{0:-3d}  {1:6.3f}  {2:5.2f}%  {3:6.3f}s\n'.format(_force_round or len(self.results), logloss, 100 * accuracy, duration))
		return logloss, accuracy, duration
Ejemplo n.º 5
0
def make_pretrain(pretrain_path,
                  data,
                  labels,
                  minimum_train_loss=0.7,
                  **params):
    """
		Make a pretrain file given parameters. If there are iterable parameters, a 'random' one is chosen.
	"""
    if not pretrain_path or isfile(pretrain_path):
        return
    print 'pretraining file not found, pretraining a network now'
    pretrain_params = {
        'dense1_nonlinearity': 'leaky20',
        'dense1_init': 'glorot_uniform',
        'dense1_size': params['dense1_size'],
        'dense2_size': params['dense2_size'],
        'dense3_size': params['dense3_size'],
        'learning_rate': params['learning_rate'],
        'learning_rate_scaling': 10,
        'momentum': 0.9,
        'momentum_scaling': 10,
        'dropout1_rate': 0.5 if params['dense1_size'] else 0,
        'dropout2_rate': 0.5 if params['dense2_size'] else 0,
        'dropout3_rate': 0.5 if params['dense3_size'] else 0,
        'weight_decay': params['weight_decay'],
        'max_epochs': 1000,
        'extra_feature_count': params['extra_feature_count'],
    }
    for key, val in pretrain_params.items():
        if is_nonstr_iterable(val):
            pretrain_params[key] = val[0]
    net, train, duplicate = train_NN(data, labels, None, **pretrain_params)
    train_err = calc_logloss(net.predict_proba(train), labels)
    assert train_err < minimum_train_loss, 'Pre-training did not converge ({0:.4f} >= {1:.4f})'.format(
        train_err, minimum_train_loss)
    save_knowledge(net, pretrain_path)
Ejemplo n.º 6
0
    'dense2_size': 0,
    'dense3_size': None,
    'learning_rate': 0.001,
    'learning_rate_scaling': 100,
    'momentum': 0.9,
    'momentum_scaling': 10,
    'dropout1_rate': 0.5,
    'dropout2_rate': None,
    'dropout3_rate': None,
    'weight_decay': 0,
    'max_epochs': 1500,
    'auto_stopping': False,
    'extra_feature_count': 0,
    'pretrain': False,  # keep this OFF
    'save_snapshots_stepsize': 500,
    'name': name_from_file(),
    'outlier_method': 'EE',
    'outlier_frac': None,
    'normalize_log': True,
    'use_calibration': False,
    'use_rescale_priors': True,
    'extra_feature_seed': 0,
    'test_data_confidence': None,
}

prediction = train_test_NN(train, train_labels, test, **params)

# do things with prediction
print calc_logloss(prediction, test_labels)
save('nnpred.npy', prediction)
Ejemplo n.º 7
0
    'weight_decay': 0,  # constrain the weights to avoid overfitting
    'max_epochs':
    30,  # it terminates when overfitting or increasing, so just leave high
    'output_nonlinearity': 'softmax',  # just keep softmax
    'auto_stopping':
    True,  # stop training automatically if it seems to be failing
    'pretrain':
    pretrain,  # use pretraining? (True for automatic, filename for specific)
    'outlier_method': 'OCSVM',  # method for outlier removal ['OCSVM', 'EE']
    'outlier_frac': None,  # which fraction of each class to remove as outliers
    'normalize_log': True,  # use logarithm for normalization
    'use_calibration': False,  # use calibration of probabilities
    'use_rescale_priors': False,  # rescale predictions to match priors
}

print calc_logloss(
    train_test_NN(train_data, true_labels, train_data, **params), true_labels)
exit()

validator = SampleCrossValidator(train_data,
                                 true_labels,
                                 rounds=1,
                                 test_frac=0.2,
                                 use_data_frac=1)
optimizer = ParallelGridOptimizer(train_test_func=train_test_NN,
                                  validator=validator,
                                  use_caching=False,
                                  **params).readygo(topprint=20,
                                                    save_fig_basename=name,
                                                    log_name=name + '.log',
                                                    only_show_top=True)
Ejemplo n.º 8
0
	print '>> pretraining network'
	make_pretrain(pretrain, train, labels, extra_feature_count = extra_feature_count, **params)

print '>> loading pretrained network'
load_knowledge(net, pretrain)

print '>> training network'
out = net.fit(train, labels - 1)

print '>> saving network'
save_knowledge(net, join(NNET_STATE_DIR, 'single_trained.net.npz'))

print '>> calculating train error'
prediction = net.predict_proba(train)
prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels)))
print 'train loss: {0:.4f} / {0:.4f} (unscaled / scaled)'.format(calc_logloss(prediction, labels))

print '>> predicting test data'
prediction = net.predict_proba(test)

print '>> scaling to priors'
prediction = scale_to_priors(prediction, priors = bincount(labels)[1:] / float64(len(labels)))

print '>> making submission file'
make_submission(prediction, fname = join(SUBMISSIONS_DIR, 'single.csv'), digits = 8)

print '>> plotting training progress'
fig, ax = show_train_progress(net)

print '>> done!'