Ejemplo n.º 1
0
def extra_data():
	pca = ("pca", IncrementalPCA(n_components=500))
	scale = ("scale", StandardScaler())
	bias = ("bias", AddBiasTerm())
	svc = ("svc", SVC(kernel="poly", degree=best_degree, C=best_c, gamma=best_gamma))
	p = Pipeline([pca, scale, bias, svc])

	n = 50000
	num_slices = 50
	N,M = Xtrain_full.shape

	scores_perturb = []
	scores_extra = []

	extra_data, extra_targets = gd.generate_extra_data(n)
	perturbed_data, perturbed_targets = gd.perturb_modified_digits(Xtrain_full, Ytrain_full, n)


	for i in range(-1,num_slices):
		print "Iteration "+ str(i+2)
		Xp = np.vstack((Xtrain_full, extra_data[0:(i+1)*n/num_slices]))
		Yp = np.vstack((Ytrain_full, extra_targets[0:(i+1)*n/num_slices]))
		p.fit(Xp, Yp)
		scores_extra.append({"train_score": p.score(Xp, Yp), "test_score": p.score(Xtest, Ytest),
							"num_examples":N+(i+1)*n/num_slices, "i":i})

		Xp = np.vstack((Xtrain_full, perturbed_data[0:(i+1)*n/num_slices]))
		Yp = np.vstack((Ytrain_full, perturbed_targets[0:(i+1)*n/num_slices]))
		p.fit(Xp, Yp)
		scores_perturb.append({"train_score": p.score(Xp, Yp), "test_score": p.score(Xtest, Ytest),
							"num_examples":N+(i+1)*n/num_slices, "i":i})

	d = {"scores_perturb":scores_perturb, "scores_extra": scores_extra}
	with open("ignore/extra_data_graph.json", "w") as f:
		json.dump(d, f)
Ejemplo n.º 2
0
Y = np.load('./data/train_outputs.npy')
# Y = Y.reshape( Y.shape[0] , 1 )
Y = Y.astype(np.int32)

PIXELS = 48



print 'Original Dataset size:'
print X.shape


import generate_extra_data as ged


x_new, y_new = ged.perturb_modified_digits(X,Y,500000)
X = np.vstack((X,x_new))
Y = np.hstack((Y,y_new))

print 'New dataset size:'
print X.shape, Y.shape

X = X.reshape((-1,1, PIXELS, PIXELS))


validation_division = int(len(X)*validate_split)
top = int(len(X)*max_split)

X_train, X_val = X[:validation_division,:], X[validation_division:top,:]
Y_train, Y_val = Y[:validation_division], Y[validation_division:top]
Ejemplo n.º 3
0
# In[6]:

X = np.load('./data/train_inputs.npy')
Y = np.load('./data/train_outputs.npy')
# Y = Y.reshape( Y.shape[0] , 1 )
Y = Y.astype(np.int32)

PIXELS = 48

print 'Original Dataset size:'
print X.shape

import generate_extra_data as ged

x_new, y_new = ged.perturb_modified_digits(X, Y, 500000)
X = np.vstack((X, x_new))
Y = np.hstack((Y, y_new))

print 'New dataset size:'
print X.shape, Y.shape

X = X.reshape((-1, 1, PIXELS, PIXELS))

validation_division = int(len(X) * validate_split)
top = int(len(X) * max_split)

X_train, X_val = X[:validation_division, :], X[validation_division:top, :]
Y_train, Y_val = Y[:validation_division], Y[validation_division:top]

# In[7]: