Esempio n. 1
0
boston_data = load_boston()

train_data = np.array(boston_data.data)
train_labels = np.array(boston_data.target)

num_features = boston_data.data.shape[1]
unique_labels = np.unique(train_labels)
num_classes = len(unique_labels)

print("The boston dataset has " + str(num_features) + " features")
print(boston_data.feature_names)

# Put everything into a Pandas DataFrame
data = pd.DataFrame(data=np.c_[train_data], columns=boston_data.feature_names)
# print(tabulate(data, headers='keys', tablefmt='psql'))

# Compute the covariance matrix
cov_mat_boston = np.cov(train_data.T)
print("Covariance matrix")
print(cov_mat_boston)

# Normalize the data and then recompute the covariance matrix
normalized_train_data = helpers.normalize_data(train_data)
normalized_cov_mat_boston = np.cov(normalized_train_data.T)
print("Normalized data covariance matrix")
print(normalized_cov_mat_boston)

# create scatterplot matrix
fig = sns.pairplot(data=data, hue='CRIM')

plt.show()
Esempio n. 2
0
            # Do a majority vote among the k neighbors and set prediction as the class receing the most votes
            label = self._majority_vote(k_nearest_neighbors, classes)
            y_pred.append(label)
        return np.array(y_pred)


# Get the training data
# Import the Iris flower dataset
iris = datasets.load_iris()
train_data = np.array(iris.data)
train_labels = np.array(iris.target)
num_features = train_data.data.shape[1]

# Normalize the training data
train_data = ml_helpers.normalize_data(train_data)

# Apply PCA to the data to reduce its dimensionality
pca = decomposition.PCA(n_components=2)
pca.fit(train_data)
train_data = pca.transform(train_data)

X_train, X_test, y_train, y_test = ml_helpers.train_test_split(train_data,
                                                               train_labels,
                                                               test_size=0.5)

# *********************************************
# Apply the KNN Classifier MANUALLY
# *********************************************
clf = KNN(k=5)
predicted_labels = clf.predict(X_test, X_train, y_train)
def main():

	# **************************************************************
	# Apply the Decision Tree for Classification Manually
	# **************************************************************
	# Get the training data
	# Import the Iris flower dataset
	iris = datasets.load_iris()
	train_data = np.array(iris.data)
	train_labels = np.array(iris.target)
	num_features = train_data.data.shape[1]

	# Randomly shuffle the data
	train_data, train_labels = ml_helpers.shuffle_data(train_data, train_labels)

	# Apply PCA to the data to reduce its dimensionality
	pca = decomposition.PCA(n_components=4)
	pca.fit(train_data)
	train_data = pca.transform(train_data)


	X_train, X_test, y_train, y_test = ml_helpers.train_test_split(train_data, train_labels, test_size=0.4)

	clf = ClassificationTree()

	clf.fit(X_train, y_train)

	predicted_labels = clf.predict(X_test)

	# Compute the testing accuracy
	Accuracy = 0
	for index in range(len(predicted_labels)):
		current_label = y_test[index]
		predicted_label = predicted_labels[index]

		if current_label == predicted_label:
			Accuracy += 1

	Accuracy /= len(train_labels)

	# Print stuff
	print("Manual Decision Tree Classification Accuracy = ", Accuracy)


	# **************************************************************
	# Apply the Decision Tree for Classification using Sklearn
	# **************************************************************

	clf = DecisionTreeClassifier(criterion="gini", splitter="best")

	clf.fit(X=X_train, y=y_train)

	predicted_labels = clf.predict(X_test)

	# Compute the testing accuracy
	Accuracy = 0
	for index in range(len(predicted_labels)):
		current_label = y_test[index]
		predicted_label = predicted_labels[index]

		if current_label == predicted_label:
			Accuracy += 1

	Accuracy /= len(train_labels)

	# Print stuff
	print("Sklearn Decision Tree Classification Accuracy = ", Accuracy)


	# **************************************************************
	# Apply the Decision Tree for Regression Manually
	# **************************************************************
	# Load the Boston housing data set to regression training
	# NOTE that this loads as a dictionairy
	boston_dataset = load_boston()

	train_data = np.array(boston_dataset.data)
	train_labels = np.array(boston_dataset.target)
	num_features = boston_dataset.data.shape[1]

	# Randomly shuffle the data
	train_data, train_labels = ml_helpers.shuffle_data(train_data, train_labels)

	# Normalize the data to have zero-mean and unit variance
	train_data = ml_helpers.normalize_data(train_data)

	X_train, X_test, y_train, y_test = ml_helpers.train_test_split(train_data, train_labels, test_size=0.4)

	clf = RegressionTree()

	clf.fit(X_train, y_train)

	predicted_values = clf.predict(X_test)

	mse = ml_helpers.mean_squared_error(y_test, predicted_values)

	print ("Manual Decision Tree Regression Mean Squared Error:", mse)

	# Now plot the manual Linear Regression
	g = plt.figure(1)
	plt.plot(y_test, predicted_values,'ro')
	plt.plot([0,50],[0,50], 'g-')
	plt.xlabel('real')
	plt.ylabel('predicted')
	g.show()

	# **************************************************************
	# Apply the Decision Tree for Regression using Sklearn
	# **************************************************************
	clf = DecisionTreeRegressor(criterion="mse", splitter="best")

	clf.fit(X_train, y_train)

	predicted_values = clf.predict(X_test)

	mse = ml_helpers.mean_squared_error(y_test, predicted_values)

	print ("Sklearn Decision Tree Regression Mean Squared Error:", mse)

	# Now plot the manual Linear Regression
	g = plt.figure(2)
	plt.plot(y_test, predicted_values,'ro')
	plt.plot([0,50],[0,50], 'g-')
	plt.xlabel('real')
	plt.ylabel('predicted')
	g.show()

	# Keep the plots alive until we get a user input
	print("Press any key to exit")
	input()