Python split_datasetの例

プログラミング言語: Python

名前空間/パッケージ名: DataInterface

メソッド/関数: split_dataset

hotexamples.comのコード掲載数: 4

Python split_dataset - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのDataInterface.split_datasetの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: learning_curve.py プロジェクト: YunhanZou/Supervised-Learning-Algorithms

from DataInterface import get_pendigits_dataset, get_car_dataset, split_dataset
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


if __name__ == '__main__':
	
	dataset = get_car_dataset()
	data = split_dataset(dataset, 0.25)
	train, test = data
	features_test, labels_test = test

	training_accuracy = []
	testing_accuracy = []

	for train_size in range(6, 99, 2):
		train_size /= 100.0
		data = split_dataset(train, 1 - train_size)
		if train_size * 100 == 100.0:
			print("Bro")
		train2, test2 = data
		features_train, labels_train = train2
		rbf_kernel_svm_clf = SVC(kernel='rbf', gamma='auto', C=10)
		rbf_kernel_svm_clf.fit(features_train, labels_train)

		predictions = rbf_kernel_svm_clf.predict(features_train)
		training_accuracy.append(accuracy_score(labels_train, predictions))
		predictions = rbf_kernel_svm_clf.predict(features_test)

コード例 #2

ファイルを表示

    num_iter = 10

    print(
        "Now training and testing decision tree on car dataset for 10 runs of train/test split:\n"
    )
    features_name = [
        "buying", "maint", "doors", "persons", "lug_boot", "safety"
    ]
    dataset = get_car_dataset()
    accuracies = 0
    max_depth = 10
    start_time = time.time()

    print('To prune the tree, the maximum depth is set to ' + str(max_depth))
    for _ in range(num_iter):
        data = split_dataset(dataset, 0.25)
        train, test = data
        features_train, labels_train = train
        dt = train_decision_tree(data, max_depth)
        accuracy = test_decision_tree(data, dt)
        accuracies += accuracy

    duration = time.time() - start_time
    print("Average accuracy is {0:.3f}.\n".format(accuracies / num_iter))
    print("The run time is " + str(duration) + " sec.")

    print("\nComplete.\n")
    print(
        "-----------------------------------------------------------------\n")
    print(
        "Now training and testing decision tree on pen digits dataset for 10 runs of train/test split:\n"

コード例 #3

ファイルを表示

ファイル: runNeuralNets.py プロジェクト: YunhanZou/Supervised-Learning-Algorithms

if __name__ == '__main__':

	num_iter = 10  # number of test/train splits

	print("\nNow training and testing on the car dataset with " + str(num_iter) + " runs of train/test splits:\n")
	data = get_car_dataset()
	hidden_layers = (10,10,10,10)
	print("The neural network has {} hidden layers, each layer has size: ".format(len(hidden_layers))),
	for layer in hidden_layers:
		print(layer),
	print('\n')
	accuracies = 0
	start_time = time.time()

	for _ in range(num_iter):
		train, test = split_dataset(data, 0.25)
		mlp = training(train, hidden_layers)
		accuracy = testing(mlp, test)
		accuracies += accuracy

	duration = time.time() - start_time
	print("The average classification rate is {0:.3f}.\n".format(accuracies / num_iter))
	print("The run time is " + str(duration) + " sec.")
	print("\nComplete\n----------------------------------\n")

	print("Now training and testing on the pen digits dataset " + str(num_iter) + " runs of train/test splits:\n")
	data = get_pendigits_dataset()
	hidden_layers = (10,10)
	print("The neural network has {} hidden layers, each layer has size: ".format(len(hidden_layers))),
	for layer in hidden_layers:
		print(layer),

コード例 #4

ファイルを表示

    print(
        "\nNow training and testing boosted version of decision tree on car dataset with train/test split for 10 times:\n"
    )

    dataset = get_car_dataset()
    feature_names = [
        "buying", "maint", "doors", "persons", "lug_boot", "safety"
    ]
    max_depth = 4
    print("To prune the decision tree, the maximum depth is set to " +
          str(max_depth))

    start_time = time.time()
    accuracies = 0
    for _ in range(10):
        data = split_dataset(dataset, 0.3)
        train, test = data
        features_train, labels_train = train
        clf = train_boosting(data, max_depth)
        accuracy = test_boosting(data, clf)
        accuracies += accuracy

        # save_trees_as_png(clf, feature_names, 'car')
        # print("Trees visualization written to the current folder.")
    duration = time.time() - start_time
    print("The average training accuracy over 10 runs is {0:.3f}.\n".format(
        accuracies / 10))
    print("The run time is " + str(duration) + " sec.")
    print("\nComplete.\n")
    print(
        "-----------------------------------------------------------------\n")