def tune_svm_using_10_fold(): dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label') headers, train_features, train_prediction_labels = dh.get_numeric_data_set() #train_features = dh.get_k_best_features(500, train_features, train_prediction_labels) data_sets = dh.get_cross_validation_data_sets(10, train_features, train_prediction_labels) accuracy = [] for data_set_number in data_sets: data_set = data_sets.get(data_set_number) training_set = data_set[0] tuning_set = data_set[1] train_features = training_set["data_points"] train_prediction_labels = training_set["labels"] # Feature selection train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels) test_features = tuning_set["data_points"] test_prediction_labels = tuning_set["labels"] # Feature selection test_features = dh.get_new_feature_vec(test_features, selected_features) svm = Svm(train_features, train_prediction_labels, 200, 1, 2) svm.train() eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels) eval = eval_metrics.evaluate() accuracy.append(eval['accuracy']) average_accuracy = sum(accuracy) / len(accuracy) print average_accuracy
def evaluate_svm(): dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label') headers, train_features, train_prediction_labels = dh.get_numeric_data_set() # Feature selection train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels) svm = Svm(train_features, train_prediction_labels, 20, 0) svm.train() dh_test = DataHandler('data/test-set-feature-engineered.csv', 'prediction_label') headers, test_features, test_prediction_labels = dh_test.get_numeric_data_set() # Feature selection test_features = dh_test.get_new_feature_vec(test_features, selected_features) eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels) eval = eval_metrics.evaluate() eval_metrics.compute_and_plot_auc(eval['predicted'], test_prediction_labels) eval_metrics.compute_au_roc(eval['predicted'], test_prediction_labels)
def main(argv): train_x = read_from_file(sys.argv[1]) train_x = one_hot_encode(train_x).astype(float) train_y = read_from_file(sys.argv[2]) train_y = train_y.astype(float).astype(int) num_of_labels = len(Counter(train_y).keys()) # np.random.seed(5) # mapIndexPosition = list(zip(train_x, train_y)) # np.random.shuffle(mapIndexPosition) # train_x, train_y = zip(*mapIndexPosition) # train_y = np.asarray(train_y) # train_x = np.asarray(train_x) ############## prediction:################ test_x = read_from_file(sys.argv[3]) test_x = one_hot_encode(test_x).astype(float) # # test_y = read_from_file("test_y.txt").astype(float).astype(int).tolist() # ###### cross validation ####### # trains_x = [all_train_x[:657],all_train_x[657:1314],all_train_x[1314:1971],all_train_x[1971:2628],all_train_x[2628:]] # trains_y = [all_train_y[:657], all_train_y[657:1314], all_train_y[1314:1971], all_train_y[1971:2628],all_train_y[2628:]] # for K in range(5): # test_x = trains_x[K] # test_y = trains_y[K] # train_x = [] # train_y = [] # for i in range(5): # if i is not K: # for example, lable in zip(trains_x[i],trains_y[i]): # train_x.append(example) # train_y.append(lable) # # train_x = np.asarray(train_x) # train_y = np.asarray(train_y) # d = {"I": 0, "M": 1, "F": 2} # temp = train_x # temp = scipy.stats.zscore(temp) train_x_Z_score, mean, std_dev = z_score_norm(train_x) train_x_min_max, min_train, max_train = min_max_norm(train_x) test_x_z_score = z_score_norm_by_mean_std(test_x, mean, std_dev) test_x_min_max = min_max_norm_by_min_max(test_x, min_train, max_train) # perceptron_z_score = Perceptron(train_x_Z_score, train_y, num_of_feature, num_of_labels) # svm_z_score = Svm(train_x_Z_score, train_y, num_of_feature, num_of_labels) # pa_z_score = Pa(train_x_Z_score, train_y, num_of_feature, num_of_labels) # # perceptron_min_max = Perceptron(train_x_min_max, train_y, num_of_feature, num_of_labels) # svm_min_max = Svm(train_x_min_max, train_y, num_of_feature, num_of_labels) # pa_min_max = Pa(train_x_min_max, train_y, num_of_feature, num_of_labels) ############# training:################# perceptron = Perceptron(train_x_min_max, train_y, num_of_labels) svm = Svm(train_x_min_max, train_y, num_of_labels) pa = Pa(train_x_Z_score, train_y, num_of_labels) perceptron.train() svm.train() pa.train() predict_pereceptron = [] predict_svm = [] predict_pa = [] for test_min_max, test_z_score in zip(test_x_min_max, test_x_z_score): predict_pereceptron.append(perceptron.predict(test_min_max)) predict_svm.append(svm.predict(test_min_max)) predict_pa.append(pa.predict(test_z_score)) # for test in test_x_z_score: # predict_pa.append(pa.predict(test)) print_predict(predict_pereceptron, predict_svm, predict_pa)