def Additional_tests(): results = [] id3 = id3_factory() avg_accuracy, avg_error = evaluate(id3, 2) results.append([1, avg_accuracy, avg_error]) perceptron = perceptron_factory() avg_accuracy, avg_error = evaluate(perceptron, 2) results.append([2, avg_accuracy, avg_error]) with open("experiments12.csv", "w") as csv_file: writer = csv.writer(csv_file, delimiter=',', lineterminator='\n') for row in results: writer.writerow(row)
def KNN_test(): results = [] for k in [1, 3, 5, 7, 13]: knn_k = knn_factory(k) avg_accuracy, avg_error = evaluate(knn_k, 2) results.append([k, avg_accuracy, avg_error]) with open("experiments6.csv", "w") as csv_file: writer = csv.writer(csv_file, delimiter=',', lineterminator='\n') for row in results: writer.writerow(row)
def run_pass(cfg, imagecache='images', n_images=100): ''' pipeline.run_pass(cfg) -> metrics Given a set of parameters, 'cfg', of the format defined below, run a single pass through the pipeline. This includes loading a dataset, downloading the images, filtering undesirable images, training the CNN classifier, and producing the resulting scoring metrics. Returns a dictionary containing the scoring metrics. The format of the input parameters is a dictionary. An example of the structure is given below: 'dataset_filename': '100marvelcharacters.csv', 'base_search_term': 'Marvel Comic Character', 'search_options': {'style': 'lineart'}, 'optimizer': ('SGD', {'lr': 0.0001, 'momentum': 0.9}), 'test_size': 0.3, 'val_size': 0.1, 'dataset_filename' is the path to the dataset to load. 'base_search_term' is a string appended to the character name when executing the Google image search. 'search_options' is the kwargs dictionary passed to download.generate_search_url(). 'optimizer' is a tuple containing the name of the optimizer in torch.optim, and the kwargs dictionary passed to its constructor. 'test_size' is the fraction of the data to hold out from the training set for validation. ''' dataset = pandas.read_csv(cfg['dataset_filename']) imgdir = prepare_imageset(dataset, cfg['base_search_term'], cfg['search_options'], imagecache, download_count=n_images) splitdir = train_test_split(imgdir, cfg['test_size'], cfg['val_size']) opt_name, opt_kwargs = cfg['optimizer'] stats_trn, stats_val, stats_tst = classifier.evaluate( splitdir, opt_name, opt_kwargs) return {'train': stats_trn, 'val': stats_val, 'test': stats_tst}
def main(args): data_pth = "data/%s" % args.data_name train_pth = os.path.join(data_pth, "train_data.txt") train_data = MonoTextData(train_pth, True, vocab=100000) vocab = train_data.vocab source_pth = os.path.join(data_pth, "test_data.txt") target_pth = args.target_path eval_data = MonoTextData(target_pth, True, vocab=vocab) source = pd.read_csv(source_pth, names=['label', 'content'], sep='\t') target = pd.read_csv(target_pth, names=['label', 'content'], sep='\t') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Classification Accuracy model = CNNClassifier(len(vocab), 300, [1, 2, 3, 4, 5], 500, 0.5).to(device) model.load_state_dict( torch.load("checkpoint/%s-classifier.pt" % args.data_name)) model.eval() eval_data, eval_label = eval_data.create_data_batch_labels( 64, device, batch_first=True) acc = 100 * evaluate(model, eval_data, eval_label) print("Acc: %.2f" % acc) # BLEU Score total_bleu = 0.0 sources = [] targets = [] for i in range(source.shape[0]): s = source.content[i].split() t = target.content[i].split() sources.append([s]) targets.append(t) total_bleu += compute_bleu(sources, targets)[0] total_bleu *= 100 print("Bleu: %.2f" % total_bleu)
#scheduler.step() # <- update schedular epoch_loss = running_loss / n_s_train # <- calculate epoch loss print("Epoch %4d\tLoss : %s" % (epoch_idx + 1, epoch_loss)) if math.isnan(epoch_loss): continue # if loss is NAN, skip! if (epoch_idx + 1) % 1 == 0: clf.eval() # Classifier evaluation mode: ON # ----------------------------------------------------------------------------------------------- # # ZERO-SHOT ACCURACY acc_zsl = evaluate(model=clf, x=x_u_test, y=y_u_test_ix, attrs=u_attr) # ------------------------------------------------------- # # * ----- * ----- * ----- * ----- * ----- * ----- * ----- * # ------------------------------------------------------- # # GENERALIZED SEEN ACCURACY acc_g_seen = evaluate(model=clf, x=x_s_test, y=y_s_test, attrs=attr) # ------------------------------------------------------- # # * ----- * ----- * ----- * ----- * ----- * ----- * ----- * # ------------------------------------------------------- # # GENERALIZED UNSEEN ACCURACY acc_g_unseen = evaluate(model=clf, x=x_u_test,
# question 3.2 patients, labels, test = utils.load_data() split_crosscheck_groups([patients, labels], 2) # question 5.1 k_list = [1, 3, 5, 7, 13] accuracy_list = [] file_name = 'experiments6.csv' with open(file_name, 'wb') as file: for k in k_list: knn_f = knn_factory(k) accuracy, error = evaluate(knn_f, 2) line = str(k) + "," + str(accuracy) + "," + str(error) + "\n" accuracy_list.append(accuracy) file.write(line.encode()) # question 5.2 plt.plot(k_list, accuracy_list) plt.xlabel('K value') plt.ylabel('Average accuracy') plt.title('Part B, question 5.2') plt.show() # questions 7.1, 7.2 file_name = 'experiments12.csv'
(train_data, test_data )) # concatenate the train and test data (for structure exploitation) test_labels_none = -1 * np.ones([ test_labels.shape[0], ]) # the label of the test_data is set to -1 y_all = np.concatenate( (train_labels, test_labels_none)) # concatenate the train labels and -1 test labels consist_model = LabelSpreading(gamma=4, max_iter=60) consist_model.fit(x_all, y_all) clf.evaluate_sub('consistency model', test_labels, consist_model.predict(test_data)) lgr_model = clf.classifier('LGR', train_data, train_labels) clf.evaluate('LGR', lgr_model, test_data, test_labels) knn_model = clf.classifier('KNN', train_data, train_labels) clf.evaluate('KNN', knn_model, test_data, test_labels) bnb_model = clf.classifier('BNB', train_data, train_labels) clf.evaluate('BNB', bnb_model, test_data, test_labels) svm_model = clf.classifier('SVM', train_data, train_labels) clf.evaluate('SVM', svm_model, test_data, test_labels) dtc_model = clf.classifier('DTC', train_data, train_labels) clf.evaluate('DTC', dtc_model, test_data, test_labels) model_to_attack = clf.classifier('MLP', train_data, train_labels) # the number of adversarial examples that models can resist to
data = [] data.append(examples) data.append(labels) data_new = [] data_new.append(SelectKBest(f_classif, 100).fit_transform(examples, labels)) data_new.append(labels) classifier.split_crosscheck_groups(data_new, 2) print("using CUT data\n") decision_tree = classifier.sklearn_factory_wrapper(RandomForestClassifier()) perceptron = classifier.sklearn_factory_wrapper(Perceptron()) knn = classifier.knn_factory(7) print("knn and perceptron: \n") ensemble = classifier.ensemble_factory([knn, perceptron]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("knn and decision tree: \n") ensemble = classifier.ensemble_factory([knn, perceptron]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("all three: \n") ensemble = classifier.ensemble_factory([knn, perceptron, decision_tree]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("knn alone: \n") ensemble = classifier.ensemble_factory([knn]) accuracy, error = classifier.evaluate(ensemble, 2)
#--------------- Produce accuracy rates and graphs ------------------ clf_names_and_results = [] for c in classifier_factory_list: accuracy_list_for_classifier = [] classifier_name = c().to_string() for v in best_num_list: selector = SelectKBest(score_func=f_classif, k=v) selector.fit(patients, labels) newData = selector.transform(patients) split_crosscheck_groups((newData, labels), num_folds_num) # Creating a classifier factory clf = c() accuracy, error = evaluate(clf, num_folds_num) accuracy_list_for_classifier.append(accuracy) clf_names_and_results.append( (classifier_name, accuracy_list_for_classifier)) # accuracy_list_for_all_classifiers.append(accuracy_list_for_classifier) present_graphs(best_num_list, clf_names_and_results) # --------------------- Code that produces the dot file for the ID3 tree presented in the report --------------------- clf = tree.DecisionTreeClassifier() clf = clf.fit(patients, labels) tree.export_graphviz(clf, out_file='tree.dot')
def main(): # Variables used for debug skip_knn = True skip_tree = True skip_perc = True train_features, train_labels, test_features = load_data('data/Data.pickle') # Split once the dataset to two folds. folds = 2 #split_crosscheck_groups(train_features, train_labels, folds) if skip_knn != True: # Evaluating KNN with different k value: k_list = [1, 3, 5, 7, 13] acc_list = [] err_list = [] with open('experiments6.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) for k in k_list: knn_fac = knn_factory(k) err, acc = evaluate(knn_fac, folds) print("k=", k, " acc=", acc, " err=", err) exp_writer.writerow([k, acc, err]) acc_list.append(acc) err_list.append(err) # Plot KNN Results plt.subplot(2, 1, 1) plt.plot(k_list, acc_list, '--', color='g') plt.plot(k_list, acc_list, 'bo') plt.ylabel("Accuracy") plt.xlabel("k") plt.xticks(k_list) plt.subplot(2, 1, 2) plt.plot(k_list, err_list, '--', color='r') plt.plot(k_list, err_list, 'bo') plt.ylabel("Error") plt.xlabel("k") plt.xticks(k_list) plt.tight_layout() plt.show() # Perform classification for Perceptron and Tree and write to files. with open('experiments12.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) if skip_tree != True: # Decision Tree experiment myTree = tree.DecisionTreeClassifier(criterion="entropy") err, acc = evaluate(myTree, folds) print("tree acc=", acc, " tree err=", err) exp_writer.writerow([1, acc, err]) if skip_perc != True: # Perceptron experiment myPerc = Perceptron(tol=1e-3, random_state=0) err, acc = evaluate(myPerc, folds) print("perceptron acc=", acc, " perceptron err=", err) exp_writer.writerow([2, acc, err]) # Competition: Classify test_features print("Triple model") my_model = triple_model() my_model.fit(train_features, train_labels) res = my_model.final_predict(preprocessing.scale(test_features)) write_prediction(res)
def main(): ###### # mat = get_data.read_data("C:/Users/Furkan/Desktop/Bitirme/dataset/video4.mp4") ###### print("Dataset has been reading...") data = read_data() labels = read_labels() print("Dataset has been read.") print('Applying contrast streching manipulation...') contrasted_data, contrasted_labels = data_manipulation.apply_contrast( data, labels) print('Size of contrasted data: %d' % len(contrasted_data)) print('Applying rotation manipulation...') rotated_data, rotated_labels = data_manipulation.apply_rotation( contrasted_data, labels) print('Size of rotated data: %d' % len(rotated_data)) print('Applying shifting manipulation...') shifted_data, shifted_labels = data_manipulation.apply_shifting( contrasted_data, labels) print('Size of shifted data: %d' % len(shifted_data)) print('Applying flipping manipulation...') flipped_data, flipped_labels = data_manipulation.apply_horizontal_flip( contrasted_data, labels) print('Size of shifted data: %d' % len(flipped_data)) print('Concatenating manipulated data') concat_data = rotated_data + shifted_data + contrasted_data + flipped_data # concat_data = data data For 2.5k sized Original data. print("Reshaping images...") reshaped_concat_data = reshape_list(concat_data) print('Shape of data: %s' % str(reshaped_concat_data[0].shape)) print("PCA has been applying...") data_pca = data_manipulation.pca(reshaped_concat_data) print("PCA has been applied.") data = data_pca concat_labels = labels print("Spliting dataset into training and test set...") X_train, X_test, y_train, y_test = split_dataset(data, concat_labels[:len(data)]) start = datetime.now() print("Appyling K-Nearest Neighbours Classifier...") knn_labels = classifier.knn_classifier(X_train, y_train, X_test) print("Evaluating accuracy...") classifier.evaluate(y_test, knn_labels) cm_knn = confusion_matrix(y_test, knn_labels) print("Confusion matrix: %s \n\t" % str(cm_knn)) print("Running time: %s" % str(datetime.now() - start)) start = datetime.now() print("Appyling Support Vector Machines Classifier...") svm_labels = classifier.svm_classifier(X_train, y_train, X_test) print("Evaluating accuracy...") classifier.evaluate(y_test, svm_labels) cm_svm = confusion_matrix(y_test, svm_labels) print("Confusion matrix: %s \n\t" % str(cm_svm)) print("Running time: %s" % str(datetime.now() - start)) start = datetime.now() print("Appyling Naive Bayes Classifier...") nbc_labels = classifier.naive_bayes_classifier(X_train, y_train, X_test) print("Evaluating accuracy...") classifier.evaluate(y_test, nbc_labels) cm_nbc = confusion_matrix(y_test, nbc_labels) print("Confusion matrix: %s \n\t" % str(cm_nbc)) print("Running time: %s" % str(datetime.now() - start)) start = datetime.now() print("Appyling Decision Tree Classifier...") dtc_labels = classifier.decision_tree_classifier(X_train, y_train, X_test) print("Evaluating accuracy...") classifier.evaluate(y_test, dtc_labels) cm_dtc = confusion_matrix(y_test, dtc_labels) print("Confusion matrix: %s \n\t" % str(cm_dtc)) print("Running time: %s" % str(datetime.now() - start)) start = datetime.now() print("Appyling Random Forest Classifier...") rfc_labels = classifier.random_forest_classifier(X_train, y_train, X_test) print("Evaluating accuracy...") classifier.evaluate(y_test, rfc_labels) cm_rfc = confusion_matrix(y_test, rfc_labels) print("Confusion matrix: %s \n\t" % str(cm_rfc)) print("Running time: %s" % str(datetime.now() - start)) """print("Applying K-Means Clustering...")
from sklearn.feature_selection import f_classif from sklearn.ensemble import RandomForestClassifier examples, labels, test = load_data() data = [] data.append(examples) data.append(labels) for feature_num in range(5, 180, 8): print("data cut to: %d\n" % feature_num) data_new = [] data_new.append( SelectKBest(f_classif, feature_num).fit_transform(examples, labels)) data_new.append(labels) classifier.split_crosscheck_groups(data_new, 2) forest = classifier.sklearn_factory_wrapper(RandomForestClassifier()) perceptron = classifier.sklearn_factory_wrapper(Perceptron()) knn = classifier.knn_factory(7) print("forest perf:\n") accuracy, error = classifier.evaluate(forest, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("perc perf:\n") accuracy, error = classifier.evaluate(perceptron, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("knn perf:\n") accuracy, error = classifier.evaluate(knn, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("-----------------------------------\n")