def main(): # preprocs = ["wav","normalized","bandpass","highpass"] # coefficients = ["mfccs","chroma","mel","contrast","all"] # subsegmentLengths = ["0.2", "0.05", "0.01"] # chunkLengths = ["1","2","3"] preprocs = ["normalized"] coefficients = ["mel"] subsegmentLengths = ["0.2"] chunkLengths = ["2"] numNodes = 20 for preproc in preprocs: for coefficientType in coefficients: for subsegmentLength in subsegmentLengths: for chunkLength in chunkLengths: argv = [] argv.append("") argv.append(preproc) argv.append(coefficientType) argv.append(subsegmentLength) argv.append(chunkLength) X, Y = fetchDataMulti.getData(argv) start = time.time() knn.knn(X, Y) svm.svm(X, Y) return
def hack(img_name): ''' HACK Recognize a CAPTCHA image Inputs: img_name: filename of image Outputs: digits: 1x5 matrix, 5 digits in the input CAPTCHA image. ''' data = np.load('hack_data.npz') x = extract_image.extract_image(img_name) # YOUR CODE HERE (you can delete the following code as you wish) x_train = data['x_train'] y_train = data['y_train'] number = len(x_train) # begin answer x_train, x_valid = x_train[ : number // 2], x_train[number // 2:] y_train, y_valid = y_train[ : number // 2], y_train[number // 2:] best_acc, best_k = 0.0, 1 for k in range(1, 101): y = knn.knn(x_valid, x_train, y_train, k) acc = np.sum(y == y_valid) / len(y) print ("K =", k, " ACC =", acc) if acc > best_acc: best_acc = acc best_k = k print ("Choose", best_k, "as K.") digits = knn.knn(x, x_train, y_train, best_k) # end answer return digits
def lvq2(dataset, prototypesPerClass, learningRate, k, w): classes = [] prototypes = [] minArg = [] maxArg = [] trainSet = [] prototypes, classes, minArg, maxArg, trainSet = lvq1(dataset, prototypesPerClass, learningRate, k) actualIndex = 0 totalIndex = int((len(trainSet) * (1 - learningRate)) / 2) while actualIndex < totalIndex: i = 0 while i < len(trainSet): knnClassification, neighbors = knn(classes, prototypes, minArg, maxArg, trainSet[i], k) knnClassification1, neighbors1 = knn(classes, prototypes, minArg, maxArg, trainSet[i+1], k) j = 0 while j < len(neighbors): if window(neighbors[j][1], trainSet[i], trainSet[i+1], w, minArg, maxArg): if knnClassification != knnClassification1: if knnClassification == neighbors[j][1].classification: neighbors[j][1].adjustParam(trainSet[i], False, actualIndex, totalIndex) neighbors1[j][1].adjustParam(trainSet[i], True, actualIndex, totalIndex) else: neighbors[j][1].adjustParam(trainSet[i], True, actualIndex, totalIndex) neighbors1[j][1].adjustParam(trainSet[i], False, actualIndex, totalIndex) j += 1 i += 2 i = 0 actualIndex += 1 return prototypes, classes, minArg, maxArg
def performance(df): execution_times = pd.DataFrame( index=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100], columns=[ 'method1_content', 'method1_content+books', 'method2', 'method3_content', 'method3_content+books', 'method4_content_k1', 'method4_content+books_k1', 'method4_content_k2', 'method4_content+books_k2', 'method4_content_k3', 'method4_content+books_k3', 'method4_content_k4', 'method4_content+books_k4', 'method4_content_k5', 'method4_content+books_k5' ]) for i in xrange(10, 101, 10): #Method 1 sample_df = df.sample(frac=i / 100.0) start = time.time() multi_label_classification(sample_df, False, test_size=1.0 / len(sample_df)) end = time.time() execution_times["method1_content"][i] = end - start execution_times.to_csv("performance_test.csv") start = time.time() multi_label_classification(sample_df, True, test_size=1.0 / len(sample_df)) end = time.time() execution_times["method1_content+books"][i] = end - start execution_times.to_csv("performance_test.csv") #Method 2 start = time.time() word_frequencies(sample_df, one_run=True) end = time.time() execution_times["method2"][i] = end - start execution_times.to_csv("performance_test.csv") #Method 3 start = time.time() nearest_centroid(sample_df, False, test_size=1.0 / len(sample_df)) end = time.time() execution_times["method3_content"][i] = end - start execution_times.to_csv("performance_test.csv") start = time.time() nearest_centroid(sample_df, True, test_size=1.0 / len(sample_df)) end = time.time() execution_times["method3_content+books"][i] = end - start execution_times.to_csv("performance_test.csv") #Method 4 for k in range(5): start = time.time() knn(sample_df, k + 1, False, one_run=True) end = time.time() execution_times["method4_content_k" + str(k + 1)][i] = end - start execution_times.to_csv("performance_test.csv") start = time.time() knn(sample_df, k + 1, True, one_run=True) end = time.time() execution_times["method4_content+books_k" + str(k + 1)][i] = end - start execution_times.to_csv("performance_test.csv")
def max_rule(data_set, view1, view2, dists, classes, labels, ks): L = 3 num_classes = len(classes) number_rows = data_set.shape[0] # igual para todos num_variables1 = data_set.shape[1] num_variables2 = view1.shape[1] num_variables3 = view2.shape[1] rates = numpy.zeros(30) for i in range(0, 30): kf = StratifiedKFold(n_splits=10, shuffle=True) folds = kf.split(data_set, labels) rate = 0.0 for train, test in folds: class_probs1, means1, inv_cov_matrices1 = train_bayesian_classifier( data_set, classes, train) class_probs2, means2, inv_cov_matrices2 = train_bayesian_classifier( view1, classes, train) class_probs3, means3, inv_cov_matrices3 = train_bayesian_classifier( view2, classes, train) for x in test: x1 = data_set.iloc[x] x2 = view1.iloc[x] x3 = view2.iloc[x] probs1 = bayes_probability(num_variables1, x1, class_probs1, means1, inv_cov_matrices1) probs2 = bayes_probability(num_variables2, x2, class_probs2, means2, inv_cov_matrices2) probs3 = bayes_probability(num_variables3, x3, class_probs3, means3, inv_cov_matrices3) pred_class1, knn_probs_1 = knn.knn(data_set, dists[0], train, classes, x, ks[0]) pred_class2, knn_probs_2 = knn.knn(view1, dists[1], train, classes, x, ks[1]) pred_class3, knn_probs_3 = knn.knn(data_set, dists[2], train, classes, x, ks[2]) class_votes = numpy.zeros(num_classes) for j in range(0, num_classes): class_votes[j] = (1 - L) * class_probs1[j] + L * max( probs1[j], probs2[j], probs3[j], knn_probs_1[j], knn_probs_2[j], knn_probs_3[j]) predicted_class = numpy.argmax(class_votes) if classes[predicted_class] == x1.name: rate += 1.0 rate /= number_rows rates[i] = rate mean_confidence_interval(rates) proportion_confidence_interval(rates) return rates
def runCode(): pd.set_option('display.max_columns',50) pd.set_option('display.expand_frame_repr', False) dataset=pd.read_csv('ted_main.csv') #formatting date dataset['film_date'] = dataset['film_date'].apply(lambda x: datetime.datetime.fromtimestamp( int(x)).strftime('%d-%m-%Y')) dataset['published_date'] = dataset['published_date'].apply(lambda x: datetime.datetime.fromtimestamp( int(x)).strftime('%d-%m-%Y')) dataset["published_year"] = dataset["published_date"].apply(lambda x: x.split("-")[2]) dataset = dataset.sort_values('views', ascending=False) pyp.pubYearPlot(dataset) #printing presenter's occupation and their counts. print(dataset["speaker_occupation"].value_counts().head(10),"\n") #printing the top 5 occupation's number of views. print("Occupation: Views") print("Writer: ",int(dataset[dataset["speaker_occupation"]=="Writer"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Writer"]))) print("Designer: ", int(dataset[dataset["speaker_occupation"]=="Designer"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Designer"]))) print("Artist: ",int(dataset[dataset["speaker_occupation"]=="Artist"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Artist"]))) print("Jornalist: ",int(dataset[dataset["speaker_occupation"]=="Journalist"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Journalist"]))) print("Entrepreneur",int(dataset[dataset["speaker_occupation"]=="Entrepreneur"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Entrepreneur"]))) #plotting views for each tag. ttp.tagsTalksPlot(dataset) #tags count yearly (CAN USE GUI HERE) print("\nMost popular Tags for year 2015") print("===="*7) year="2015" ttp.tagsCountYearly(dataset,year,listTags) print("\nMost popular Tags for year 2016") print("===="*7) year="2016" ttp.tagsCountYearly(dataset,year,listTags) print("\nMost popular Tags for year 2017") print("===="*7) year="2017" ttp.tagsCountYearly(dataset,year,listTags) # #Ratings vs count plot # counter = {'Funny':0, 'Beautiful':0, 'Ingenious':0, 'Courageous':0, 'Longwinded':0, 'Confusing':0, 'Informative':0, 'Fascinating':0, 'Unconvincing':0, 'Persuasive':0, 'Jaw-dropping':0, 'OK':0, 'Obnoxious':0, 'Inspiring':0} # neg_descriptors = {"Confusing", "Unconvincing", "Longwinded", "Obnoxious", "OK"} # rp.ratingsPlot(dataset,counter,neg_descriptors) # # #Ratings vs count plot # counter = {'Funny':0, 'Beautiful':0, 'Ingenious':0, 'Courageous':0, 'Longwinded':0, 'Confusing':0, 'Informative':0, 'Fascinating':0, 'Unconvincing':0, 'Persuasive':0, 'Jaw-dropping':0, 'OK':0, 'Obnoxious':0, 'Inspiring':0} # neg_descriptors = {"Confusing", "Unconvincing", "Longwinded", "Obnoxious", "OK"} # rp.ratingsPlot(dataset,counter,neg_descriptors) k.knn(dataset,listTags,durationEntry,languageEntry) twty.searchTweets()
def onclick(event): # Creating a new point and finding the k nearest neighbours new = sample.Sample('', [event.xdata, event.ydata], '') knn.knn(new, data, K) data.append(new) pylab.scatter([new.getFeatures()[0]], \ [new.getFeatures()[1]], \ label=new.getLabel(), \ marker=MARKERS[LABELS.index(new.getLabel())], \ color=COLORS[LABELS.index(new.getLabel())]) pylab.draw()
def knnmodel(self): _translate = QtCore.QCoreApplication.translate knn.knn(self.path) self.modelclass = 2 self.label.setText( _translate( "MainWindow", "<html><head/><body><p><span style=\" font-size:10pt; font-weight:600;\">" + " KNN模型训练完毕!" + "</span></p></body></html>")) self.model.setText( _translate( "MainWindow", "<html><head/><body><p><span style=\" font-size:10pt; font-weight:600;\">" + "KNN模型 " + "</span></p></body></html>"))
def runCode(): pd.set_option('display.max_columns',50) pd.set_option('display.expand_frame_repr', False) dataset=pd.read_csv('ted_main.csv') #formatting date dataset['film_date'] = dataset['film_date'].apply(lambda x: datetime.datetime.fromtimestamp( int(x)).strftime('%d-%m-%Y')) dataset['published_date'] = dataset['published_date'].apply(lambda x: datetime.datetime.fromtimestamp( int(x)).strftime('%d-%m-%Y')) dataset["published_year"] = dataset["published_date"].apply(lambda x: x.split("-")[2]) dataset = dataset.sort_values('views', ascending=False) #Call to create graph Number of Talks Vs Published_Year pyp.pubYearPlot(dataset) #printing presenter's occupation and their counts. print(dataset["speaker_occupation"].value_counts().head(10),"\n") #printing the top 5 occupation's number of views. print("Occupation: Views") print("Writer: ",int(dataset[dataset["speaker_occupation"]=="Writer"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Writer"]))) print("Designer: ", int(dataset[dataset["speaker_occupation"]=="Designer"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Designer"]))) print("Artist: ",int(dataset[dataset["speaker_occupation"]=="Artist"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Artist"]))) print("Jornalist: ",int(dataset[dataset["speaker_occupation"]=="Journalist"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Journalist"]))) print("Entrepreneur",int(dataset[dataset["speaker_occupation"]=="Entrepreneur"]["views"].sum() / len(dataset[dataset["speaker_occupation"]=="Entrepreneur"]))) #plotting views for each tag. ttp.tagsTalksPlot(dataset) #tags count yearly (CAN USE GUI HERE) print("\nMost popular Tags for year 2015") print("===="*7) year="2015" ttp.tagsCountYearly(dataset,year,listTags) print("\nMost popular Tags for year 2016") print("===="*7) year="2016" ttp.tagsCountYearly(dataset,year,listTags) print("\nMost popular Tags for year 2017") print("===="*7) year="2017" ttp.tagsCountYearly(dataset,year,listTags) #Call to KNN Algorithm k.knn(dataset,listTags,durationEntry,languageEntry) twty.searchTweets()
def train_knn(x_train, y_train, x_test, k): trainer = knn(k) trainer.fit(x_train, y_train) y_predict = np.zeros(len(x_test)) for j, x in enumerate(x_test): y_predict[j] = trainer.predict(x) return y_predict
def hack(img_name): ''' hack Recognize a CAPTCHA image Inputs: img_name: filename of image Outputs: digits: 4 digits in the input CAPTCHA image, shape(4, ). ''' # hack_data.npz contains 100 images with labels, # i.e., 400 digits with labels data = np.load('hack_data.npz') # YOUR CODE HERE (you can delete the following code as you wish) x_train = data['x_train'] y_train = data['y_train'] # begin answer N = x_train.shape[0] # square of N, square(400) in this case k = 20 # test matrix, 4-by-144 x_test = extract_image(img_name) digits = knn.knn(x_test, x_train, y_train, k) # end answer return digits
def requirement1() : global min_range global max_range ds = [100, 500, 1000, 10000] b = 100 h = 0.1 k = 10 xs = np.linspace(min_range, max_range, 200) # Histogram as example legends = [] data = get_data(200) plot_true_distribution(1000) legends.append('True distribution') for d in ds : data = get_data(d) plt.hist(data, density=True, bins=b, alpha=0.4) legends.append('#bin = ' + str(b) + ', #data = ' + str(d)) plt.legend(legends) plt.title('Requirement 1-1') plt.savefig('req1-1', dpi=300) plt.show() # KDE as example plt.figure() legends = [] data = get_data(200) plot_true_distribution(1000) legends.append('True distribution') density = kde(data) for d in ds : data = get_data(d) density = kde(data) density.set_bandwidth(h) plt.plot(xs, density(xs)) legends.append('h = ' + str(h) + ', #data = ' + str(d)) plt.legend(legends) plt.title('Requirement 1-2') plt.savefig('req1-2', dpi=300) plt.show() # KNN as example plt.figure() legends = [] data = get_data(200) plot_true_distribution(1000) legends.append('True distribution') for d in ds : data = get_data(d) density = knn(data, k) plt.plot(xs, density(xs)) legends.append('k = ' + str(k) + ', #data = ' + str(d)) plt.legend(legends) plt.ylim([0, 0.4]) plt.title('Requirement 1-3') plt.savefig('req1-3', dpi=300) plt.show()
def run_ml(k=1): total = len(data) num_test = int(total * 0.3) train_data = [] train_labels = [] test_data = [] test_labels = [] train_data = data[:] train_labels = labels[:] n = total for i in range(num_test): index = int(random.random() * n) n -= 1 test_data.append(train_data.pop(index)) test_labels.append(train_labels.pop(index)) # train import nn classifier_nn = nn.nn(train_data, train_labels) import knn classifier_knn = knn.knn(train_data, train_labels) # test for i in range(len(test_data)): d = test_data[i] res.append( [classifier_nn.test(d), classifier_knn.test(d, k), test_labels[i]])
def test(): features = np.load('train_features.npy') # create test matrix test_matrix = create_matrix(100, 100) # train knn model idx = knn(features, RW(test_matrix)) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title("fruchterman_reingold") # draw draw(np.load('matrices/synthetic' + str(idx) + '.npy'), 'estimated_layout', False, ax) draw(test_matrix, 'real_layout', True, ax) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title("Kamada-Kawai") # draw draw2(np.load('matrices/synthetic' + str(idx) + '.npy'), 'estimated_layout2', False, ax) draw2(test_matrix, 'real_layout2', True, ax) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title("Spectral method") # draw draw3(np.load('matrices/synthetic' + str(idx) + '.npy'), 'estimated_layout3', False, ax) draw3(test_matrix, 'real_layout3', True, ax)
def train(k, xx, yy, data): ptxm = [] ptym = [] for i in data: ptx = [i[0], i[1]] ptxm.append(ptx) label = i[2] ptym.append(label) nn = knn(k) ptxm = np.array(ptxm) ptym = np.array(ptym) #print(ptxm) #print(ptym) nn.train(ptxm, ptym) zz = [] #print(nn.predict(ptxm)) x_len, y_len = np.shape(xx) tptx = [] for i in range(x_len): for j in range(y_len): px = xx[i][j] py = yy[i][j] ptx = [px, py] tptx.append(ptx) #print(tptx) z = nn.predict(np.array(tptx)) z = np.reshape(z, (x_len, y_len)) return z
def predict(self, test_data): self.rtl = np.zeros((test_data.shape[0], self.train_lables_num)) test_data_num = test_data.shape[0] self.predict_labels = np.zeros((test_data_num, self.train_lables_num)) for i in range(test_data_num): # get k nearest neighbors' index in train data knn_index, knn_distances = knn.knn(test_data[i], self.train_data, self.k) for j in range(self.train_lables_num): temp = 0 for index in knn_index: if self.train_labels[index][j] == 1: temp = temp + 1 y1 = self.PH1[j] * self.PEH1[j][temp] y0 = self.PH0[j] * self.PEH0[j][temp] self.rtl[i][j] = self.PH1[j] * self.PEH1[j][temp] / ( self.PH1[j] * self.PEH1[j][temp] + self.PH0[j] * self.PEH0[j][temp]) if y1 > y0: self.predict_labels[i][j] = 1 else: self.predict_labels[i][j] = 0 # print(self.predict_labels) return self.predict_labels
def fit(self): # cal ph0 , ph1 for i in range(self.train_lables_num): y = 0 for j in range(self.train_data_num): if self.train_labels[j][i] == 1: y += 1 self.PH1[i] = (self.s + y) / (self.s * 2 + self.train_data_num) self.PH0 = 1 - self.PH1 # cal peh1m peh0 for i in range(self.train_lables_num): c1 = np.zeros((self.k + 1, )) c0 = np.zeros((self.k + 1, )) for j in range(self.train_data_num): temp = 0 knn_index, knn_distances = knn.knn(self.train_data[j], self.train_data, self.k + 1) knn_index = knn_index[1:] # knn_distances = knn_distances[1:] for index in knn_index: if self.train_labels[index][i] == 1: temp += 1 if self.train_labels[j][i] == 1: c1[temp] = c1[temp] + 1 else: c0[temp] = c0[temp] + 1 for l in range(self.k + 1): self.PEH1[i][l] = (self.s + c1[l]) / (self.s * (self.k + 1) + c1.sum()) self.PEH0[i][l] = (self.s + c0[l]) / (self.s * (self.k + 1) + c0.sum())
def test_knn(): dataset = pickle.load(open("dataset.obj", "rb")) n_classes = len(dataset.get_classes()) start = time.time() predictions = knn.knn(dataset) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time using knn {0}...".format(elapsed_time)) print("predictions = \n{0}".format(predictions)) utils.write_list(predictions, "results/knn-predictions.txt") # predictions = [ # [1, 1, 0, 2, 4, 3, 2, 0, 2, 4, 0, 3, 2, 1, 1], # [1, 2, 4, 2, 1, 0, 4, 1, 3, 2, 2, 2, 1, 2, 1], # [2, 3, 4, 2, 2, 0, 2, 0, 3, 3, 1, 2, 2, 2, 3], # [0, 1, 3, 3, 3, 3, 1, 3, 3, 3, 2, 2, 3, 0, 1], # [3, 0, 2, 1, 4, 2, 1, 0, 2, 4, 1, 1, 4, 2, 3] # ] hist = np.zeros((n_classes, n_classes), dtype=np.uint16) for i in range(len(predictions)): for j in range(len(predictions[i])): c = predictions[i][j] hist[i][c] += 1 print("hist = \n{0}".format(hist)) np.savetxt("results/knn-hist.csv", hist, fmt="%i", delimiter=",") confusion_matrix = hist / 25.0 print("conf mat = \n{0}".format(confusion_matrix)) values = [confusion_matrix[i][i] for i in range(n_classes)] precision = np.average(values) print("precision = {0}".format(precision)) plt.matshow(confusion_matrix) plt.title('Confusion matrix') plt.colorbar() plt.show()
def knn(self, predictData=None, trainData=None): h = hp() k = knn() accuracy = [] precision = [] recall = [] f_score = [] mean, stdDev = h.normalizeData(trainData) nn = int(input("Enter the number of closest neighbors to consider: ")) h.normalizeEvaluationSet(predictData, mean, stdDev) for i in range(len(trainData)): tmp = None predictData = trainData[i] tmp = [lt for j, lt in enumerate(trainData) if j != i] td = h.convertToList(tmp) k.classify(td, predictData, nn) truePositives, trueNegatives, falsePositives, falseNegatives = h.findParams( predictData) accuracy.append( h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives)) tmpPrecision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives) tmpRecall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives) precision.append(tmpPrecision) recall.append(tmpRecall) f_score.append(h.findFMeasure(tmpPrecision, tmpRecall)) return accuracy, precision, recall, f_score
def run_all(): import pandas as pd from ds import decision_tree from knn import knn from logreg import logreg from sv import sv from ensemble_methods import ensemble_methods from xg import xgb print('Running All') acc_ds = decision_tree() acc_knn = knn() acc_log = logreg() acc_xg = xgb() acc_svc = sv() acc_rf, acc_ab, acc_gb = ensemble_methods() # Model Performance models = pd.DataFrame({ 'Model': [ 'XGBoost', 'Logistic Regression', 'KNN', 'Support Vector Machines', 'Gradient Boosting', 'Random Forest', 'Decision Tree', 'ADABoost' ], 'Score': [acc_xg, acc_log, acc_knn, acc_svc, acc_gb, acc_rf, acc_ds, acc_ab] }) models = models.sort_values(by='Score', ascending=True) print models.sort_values(by='Score', ascending=False)
def compare_errors(k_vals, input_data_file): ## read in the input data initial_data = create_data(input_data_file) ## create plots of the data (this should save the images within the current ## directory) plot_data(initial_data) ## integerize the data labels integerized_data, label_dict = integerize_labels(initial_data) ## split the data into train and test train, test = split(integerized_data) ## compute the errors errors = {} for k in k_vals: predicted_labels = knn(train, test, k) error_rate = calculate_error_rate(predicted_labels, test) errors[k] = error_rate ## BONUS: weighting for k in k_vals: weighted_predicted_labels = weighted_knn(train, test, k) weighted_error_rate = calculate_error_rate(weighted_predicted_labels, test) print("Weighted error value for k = %d was %f" % (k, weighted_error_rate)) return errors
def digit_recognizer(train_data_file, test_data_file, test_label_file, test_result_file, knn_k): log("Start get train data & label.") train_data, train_label = load_train_data(train_data_file) log("Start get test data.") test_data = load_test_data(test_data_file) log("Start get test label.") test_label = load_test_result(test_label_file) log("test label: {}".format(test_label)) m, n = shape(test_data) error_count = 0 result = [] for idx in range(m): log("main iter: {}".format(idx)) classifier_result = knn(test_data[idx], train_data, train_label, knn_k) result.append(classifier_result) log("the class result: {}, the true answer: {}".format(classifier_result, test_label[0, idx])) if classifier_result != test_label[0, idx]: error_count += 1 log("error count: {}".format(error_count)) log("error rate: {}".format(error_count / float(m))) save_result(result, test_result_file)
def move(self): a = [] Pregnancies = self.lineEdit.text() Glucose = self.lineEdit_2.text() BloodPressure = self.lineEdit_3.text() SkinThickness = self.lineEdit_4.text() Insulin = self.lineEdit_5.text() Bmi = self.lineEdit_6.text() DiabetesPedigreeFunction = self.lineEdit_7.text() Age = self.lineEdit_8.text() print(isinstance(12, numbers.Real)) if (len(Pregnancies) != 0 and len(Glucose) != 0 and len(BloodPressure) != 0 and len(SkinThickness) != 0 and len(Insulin) != 0 and len(Bmi) != 0 and len(DiabetesPedigreeFunction) != 0 and len(Age) != 0): a.append(float(Pregnancies)) a.append(float(Glucose)) a.append(float(BloodPressure)) a.append(float(SkinThickness)) a.append(float(Insulin)) a.append(float(Bmi)) a.append(float(DiabetesPedigreeFunction)) a.append(float(Age)) model = knn('data_diabetes.csv') output = model.predict([a]) print(output) self.openWindow(output[0]) else: self.warning("Cảnh báo", "Bạn nhập sai")
def generate_output(patient_index): distances, indices = knn() neighbours_indices = neighbours_of_index(patient_index, distances, indices) export_anomaly_and_neighbours('Reports/anomalies/', patient_index, neighbours_indices) data = [] with open('Reports/anomalies/' + str(patient_index) + '.csv') as csvfile: spamreader = csv.reader(csvfile, delimiter=',') for row in spamreader: data.append(row) patient_input = [float(i) for i in data[1][1:]] all_inputs = [[float(j) for j in i[1:]] for i in data[1:]] for input_idx in range(30): inputs = get_inputs(input_idx, all_inputs) std = np.std(inputs) mean = np.mean(inputs) z = abs((patient_input[input_idx] - mean) / std) z = float("{0:.3f}".format(z)) above_mean = patient_input[input_idx] >= mean t1 = "for " + color.DARKCYAN + Y_LABEL[input_idx].title( ) + color.END + " the patient is " t2 = color.PURPLE + str(z) + color.END t3 = " standard deviations below the mean" if z > 1: t2 = color.RED + str(z) + color.END if above_mean: t3 = " standard deviations above the mean" print(t1 + t2 + t3)
def assess_knn(name, point_info, categories, labels, actions=['save', 'load', 'evaluate']): ''' name: name of graph being assessed point_info: coords (list of coordinates), nids (list of node ids), category_map (map of node ids to categories), nodes (dataframe of all this info) categories: categories to assess labels: correct and incorrect high-density nodes of a given category actions: [evaluate/graph/save] -> what to do ''' print >> sys.stderr, 'assessing knn...' coords, nids, category_map, nodes = point_info pos = {} for nid, coord in zip(nids, coords): pos[nid] = (coord[0], coord[1]) ks = list(range(4, 20)) results = {cat: {} for cat in categories} if 'load' in actions: for k in ks: for c in categories: graphfile = 'src/clustering/graphs/{}-{}-knn-{:02}.csv'.format( name, c, k) if os.path.isfile(graphfile): edges = pd.read_csv(graphfile, ',', header=0) edges['r1'] = edges['r1'].apply(str) edges['r2'] = edges['r2'].apply(str) cat_graph = nx.from_pandas_edgelist(edges, source='r1', target='r2') if 'evaluate' in actions: res = evaluate('knn,k={}'.format(k), c, set(cat_graph.nodes()), labels) results[c][k] = res for i, k in enumerate(ks): if all([k in results[c] for c in categories]): continue graph = knn.knn(nodes, k) for c in categories: cat_graph = knn.split(graph, c, category_map) cat_graph = cutoff.filter_connected_components(cat_graph) if 'evaluate' in actions: res = evaluate('knn,k={}'.format(k), c, set(cat_graph.nodes()), labels) results[c][k] = res if 'graph' in actions: draw_graph( cat_graph, pos, 'knn with filter and k={}'.format(k), 'src/clustering/figures/{}-{}-knn-{:02}.png'.format( name, c, k)) if 'save' in actions: save_graph( cat_graph, 'src/clustering/graphs/{}-{}-knn-{:02}.csv'.format( name, c, k)) return results
def cross_validation(data, target, k): num_samples = data.shape[0] perm = np.arange(num_samples) np.random.shuffle(perm) data = data[perm] target = target[perm] batch_sz = num_samples//10 errs = 0 for batch in range(0, num_samples, batch_sz): batch_train_X = np.concatenate( [data[:batch], data[(batch+batch_sz):]] ) batch_train_Y = np.concatenate( [target[:batch], target[(batch + batch_sz):]] ) batch_test_X = data[batch:batch+batch_sz] batch_test_Y = target[batch:batch+batch_sz] pred = knn(batch_train_X, batch_test_X, batch_train_Y, k) #print(pred) #print(batch_test_Y) errs += np.sum(pred != batch_test_Y) return errs/num_samples
def run_ml(k = 1): total = len(data) num_test = int(total * 0.3) train_data = [] train_labels = [] test_data = [] test_labels = [] train_data = data[:] train_labels = labels[:] n = total for i in range(num_test): index = int(random.random() * n) n -= 1 test_data.append(train_data.pop(index)) test_labels.append(train_labels.pop(index)) # train import nn classifier_nn = nn.nn(train_data,train_labels) import knn classifier_knn = knn.knn(train_data,train_labels) # test for i in range(len(test_data)): d = test_data[i] res.append([classifier_nn.test(d), classifier_knn.test(d,k), test_labels[i]])
def match(self, tree, query, k=None, radius=None): dist_list, idx_list = knn.knn(tree, query, k) dist_list = dist_list.T # index basis is 1 idx_list = idx_list.T - 1 return dist_list, idx_list
def get_nearest_neighbour(pt_cloud, bev_width, bev_length, image_downsampling_factor, P2, parts=4): if pt_cloud.shape[0] != 3: pt_cloud = pt_cloud.T world_pts = [] # one time for each dim for i in range(bev_length): for j in range(bev_width): world_pts.append(bev2world(j, i, bev_width, bev_length, 80, 70)) all_inds = [] for i in range(parts): cur_part = np.array( world_pts[i * len(world_pts) // parts:i * len(world_pts) // parts + len(world_pts) // parts]).T _, inds = knn.knn(cur_part.astype(np.float32), pt_cloud.astype(np.float32), 1) inds = np.squeeze(inds) - 1 all_inds = all_inds + inds.tolist() world_pts = np.array(world_pts).T nearest = pt_cloud[:, all_inds] return world_pts, nearest
def procYear(player, kNeighbors=4, year=''): """ takes as input dataframe from year specific FanGraphs Leaderboard, year (blank = all), player, and the number of neighbors in cluster returns cluster of players """ ## load the knn clustering class object knnObj = knn.knn() knnObj.k = kNeighbors knnObj.procList = ['CPU'] ## Choose CPU or GPU distance calculations ## read fangraphs csv into dataframe ## downloaded from https://www.fangraphs.com/ df = pd.read_csv('FanGraphsLeaderboard' + str(year) + '.csv') ## remove name artifact df = df.rename(columns={'\xef\xbb\xbf"Name"': 'name'}) ## handle % in floating point values for col in df.columns: if "%" in col: ## chop off ' %' df[col] = df.apply(lambda x: float(x[col][:-2]), axis=1) ## use player name as id dataCl = np.array(df.name) ## columns to remove from clustering drop_cols = ['Team', 'playerid', 'name'] data = np.column_stack( [df[col] for col in df.columns if col not in drop_cols]) ## handle wide data edge case if len(data) <= knnObj.k: return [0] ## feature scaling scaler = StandardScaler() data = scaler.fit_transform(data) nPts = len(data) if player not in dataCl: return [0] point = list(dataCl).index(player) dataPoint = data[point] ## subtract off test point knnObj.data = np.append(data[:][0:point], data[:][point + 1:nPts], axis=0) knnObj.dataCl = np.append(dataCl[:][0:point], dataCl[:][point + 1:nPts], axis=0) knnObj.testPt = dataPoint result = knnObj.getCluster() return result
def lvq_3(prots): prots_3 = prots[:] for r in range(repetitions): for x in dataset: closest_prototypes = nn(x, 2, prots_3) m = closest_prototypes[0]['elem'] n = closest_prototypes[1]['elem'] m_class = closest_prototypes[0]['class'] n_class = closest_prototypes[1]['class'] x_class = x[len(x) - 1] same_class = m if (m_class == x_class): same_class = m other_class = n elif (n_class == x_class): same_class = n other_class = m else: same_class = False if (window_rule(x, m, n) and same_class): if (m_class != n_class): movement(same_class, x, True) movement(other_class, x, False) else: movement(same_class, x, True, e=e) movement(other_class, x, True, e=e) print "LVQ 3 RESULTS:" return knn(k, prots_3, evaluation)
def run(source, method, k): tags = [1,1,1,1,1,1] if method=='knn' and k is None: raise ValueError('Argument k is mandatory for KnnClassifier') else: if source=='chemistry': if tags[5]: glossary = get_chemistry() else: glossary = None filepath = 'files/chemistry.xml' else: if tags[5]: glossary = get_graphicdesign() else: glossary = None filepath = 'files/graphic-design.xml' matrix, tag_names = preprocessing(filepath, tags, glossary) print(tag_names) if method=='knn': k = int(math.fabs(int(k) or 5)) r = knn(matrix, k) else: r = logistic_regression(matrix) print(r)
def runtest_knn(train_data, test_data, k=1): train_t, train_zips = cookdata_knn(train_data) test_t, test_zips = cookdata_knn(test_data) correct = 0 for i in range(len(test_zips)): res = knn.knn(train_zips, train_t, k, test_zips[i]) if res == test_t[i]: correct += 1 return 1.-(float(correct)/len(test_zips))
def loocv(training_data, k, current_features): num_examples = len(training_data) num_correct = 0 for leave_out in range (0, num_examples): test = [training_data.pop(leave_out),] num_correct += knn.knn(training_data, test, k, current_features) training_data.insert(leave_out, test[0]) acc = 100.0 * num_correct / num_examples return [num_correct, num_examples, acc]
def mnist_block(train_set, valid_set, test_set, knn_data, mis): dataset = train_set n=int(mis*28) ###mask train_mask=np.ones_like(train_set) valid_mask=np.ones_like(valid_set) test_mask=np.ones_like(test_set) block=[0]*28 for row in range(train_mask.shape[0]): ran=np.random.randint(100,700,size=n) for r in ran: train_mask[row,r:r+28]=block data = (train_set*train_mask, valid_set *valid_mask ,test_set *test_mask) mask= train_mask, valid_mask, test_mask ###knn knn_mask = np.split(train_mask, 10)[0] t0=time.time() knn_result = knn(knn_data , knn_mask,k=50) tknn=time.time()-t0 ###sda t0=time.time() gather=Gather_sda(dataset,data ,problem = 'class', available_mask = mask, method = 'nes_mom', pretraining_epochs = 10, pretrain_lr = 0.0005, training_epochs = 100, finetune_lr = 0.0005, batch_size = 200, ###300 hidden_size = [1000,1000,100], dA_initiall = True , error_known = True ) gather.finetuning() tsda=time.time()-t0 print('time_knn',tknn,'time_sda',tsda) sda_er = np.mean(np.sum((1-train_mask)*((train_set-gather.gather_out())**2), axis=1)) kn_er = np.mean(np.sum((1-knn_mask)*((knn_data-knn_result)**2), axis=1)) return(sda_er, kn_er) """
def test(): print("Starting int test.") k = knn.knn(( knn.TrainingEntry((0, 0), 'red'), knn.TrainingEntry((5, 5), 'blue'), )) assert k.predict((1, 1)) == 'red' assert k.predict((4, 4)) == 'blue' for vec in [(0, 0), (2, 4), (5, 5), (7, 9), (-2, -4)]: print("Regress[{0}] = {1}".format(vec, k.regress(vec)))
def setupClassifier(path): data = [] label = [] l = p.getTrialList(path) for x in l: features = f.Features(x,path[-2]) data.append(features.feature) if x.head.target == 'good': label.append(1) else: label.append(0) return knn.knn(data, label)
def leave_one_out(examples,k): conf_matr = ConfusionMatrix() for ex in examples: # disable only this example ex.active = False # run the k-Nearest-Neighbor algorithm rank_list = knn.knn(k,examples,ex) # check the voting for correctness outcome = knn.voting(rank_list) conf_matr.inc_according_to(outcome,ex.outcome) ex.active = True # return the computed confusion matrix return conf_matr
def datingClassTest(): hoRatio = 0.80 datingDataMat, datingLabels = file2matrix('C:\Users\Daniel.Lee\Desktop\machinelearninginaction-master\Ch02\datingTestSet.txt') normMat, ranges, minVals = autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print "the classifier came back with : %d , the real answer is : %d" % (classifierResult, datingLabels[i]) if classifierResult != datingLabels[i]: errorCount += 1.0 print "the total error rate is : %d" % ( float(errorCount) / float(numTestVecs)) '''
def str_test(nr_train): print("Starting str test ({0} sample[s]).".format(nr_train)) def label(s): return "short" if len(word) < 5 else "long" k = knn.knn(dist=knn.util.dist_string) # k = knn.knn(dist=lambda l, r: abs(len(l) - len(r))) for word, count in entries[:nr_train]: k.trainer.append(knn.TrainingEntry(word, label(word))) errors = 0 for word, count in entries: errors += int(k.predict(word) != label(word)) print("Error rate: {0}".format(float(errors) / len(entries)))
def leave_one_out(examples,k): right_classified = 0 for ex in examples: # disable only this example ex.active = False # run the k-Nearest-Neighbor algorithm rank_list = knn.knn(k,examples,ex) # check the voting for correctness outcome = knn.voting(rank_list) if outcome == ex.outcome: right_classified += 1 ex.active = True # return which share was correctly classified return right_classified/float(len(examples.examples))
def test_knn(filepath, glossary, k=5): bits = functions.gen_bitlist(6)[1:] total = len(bits) max = 0 max_tags = [] counter = 1 for tags in bits: matrix, tag_names = preprocessing(filepath, tags, glossary) r = knn(matrix, k) print(str(counter) + "/" + str(total), end='\r') if r > max: max = r max_tags = tag_names counter += 1 print("---Mejor---") print("Valoracion: " + str(max)) print("Tag names: " + str(max_tags))
def setUp(self): self.vector = vectorization.vector() self.training = self.vector.vectorize(iter(training_movies)) self.knn = knn.knn()
from Preprocess import call from knn import knn from naivebayes import main_def from dtree import decisionTree if __name__=="__main__": raw = raw_input("Data reduction technique? \n1. PCA 2. Correlation filter 3. Variance Filter 4. Without Reduction\n") call(int(raw)) alg = raw_input("Algorithm for classification? \n1. KNN 2.Naive-Bayes 3.Decision Tree\n") if int(alg) == 1: knn(raw) elif int(alg) == 2: main_def(int(raw)) else: decisionTree(int(raw))
def train(d, l): import nn classifier_nn = nn.nn(d,l) import knn classifier_knn = knn.knn(d,l) return classifier_nn, classifier_knn
finetune_lr = 0.001, batch_size = 50, #10 hidden_size = [170,168,130], #[3000,1000,450,168], corruption_da = [0.2, 0.2, .2, 0.1,0.1], drop = [0.2, 0.3, 0.3,0.1,0.2,0.], dA_initiall = True , error_known = True , activ_fun = T.tanh, regu_l1 = 0, regu_l2 = 0) #T.nnet.sigmoid) gather.finetuning() ###########define nof K ############### k_neib = 20 print('... Knn calculation with {} neighbor'.format(k_neib)) knn_result = knn(test_set,test_mask,k=k_neib) #########run the result for test sda_error.append(MSE(test_set, gather.gather_out(), test_mask)) mean_error.append(MSE(dataset,dataset.mean(axis=0),available_mask)) knn_error.append(MSE(test_set,knn_result,test_mask)) print('sda_error= ',sda_error[-1]) print('knn_error= ',knn_error[-1]) print('mean_error= ',mean_error[-1]) print('sda_error= ',sda_error) print('knn_error= ',knn_error)
import reader import tablestr import zeror import xval import nb import knn import uxval if __name__ == "__main__": filename = 'data/weather2.csv' table = tablestr.Table() #create raw data structure reader.readcsv(filename,table ) #read the .csv data set f = '%4.2f' #set the formatting for the output tables = reader.klasses(table) b = x = 2 kn = 5 k = 1 m = 2 uxvaltables = uxval.uxvals(tables, x, b) knn_acc = [] nb_acc = [] for s in range(b*x): s += 1 acc = knn.knn(uxvaltables[s]['test'], uxvaltables[s]['train'], tables, kn) #acc2 = nb.nb(xvaltables[s]['test'], xvaltables[s]['train'], tables['names'], k, m) knn_acc += [f%acc] #nb_acc += [f%acc2] print 'knn_acc =', knn_acc #print 'nb_acc =', nb_acc
label = [] for x in l: features = f.Features(x) data.append(features.feature) if x.head.target == 'good': label.append(1) else: label.append(0) print len(data) print len(label) n = len(data) import knn correct = 0 for i in range(n): test_data = data.pop(i) test_label = label.pop(i) # knn cla = knn.knn(data, label) if cla.test(test_data,10) == test_label: correct += 1 # nn data, label test_data, test_label data.insert(i, test_data) label.insert(i, test_label) print correct * 1.0 / n
if neighbor_point not in clustered: new_cluster.append(neighbor_point) clustered.append(neighbor_point) print '%d clusters formed.' % len(clusters) print '%d(%.2f%%) noise points found.' % (len(noise), (float(len(noise))/len(points)) * 100) #print 'Noise: %s' % ', '.join([str(point).replace('[', '(').replace(']', ')') for point in noise]) return [point for point in points if point not in noise] # Read file test = [] train = [] for cl in 'rgb': train_file = open('./TrainingData-original/W%s_train.txt' % cl, 'r') train_data = [[float(x.strip()) for x in line.split()] for line in train_file.read().strip().split('\n')] print '- Class: %s' % cl filtered_data = dbscan(train_data, 4, 2) train += [(point, cl) for point in filtered_data] new_train_file = open('./TrainingData/W%s_train.txt' % cl, 'w') new_train_file.write('\n'.join(['\t'.join([str(item) for item in row]) for row in filtered_data])) new_train_file.close() test_file = open('./TestData/W%s_test.txt' % cl, 'r') test += [([float(x.strip()) for x in line.split()], cl) for line in test_file.read().strip().split('\n')] for i in range(1,30): knn.knn(i, test, train)
import matplotlib data = [] f = open(r'DRD.txt','r') for s in f : d = s.split(',') d = [float(d2) for d2 in d] data.append(d) f.close() data = numpy.array(data) row,colu = numpy.shape(data) t_n = int(row*0.9) tra = data[0:t_n,0:-1] sam = data[t_n:,0:-1] lab = data[0:t_n,-1] true = numpy.array(data[t_n:,-1]) k = 21 result,label = knn.knn(tra,lab,sam,k) label = numpy.array(label) result_list = [] for r in result: # if r[0] >=0.5: result_list.append(label[0]) else: result_list.append(label[1]) result_list = numpy.array(result_list) print "correct rate:" print len(matplotlib.mlab.find((result_list==true) == True)) / float(len(true))
problem = 'regression', available_mask = mask, method = 'adam', pretraining_epochs = 10, pretrain_lr = 0.0001, training_epochs = 100, finetune_lr = 0.0001, batch_size = 200, hidden_size = [1000,100,2], corruption_da = [0.1,0.1,0.1], dA_initiall = True , error_known = True ) gather.finetuning() ###########define nof K ############### knn_result = knn(dataset,available_mask,k=1000) #########run the result for test #dd_mask=test_mask #dd = test_set def MAE(x,xr,mas): return np.mean(np.sum((1-mas) * np.abs(x-xr),axis=1)) sda_error.append(MAE(test_set, gather.gather_out(), test_mask)) mean_error.append(MAE(dataset,dataset.mean(axis=0),available_mask)) knn_error.append(MAE(dataset,knn_result,available_mask)) #sda_error.append(sum((1-dd_mask)*(np.abs(dd-gather.gather_out())), axis=1).mean()) #mean_error.append(sum((1-available_mask)*(np.abs(dataset-dataset.mean(axis=0))), axis=1).mean())
## tu bedzie logger #return articles[0] polishArticles = getFromDir("pl") englishArticles = getFromDir("en") deutschArticles = getFromDir("de") frenchArticles = getFromDir("fr") spanishArticles = getFromDir("es") articles = [ polishArticles, englishArticles, deutschArticles, frenchArticles, spanishArticles ] sortToTrainAndTest(articles) print "train set: " + str(len(trainSet)) print "test set: " + str(len(testSet)) network = net.NeuralNetwork(trainSet, testSet) network.initializeNetwork() network.testNetwork() knn = k.knn(trainSet, testSet) knn.initializeAlgorithm() knn.testkNN()
Xtest = Xtest[1 : len(Xtest)] Ytest = Ytest[1 : len(Ytest)] """ dataset.X = Xtrain dataset.Y = Ytrain dataset.Xte = Xtest dataset.Yte = Ytest """ # print len(Xtrain), len(Ytrain), len(Xtest), len(Ytest) print("================================================================") print("KNN (Feature %d, TestSet %d)" % (testFeatureSize, testSetIndex)) print("================================================================") knn.knn(3, testFeatureSize, Xtrain, Ytrain, Xtest, Ytest) # end for testSetIndex # end for testFeatureSize ################################### # supervised ################################### """ f_tr = "tool.train" f_tm = "tool.test" f_mm = "tool.model" f_pm = "tool.predict" Xtr, Ytr, Xte, Yte = util.splitTrainTest(X0, Y0, 5)
def setUp(self): self.knn = knn.knn()
def __init__(self, db): self.k = knn.knn() self.training = db['training'] self.classified = db['classified'] self.gold = db['gold'] self.golden = {}
def train(d, l, w, th): classifier_nn = nn.nn(d,l,w,th) classifier_knn = knn.knn(d,l) return classifier_nn, classifier_knn
method = 'adam', pretraining_epochs = 200, pretrain_lr = 0.0001, training_epochs = 300, finetune_lr = 0.0001, batch_size = 5, hidden_size = [300,100,3], #(1388, 8) PCA--> 3 corruption_da = [ 0.2,.2,0.1,.1,0.1,.1], dA_initiall = True , error_known = True , activ_fun =T.tanh) gather.finetuning() ###########define nof K ############### k_neib = 25 print('... Knn calculation with {} neighbor'.format(k_neib)) knn_result = knn(dataset,available_mask,k=k_neib) #########run the result for test def MAE(x,xr,mas): return np.mean(np.sum((1-mas) * np.abs(x-xr),axis=1)) sda_error.append(MAE(test_set, gather.gather_out(), test_mask)) mean_error.append(MAE(dataset,dataset.mean(axis=0),available_mask)) knn_error.append(MAE(dataset,knn_result,available_mask)) print('sda_error= ',sda_error[-1]) print('knn_error= ',knn_error[-1]) print('mean_error= ',mean_error[-1])
problem = 'regression', available_mask = mask, method = 'adam', pretraining_epochs = 100, pretrain_lr = 0.0001, training_epochs = 200, finetune_lr = 0.0001, batch_size = 100, hidden_size = [100,20,2], corruption_da = [0.1, 0.1, 0.1], dA_initiall = True , error_known = True ) gather.finetuning() knn_result = knn(dataset,available_mask) #########run the result for test dd_mask=test_mask dd = test_set b_error.append(sum((1-dd_mask)*((dd-gather.gather_out())**2), axis=1).mean()) mean_error.append(sum((1-available_mask)*((dataset-dataset.mean(axis=0))**2), axis=1).mean()) knn_error.append(sum((1-available_mask)*((dataset-knn_result)**2), axis=1).mean()) plot(mis,b_error[-1],'ro') plot(mis,mean_error[-1],'bo') plot(mis,knn_error[-1],'g*') #### SDA with corruption in training train_mask = rest_mask[:percent_valid] data= (train_set*train_mask, valid_set *valid_mask ,test_set *test_mask)
attrtable_test = attrselect.attrtable(test_table, attrlst) #test_table = projections.projections(uxvaltables[s]['test']) pcaT_all,tmp1 = tiles.tiles(table, numdim, outfile0) #pca projection and clustering pcaT_info, tmp2 = tiles.tiles(attrtable_train, numdim, outfile0) #fastT_all = tiles.tilesv2(table, numdim, outfile0) fastT_info, tmp3 = tiles.tilesv2(attrtable_train, numdim, outfile0) pcaT_all_cen += [tmp1]; pcaT_info_cen += [tmp2]; fastT_info_cen += [tmp3] #tablestr.tableprint(centroid_attr) tables = reader.klasses(table) kt = 1; mt = 2 acc1, f1, prec1, pd1 = nb.nb(xvaltables[s]['test'], xvaltables[s]['train'], tables['names'], kt, mt) acc2, f2, prec2, pd2 = knn.knn(uxvaltables[s]['test'], uxvaltables[s]['train'], kn) # PCA Infer Methods acc3, f3, prec3, pd3 = newknn.knn(test_table, pcaT_all[0], kn, threshold) acc4, f4, prec4, pd4 = newknn.knn(attrtable_test, pcaT_info[0], kn, threshold) # Fastmap Infer Methods #acc5, f5, prec5, pd5 = newknn.knn(test_table, fastT_all[0], kn, threshold) acc6, f6, prec6, pd6 = newknn.knn(attrtable_test, fastT_info[0], kn, threshold) break acc_pcaT_all += [acc3] acc_pcaT_infogain += [acc4] acc_nb += [acc1] acc_knn += [acc2] #acc_fastT_all += [acc5] acc_fastT_infogain += [acc6]