def main(args): database = parse_database(args.database) probe = np.load(args.probe) os.makedirs(args.output, exist_ok=True) copy_video(video_id_from_name(args.probe), args.videos, os.path.join(args.output, 'probe.avi')) start = timer() k_naive = top_k_naive(database, probe, args.k) end = timer() os.makedirs(os.path.join(args.output, 'naive'), exist_ok=True) with open(os.path.join(args.output, 'naive', 'results.txt'), 'w') as f: for i in range(len(k_naive)): v = k_naive[i] d = np.linalg.norm(v[0] - probe) f.write('Video ID: {}, Distance: {}\n'.format(v[1], d)) copy_video(v[1], args.videos, os.path.join(args.output, 'naive', '{}.avi'.format(i))) f.write('Time: {} s\n'.format(end - start)) f.write('DB Memory: {}\n'.format( human_size(sys.getsizeof(pickle.dumps(database))))) for basis_size in args.lsh: basis = lsh.generate_basis(basis_size, database[0][0].shape[0]) probe_lsh = lsh.lsh(probe, basis) database_lsh = lsh_database(database, basis) start = timer() k_l = top_k_lsh(database_lsh, probe_lsh, args.k) end = timer() result_dir = 'lsh_{}'.format(basis_size) os.makedirs(os.path.join(args.output, result_dir), exist_ok=True) with open(os.path.join(args.output, result_dir, 'results.txt'), 'w') as f: for i in range(len(k_l)): v = k_l[i] d = lsh.hamming_distance(v[0], probe_lsh) f.write('Video ID: {}, Distance: {}\n'.format(v[1], d)) copy_video( v[1], args.videos, os.path.join(args.output, result_dir, '{}.avi'.format(i))) f.write('Time: {} s\n'.format(end - start)) f.write('DB Memory: {}\n'.format( human_size(sys.getsizeof(pickle.dumps(database_lsh)))))
def __init__(self): self.train_dataframe = pd.read_csv('data/training.csv', header=0) self.test_dataframe = pd.read_csv('data/test.csv', header=0) self.test_dataframe_refId = self.test_dataframe['RefId'] self.preprocess_data() # Initialise classifiers self.attributes = list(self.train_dataframe.columns.values)[1:] self.lsh_neighbours = 2000 self.initialise_knn() self.initialise_pca() self.initialise_svm() self.initialise_nn() self.lsh = lsh.lsh(self.train_dataframe)
def __init__(self): self.train_dataframe = pd.read_csv("data/training.csv", header=0) self.test_dataframe = pd.read_csv("data/test.csv", header=0) self.test_dataframe_refId = self.test_dataframe["RefId"] self.preprocess_data() # Initialise classifiers self.attributes = list(self.train_dataframe.columns.values)[1:] self.lsh_neighbours = 2000 self.initialise_knn() self.initialise_pca() self.initialise_svm() self.initialise_nn() self.lsh = lsh.lsh(self.train_dataframe)
def compute_hashes(domains, n, num_perms=32, max_items=100, hash_function=lsh.md5hash): # domains는 도메인 객체로 사전 형태이다. # 도메인 이름을 키로 지정한다. # LSH 인덱스를 생성한다. hashes = lsh.lsh(num_perms, hash_function) # minHashes를 계산한다. for dom in domains: dg = hashes.digest(domains[dom].ngrams[n]) domains[dom].digest = dg hashes.insert(dom, dg) return hashes
from org.apache.lucene.search.highlight import QueryScorer import datetime import parser import web import jieba import lsh import extract_faces #import extract_features from flask import Flask, redirect, render_template, request, url_for app = Flask(__name__,static_url_path='/static') search_result=[] #储存搜索结果 dic1=dict() dic2=dict() LSH_1=lsh.lsh('features') LSH_1.generate_hash() LSH_2=lsh.lsh('faces_features') LSH_2.generate_hash() ############### #基于关键词搜索# ############### def search(keyword): STORE_DIR = "sports_index" vm_env.attachCurrentThread() directory=SimpleFSDirectory(File(STORE_DIR).toPath()) searcher=IndexSearcher(DirectoryReader.open(directory)) analyzer=StandardAnalyzer() res_cnt,res_list=get_res(searcher,analyzer,keyword)
def lsh_database(database, basis): return [(lsh.lsh(r, basis), v) for r, v in database]
def twitter(): if int((sklearn.__version__).split(".")[1]) < 18: from sklearn.cross_validation import train_test_split else: from sklearn.model_selection import train_test_split #Twitter Data Set ip_file_path = 'data/twitter/' ip_file_name = 'twitter.txt' out = 'twitter' ip_label_file_name = 'twitter_label.txt' test_file_name = 'twitter_test.txt' test_label_file_name = 'twitter_test_label.txt' vocabulary = {} input_file = open(ip_file_path + ip_file_name, 'r+') file_lines = input_file.readlines() word_matrix = [] for line in file_lines: line = line.strip() sentence = line.split(" ") word_matrix.append(sentence) label_matrix = np.genfromtxt(ip_file_path + ip_label_file_name, delimiter=' ') (train_data, test_data, train_labels, test_labels) = train_test_split(word_matrix, label_matrix, test_size=0.20, random_state=42) #prior_dict = prior_density(train_labels) for sentence in train_data: for word in sentence: if (word not in vocabulary): vocabulary[word] = 1 else: vocabulary[word] = vocabulary[word] + 1 #orderedDictionary = collections.OrderedDict(sorted(vocabulary.items(), key=lambda x: x[1])) updated_voc = {k: v for k, v in vocabulary.items() if v > 3} data_matrix = np.zeros((len(train_data), len(updated_voc))) test_matrix = np.zeros((len(test_data), len(updated_voc))) i = 0 for sentence in train_data: for word in sentence: if (word not in updated_voc): continue index = list(updated_voc.keys()).index(word) data_matrix[i][index] = data_matrix[i][index] + 1 i = i + 1 i = 0 for sentence in test_data: for word in sentence: if (word not in updated_voc): continue index = list(updated_voc.keys()).index(word) test_matrix[i][index] = test_matrix[i][index] + 1 i = i + 1 class_partition_set = class_partition(data_matrix, train_labels) prior_prob_set = {} for class_value, features in class_partition_set.items(): prior_prob_set[class_value] = len(features) / len(train_data) #Bayes On Twitter class_densities_words = {} for class_value, features in class_partition_set.items(): voc_size = len(features[0]) matrix = np.matrix(features) sum_elements = np.sum(matrix) deno = voc_size + sum_elements c_sum = matrix.sum(axis=0).tolist() densities = [] for i in range(0, len(c_sum[0])): num = c_sum[0][i] + 1 prob = (num) / deno densities.append(prob) class_densities_words[class_value] = densities predictions = [] accuracy = 0 for i in range(len(test_matrix)): test_vector = np.zeros((len(updated_voc))) #testVector = list(testData[0]) for word in test_data[i]: if (word not in updated_voc): continue index = list(updated_voc.keys()).index(word) test_vector[index] += 1 probabilities = {} for class_value, densities in class_densities_words.items(): prob = prior_prob_set[class_value] for i in range(0, len(test_vector)): if (test_vector[i] != 0): if (test_vector[i] == 1): prob = prob * densities[i] else: prob = prob * np.power(densities[i], test_vector[i]) probabilities[class_value] = prob predictions.append(max(probabilities, key=probabilities.get)) for x in range(len(test_data)): if test_labels[x] == predictions[x]: accuracy += 1 accuracy_mycode = (accuracy / len(test_data)) * 100.0 print("Accuracy without using library ", accuracy_mycode) lsh.lsh(data_matrix, train_labels, test_matrix, test_labels) nn.nearest_neighbour(data_matrix, train_labels, test_matrix, test_labels)
def main(csv_txt, write_flag, path, label_path): # Raed from csv file - if csv_txt == 0: input_data, input_label = read_csv_file(path, label_path, 0) #input_label = read_csv_file(label_path,0) if csv_txt == 1: input_data, input_label = read_csv_file(path, label_path, 1) split_ratio = 0.80 input_train, input_train_label, input_test, input_test_label = split_input_data( input_data, input_label, split_ratio) k = 3 number_hash_function = 1 #print len(input_data) #print len(input_data[0]) orginal_dim = len(input_data[0]) #k = int(orginal_dim/3) new_dim = 5 hash_tables, all_hash_functions = lsh.lsh(number_hash_function, orginal_dim, new_dim, input_train, input_train_label) input_test = np.asarray(input_test) #print input_test.shape #print all_hash_functions[0].shape projected_test_data = np.dot(input_test, all_hash_functions[0]) k = 3 #print projected_test_data.shape predict_label = [] for row in range(len(projected_test_data)): test_vector = projected_test_data[row] all_neighbour = np.asarray( lsh.get_all_localiy_sensetive_elements(hash_tables, test_vector)) #print type(all_neighbour) if len(all_neighbour.shape) == 0: #print "NO NEIGHBOUR" cur_class = 0 else: input_train = all_neighbour[:, 0:-1] input_train_label = all_neighbour[:, -1] if len(input_train_label) >= k: top_k_neighbour = nn.get_top_k_neighbours( k, input_train, test_vector, input_train_label) cur_class = nn.find_class(np.atleast_2d(top_k_neighbour)) else: #print 'less than k' cur_class = input_train_label[0] predict_label.append(cur_class) test_accuracy, macro, micro, score = nn.calculate_test_accuracy( input_test_label, predict_label) #test_accuracy= calculate_test_accuracy(true_label,predict_label) #print ('Test Accuracy :: {0}% for k = {1}').format(test_accuracy,k) print( 'Test Accuracy :: {0} %\n\nTest Macro F1-score :: {1}\n\nTest Micro F1-score :: {2}\n\nTest weighted F1 score :: {3}' ).format(test_accuracy, macro * 100, micro * 100, score * 100)
def main(): """ This functions is main function in your folder which should run first calls other functions and calculate time taken to execute for evry function call and serializing all Dataframes obtained Parametrs --------- similar_docs: Python Dictionary() Dictionary of similar documents to Query Document query: string string representig query document's file name jscore : float User threshold for jaccard similarity score between two documents Returns ------- Precision_count : int Returns number of documents from input similar_docs dictionary that have higher jaccard score than User threshold """ shingle_length = 4 #set shingle length if os.path.exists("./shingle_pickle4.py") == False: time_start = time.time() preprocess.preprocess( './temp') #Time calculation for pre processing files time_end = time.time() print(time_end - time_start) # 0.01327657699584961 if os.path.exists("./shingle_pickle4.py") == False: time_start = time.time() shingleDf = shingle.get_shingles( shingle_length) #Time calculation fro shingling time_end = time.time() print(time_end - time_start) # 207.01369958496123463 # shingleDf.to_pickle("./shingle_pickle4.py") # shingleDf.to_pickle("./shingle_pickle.py") # un_pickle_df=pd.read_pickle("./shingle_pickle.py") if os.path.exists("./sig_nat.pickle") == False: time_start = time.time() signatureDf = minhashing.generate_signature_matrix( shingleDf, 100) # Time calculation for min hashing time_end = time.time() print(time_end - time_start) # 2041.65769958496173297 # signatureDf.to_pickle("./signature_pickle4shingles.py") # un_pickle_df=pd.read_pickle("./signature_pickle.py") output0 = "Please Enter document number : " # taking user input query and threshold jaccard score print(output0) query = (int)(input()) output1 = "Please Enter threshold jscore: " print(output1) sig_matrix = pd.read_pickle("./sig_matc_4shigles.pickle") jscore = (float)(input()) for i in range(query, query + 1): similar_documents = lsh.lsh(sig_matrix, sig_matrix.columns[i], 100) # applying Lsh on signature matrix print(similar_documents) if (len(similar_documents) == 0): print("No similar Documents found") else: p_count = lsh.precision( similar_documents, sig_matrix.columns[i], jscore=0.1) #calculating precision for retreivs documents print("precision is = " + (str)(p_count / len(similar_documents))) r_count = lsh.recall( sig_matrix.columns, sig_matrix.columns[i], jscore=0.1) #calculating recall for retreived documents print("recall is = " + (str)(p_count / r_count))
if len(sys.argv) > 1: threshold = float(sys.argv[1]) (row, b) = findLSHParameters(numHashFunctions, threshold) # Get the data documents = [] unProcessedDocuments = readDataJson() for i in range(0, len(unProcessedDocuments)): documents.append(constructShingles(unProcessedDocuments[i][1], shingleLength)) # Generate signature matrix print(len(documents)) sm = minHash(documents, numHashFunctions) # Get candidate pairs candidatePairs = findCandidatePairs(lsh(sm, b, row)) # Get similar pairs by comparing all signatures similarDocuments = findSimilarDocumentsCompareToOne(sm, threshold, unProcessedDocuments) printElements = similarDocuments printElements.sort(key=takeThird, reverse=True) for doc in printElements: print(doc) print(len(similarDocuments)) allSimilarDocuments = [] for doc in similarDocuments: docDic = dict() docDic = {"id1": doc[0], "id2": doc[1], "similarity": doc[2]}
f.write("Bayes Test Accuracy " + str(accuracyBayes)) f.write("\n") f.write("Bayes Test Macro F1-score " + str(f1_score_macroBayes)) f.write("\n") f.write("Bayes Test Micro F1-score " + str(f1_score_microBayes)) f.write("\n") f.write("\n") # endregion # region Task-VI:LSH Custom Code print("Custom LSH Classifier Statistcs:") f.write("Custom LSH Classifier Statistcs:") f.write("\n") f.write("\n") accuracyLSH, f1_score_macroLSH, f1_score_microLSH = lsh.lsh( trainData, trainLabels, testData, testLabels) print("LSH Test Accuracy ", str(accuracyLSH)) print("LSH Test Macro F1-score ", str(f1_score_macroLSH)) print("LSH Test Micro F1-score ", str(f1_score_microLSH)) f.write("Original Dimension :" + str(noOfColumns)) f.write("\n") f.write("LSH Test Accuracy " + str(accuracyLSH)) f.write("\n") f.write("LSH Test Macro F1-score " + str(f1_score_macroLSH)) f.write("\n") f.write("LSH Test Micro F1-score " + str(f1_score_microLSH)) f.write("\n") f.write("\n") # endregion
def twitter(testMatrix, testlabelMatrix): vocabulary = {} Accuracy = {} F1ScoreMacro = {} F1ScoreMicro = {} # region handle older versions of sklearn if int((sklearn.__version__).split(".")[1]) < 18: from sklearn.cross_validation import train_test_split # otherwise we're using at lease version 0.18 else: from sklearn.model_selection import train_test_split # endregion # region Twitter Data Set outputFileName = "Twitter_stat.txt" f = open(outputFileName, "w") print("Processing Twitter DataSet.......") f.write("Processing Twitter DataSet.......") f.write("\n") # region Input File INformation inputFilePath = '../data/twitter/' inputFileName = 'twitter.txt' out = 'twitter' inputLabelFileName = 'twitter_label.txt' labelMatrix = np.genfromtxt(inputFilePath + inputLabelFileName, delimiter=' ') # endregion # region Generate Word Matrix from txt file inputFile = open(inputFilePath + inputFileName, 'r+') fileLines = inputFile.readlines() wordMatrix = [] for line in fileLines: line = line.strip() sentence = line.split(" ") wordMatrix.append(sentence) # endregion # region Original Dimension Analysis KNN/Bayes/LSH trainData = wordMatrix trainLabels = labelMatrix testData = testMatrix testLabels = testlabelMatrix # region Generate Vocubulary Dictionary for sentence in trainData: for word in sentence: if (word not in vocabulary): vocabulary[word] = 1 else: vocabulary[word] = vocabulary[word] + 1 updatedVoc = {k: v for k, v in vocabulary.items() if v > 2} # endregion # region Convert Word Matrix into MultiNomial Form dataMatrix = np.zeros((len(trainData), len(updatedVoc))) testMatrix = np.zeros((len(testData), len(updatedVoc))) i = 0 for sentence in trainData: for word in sentence: if (word not in updatedVoc): continue index = list(updatedVoc.keys()).index(word) dataMatrix[i][index] = dataMatrix[i][index] + 1 i = i + 1 i = 0 for sentence in testData: for word in sentence: if (word not in updatedVoc): continue index = list(updatedVoc.keys()).index(word) testMatrix[i][index] = testMatrix[i][index] + 1 i = i + 1 # endregion noOfColumns = len(dataMatrix[0]) # region K-Nearest Neighbour Classifier print("Custom K-NN Classifier Statistcs:") print("K-Value Taken:5") f.write("Custom K-NN Classifier Statistcs:") f.write("\n") f.write("K-Value Taken:5") f.write("\n") accuracyNN, f1_score_macroNN, f1_score_microNN = nn.nearestNeighbour( dataMatrix, trainLabels, testMatrix, testLabels) print("KNN Test Accuracy ", str(accuracyNN)) print("KNN Test Macro F1-score ", str(f1_score_macroNN)) print("KNN Test Micro F1-score ", str(f1_score_microNN)) f.write("\n") f.write("KNN Test Accuracy " + str(accuracyNN)) f.write("\n") f.write("KNN Test Macro F1-score " + str(f1_score_macroNN)) f.write("\n") f.write("KNN Test Micro F1-score " + str(f1_score_microNN)) f.write("\n") f.write("\n") # endregion # region Bayes Classifier print("Custom Bayes Classifier Statistcs:") f.write("Custom Bayes Classifier Statistcs:") f.write("\n") accuracyBayes, f1_score_macroBayes, f1_score_microBayes = naiveBayesMultiNomial( dataMatrix, trainLabels, testMatrix, testLabels, updatedVoc) print("Bayes Test Accuracy ", str(accuracyBayes)) print("Bayes Test Macro F1-score ", str(f1_score_macroBayes)) print("Bayes Test Micro F1-score ", str(f1_score_microBayes)) f.write("Original Dimension :" + str(noOfColumns)) f.write("\n") f.write("Bayes Test Accuracy " + str(accuracyBayes)) f.write("\n") f.write("Bayes Test Macro F1-score " + str(f1_score_macroBayes)) f.write("\n") f.write("Bayes Test Micro F1-score " + str(f1_score_microBayes)) f.write("\n") f.write("\n") # endregion # region LSH Custom Code print("Custom LSH Classifier Statistcs:") f.write("Custom LSH Classifier Statistcs:") f.write("\n") f.write("\n") accuracyLSH, f1_score_macroLSH, f1_score_microLSH = lsh.lsh( dataMatrix, trainLabels, testMatrix, testLabels) print("LSH Test Accuracy ", str(accuracyLSH)) print("LSH Test Macro F1-score ", str(f1_score_macroLSH)) print("LSH Test Micro F1-score ", str(f1_score_microLSH)) f.write("Original Dimension :" + str(noOfColumns)) f.write("\n") f.write("LSH Test Accuracy " + str(accuracyLSH)) f.write("\n") f.write("LSH Test Macro F1-score " + str(f1_score_macroLSH)) f.write("\n") f.write("LSH Test Micro F1-score " + str(f1_score_microLSH)) f.write("\n") f.write("\n") # endregion # endregion # region Random Projection ''' K = 2 while (K <= int(noOfColumns / 2)): Accuracy[K] = 0 F1ScoreMacro[K] = 0 F1ScoreMicro[K] = 0 randomDataMatrix,randomTestMatrix = projections.randomProjectionTwitter(dataMatrix,testMatrix, noOfColumns, K, inputFilePath, out) #randomTestMatrix = projections.randomProjection(testMatrix, noOfColumns, K, inputFilePath, out) #accuracyOfMyCode, f1_score_macro, f1_score_micro = nn.nearestNeighbour(randomDataMatrix, trainLabels,randomTestMatrix,testLabels) accuracyOfMyCode, f1_score_macro, f1_score_micro = naiveBayesMultiNomial(randomDataMatrix, trainLabels,randomTestMatrix, testLabels,updatedVoc) print("Reduced Dimension :", K) f.write("Reduced Dimension :" + str(K)) f.write("\n") print("Test Accuracy ", str(accuracyOfMyCode)) print("Test Macro F1-score ", str(f1_score_macro)) print("Test Micro F1-score ", str(f1_score_micro)) f.write("Test Accuracy " + str(accuracyOfMyCode)) f.write("\n") f.write("Test Macro F1-score " + str(f1_score_macro)) f.write("\n") f.write("Test Micro F1-score " + str(f1_score_micro)) f.write("\n") f.write("\n") Accuracy[K] = accuracyOfMyCode F1ScoreMacro[K] = f1_score_macro F1ScoreMicro[K] = f1_score_micro K = K * 2 ''' # endregion # region Plot and Save Data ''' index = [] accuracyList = [] for k, accuracy in Accuracy.items(): index.append(k) accuracyList.append(accuracy) #title = "Accuracy:Different D values Of custom K-NN for Twitter Data Set." #figname = "custom_K-NN_Accuracy_Twitter.png" #title = "Accuracy:Different D values Of Sklearn K-NN for Twitter Data Set." #figname = "Sklearn_K-NN_Accuracy_Twitter.png" #title = "Accuracy: Different D values Of custom Bayes for Twitter Data Set." #figname="custom_Bayes_Accuracy_Twitter.png" # title = "Accuracy: Different D values Of Sklearn Bayes for Twitter Data Set." # figname="Sklearn_Bayes_Accuracy_Twitter.png" plotData(index, accuracyList, 'D-Value', 'Accuracy', title, figname, np.arange(0, 100, step=10)) index = [] F1MacroList = [] for k, f1_score_macro in F1ScoreMacro.items(): index.append(k) F1MacroList.append(f1_score_macro) #title = "F1-Score(Macro):Different D values Of custom K-NN for Twitter Data Set." #figname = "custom_K-NN_F1-Score(Macro)_Twitter.png" #title = "F1-Score(Macro):Different D values Of Sklearn K-NN for Twitter Data Set." #figname = "Sklearn_K-NN_F1-Score(Macro)_Twitter.png" #title = "F1-Score(Macro): Different D values Of custom Bayes for Twitter Data Set." #figname = "custom_Bayes_F1-Score(Macro)_Twitter.png" # title = "F1-Score(Macro): Different D values Of Sklearn Bayes for Twitter Data Set." # figname = "Sklearn_Bayes_F1-Score(Macro)_Twitter.png" plotData(index, F1MacroList, 'D-Value', 'F1-Score(Macro)', title, figname, np.arange(0, 1, step=0.1)) index = [] F1MicroList = [] for k, f1_score_micro in F1ScoreMicro.items(): index.append(k) F1MicroList.append(f1_score_micro) #title = "F1-Score(Micro):Different D values Of custom K-NN for Twitter Data Set." #figname = "custom_K-NN_F1-Score(Micro)_Twitter.png" #title = "F1-Score(Micro):Different D values Of Sklearn K-NN for Twitter Data Set." #figname = "Sklearn_K-NN_F1-Score(Micro)_Twitter.png" #title = "F1-Score(Micro): Different D values Of custom Bayes for Twitter Data Set." #figname = "custom Bayes_F1-Score(Micro)_Twitter.png" # title = "F1-Score(Micro): Different D values Of Sklearn Bayes for Twitter Data Set." # figname = "Sklearn Bayes_F1-Score(Micro)_Twitter.png" plotData(index, F1MicroList, 'D-Value', 'F1-Score(Micro)', title, figname, np.arange(0, 1, step=0.1)) ''' # endregion # region Task-VII PCA Analysis:Twitter ''' K = 2 while (K <= int(noOfColumns / 2)): pca = PCA(n_components=K).fit(dataMatrix) data_reduce = pca.transform(dataMatrix) pca2 = PCA(n_components=K).fit(testMatrix) test_reduce = pca2.transform(testMatrix) #accuracyOfMyCode, f1_score_macro, f1_score_micro = nn.nearestNeighbour(data_reduce, trainLabels, test_reduce,testLabels) accuracyOfMyCode, f1_score_macro, f1_score_micro = naiveBayesMultiNomial(data_reduce, trainLabels, test_reduce,testLabels,updatedVoc) print("PCA Reduced Dimension :", K) print("Test Accuracy ", str(accuracyOfMyCode)) print("Test Macro F1-score ", str(f1_score_macro)) print("Test Micro F1-score ", str(f1_score_micro)) f.write("Reduced Dimension :" + str(K)) f.write("\n") f.write("Test Accuracy " + str(accuracyOfMyCode)) f.write("\n") f.write("Test Macro F1-score " + str(f1_score_macro)) f.write("\n") f.write("Test Micro F1-score " + str(f1_score_micro)) f.write("\n") f.write("\n") Accuracy[K] = accuracyOfMyCode F1ScoreMacro[K] = f1_score_macro F1ScoreMicro[K] = f1_score_micro K = K * 2 index = [] accuracyList = [] for k, accuracy in Accuracy.items(): index.append(k) accuracyList.append(accuracy) #title = "Accuracy:Different D values Of PCA K-NN for Twitter Data Set." #figname = "PCA_K-NN_Accuracy_Twitter.png" title = "Accuracy: Different D values Of PCA Bayes for Twitter Data Set." figname = "PCA_Bayes_Accuracy_Twitter.png" plotData(index, accuracyList, 'D-Value', 'Accuracy', title, figname, np.arange(0, 100, step=10)) index = [] F1MacroList = [] for k, f1_score_macro in F1ScoreMacro.items(): index.append(k) F1MacroList.append(f1_score_macro) #title = "F1-Score(Macro):Different D values Of PCA K-NN for Twitter Data Set." #figname = "PCA_K-NN_F1-Score(Macro)_Twitter.png" title = "F1-Score(Macro): Different D values Of PCA Bayes for Twitter Data Set." figname = "PCA_Bayes_F1-Score(Macro)_Twitter.png" plotData(index, F1MacroList, 'D-Value', 'F1-Score(Macro)', title, figname, np.arange(0, 1, step=0.1)) index = [] F1MicroList = [] for k, f1_score_micro in F1ScoreMicro.items(): index.append(k) F1MicroList.append(f1_score_micro) #title = "F1-Score(Micro):Different D values Of PCA K-NN for Twitter Data Set." #figname = "PCA_K-NN_F1-Score(Micro)_Twitter.png" title = "F1-Score(Micro): Different D values Of PCA Bayes for Twitter Data Set." figname = "PCA Bayes_F1-Score(Micro)_Twitter.png" plotData(index, F1MicroList, 'D-Value', 'F1-Score(Micro)', title, figname, np.arange(0, 1, step=0.1)) ''' # endregion f.close()
#figname="task_3_Bayes_Accuracy_"+str(data_set_name.upper())+".png" #plot_data(index,accuracy_list,'Dimension','Accuracy',title,figname,np.arange(0,100,step=10)) index = [] F1MacroList = [] for k, f1_score_macro in F1ScoreMacro.items(): index.append(k) F1MacroList.append(f1_score_macro) title = "F1-Score(Macro) v/s Dimension for nearest neighbor classifier for " + str(data_set_name) + " Data Set." figname = "task_3_NN_F1-Score(Macro)_" + str(data_set_name) + ".png" #title = "F1-Score(Macro) v/s Dimension for Bayes classifier for " + str(data_set_name.upper()) + " Data Set." #figname = "task_3_Bayes_F1-Score(Macro)_" + str(data_set_name.upper()) + ".png" plot_data(index, F1MacroList, 'Dimension', 'F1-Score(Macro)', title, figname,np.arange(0,1,step=0.1)) index = [] F1MicroList = [] for k, f1_score_micro in F1ScoreMicro.items(): index.append(k) F1MicroList.append(f1_score_micro) title = "F1-Score(Micro) v/s Dimension for nearest neighbor classifier for " + str(data_set_name) + " Data Set." figname = "task_3_NN_F1-Score(Micro)_" + str(data_set_name) + ".png" #title = "F1-Score(Micro) v/s Dimension for Bayes classifier for " + str(data_set_name.upper()) + " Data Set." #figname = "task_3_Bayes_F1-Score(Micro)_" + str(data_set_name.upper()) + ".png" plot_data(index, F1MicroList, 'Dimension', 'F1-Score(Micro)', title, figname,np.arange(0,1,step=0.1)) #prior_dict=prior_density(train_labels) #accuracy_mycode, f1_score_macro, f1_score_micro=nn.nearest_neighbour(train_data,train_labels,test_data,test_labels) lsh.lsh(train_data, train_labels, test_data, test_labels) #bayes.bayes_classifier(data_matrix,label_matrix)