knn = KNN(tf_idf_calculator.results, k, metric) # confusion_matrix[A][B] = quantas vezes um documento da classe A foi atribuído à classe B topics = ['baseball', 'christian', 'guns'] confusion_matrix = {topic:{t:0 for t in topics} for topic in topics} print_log = False i = 0 ytrue = [] ypred = [] for topic in topics: for doc in reader.test[topic]: ytrue.append(topic) # classifica os documentos de teste words = parser.process_sent(doc) query = tf_idf_calculator.generate_tf_vector(words) result = knn.classify(query) confusion_matrix[topic][result] += 1 ypred.append(result) i += 1 if print_log: print('') print(i) print(doc) print(words) print(query) print(result) # e imprime os resultados print('#'*40) s = '#'*10 + (' K=%d || dist=%s ' % (k, metric)) + '#'*10