def process_batch(models, folder): total = 0 no_correct = 0 confusion_matrix = dict() paths = ioutils.list_files_only(folder) for path in paths: total += 1 with open(path, 'r') as f: print 'Process file', path, document = ngramprofile.build_profile(f) document.language = os.path.splitext(os.path.basename(path))[0].split('-')[0] predicted_language = identify_language(models, document) print 'Predict:', predicted_language, 'expect:', document.language if predicted_language == document.language: no_correct += 1 else: if document.language not in confusion_matrix: confusion_matrix[document.language] = dict() confusion_matrix[document.language][predicted_language] = confusion_matrix.get(predicted_language, 0) + 1 print 'Accuracy:', no_correct * 1.0 / total
def read_models(folder): model_paths = ioutils.list_files_only(folder) models = list() for model_path in model_paths: models.append(ngramprofile.load_profile(model_path)) return models