class Benchmarker: def __init__(self): self.mongoCollection = MongoCollection() def benchmark(self, classes, classification_method, fields): all_records = self.mongoCollection.get_all_records() if all_records < 2: return print reduce( lambda res, class_name: update_and_return_json( res, class_name, self.create_accuracy_ranking(all_records, fields, class_name, classification_method) ), classes, {}, ) def create_accuracy_ranking(self, records, features_to_benchmark, class_name, delivered_classification_method): def increment_if_successful_classified(score, record, feature): if record["classes"][class_name] == delivered_classification_method( record, class_name, feature, [x for x in records if x != record] ): score += 1 return score return reduce( lambda result, feature: update_and_return_json( result, feature, reduce(lambda score, record: increment_if_successful_classified(score, record, feature), records, 0) / (len(records) - 1), ), features_to_benchmark, {}, )
class Benchmarker: def __init__(self): self.mongoCollection = MongoCollection() def benchmark(self, classes, classification_method, fields): all_records = self.mongoCollection.get_all_records() if all_records < 2: return print reduce( lambda res, class_name: update_and_return_json( res, class_name, self.create_accuracy_ranking(all_records, fields, class_name, classification_method)), classes, {}) def create_accuracy_ranking(self, records, features_to_benchmark, class_name, delivered_classification_method): def increment_if_successful_classified(score, record, feature): if record['classes'][ class_name] == delivered_classification_method( record, class_name, feature, [x for x in records if x != record]): score += 1 return score return reduce( lambda result, feature: update_and_return_json( result, feature, reduce( lambda score, record: increment_if_successful_classified( score, record, feature), records, 0) / (len(records) - 1)), features_to_benchmark, {})
class ClassifierAggregator: def __init__(self): self.mongoCollection = MongoCollection() def full_classification(self, to_detect_position): def proceed_classification_for_class_name(class_name): print "Classifying for", class_name with_class_records = self.mongoCollection.get_positons_with_given_class(class_name) # class_names = map(lambda x: x["classes"][class_name], with_class_records) t0 = time() numerical_classification = NumericalFeaturesClassifier.classify(to_detect_position, class_name, numerical_characteristics, with_class_records) print "Numerical classification counted in %fs" % (time() - t0) t0 = time() parts_of_speech_classification = PartsOfSpeechClassifier.classify(to_detect_position, class_name, "parts_of_speech_frequencies", with_class_records) print "Parts of speech classification counted in %fs" % (time() - t0) t0 = time() # most_common_words_class = CommonWordsClassifier.classify(to_detect_position, # "top_words", # class_name, # with_class_records) print "Common words classification counted in %fs" % (time() - t0) return { class_name: { "numerical_classification": numerical_classification[0], "parts_of_speech_frequencies_classification": parts_of_speech_classification[0], "top_common_words": "no be defined " } } return map(proceed_classification_for_class_name, to_detect_position["classes"])
def __init__(self): self.mongoCollection = MongoCollection()
from parser.MongoCollection import MongoCollection from parser.classification.ClassifierAggregator import ClassifierAggregator mongoCollection = MongoCollection() def test_classification(): to_detect = mongoCollection.get_first_from_db() to_detect["classes"] = { "author": "true", "type": "true", "age": "false", "male": "false" } return ClassifierAggregator().full_classification(to_detect)