class Benchmarker:
    def __init__(self):
        self.mongoCollection = MongoCollection()

    def benchmark(self, classes, classification_method, fields):
        all_records = self.mongoCollection.get_all_records()
        if all_records < 2:
            return
        print reduce(
            lambda res, class_name: update_and_return_json(
                res, class_name, self.create_accuracy_ranking(all_records, fields, class_name, classification_method)
            ),
            classes,
            {},
        )

    def create_accuracy_ranking(self, records, features_to_benchmark, class_name, delivered_classification_method):
        def increment_if_successful_classified(score, record, feature):
            if record["classes"][class_name] == delivered_classification_method(
                record, class_name, feature, [x for x in records if x != record]
            ):
                score += 1
            return score

        return reduce(
            lambda result, feature: update_and_return_json(
                result,
                feature,
                reduce(lambda score, record: increment_if_successful_classified(score, record, feature), records, 0)
                / (len(records) - 1),
            ),
            features_to_benchmark,
            {},
        )
class Benchmarker:
    def __init__(self):
        self.mongoCollection = MongoCollection()

    def benchmark(self, classes, classification_method, fields):
        all_records = self.mongoCollection.get_all_records()
        if all_records < 2:
            return
        print reduce(
            lambda res, class_name: update_and_return_json(
                res, class_name,
                self.create_accuracy_ranking(all_records, fields, class_name,
                                             classification_method)), classes,
            {})

    def create_accuracy_ranking(self, records, features_to_benchmark,
                                class_name, delivered_classification_method):
        def increment_if_successful_classified(score, record, feature):
            if record['classes'][
                    class_name] == delivered_classification_method(
                        record, class_name, feature,
                        [x for x in records if x != record]):
                score += 1
            return score

        return reduce(
            lambda result, feature: update_and_return_json(
                result, feature,
                reduce(
                    lambda score, record: increment_if_successful_classified(
                        score, record, feature), records, 0) /
                (len(records) - 1)), features_to_benchmark, {})
class ClassifierAggregator:
  def __init__(self):
    self.mongoCollection = MongoCollection()

  def full_classification(self, to_detect_position):

    def proceed_classification_for_class_name(class_name):
      print "Classifying for", class_name
      with_class_records = self.mongoCollection.get_positons_with_given_class(class_name)
      # class_names = map(lambda x: x["classes"][class_name], with_class_records)
      t0 = time()
      numerical_classification = NumericalFeaturesClassifier.classify(to_detect_position,
                                                                      class_name,
                                                                      numerical_characteristics,
                                                                      with_class_records)
      print "Numerical classification counted in  %fs" % (time() - t0)
      t0 = time()
      parts_of_speech_classification = PartsOfSpeechClassifier.classify(to_detect_position,
                                                                        class_name,
                                                                        "parts_of_speech_frequencies",
                                                                        with_class_records)
      print "Parts of speech classification counted in  %fs" % (time() - t0)
      t0 = time()
      # most_common_words_class = CommonWordsClassifier.classify(to_detect_position,
      #                                                          "top_words",
      #                                                          class_name,
      #                                                          with_class_records)
      print "Common words classification counted in  %fs" % (time() - t0)

      return {
        class_name: {
          "numerical_classification": numerical_classification[0],
          "parts_of_speech_frequencies_classification": parts_of_speech_classification[0],
          "top_common_words": "no be defined "
        }
      }

    return map(proceed_classification_for_class_name, to_detect_position["classes"])
 def __init__(self):
     self.mongoCollection = MongoCollection()
Beispiel #5
0
from parser.MongoCollection import MongoCollection
from parser.classification.ClassifierAggregator import ClassifierAggregator

mongoCollection = MongoCollection()


def test_classification():
    to_detect = mongoCollection.get_first_from_db()
    to_detect["classes"] = {
        "author": "true",
        "type": "true",
        "age": "false",
        "male": "false"
    }
    return ClassifierAggregator().full_classification(to_detect)
 def __init__(self):
     self.mongoCollection = MongoCollection()