Exemple #1
0
    def process(self, new_features):
        for feature in new_features:
            self.insert_feature(feature)

            # update meta-object
            meta = Meta(feature.name, self.db)
            meta.learn_from_attributes(feature)
            meta.save()

            features = self.db.features.find({"name": feature.name})
            if(features.count() % meta.get_merge_frequency() == 0):
                logging.debug("Summarizing %s" % feature.name)

                # insert new features as "characteristics"
                self.update_characteristics(feature.name, meta.get_merge_frequency())

                # merge characteristics
                while True:
                    changed = self.merge_characteristics(feature.name, meta.get_merge_threshold())
                    if not changed: break
        return True
Exemple #2
0
    def merge_characteristics(self, name, merge_threshold): # returns if one or more characteristics were merged (boolean)
        logging.debug("Merging characterics")
        characteristics = self.db.characteristics.find({"name": name})
        meta = Meta(name, self.db)

        chars = list(characteristics)

        # find "best fit" (other characteristic with minimal distance) for each characteristic
        best_fits = [None]*len(chars) #  list of tuples: (index of best fit, distance)
        best_distance = 1.
        for first_i, first in enumerate(chars):
            for second_i, second in enumerate(chars):
                if (first['_id'] is not second['_id']) and (idents_disjoint(first['ident'], second['ident'])):
                    distance = Feature.from_db(first).distance_to(Feature.from_db(second), meta.get_attr_ranges())
                    if (not best_fits[first_i]) or (distance < best_fits[first_i][2]):
                        best_fits[first_i] = (first_i, second_i, distance)
                        if distance < best_distance: best_distance = distance

        if best_distance > (1-merge_threshold): return False # signalize that no characteristic needed to be merged
        
        for bf in best_fits:
            if not bf: continue # continue if first has been merged before
            if not best_fits[bf[1]]: continue # continue if second has been merged before

            first = Feature.from_db(chars[bf[0]])
            second = Feature.from_db(chars[bf[1]])

            first.merge(second)
            self.db.characteristics.save(first.db_entry())
            self.db.characteristics.remove({"_id": second._id})
            best_fits[bf[0]] = None
            best_fits[bf[1]] = None

            # TODO recalculate best_fits with same bf[0]

        return True # signalize that one or more characteristics were merged