Ejemplo n.º 1
0
    def __refresh_statistics(self):
        '''
        Recalculates the statistics for this rule.
        '''
        self.coverage = self.covered_examples.count()

        indices = self.kb.bits_to_indices(self.covered_examples)
        ex_scores = [self.kb.get_score(idx) for idx in indices]

        if self.target_type == Example.Ranked:
            self.mean = avg(ex_scores)
            self.sd = std(ex_scores)
            self.score = self.kb.score_fun(self)
        else:
            self.distribution = defaultdict(int)
            for score in ex_scores:
                self.distribution[score] += 1
            self.score = self.kb.score_fun(self)
Ejemplo n.º 2
0
    def __refresh_statistics(self):
        '''
        Recalculates the statistics for this rule.
        '''
        self.coverage = self.covered_examples.count()

        indices = self.kb.bits_to_indices(self.covered_examples)
        ex_scores = [self.kb.get_score(idx) for idx in indices]

        if self.target_type == Example.Ranked:
            self.mean = avg(ex_scores)
            self.sd = std(ex_scores)
            self.score = self.kb.score_fun(self)
        else:
            self.distribution = defaultdict(int)
            for score in ex_scores:
                self.distribution[score] += 1
            self.score = self.kb.score_fun(self)
Ejemplo n.º 3
0
    def __init__(self, triplets, score_fun, instances_as_leaves=True):
        '''
        Initialize the knowledge base with the given triplet graph.
        The target class is given with 'target_class' - this is the
        class to be described in the induction step.
        '''
        self.instances_as_leaves = instances_as_leaves
        self.score_fun = score_fun
        self.sub_class_of = defaultdict(list)
        self.super_class_of = defaultdict(list)
        self.predicates = set()
        self.binary_predicates = set()
        self.class_values = set()
        self.annotation_name = defaultdict(list)

        self.examples, all_annotations = self._build_examples(triplets)

        # Ranked or class-labeled data
        self.target_type = self.examples[0].target_type

        self._build_subclassof(triplets)
        self._calc_predicate_members(triplets)
        self._find_roots(all_annotations)
        self._calc_members_closure()
        self._calc_binary_members()
        self._propagate_annotation_names(triplets)

        # Statistics
        if self.target_type == Example.Ranked:
            self.mean = avg([ex.score for ex in self.examples])
            self.sd = std([ex.score for ex in self.examples])
        else:
            self.distribution = defaultdict(int)
            for ex in self.examples:
                self.distribution[ex.score] += 1
            logger.debug('Class distribution: %s' % str(self.distribution))
Ejemplo n.º 4
0
    def __init__(self, triplets, score_fun, instances_as_leaves=True):
        """
        Initialize the knowledge base with the given triplet graph.
        The target class is given with 'target_class' - this is the
        class to be described in the induction step.
        """
        self.instances_as_leaves = instances_as_leaves
        self.score_fun = score_fun
        self.sub_class_of = defaultdict(list)
        self.super_class_of = defaultdict(list)
        self.predicates = set()
        self.binary_predicates = set()
        self.class_values = set()
        self.annotation_name = defaultdict(list)

        self.examples, all_annotations = self._build_examples(triplets)

        # Ranked or class-labeled data
        self.target_type = self.examples[0].target_type

        self._build_subclassof(triplets)
        self._calc_predicate_members(triplets)
        self._find_roots(all_annotations)
        self._calc_members_closure()
        self._calc_binary_members()
        self._propagate_annotation_names(triplets)

        # Statistics
        if self.target_type == Example.Ranked:
            self.mean = avg([ex.score for ex in self.examples])
            self.sd = std([ex.score for ex in self.examples])
        else:
            self.distribution = defaultdict(int)
            for ex in self.examples:
                self.distribution[ex.score] += 1
            logger.debug("Class distribution: %s" % str(self.distribution))