def build_tree(
    internal_ex_format: InternalExampleFormat,
    treebuilder_type: TreeBuilderType,
    language: TypeModeLanguage,
    possible_labels,
    training_examples_collection: ExampleCollection,
    prediction_goal=None,
    full_background_knowledge_sp: Optional[PrologFile] = None,
    debug_printing_tree_building=False,
    stop_criterion_handler: Optional = StopCriterionMinimalCoverage(),
    engine: DefaultEngine = None
    #engine:GenericEngine=None
) -> TreeNode:
    example_partitioner = PartitionerBuilder().build_partitioner(
        internal_ex_format, full_background_knowledge_sp, engine=engine)

    tree_builder = TreeBuilderBuilder().build_treebuilder(
        treebuilder_type, language, possible_labels, example_partitioner,
        stop_criterion_handler)

    tree_builder.debug_printing(debug_printing_tree_building)
    tree_builder.build_tree(
        training_examples_collection.get_labeled_examples(), prediction_goal)
    tree = tree_builder.get_tree()  # type: TreeNode
    return tree
Example #2
0
    def _parse_ex_simpleprogram(self,
                                fname_examples: str) -> ExampleCollection:
        if self.debug_printing:
            print(
                'start parsing kb examples into SimpleProgramExampleWrappers')
        example_wrappers_sp = self._parse_ex_simpleprogram_input_format(
            fname_examples)
        # type: List[SimpleProgramExampleWrapper]

        example_collection = ExampleCollection()
        example_collection.set_example_wrappers_sp(example_wrappers_sp)

        self.training_example_collection = example_collection

        if self.debug_printing:
            print('end parsing kb examples into SimpleProgramExampleWrappers')
        return self.training_example_collection
Example #3
0
def split_examples_into_training_and_test_sets(
        all_key_sets: List[Set[Constant]], test_key_set: Set[Constant],
        examples_collection_usable_for_training: ExampleCollection,
        examples_usable_for_testing: List[ClauseDBExampleWrapper]
) -> Tuple[ExampleCollection, List[ExampleWrapper]]:
    training_key_sets_list = [s for s in all_key_sets if s is not test_key_set]  # type: List[Set[Constant]]

    training_key_set = set.union(*training_key_sets_list)  # type: Set[Constant]

    training_example_collection = examples_collection_usable_for_training.filter_examples(
        training_key_set)  # type: ExampleCollection

    test_examples = [ex_wp for ex_wp in examples_usable_for_testing if
                     ex_wp.key in test_key_set]  # type: List[ExampleWrapper]

    return training_example_collection, test_examples
Example #4
0
    def _split_examples_into_training_and_test_sets(self,
                                                    all_keys_set: Set[Constant],
                                                    test_key_set: Set[Constant],
                                                    examples_collection_usable_for_training: ExampleCollection,
                                                    examples_usable_for_testing: List[ClauseDBExampleWrapper]
                                                    ) -> Tuple[ExampleCollection, List[ExampleWrapper]]:
        # for all keys
        # if key is not part of the current test set keys
        #   add key to training set keys
        # else

        training_key_set = [s for s in all_keys_set
                            if s not in test_key_set]  # type: Set[Constant]

        training_example_collection = examples_collection_usable_for_training.filter_examples(
            training_key_set)  # type: ExampleCollection

        test_examples = [ex_wp for ex_wp in examples_usable_for_testing
                         if ex_wp.key in test_key_set]  # type: List[ExampleWrapper]

        return training_example_collection, test_examples
Example #5
0
    def extract_labels(self, example_collection: ExampleCollection):
        example_wrappers_sp = example_collection.get_example_wrappers_sp()
        example_wrappers_clausedb = example_collection.get_example_wrappers_clausedb(
        )

        keys_of_unlabeled_examples = set()
        label_distribution = {}

        for ex_index, clause_db_ex in enumerate(
                example_wrappers_clausedb
        ):  # type: Tuple[int, ClauseDBExampleWrapper]
            if clause_db_ex.classification_term is not None:
                label = clause_db_ex.classification_term.args[
                    self.index_of_label_var]
                self.labels.add(label)
                clause_db_ex.label = label

                if label in label_distribution.keys():
                    label_distribution[label] = label_distribution[label] + 1
                else:
                    label_distribution[label] = 1
            else:
                # TODO: update this to the probabilistic way of querying
                query_results = self.engine.query(clause_db_ex.logic_program,
                                                  self.predicate_to_query)
                if len(query_results) is 0:
                    example_str = ""
                    for ex_statement in clause_db_ex:
                        print(ex_statement)
                    print("--")
                    keys_of_unlabeled_examples.add(clause_db_ex.get_key())
                    # raise Exception("Querying the predicate", self.predicate_to_query, "on the example gives no results. Example: \n", example_str)
                for answer in query_results:
                    label = answer[self.index_of_label_var]
                    self.labels.add(label)
                    clause_db_ex.label = label

                    if label in label_distribution.keys():
                        label_distribution[
                            label] = label_distribution[label] + 1
                    else:
                        label_distribution[label] = 1
            # --------------------------------
            example_wrappers_sp[ex_index].label = clause_db_ex.label
        # ---------------------------------------------
        # set flags
        example_collection.are_sp_examples_labeled = True
        example_collection.are_clausedb_examples_labeled = True

        nb_examples = len(example_wrappers_clausedb)
        print("nb of examples: " + str(nb_examples))
        nb_unlabeled_examples = len(keys_of_unlabeled_examples)
        print("nb of unlabeled examples: " + str(nb_unlabeled_examples))
        print("nb of labeled examples: " +
              str(nb_examples - nb_unlabeled_examples))
        print("indexes of unlabeled examples:")
        print(keys_of_unlabeled_examples)
        print("label distribution:")
        print(label_distribution)

        return keys_of_unlabeled_examples
Example #6
0
 def extract_labels(self, example_collection: ExampleCollection):
     for example_wrapper in example_collection.get_example_wrappers_sp():
         self.extract_label(example_wrapper)
     example_collection.are_sp_examples_labeled = True