Beispiel #1
0
def main_cross_validation(fname_examples: str, fname_settings: str, fname_background: str,
                          dir_fold_files: str, fname_prefix_fold: str, fold_start_index: int, nb_folds: int,
                          fold_suffix: str, dir_output_files: str,
                          filter_out_unlabeled_examples=False,
                          debug_printing_example_parsing=False,
                          debug_printing_tree_building=False,
                          debug_printing_tree_pruning=False,
                          debug_printing_program_conversion=False,
                          debug_printing_get_classifier=False,
                          debug_printing_classification=False):
    engine = DefaultEngine()
    engine.unknown = 1

    fd = FoldData.build_fold_data(fname_examples, fname_settings, fname_background,
                                  dir_fold_files, fname_prefix_fold, fold_start_index, nb_folds, fold_suffix,
                                  dir_output_files,
                                  filter_out_unlabeled_examples,
                                  debug_printing_example_parsing,
                                  debug_printing_tree_building,
                                  debug_printing_tree_pruning,
                                  debug_printing_program_conversion,
                                  debug_printing_get_classifier,
                                  debug_printing_classification,
                                  engine=engine
                                  )

    # take one key set as test, the others as training
    for fold_index, test_key_set in enumerate(fd.all_key_sets):
        do_one_fold(fold_index, test_key_set, fd)

    do_all_examples(fd)
Beispiel #2
0
def get_labels_single_example_models(example: SimpleProgram,
                                     rules: SimpleProgram,
                                     possible_labels: Iterable[str],
                                     background_knowledge=None,
                                     debug_printing=False) -> List[str]:
    """


    Classifies a single example and returns a list of its labels
    :param example:
    :param rules:
    :param possible_labels:
    :return:
    """
    eng = DefaultEngine()
    eng.unknown = 1

    if background_knowledge is not None:
        db = eng.prepare(background_knowledge)
        for statement in example:
            db += statement
        for rule in rules:
            db += rule
    else:
        db = eng.prepare(rules)
        for statement in example:
            db += statement

    if debug_printing:
        print('\nQueried database:')
        for statement in db:
            print('\t' + str(statement))
            # print('\n')

    result_list = []
    for label in possible_labels:
        db_to_query = db.extend()
        db_to_query += Term('query')(label)
        start_time = time.time()
        result = problog.get_evaluatable().create_from(db_to_query,
                                                       engine=eng).evaluate()
        end_time = time.time()
        print("call time:", end_time - start_time)

        if result[label] > 0.5:
            result_list.append(label)

    return result_list
    def get_full_background_knowledge_clausedb(self, engine=None) -> ClauseDB:
        if self.full_background_knowledge_clausedb is not None:
            return self.full_background_knowledge_clausedb
        else:
            if engine is None:
                engine = DefaultEngine()
                engine.unknown = 1

            full_bg_kw = self.get_full_background_knowledge_simple_program()

            if full_bg_kw is not None:
                self.full_background_knowledge_clausedb = engine.prepare(
                    full_bg_kw)  # ClauseDB
                return self.full_background_knowledge_clausedb
            else:
                raise Exception(
                    "No sense in making an empty ClauseDB for an empty background knowledge"
                )
Beispiel #4
0
def get_labels_single_example_probabilistic_models(
        example: SimpleProgram,
        rules: SimpleProgram,
        possible_labels: Iterable[str],
        background_knowledge=None,
        debug_printing=False) -> List[str]:
    """
    Classifies a single example and returns a list of its labels
    :param example:
    :param rules:
    :param possible_labels:
    :return:
    """
    eng = DefaultEngine()
    eng.unknown = 1

    if background_knowledge is not None:
        db = eng.prepare(background_knowledge)
        for statement in example:
            db += statement
        for rule in rules:
            db += rule
    else:
        db = eng.prepare(rules)
        for statement in example:
            db += statement

    if debug_printing:
        print('\nQueried database:')
        for statement in db:
            print('\t' + str(statement))
            # print('\n')

    query_terms = [Term('query')(label) for label in possible_labels]

    db_to_query = db.extend()
    for query_term in query_terms:
        db_to_query += query_term

    query_results = problog.get_evaluatable().create_from(
        db_to_query, engine=eng).evaluate()

    return query_results
Beispiel #5
0
def get_example_databases(
        simple_program_examples: Iterable[SimpleProgramExampleWrapper],
        background_knowledge: Optional[LogicProgram] = None,
        models=False,
        engine=None) -> List[ClauseDBExampleWrapper]:
    if engine is None:
        engine = DefaultEngine()
        engine.unknown = 1

    clausedb_examples = []  # type: List[ClauseDBExampleWrapper]

    if background_knowledge is not None:
        db = engine.prepare(background_knowledge)  # type: ClauseDB
        for example in simple_program_examples:
            db_example = db.extend()  # type: ClauseDB
            for statement in example:
                db_example += statement

            example_wrapper = ClauseDBExampleWrapper(logic_program=db_example)
            clausedb_examples.append(example_wrapper)

            if example.classification_term is not None:
                example_wrapper.classification_term = example.classification_term
            if example.key is not None:
                example_wrapper.key = example.key
            if models:
                example_wrapper.label = example.label
    else:
        for example in simple_program_examples:
            db_example = engine.prepare(
                example.logic_program)  # type: ClauseDB

            example_wrapper = ClauseDBExampleWrapper(logic_program=db_example)
            clausedb_examples.append(example_wrapper)

            if example.classification_term is not None:
                example_wrapper.classification_term = example.classification_term
            if example.key is not None:
                example_wrapper.key = example.key
            if models:
                example_wrapper.label = example.label
    return clausedb_examples
Beispiel #6
0
    def get_default_decision_tree_builder(language,
                                          prediction_goal) -> TreeBuilder:
        engine = DefaultEngine()
        engine.unknown = 1

        test_evaluator = SimpleProgramQueryEvaluator(engine=engine)

        test_generator_builder = ProbLogTestGeneratorBuilder(
            language=language, query_head_if_keys_format=prediction_goal)
        splitter = Splitter(split_criterion_str=split_criterion(),
                            test_evaluator=test_evaluator,
                            test_generator_builder=test_generator_builder)
        # splitter = ProblogSplitter(language=language,split_criterion_str='entropy', test_evaluator=test_evaluator,
        #                            query_head_if_keys_format=prediction_goal)
        leaf_builder = LeafBuilder()
        stop_criterion = StopCriterion()
        tree_builder = TreeBuilder(splitter=splitter,
                                   leaf_builder=leaf_builder,
                                   stop_criterion=stop_criterion)
        return tree_builder
Beispiel #7
0
def get_labels_single_example_keys(example: SimpleProgram,
                                   rules: SimpleProgram,
                                   prediction_goal: Term,
                                   index_of_label_arg: int,
                                   possible_labels: Iterable[str],
                                   background_knowledge=None,
                                   debug_printing=False) -> List[str]:
    """
    Classifies a single example and returns a list of its labels
    :param prediction_goal: 
    :param example:
    :param rules:
    :param possible_labels:
    :return:
    """
    eng = DefaultEngine()
    eng.unknown = 1

    if background_knowledge is not None:
        db = eng.prepare(background_knowledge)
        for statement in example:
            db += statement
        for rule in rules:
            db += rule
    else:
        db = eng.prepare(rules)
        for statement in example:
            db += statement

    if debug_printing:
        print('\nQueried database:')
        for statement in db:
            print('\t' + str(statement))

    query_results = eng.query(db, prediction_goal)

    labels_ex = []
    for query_result in query_results:
        labels_ex.append(query_result[index_of_label_arg])
    return labels_ex
Beispiel #8
0
    def preprocess_examples_and_background_knowledge(
            self, file_name_data: FileNameData,
            filter_out_unlabeled_examples: bool,
            debug_printing_options: DebugPrintingOptions):
        engine = DefaultEngine()
        engine.unknown = 1

        settings_file_parser = SettingsParserMapper.get_settings_parser(
            KnowledgeBaseFormat.KEYS)
        parsed_settings = settings_file_parser.parse(
            file_name_data.fname_settings)

        self.language = parsed_settings.language  # type: TypeModeLanguage

        kb_format = KnowledgeBaseFormat.KEYS
        internal_ex_format = InternalExampleFormat.CLAUSEDB

        treebuilder_type = TreeBuilderType.DETERMINISTIC

        prediction_goal_handler = parsed_settings.get_prediction_goal_handler(
        )  # type: KeysPredictionGoalHandler
        self.prediction_goal = prediction_goal_handler.get_prediction_goal(
        )  # type: Term

        # ------------------------------------------------
        # --- BACKGROUND KNOWLEDGE -----------------------
        # ------------------------------------------------

        background_knowledge_wrapper \
            = parse_background_knowledge_keys(file_name_data.fname_background,
                                              self.prediction_goal)  # type: BackgroundKnowledgeWrapper

        full_background_knowledge_sp \
            = background_knowledge_wrapper.get_full_background_knowledge_simple_program()  # type: Optional[SimpleProgram]
        stripped_background_knowledge = background_knowledge_wrapper.get_stripped_background_knowledge(
        )  # type: Optional[SimpleProgram]
        # ------------------------------------------------

        # EXAMPLES
        example_builder = KeysExampleBuilder(
            self.prediction_goal, debug_printing_options.example_parsing)
        self.training_examples_collection = example_builder.parse(
            internal_ex_format, file_name_data.fname_examples,
            full_background_knowledge_sp)  # type: ExampleCollection

        # ------------------------------------------------
        # --- LABELS -------------------------------------
        index_of_label_var = prediction_goal_handler.get_predicate_goal_index_of_label_var(
        )  # type: int
        label_collector = LabelCollectorMapper.get_label_collector(
            internal_ex_format,
            self.prediction_goal,
            index_of_label_var,
            engine=engine)

        keys_of_unlabeled_examples = label_collector.extract_labels(
            self.training_examples_collection)
        nb_of_unlabeled_examples = len(keys_of_unlabeled_examples)

        possible_labels = label_collector.get_labels()  # type: Set[Label]
        self.possible_labels = list(possible_labels)  # type: List[Label]
        # ------------------------------------------------

        # TODO: change this back if necessary
        if filter_out_unlabeled_examples and nb_of_unlabeled_examples > 0:
            total_nb_of_examples = len(
                self.training_examples_collection.example_wrappers_sp)
            self.training_examples_collection = self.training_examples_collection.filter_examples_not_in_key_set(
                keys_of_unlabeled_examples)
            print("DANGEROUS: FILTERED OUT UNLABELED EXAMPLES")

        # ------------------------------------------------

        stripped_examples_simple_program = self.training_examples_collection.get_labeled_example_wrappers_sp(
        )  # type: List[SimpleProgramExampleWrapper]
        self.examples_usable_for_testing = stripped_examples_simple_program  # type: List[SimpleProgramExampleWrapper]

        if internal_ex_format == InternalExampleFormat.CLAUSEDB:
            stripped_examples_clausedb = ClauseDBExampleWrapper.get_clause_db_examples(
                stripped_examples_simple_program,
                background_knowledge=stripped_background_knowledge)
            self.examples_usable_for_testing = stripped_examples_clausedb  # type: List[ClauseDBExampleWrapper]
Beispiel #9
0
file_name_settings = s_file()

parsed_settings = KeysSettingsParser().parse(file_name_settings)

debug_printing_example_parsing = False
debug_printing_tree_building = False
debug_printing_tree_pruning = False
debug_printing_program_conversion = True
debug_printing_get_classifier = False
debug_printing_classification = False
fname_background_knowledge = None

internal_ex_format = InternalExampleFormat.CLAUSEDB

engine = DefaultEngine()
engine.unknown = 1

language = parsed_settings.language  # type: TypeModeLanguage

# TODO: unify this with models --> let models use a prediction goal predicate label()
prediction_goal_handler = parsed_settings.get_prediction_goal_handler()  # type: KeysPredictionGoalHandler
prediction_goal = prediction_goal_handler.get_prediction_goal()  # type: Term

print('=== START parsing background ===')
background_knowledge_wrapper \
    = parse_background_knowledge_keys(fname_background_knowledge,
                                      prediction_goal)  # type: BackgroundKnowledgeWrapper

full_background_knowledge_sp \
    = background_knowledge_wrapper.get_full_background_knowledge_simple_program()  # type: Optional[SimpleProgram]
print('=== END parsing background ===\n')
Beispiel #10
0
def run_program(settings: ProgramSettings):
    # get the name of the program to run
    fname_labeled_examples = settings.filename_prefix + kb_suffix
    fname_settings = settings.filename_prefix + s_suffix

    # BACKGROUND KNOWLEDGE

    fname_background_knowledge = settings.filename_prefix + bg_suffix
    #     background_knowledge = parse_background_knowledge(fname_background_knowledge)
    # else:
    #     background_knowledge = None

    debug_printing = settings.debug_parsing

    if settings.kb_format is None:
        raise NotImplementedError(
            'Automatic recognition of input format is not yet supported.')
    else:
        # SETTINGS FILE
        settings_file_parser = SettingsParserMapper.get_settings_parser(
            settings.kb_format)
        parsed_settings = settings_file_parser.parse(fname_settings)

        if settings.kb_format is KnowledgeBaseFormat.MODELS:
            possible_labels = parsed_settings.possible_labels
            training_examples_collection, background_knowledge_wrapper \
                = preprocessing_examples_models(fname_labeled_examples, parsed_settings,
                                                settings.internal_examples_format, fname_background_knowledge)
            prediction_goal = None
            index_of_label_var = None
        elif settings.kb_format is KnowledgeBaseFormat.KEYS:
            training_examples_collection, prediction_goal, index_of_label_var, possible_labels, background_knowledge_wrapper = \
                preprocessing_examples_keys(fname_labeled_examples, parsed_settings, settings.internal_examples_format,
                                            fname_background_knowledge, filter_out_unlabeled_examples=False)
        else:
            raise KnowledgeBaseFormatException(
                'Only the input formats Models and Key are supported.')

        engine = DefaultEngine()
        engine.unknown = 1

        full_background_knowledge_sp = background_knowledge_wrapper.get_full_background_knowledge_simple_program(
        )
        tree = build_tree(
            settings.internal_examples_format,
            settings.treebuilder_type,
            parsed_settings.language,
            possible_labels,
            training_examples_collection,
            prediction_goal=prediction_goal,
            full_background_knowledge_sp=full_background_knowledge_sp,
            debug_printing_tree_building=debug_printing,
            engine=engine)

        tree = prune_tree(tree)

        program = convert_tree_to_program(
            settings.kb_format,
            settings.treebuilder_type,
            tree,
            parsed_settings.language,
            debug_printing=debug_printing,
            prediction_goal=prediction_goal,
            index_of_label_var=index_of_label_var)
Beispiel #11
0
1. Encode as a PrologString (subclass of LogicProgram):
    a. one example
    b. the background knowledge
    c. the logic program encoding the decision tree
2.

"""
import problog
from problog.engine import ClauseDB
from problog.logic import *
from problog.engine import DefaultEngine
from problog.program import PrologString

eng = DefaultEngine()
eng.unknown = 1

example1_prolog_string = PrologString("""
worn(gear).
worn(engine).
replaceable(gear).
""")

logic_program = PrologString("""
p0 :- worn(X).
p1 :- worn(X), \+ replaceable(X).
sendback :- worn(X), \+ replaceable(X).
fix :- worn(X), \+ p1.
ok :- \+ p0.
""")