def main_cross_validation(fname_examples: str, fname_settings: str, fname_background: str, dir_fold_files: str, fname_prefix_fold: str, fold_start_index: int, nb_folds: int, fold_suffix: str, dir_output_files: str, filter_out_unlabeled_examples=False, debug_printing_example_parsing=False, debug_printing_tree_building=False, debug_printing_tree_pruning=False, debug_printing_program_conversion=False, debug_printing_get_classifier=False, debug_printing_classification=False): engine = DefaultEngine() engine.unknown = 1 fd = FoldData.build_fold_data(fname_examples, fname_settings, fname_background, dir_fold_files, fname_prefix_fold, fold_start_index, nb_folds, fold_suffix, dir_output_files, filter_out_unlabeled_examples, debug_printing_example_parsing, debug_printing_tree_building, debug_printing_tree_pruning, debug_printing_program_conversion, debug_printing_get_classifier, debug_printing_classification, engine=engine ) # take one key set as test, the others as training for fold_index, test_key_set in enumerate(fd.all_key_sets): do_one_fold(fold_index, test_key_set, fd) do_all_examples(fd)
def get_labels_single_example_models(example: SimpleProgram, rules: SimpleProgram, possible_labels: Iterable[str], background_knowledge=None, debug_printing=False) -> List[str]: """ Classifies a single example and returns a list of its labels :param example: :param rules: :param possible_labels: :return: """ eng = DefaultEngine() eng.unknown = 1 if background_knowledge is not None: db = eng.prepare(background_knowledge) for statement in example: db += statement for rule in rules: db += rule else: db = eng.prepare(rules) for statement in example: db += statement if debug_printing: print('\nQueried database:') for statement in db: print('\t' + str(statement)) # print('\n') result_list = [] for label in possible_labels: db_to_query = db.extend() db_to_query += Term('query')(label) start_time = time.time() result = problog.get_evaluatable().create_from(db_to_query, engine=eng).evaluate() end_time = time.time() print("call time:", end_time - start_time) if result[label] > 0.5: result_list.append(label) return result_list
def get_full_background_knowledge_clausedb(self, engine=None) -> ClauseDB: if self.full_background_knowledge_clausedb is not None: return self.full_background_knowledge_clausedb else: if engine is None: engine = DefaultEngine() engine.unknown = 1 full_bg_kw = self.get_full_background_knowledge_simple_program() if full_bg_kw is not None: self.full_background_knowledge_clausedb = engine.prepare( full_bg_kw) # ClauseDB return self.full_background_knowledge_clausedb else: raise Exception( "No sense in making an empty ClauseDB for an empty background knowledge" )
def get_labels_single_example_probabilistic_models( example: SimpleProgram, rules: SimpleProgram, possible_labels: Iterable[str], background_knowledge=None, debug_printing=False) -> List[str]: """ Classifies a single example and returns a list of its labels :param example: :param rules: :param possible_labels: :return: """ eng = DefaultEngine() eng.unknown = 1 if background_knowledge is not None: db = eng.prepare(background_knowledge) for statement in example: db += statement for rule in rules: db += rule else: db = eng.prepare(rules) for statement in example: db += statement if debug_printing: print('\nQueried database:') for statement in db: print('\t' + str(statement)) # print('\n') query_terms = [Term('query')(label) for label in possible_labels] db_to_query = db.extend() for query_term in query_terms: db_to_query += query_term query_results = problog.get_evaluatable().create_from( db_to_query, engine=eng).evaluate() return query_results
def get_example_databases( simple_program_examples: Iterable[SimpleProgramExampleWrapper], background_knowledge: Optional[LogicProgram] = None, models=False, engine=None) -> List[ClauseDBExampleWrapper]: if engine is None: engine = DefaultEngine() engine.unknown = 1 clausedb_examples = [] # type: List[ClauseDBExampleWrapper] if background_knowledge is not None: db = engine.prepare(background_knowledge) # type: ClauseDB for example in simple_program_examples: db_example = db.extend() # type: ClauseDB for statement in example: db_example += statement example_wrapper = ClauseDBExampleWrapper(logic_program=db_example) clausedb_examples.append(example_wrapper) if example.classification_term is not None: example_wrapper.classification_term = example.classification_term if example.key is not None: example_wrapper.key = example.key if models: example_wrapper.label = example.label else: for example in simple_program_examples: db_example = engine.prepare( example.logic_program) # type: ClauseDB example_wrapper = ClauseDBExampleWrapper(logic_program=db_example) clausedb_examples.append(example_wrapper) if example.classification_term is not None: example_wrapper.classification_term = example.classification_term if example.key is not None: example_wrapper.key = example.key if models: example_wrapper.label = example.label return clausedb_examples
def get_default_decision_tree_builder(language, prediction_goal) -> TreeBuilder: engine = DefaultEngine() engine.unknown = 1 test_evaluator = SimpleProgramQueryEvaluator(engine=engine) test_generator_builder = ProbLogTestGeneratorBuilder( language=language, query_head_if_keys_format=prediction_goal) splitter = Splitter(split_criterion_str=split_criterion(), test_evaluator=test_evaluator, test_generator_builder=test_generator_builder) # splitter = ProblogSplitter(language=language,split_criterion_str='entropy', test_evaluator=test_evaluator, # query_head_if_keys_format=prediction_goal) leaf_builder = LeafBuilder() stop_criterion = StopCriterion() tree_builder = TreeBuilder(splitter=splitter, leaf_builder=leaf_builder, stop_criterion=stop_criterion) return tree_builder
def get_labels_single_example_keys(example: SimpleProgram, rules: SimpleProgram, prediction_goal: Term, index_of_label_arg: int, possible_labels: Iterable[str], background_knowledge=None, debug_printing=False) -> List[str]: """ Classifies a single example and returns a list of its labels :param prediction_goal: :param example: :param rules: :param possible_labels: :return: """ eng = DefaultEngine() eng.unknown = 1 if background_knowledge is not None: db = eng.prepare(background_knowledge) for statement in example: db += statement for rule in rules: db += rule else: db = eng.prepare(rules) for statement in example: db += statement if debug_printing: print('\nQueried database:') for statement in db: print('\t' + str(statement)) query_results = eng.query(db, prediction_goal) labels_ex = [] for query_result in query_results: labels_ex.append(query_result[index_of_label_arg]) return labels_ex
def preprocess_examples_and_background_knowledge( self, file_name_data: FileNameData, filter_out_unlabeled_examples: bool, debug_printing_options: DebugPrintingOptions): engine = DefaultEngine() engine.unknown = 1 settings_file_parser = SettingsParserMapper.get_settings_parser( KnowledgeBaseFormat.KEYS) parsed_settings = settings_file_parser.parse( file_name_data.fname_settings) self.language = parsed_settings.language # type: TypeModeLanguage kb_format = KnowledgeBaseFormat.KEYS internal_ex_format = InternalExampleFormat.CLAUSEDB treebuilder_type = TreeBuilderType.DETERMINISTIC prediction_goal_handler = parsed_settings.get_prediction_goal_handler( ) # type: KeysPredictionGoalHandler self.prediction_goal = prediction_goal_handler.get_prediction_goal( ) # type: Term # ------------------------------------------------ # --- BACKGROUND KNOWLEDGE ----------------------- # ------------------------------------------------ background_knowledge_wrapper \ = parse_background_knowledge_keys(file_name_data.fname_background, self.prediction_goal) # type: BackgroundKnowledgeWrapper full_background_knowledge_sp \ = background_knowledge_wrapper.get_full_background_knowledge_simple_program() # type: Optional[SimpleProgram] stripped_background_knowledge = background_knowledge_wrapper.get_stripped_background_knowledge( ) # type: Optional[SimpleProgram] # ------------------------------------------------ # EXAMPLES example_builder = KeysExampleBuilder( self.prediction_goal, debug_printing_options.example_parsing) self.training_examples_collection = example_builder.parse( internal_ex_format, file_name_data.fname_examples, full_background_knowledge_sp) # type: ExampleCollection # ------------------------------------------------ # --- LABELS ------------------------------------- index_of_label_var = prediction_goal_handler.get_predicate_goal_index_of_label_var( ) # type: int label_collector = LabelCollectorMapper.get_label_collector( internal_ex_format, self.prediction_goal, index_of_label_var, engine=engine) keys_of_unlabeled_examples = label_collector.extract_labels( self.training_examples_collection) nb_of_unlabeled_examples = len(keys_of_unlabeled_examples) possible_labels = label_collector.get_labels() # type: Set[Label] self.possible_labels = list(possible_labels) # type: List[Label] # ------------------------------------------------ # TODO: change this back if necessary if filter_out_unlabeled_examples and nb_of_unlabeled_examples > 0: total_nb_of_examples = len( self.training_examples_collection.example_wrappers_sp) self.training_examples_collection = self.training_examples_collection.filter_examples_not_in_key_set( keys_of_unlabeled_examples) print("DANGEROUS: FILTERED OUT UNLABELED EXAMPLES") # ------------------------------------------------ stripped_examples_simple_program = self.training_examples_collection.get_labeled_example_wrappers_sp( ) # type: List[SimpleProgramExampleWrapper] self.examples_usable_for_testing = stripped_examples_simple_program # type: List[SimpleProgramExampleWrapper] if internal_ex_format == InternalExampleFormat.CLAUSEDB: stripped_examples_clausedb = ClauseDBExampleWrapper.get_clause_db_examples( stripped_examples_simple_program, background_knowledge=stripped_background_knowledge) self.examples_usable_for_testing = stripped_examples_clausedb # type: List[ClauseDBExampleWrapper]
file_name_settings = s_file() parsed_settings = KeysSettingsParser().parse(file_name_settings) debug_printing_example_parsing = False debug_printing_tree_building = False debug_printing_tree_pruning = False debug_printing_program_conversion = True debug_printing_get_classifier = False debug_printing_classification = False fname_background_knowledge = None internal_ex_format = InternalExampleFormat.CLAUSEDB engine = DefaultEngine() engine.unknown = 1 language = parsed_settings.language # type: TypeModeLanguage # TODO: unify this with models --> let models use a prediction goal predicate label() prediction_goal_handler = parsed_settings.get_prediction_goal_handler() # type: KeysPredictionGoalHandler prediction_goal = prediction_goal_handler.get_prediction_goal() # type: Term print('=== START parsing background ===') background_knowledge_wrapper \ = parse_background_knowledge_keys(fname_background_knowledge, prediction_goal) # type: BackgroundKnowledgeWrapper full_background_knowledge_sp \ = background_knowledge_wrapper.get_full_background_knowledge_simple_program() # type: Optional[SimpleProgram] print('=== END parsing background ===\n')
def run_program(settings: ProgramSettings): # get the name of the program to run fname_labeled_examples = settings.filename_prefix + kb_suffix fname_settings = settings.filename_prefix + s_suffix # BACKGROUND KNOWLEDGE fname_background_knowledge = settings.filename_prefix + bg_suffix # background_knowledge = parse_background_knowledge(fname_background_knowledge) # else: # background_knowledge = None debug_printing = settings.debug_parsing if settings.kb_format is None: raise NotImplementedError( 'Automatic recognition of input format is not yet supported.') else: # SETTINGS FILE settings_file_parser = SettingsParserMapper.get_settings_parser( settings.kb_format) parsed_settings = settings_file_parser.parse(fname_settings) if settings.kb_format is KnowledgeBaseFormat.MODELS: possible_labels = parsed_settings.possible_labels training_examples_collection, background_knowledge_wrapper \ = preprocessing_examples_models(fname_labeled_examples, parsed_settings, settings.internal_examples_format, fname_background_knowledge) prediction_goal = None index_of_label_var = None elif settings.kb_format is KnowledgeBaseFormat.KEYS: training_examples_collection, prediction_goal, index_of_label_var, possible_labels, background_knowledge_wrapper = \ preprocessing_examples_keys(fname_labeled_examples, parsed_settings, settings.internal_examples_format, fname_background_knowledge, filter_out_unlabeled_examples=False) else: raise KnowledgeBaseFormatException( 'Only the input formats Models and Key are supported.') engine = DefaultEngine() engine.unknown = 1 full_background_knowledge_sp = background_knowledge_wrapper.get_full_background_knowledge_simple_program( ) tree = build_tree( settings.internal_examples_format, settings.treebuilder_type, parsed_settings.language, possible_labels, training_examples_collection, prediction_goal=prediction_goal, full_background_knowledge_sp=full_background_knowledge_sp, debug_printing_tree_building=debug_printing, engine=engine) tree = prune_tree(tree) program = convert_tree_to_program( settings.kb_format, settings.treebuilder_type, tree, parsed_settings.language, debug_printing=debug_printing, prediction_goal=prediction_goal, index_of_label_var=index_of_label_var)
1. Encode as a PrologString (subclass of LogicProgram): a. one example b. the background knowledge c. the logic program encoding the decision tree 2. """ import problog from problog.engine import ClauseDB from problog.logic import * from problog.engine import DefaultEngine from problog.program import PrologString eng = DefaultEngine() eng.unknown = 1 example1_prolog_string = PrologString(""" worn(gear). worn(engine). replaceable(gear). """) logic_program = PrologString(""" p0 :- worn(X). p1 :- worn(X), \+ replaceable(X). sendback :- worn(X), \+ replaceable(X). fix :- worn(X), \+ p1. ok :- \+ p0. """)