def __init__(self, filename, export_proba=False): self.filename = unquote(filename) self.connection = sqlite3.connect(self.filename) self.cursor = self.connection.cursor() self.proba = export_proba self.__init_tables() self.__previous_insert = set()
def __init__( self, modelname, scope, source_columns, target_columns, database=None, engine=None, parameters=None, ): """ :param modelname: The name of the scikit-learn classifier to use. Name is the package name, without the sklearn part. For example, the name of a decision tree is "tree.DecisionTreeClassifier" :param scope: A scope, containing table_cell predicates describing a table content. :param source_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as input columns for the predictor. :param target_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as columns to predict for the predictor. :param database: The database of Problog :param engine: The engine of Problog :param parameters: Parameters to pass to the constructor of modelname. This is a dictionary. """ self.modelname = "sklearn.%s" % unquote(modelname) modulename, classname = self.modelname.rsplit(".", 1) modelclass = getattr(importlib.import_module(modulename), classname) super().__init__( modelclass, scope=scope, source_columns=source_columns, target_columns=target_columns, database=database, engine=engine, parameters=parameters, )
def scope_to_tables(scope, kwargs): tables = get_terms_from_scope(scope, "table", **kwargs) table_cells = get_terms_from_scope(scope, "table_cell", **kwargs) table_types = get_terms_from_scope(scope, "table_cell_type", **kwargs) tacle_tables = [] for table in tables: table_name = unquote(term2str(table.args[0].value)) data = table_cells_to_matrix( [c for c in table_cells if c.args[0] == table.args[0]]) type_data = table_cell_types_to_matrix( [c for c in table_types if c.args[0] == table.args[0]]) t_range = Range( table.args[2].value - 1, table.args[1].value - 1, table.args[4].value, table.args[3].value, ) table = Table( data, type_data, t_range, name=table_name, orientations=[Orientation.vertical], ) tacle_tables.append(table) return tacle_tables
def cell_to_atoms(table_name, header, cell_row, column_type, column_unique_values, cell_value, **kwargs): table_name = unquote(str(table_name)).lower() header = unquote(str(header)).lower() cell_row = unquote(str(cell_row)).lower() column_type = unquote(str(column_type)).lower() cell_value = unquote(str(cell_value)).lower() column_unique_values1 = term2list(column_unique_values) # print(column_unique_values1) row_id = table_name + "_r" + cell_row # result = [Term(header, Constant(row_id), Constant(cell_value))] result = [] result.append((Term("row", Constant(row_id)), 1.0)) if column_type == "string": for unique_value in column_unique_values1: if unquote(unique_value).lower() == cell_value: result.append((Term(header + "_" + cell_value, Constant(row_id)), 1.0)) else: # WIth Probability 0 result.append(( Term( header + "_" + unquote(unique_value).lower(), Constant(row_id), ), 0.0, )) return result
def probfoil_loop(scope, target_predicate, **kwargs): t = unquote(term2str(target_predicate)) len_target = len(t) engine = kwargs["engine"] database = kwargs["database"] input_facts = engine.query(database, Term("':'", scope, None), subcall=True) num_facts = len(input_facts) background_facts = [] target_facts = {} base_list = [] base_facts = [] mode_list = [] mode_facts = [] for i in range(0, num_facts): fact = input_facts[i][1] args = ["'" + term2str(val) + "'" for val in fact.args] if ( len(fact.functor) > len_target + 1 and fact.functor[: len_target + 1] == t + "_" ): target_constant = fact.functor[len_target + 1 :] if target_constant not in target_facts: target_facts[target_constant] = [] target_facts[target_constant].append( fact.functor + "(" + ",".join(args) + ")." ) elif fact.functor == t: continue else: background_facts.append(fact.functor + "(" + ",".join(args) + ").") # Typing of Predicates if fact.functor not in base_list: base_list.append(fact.functor) if fact.functor == t or len(args) == 1: base_facts.append("base(" + fact.functor + "(row_id)).") elif len(args) == 2: base_facts.append( "base(" + fact.functor + "(row_id, " + fact.functor + "_constant))." ) # Declarative Bias if fact.functor not in mode_list: mode_list.append(fact.functor) if len(args) == 1 and fact.functor != t: mode_facts.append("mode(" + fact.functor + "(+)).") elif len(args) == 2 and fact.functor != t: mode_facts.append("mode(" + fact.functor + "(+, +)).") mode_facts.append("mode(" + fact.functor + "(-, +)).") mode_facts.append("mode(" + fact.functor + "(+, -)).") result = [] for target_constant in target_facts.keys(): pos_examples = target_facts[target_constant] neg_examples = [] for key, value in target_facts.items(): if key != target_constant: neg_examples += value # Create ProbFOIL Input probfoil_input = create_probfoil_inputfile( base_facts, mode_facts, t + "_" + target_constant, background_facts, pos_examples, neg_examples, ) # Run ProbFOIL+ hypothesis = ProbFOIL2( DataFile(PrologString(probfoil_input)), beam_size=10, l=4 ).learn() result += rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis) return result
def probfoil(scope, target_predicate, **kwargs): t = unquote(term2str(target_predicate)) len_target = len(t) engine = kwargs["engine"] database = kwargs["database"] input_facts = engine.query(database, Term("':'", scope, None), subcall=True) probfoil_input = "learn(" + t + "/1).\n" num_facts = len(input_facts) base_list = [] # base_facts = [] mode_list = [] # mode_facts = [] for i in range(0, num_facts): fact = input_facts[i][1] args = ["'" + term2str(val) + "'" for val in fact.args] # Ignore propositionalized facts of target predicate if ( len(fact.functor) > len_target + 1 and fact.functor[: len_target + 1] == t + "_" ): if fact.functor.endswith("yes"): probfoil_input += t + "(" + args[0] + ").\n" else: probfoil_input += "0::" + t + "(" + args[0] + ").\n" else: probfoil_input += fact.functor + "(" + ",".join(args) + ").\n" # Typing of Predicates if fact.functor not in base_list: base_list.append(fact.functor) if fact.functor.startswith(t + "_"): probfoil_input += "base(" + t + "(row_id)).\n" elif len(args) == 1: probfoil_input += "base(" + fact.functor + "(row_id)).\n" elif len(args) == 2: probfoil_input += ( "base(" + fact.functor + "(row_id, " + fact.functor + "_constant)).\n" ) # Declarative Bias if fact.functor not in mode_list: mode_list.append(fact.functor) if len(args) == 1 and not fact.functor.startswith(t + "_"): probfoil_input += "mode(" + fact.functor + "(+)).\n" elif len(args) == 2 and not fact.functor.startswith(t + "_"): probfoil_input += "mode(" + fact.functor + "(+, -)).\n" probfoil_input += "mode(" + fact.functor + "(-, +)).\n" probfoil_input += "mode(" + fact.functor + "(+, +)).\n" # # Typing of Predicates # for fact in base_facts: # probfoil_input += fact + "\n" # # # Declarative Bias # for fact in mode_facts: # probfoil_input += fact + "\n" # Run ProbFOIL+ hypothesis = ProbFOIL2( DataFile(PrologString(probfoil_input)), beam_size=10, l=4 ).learn() result = rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis) return result
def join(sep, terms): return make_safe(unquote(sep).join(map(lambda x: unquote(str(x)), terms)))
def concat(terms): return make_safe("".join(map(lambda x: unquote(str(x)), terms)))
def insert_str_cell(matrix, indices, value): list_indices = term2list(indices) return insert_value(matrix, list_indices, unquote(value))
def get_slice(term): if unquote(term.functor) == ":": return term.args[0].value, term.args[1].value raise UserError("Term is not a slice: " + str(term))
def load_blocks(matrix, filename, wid, sid, row_slice, col_slice): if type(matrix) != int: m = matrix.functor m.set_load_cells_parameters(unquote(filename), wid.functor, sid.functor, row_slice, col_slice) return ()
def str2term(term_string): return Term(unquote(term_string))
def string_to_clause(term_string, engine=None, database=None, **kwargs): parser = PrologParser(ExtendedPrologFactory()) res = parser.parseString(unquote(term_string)) return res[0]