Esempio n. 1
0
def main_mpe_semiring(args):
    inputfile = args.inputfile

    init_logger(args.verbose)

    if args.web:
        result_handler = print_result_json
    else:
        result_handler = print_result

    if args.output is not None:
        outf = open(args.output, 'w')
    else:
        outf = sys.stdout

    with Timer("Total"):
        try:
            pl = PrologFile(inputfile)

            lf = LogicFormula.create_from(model, label_all=True)

            prob, facts = mpe_semiring(lf, args.verbose)
            result_handler((True, (prob, facts)), outf)

        except Exception as err:
            trace = traceback.format_exc()
            err.trace = trace
            result_handler((False, err), outf)
Esempio n. 2
0
def mpe_maxsat(dag, verbose=0, solver=None):
    if dag.queries():
        print('%% WARNING: ignoring queries in file', file=sys.stderr)
        dag.clear_queries()

    logger = init_logger(verbose)
    logger.info('Ground program size: %s' % len(dag))

    cnf = CNF.createFrom(dag)

    for qn, qi in cnf.evidence():
        if not cnf.is_true(qi):
            cnf.add_constraint(TrueConstraint(qi))

    queries = list(cnf.labeled())

    logger.info('CNF size: %s' % cnf.clausecount)

    if not cnf.is_trivial():
        solver = get_solver(solver)

        with Timer('Solving'):
            result = frozenset(solver.evaluate(cnf))
        weights = cnf.extract_weights(SemiringProbability())
        output_facts = None
        prob = 1.0
        if result is not None:
            output_facts = []

            if queries:
                for qn, qi, ql in queries:
                    if qi in result:
                        output_facts.append(qn)
                    elif -qi in result:
                        output_facts.append(-qn)
            for i, n, t in dag:
                if t == 'atom':
                    if i in result:
                        if not queries:
                            output_facts.append(n.name)
                        prob *= weights[i][0]
                    elif -i in result:
                        if not queries:
                            output_facts.append(-n.name)
                        prob *= weights[i][1]
    else:
        prob = 1.0
        output_facts = []

    return prob, output_facts
Esempio n. 3
0
def mpe_maxsat(dag, verbose=0, solver=None, minpe=False):
    logger = init_logger(verbose)
    logger.info("Ground program size: %s" % len(dag))

    cnf = CNF.createFrom(dag, force_atoms=True)
    for qn, qi in cnf.evidence():
        if not cnf.is_true(qi):
            cnf.add_constraint(TrueConstraint(qi))

    queries = list(cnf.labeled())

    logger.info("CNF size: %s" % cnf.clausecount)

    if not cnf.is_trivial():
        solver = get_solver(solver)

        with Timer("Solving"):
            result = frozenset(solver.evaluate(cnf, invert_weights=minpe))
        weights = cnf.extract_weights(SemiringProbability())
        output_facts = None
        prob = 1.0
        if result is not None:
            output_facts = []

            if queries:
                for qn, qi, ql in queries:
                    if qi in result:
                        output_facts.append(qn)
                    elif -qi in result:
                        output_facts.append(-qn)
            for i, n, t in dag:
                if t == "atom":
                    if i in result:
                        if not queries:
                            output_facts.append(n.name)
                        prob *= weights[i][0]
                    elif -i in result:
                        if not queries:
                            output_facts.append(-n.name)
                        prob *= weights[i][1]
    else:
        prob = 1.0
        output_facts = []

    return prob, output_facts
    def learn(self,
              significance=None,
              max_rule_length=None,
              beam_size=5,
              m_estimator=1,
              deterministic=False):
        log_name = 'structure_learner'
        if self.__log_file is not None:
            self.__log = init_logger(verbose=True,
                                     name=log_name,
                                     out=self.__log_file)
            self.__log.info('Random seed: %s' % self.__seed)

        if deterministic:
            learn_class = ProbFOIL
        else:
            learn_class = ProbFOIL2

        self.__learner = learn_class(self.__data,
                                     logger=log_name,
                                     p=significance,
                                     l=max_rule_length,
                                     beam_size=beam_size,
                                     m=m_estimator)

        time_start = time.time()
        self.__hypothesis = self.__learner.learn()
        self.__rules = self.__hypothesis.to_clauses(
            self.__hypothesis.target.functor)

        # First rule is failing rule: don't consider it if there are other rules.
        if len(self.__rules) > 1:
            del self.__rules[0]
        time_total = time.time() - time_start
        if self.__log is not None:
            self.__log.info('ACCURACY: %f' % self.accuracy())
            self.__log.info('PRECISION: %f' % self.precision())
            self.__log.info('RECALL: %f' % self.recall())
            self.__log.info('ACCURACY: %f' % self.accuracy())
            self.__log.info('Total time:\t%.4fs' % time_start)
        return time_total
Esempio n. 5
0
from __future__ import print_function

import importlib
from abc import abstractmethod

import numpy as np
import pandas as pd

from problog.util import init_logger
from problog.logic import Term, Object, Constant, term2str, unquote

from synthlog.mercs.core.MERCS import MERCS
from synthlog.tasks.base_stored_object import StoredObject, cells_to_matrix

logger = init_logger()


class Predictor(StoredObject):
    def __init__(
        self,
        scope=None,
        source_columns=None,
        target_columns=None,
        database=None,
        engine=None,
    ):
        """

        :param scope: A scope, containing table_cell predicates describing a table content.
        :param source_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as input columns for the predictor.
        :param target_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as columns to predict for the predictor.
Esempio n. 6
0
def main(args, result_handler=None):
    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument("filename")
    parser.add_argument(
        "-N",
        "-n",
        type=int,
        dest="n",
        default=argparse.SUPPRESS,
        help="Number of samples.",
    )
    parser.add_argument(
        "--with-facts",
        action="store_true",
        help="Also output choice facts (default: just queries).",
    )
    parser.add_argument("--with-probability",
                        action="store_true",
                        help="Show probability.")
    parser.add_argument("--as-evidence",
                        action="store_true",
                        help="Output as evidence.")
    parser.add_argument(
        "--propagate-evidence",
        dest="propagate_evidence",
        default=False,
        action="store_true",
        help="Enable evidence propagation",
    )
    parser.add_argument(
        "--dont-propagate-evidence",
        action="store_false",
        dest="propagate_evidence",
        default=False,
        help="Disable evidence propagation",
    )
    parser.add_argument("--oneline",
                        action="store_true",
                        help="Format samples on one line.")
    parser.add_argument(
        "--estimate",
        action="store_true",
        help="Estimate probability of queries from samples.",
    )
    parser.add_argument(
        "--timeout",
        "-t",
        type=int,
        default=0,
        help="Set timeout (in seconds, default=off).",
    )
    parser.add_argument("--output",
                        "-o",
                        type=str,
                        default=None,
                        help="Filename of output file.")
    parser.add_argument("--web", action="store_true", help=argparse.SUPPRESS)
    parser.add_argument("--verbose",
                        "-v",
                        action="count",
                        help="Verbose output")
    parser.add_argument("--seed",
                        "-s",
                        type=float,
                        help="Random seed",
                        default=None)
    parser.add_argument("--full-trace", action="store_true")
    parser.add_argument("--strip-tag",
                        action="store_true",
                        help="Strip outermost tag from output.")
    parser.add_argument(
        "-a",
        "--arg",
        dest="args",
        action="append",
        help="Pass additional arguments to the cmd_args builtin.",
    )
    parser.add_argument("--progress",
                        help="show progress",
                        action="store_true")

    args = parser.parse_args(args)

    init_logger(args.verbose, "problog_sample")

    if args.seed is not None:
        random.seed(args.seed)
    else:
        seed = random.random()
        logging.getLogger("problog_sample").debug("Seed: %s", seed)
        random.seed(seed)

    pl = PrologFile(args.filename)

    outf = sys.stdout
    if args.output is not None:
        outf = open(args.output, "w")

    if args.timeout:
        start_timer(args.timeout)

    # noinspection PyUnusedLocal
    def signal_term_handler(*sigargs):
        sys.exit(143)

    signal.signal(signal.SIGTERM, signal_term_handler)

    if result_handler is not None or args.web:
        outformat = "dict"
        if result_handler is None:
            result_handler = print_result_json
    else:
        outformat = "str"
        result_handler = print_result
    try:
        if args.estimate:
            results = estimate(pl, **vars(args))
            print(format_dictionary(results))
        else:
            result_handler(
                (True, sample(pl, format=outformat, **vars(args))),
                output=outf,
                oneline=args.oneline,
            )
    except Exception as err:
        trace = traceback.format_exc()
        err.trace = trace
        result_handler((False, err), output=outf)

    if args.timeout:
        stop_timer()

    if args.output is not None:
        outf.close()
Esempio n. 7
0
def main(argv=sys.argv[1:]):
    args = argparser().parse_args(argv)

    if args.seed:
        seed = args.seed
    else:
        seed = str(random.random())
    random.seed(seed)

    logger = 'probfoil'

    if args.log is None:
        logfile = None
    else:
        logfile = open(args.log, 'w')

    log = init_logger(verbose=args.verbose, name=logger, out=logfile)

    log.info('Random seed: %s' % seed)

    # Load input files
    data = DataFile(*(PrologFile(source) for source in args.files))

    if args.probfoil1:
        learn_class = ProbFOIL
    else:
        learn_class = ProbFOIL2  # this seems to be the default learn_class

    time_start = time.time()  # record start time
    learn_one = learn_class(data, logger=logger, **vars(args))

    hypothesis_one = learn_one.learn()  # run learn function from learn_class

    time_one = time.time() - time_start  # time for first stage

    # call function from defaults.py to construct abnormality predicate
    construct_ab_pred(hypothesis_one, learn_one, args.files)

    # reload data files and re-learn rules with new data
    data = DataFile(*(PrologFile(source) for source in args.files))
    learn_two = learn_class(data, logger=logger, **vars(args))
    hypothesis_two = learn_two.learn()

    time_total = time.time() - time_start  # get time taken
    time_two = time_total - time_one  # time for second stage

    print('================ SETTINGS ================')
    for kv in vars(args).items():
        print('%20s:\t%s' % kv)

    if learn_one.interrupted:
        print('================ PARTIAL THEORY ================')
    else:
        print('================= INTERMEDIATE THEORY =================')
    rule = hypothesis_one
    rules = rule.to_clauses(
        rule.target.functor)  # convert rules to clause form

    # First rule is failing rule: don't print it if there are other rules.
    if len(rules) > 1:
        for rule in rules[1:]:
            print(rule)  # print each rule
    else:
        print(rules[0])

    print('================= FINAL THEORY =================')
    rule = hypothesis_two
    rules = rule.to_clauses(
        rule.target.functor)  # convert rules to clause form

    if len(rules) > 1:
        for rule in rules[1:]:
            print(rule)
    else:
        print(rules[0])
    print('==================== SCORES ====================')
    print('            Accuracy:\t',
          accuracy(hypothesis_two))  # compute accuracy
    print('           Precision:\t', precision(hypothesis_two))
    print('              Recall:\t', recall(hypothesis_two))
    print('================== STATISTICS ==================')
    for name, value in learn_one.statistics():
        print('%20s Stage One:\t%s' % (name, value))
    for name, value in learn_two.statistics():
        print('%20s Stage Two:\t%s' % (name, value))
    print('      Stage one time:\t%.4fs' % time_one)
    print('      Stage two time:\t%.4fs' % time_two)
    print('      Total time:\t%.4fs' % time_total)

    if logfile:
        logfile.close()
Esempio n. 8
0
def main(args, result_handler=None):
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('filename')
    parser.add_argument('-N',
                        '-n',
                        type=int,
                        dest='n',
                        default=argparse.SUPPRESS,
                        help="Number of samples.")
    parser.add_argument(
        '--with-facts',
        action='store_true',
        help="Also output choice facts (default: just queries).")
    parser.add_argument('--with-probability',
                        action='store_true',
                        help="Show probability.")
    parser.add_argument('--as-evidence',
                        action='store_true',
                        help="Output as evidence.")
    parser.add_argument('--propagate-evidence',
                        dest='propagate_evidence',
                        default=False,
                        action='store_true',
                        help="Enable evidence propagation")
    parser.add_argument('--dont-propagate-evidence',
                        action='store_false',
                        dest='propagate_evidence',
                        default=False,
                        help="Disable evidence propagation")
    parser.add_argument('--oneline',
                        action='store_true',
                        help="Format samples on one line.")
    parser.add_argument('--estimate',
                        action='store_true',
                        help='Estimate probability of queries from samples.')
    parser.add_argument('--timeout',
                        '-t',
                        type=int,
                        default=0,
                        help="Set timeout (in seconds, default=off).")
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        default=None,
                        help="Filename of output file.")
    parser.add_argument('--web', action='store_true', help=argparse.SUPPRESS)
    parser.add_argument('--verbose',
                        '-v',
                        action='count',
                        help='Verbose output')
    parser.add_argument('--seed',
                        '-s',
                        type=float,
                        help='Random seed',
                        default=None)
    parser.add_argument('--full-trace', action='store_true')
    parser.add_argument('--strip-tag',
                        action='store_true',
                        help='Strip outermost tag from output.')
    parser.add_argument(
        '-a',
        '--arg',
        dest='args',
        action='append',
        help='Pass additional arguments to the cmd_args builtin.')
    parser.add_argument('--progress',
                        help='show progress',
                        action='store_true')

    args = parser.parse_args(args)

    init_logger(args.verbose, 'problog_sample')

    if args.seed is not None:
        random.seed(args.seed)
    else:
        seed = random.random()
        logging.getLogger('problog_sample').debug('Seed: %s', seed)
        random.seed(seed)

    pl = PrologFile(args.filename)

    outf = sys.stdout
    if args.output is not None:
        outf = open(args.output, 'w')

    if args.timeout:
        start_timer(args.timeout)

    # noinspection PyUnusedLocal
    def signal_term_handler(*sigargs):
        sys.exit(143)

    signal.signal(signal.SIGTERM, signal_term_handler)

    if result_handler is not None or args.web:
        outformat = 'dict'
        if result_handler is None:
            result_handler = print_result_json
    else:
        outformat = 'str'
        result_handler = print_result
    try:
        if args.estimate:
            results = estimate(pl, **vars(args))
            print(format_dictionary(results))
        else:
            result_handler((True, sample(pl, format=outformat, **vars(args))),
                           output=outf,
                           oneline=args.oneline)
    except Exception as err:
        trace = traceback.format_exc()
        err.trace = trace
        result_handler((False, err), output=outf)

    if args.timeout:
        stop_timer()

    if args.output is not None:
        outf.close()
Esempio n. 9
0
def main(argv=sys.argv[1:]):
    args = argparser().parse_args(argv)

    if args.seed:
        seed = args.seed
    else:
        seed = str(random.random())
    random.seed(seed)

    logger = "probfoil"

    if args.log is None:
        logfile = None
    else:
        logfile = open(args.log, "w")

    log = init_logger(verbose=args.verbose, name=logger, out=logfile)

    log.info("Random seed: %s" % seed)

    # Load input files
    data = DataFile(*(PrologFile(source) for source in args.files))

    if args.probfoil1:
        learn_class = ProbFOIL
    else:
        learn_class = ProbFOIL2

    time_start = time.time()
    learn = learn_class(data, logger=logger, **vars(args))

    hypothesis = learn.learn()
    time_total = time.time() - time_start

    print("================ SETTINGS ================")
    for kv in vars(args).items():
        print("%20s:\t%s" % kv)

    if learn.interrupted:
        print("================ PARTIAL THEORY ================")
    else:
        print("================= FINAL THEORY =================")
    rule = hypothesis
    rules = rule.to_clauses(rule.target.functor)

    # First rule is failing rule: don't print it if there are other rules.
    if len(rules) > 1:
        for rule in rules[1:]:
            print(rule)
    else:
        print(rules[0])
    print("==================== SCORES ====================")
    print("            Accuracy:\t", accuracy(hypothesis))
    print("           Precision:\t", precision(hypothesis))
    print("              Recall:\t", recall(hypothesis))
    print("================== STATISTICS ==================")
    for name, value in learn.statistics():
        print("%20s:\t%s" % (name, value))
    print("          Total time:\t%.4fs" % time_total)

    if logfile:
        logfile.close()
Esempio n. 10
0
def probfoil(**kwargs):
    args = kwargs

    if 'seed' in args:
        seed = args['seed']
    else:
        seed = str(random.random())
        args['seed'] = seed
    random.seed(seed)

    logger = 'probfoil'

    if 'log' not in args:
        args['log'] = None
        logfile = None
    else:
        logfile = open(args['log'], 'w')

    if 'verbose' not in args:
        args['verbose'] = 0

    if 'm' not in args:
        args['m'] = 1

    if 'beam_size' not in args:
        args['beam_size'] = 5

    if 'p' not in args:
        args['p'] = None

    if 'l' not in args:
        args['l'] = None

    if 'target' not in args:
        args['target'] = None

    if 'symmetry_breaking' not in args:
        args['symmetry_breaking'] = True

    if 'settings' in args:
        settings = args['settings']
        del args['settings']
    else:
        settings = None

    if 'train' in args:
        train = args['train']
        del args['train']
    else:
        train = None

    if 'test' in args:
        test = args['test']
        del args['test']
    else:
        test = None

    #settings = args['settings']
    #train = args['train']

    log = init_logger(verbose=args['verbose'], name=logger, out=logfile)

    log.info('Random seed: %s' % seed)

    # Load input files
    #data = DataFile(*(PrologFile(source) for source in args['files']))
    data = DataFile(*(PrologString(source) for source in [settings, train]))

    if 'probfoil1' in args:
        learn_class = ProbFOIL
    else:
        learn_class = ProbFOIL2

    time_start = time.time()
    learn = learn_class(data,
                        logger=logger,
                        seed=seed,
                        log=args['log'],
                        verbose=args['verbose'],
                        m=args['m'],
                        beam_size=args['beam_size'],
                        p=args['p'],
                        l=args['l'])

    hypothesis = learn.learn()
    time_total = time.time() - time_start

    # Store scores
    train_accuracy = accuracy(hypothesis)
    train_precision = precision(hypothesis)
    train_recall = recall(hypothesis)

    # Load test data
    if test != None:
        test_data = DataFile(*(PrologString(source)
                               for source in [settings, test]))
        test = learn_class(test_data,
                           logger=logger,
                           seed=seed,
                           log=args['log'],
                           verbose=args['verbose'],
                           m=args['m'],
                           beam_size=args['beam_size'],
                           p=args['p'],
                           l=args['l'])
        test_hypothesis = test.test_rule(hypothesis)

        # Store scores
        test_accuracy = accuracy(test_hypothesis)
        test_precision = precision(test_hypothesis)
        test_recall = recall(test_hypothesis)

    print('================ SETTINGS ================')
    #for kv in vars(args).items():
    for kv in args.items():
        print('%20s:\t%s' % kv)

    if learn.interrupted:
        print('================ PARTIAL THEORY ================')
    else:
        print('================= FINAL THEORY =================')
    rule = hypothesis
    rules = rule.to_clauses(rule.target.functor)

    # First rule is failing rule: don't print it if there are other rules.
    if len(rules) > 1:
        for rule in rules[1:]:
            print(rule)
    else:
        print(rules[0])

    print('==================== SCORES ====================')
    print('            Train Set')
    print('             Accuracy:\t', train_accuracy)
    print('            Precision:\t', train_precision)
    print('               Recall:\t', train_recall)
    if test != None:
        print('             Test Set')
        print('             Accuracy:\t', test_accuracy)
        print('            Precision:\t', test_precision)
        print('               Recall:\t', test_recall)
    print('================== STATISTICS ==================')
    for name, value in learn.statistics():
        print('%20s:\t%s' % (name, value))
    print('          Total time:\t%.4fs' % time_total)

    if logfile:
        logfile.close()


#def main(argv=sys.argv[1:]):
#    args = argparser().parse_args(argv)
#
#    if args.seed:
#        seed = args.seed
#    else:
#        seed = str(random.random())
#    random.seed(seed)
#
#    logger = 'probfoil'
#
#    if args.log is None:
#        logfile = None
#    else:
#        logfile = open(args.log, 'w')
#
#    log = init_logger(verbose=args.verbose, name=logger, out=logfile)
#
#    log.info('Random seed: %s' % seed)
#
#    # Load input files
#    data = DataFile(*(PrologFile(source) for source in args.files))
#
#    if args.probfoil1:
#        learn_class = ProbFOIL
#    else:
#        learn_class = ProbFOIL2
#
#    time_start = time.time()
#    learn = learn_class(data, logger=logger, **vars(args))
#
#    hypothesis = learn.learn()
#    time_total = time.time() - time_start
#
#    print ('================ SETTINGS ================')
#    for kv in vars(args).items():
#        print('%20s:\t%s' % kv)
#
#    if learn.interrupted:
#        print('================ PARTIAL THEORY ================')
#    else:
#        print('================= FINAL THEORY =================')
#    rule = hypothesis
#    rules = rule.to_clauses(rule.target.functor)
#
#    # First rule is failing rule: don't print it if there are other rules.
#    if len(rules) > 1:
#        for rule in rules[1:]:
#            print (rule)
#    else:
#        print (rules[0])
#    print ('==================== SCORES ====================')
#    print ('            Accuracy:\t', accuracy(hypothesis))
#    print ('           Precision:\t', precision(hypothesis))
#    print ('              Recall:\t', recall(hypothesis))
#    print ('================== STATISTICS ==================')
#    for name, value in learn.statistics():
#        print ('%20s:\t%s' % (name, value))
#    print ('          Total time:\t%.4fs' % time_total)
#
#    if logfile:
#        logfile.close()
#
#def argparser():
#    parser = argparse.ArgumentParser()
#    parser.add_argument('files', nargs='+')
#    parser.add_argument('-1', '--det-rules', action='store_true', dest='probfoil1',
#                        help='learn deterministic rules')
#    parser.add_argument('-m', help='parameter m for m-estimate', type=float,
#                        default=argparse.SUPPRESS)
#    parser.add_argument('-b', '--beam-size', type=int, default=5,
#                        help='size of beam for beam search')
#    parser.add_argument('-p', '--significance', type=float, default=None,
#                        help='rule significance threshold', dest='p')
#    parser.add_argument('-l', '--length', dest='l', type=int, default=None,
#                        help='maximum rule length')
#    parser.add_argument('-v', action='count', dest='verbose', default=None,
#                        help='increase verbosity (repeat for more)')
#    parser.add_argument('--symmetry-breaking', action='store_true',
#                        help='avoid symmetries in refinement operator')
#    parser.add_argument('--target', '-t', type=str,
#                        help='specify predicate/arity to learn (overrides settings file)')
#    parser.add_argument('-s', '--seed', help='random seed', default=None)
#    parser.add_argument('--log', help='write log to file', default=None)
#
#    return parser
#
#
#if __name__ == '__main__':
#    main()