Ejemplo n.º 1
0
    def test_compile(self):
        print(">> CDMVLP.compile(algorithm)")

        for i in range(self._nb_tests):
            for algorithm in self._SUPPORTED_ALGORITHMS:
                dataset = random_StateTransitionsDataset( \
                nb_transitions=random.randint(1, self._nb_transitions), \
                nb_features=random.randint(1,self._nb_features), \
                nb_targets=random.randint(1,self._nb_targets), \
                max_feature_values=self._nb_feature_values, \
                max_target_values=self._nb_target_values)

                model = CDMVLP(features=dataset.features,
                               targets=dataset.targets)

                model.compile()

                self.assertEqual(model.algorithm,
                                 "synchronizer")  # default algorithm

                model.compile(algorithm=algorithm)

                self.assertEqual(model.algorithm, algorithm)

                self.assertRaises(ValueError, model.compile, "lol")
                self.assertRaises(ValueError, model.compile, "gula")
                #self.assertRaises(NotImplementedError, model.compile, "pride")
                #self.assertRaises(NotImplementedError, model.compile, "synchronizer-pride")

                original = CDMVLP._ALGORITHMS.copy()
                CDMVLP._ALGORITHMS = ["gula"]
                self.assertRaises(NotImplementedError, model.compile,
                                  "gula")  # dataset not supported yet
                CDMVLP._ALGORITHMS = original
Ejemplo n.º 2
0
def random_CDMVLP(nb_features, nb_targets, max_feature_values,
                  max_target_values, algorithm):
    dataset = random_StateTransitionsDataset(100, nb_features, nb_targets,
                                             max_feature_values,
                                             max_target_values)

    model = CDMVLP(features=dataset.features, targets=dataset.targets)
    model.compile(algorithm=algorithm)
    model.fit(dataset=dataset)

    return model
    def test_next(self):
        print(">> pylfit.semantics.SynchronousConstrained.next(feature_state, targets, rules)")

        # Unit test
        data = [ \
        ([0,0,0],[0,0,1]), \
        ([0,0,0],[1,0,0]), \
        ([1,0,0],[0,0,0]), \
        ([0,1,0],[1,0,1]), \
        ([0,0,1],[0,0,1]), \
        ([1,1,0],[1,0,0]), \
        ([1,0,1],[0,1,0]), \
        ([0,1,1],[1,0,1]), \
        ([1,1,1],[1,1,0])]
        feature_names=["p_t-1","q_t-1","r_t-1"]
        target_names=["p_t","q_t","r_t"]

        dataset = pylfit.preprocessing.transitions_dataset_from_array(data=data, feature_names=feature_names, target_names=target_names)

        model = CDMVLP(features=dataset.features, targets=dataset.targets)
        model.compile(algorithm="synchronizer")
        model.fit(dataset=dataset)

        feature_state = Algorithm.encode_state([0,0,0], model.features)
        self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,0,0), (0, 0, 1)]))
        feature_state = Algorithm.encode_state([1,1,1], model.features)
        self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,1,0)]))
        feature_state = Algorithm.encode_state([0,1,0], model.features)
        self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,0,1)]))

        # Random tests
        for i in range(self._nb_tests):

            # Apply CDMVLP correctly
            model = random_CDMVLP( \
            nb_features=random.randint(1,self._nb_features), \
            nb_targets=random.randint(1,self._nb_targets), \
            max_feature_values=self._nb_feature_values, \
            max_target_values=self._nb_target_values, \
            algorithm="synchronizer")

            feature_state = random.choice(model.feature_states())
            feature_state = Algorithm.encode_state(feature_state, model.features)

            target_states = SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)

            domains = [set() for var in model.targets]

            # Apply synchronous semantics
            candidates = Synchronous.next(feature_state, model.targets, model.rules)

            # Apply constraints
            expected = []
            for s in candidates:
                valid = True
                for c in model.constraints:
                    if c.matches(list(feature_state)+list(s)):
                        valid = False
                        #eprint(c, " matches ", feature_state, ", ", s)
                        break
                if valid:
                    # Decode state with domain values
                    expected.append(s)

            for s2 in target_states:
                self.assertTrue(s2 in expected)

            for s2 in expected:
                self.assertTrue(s2 in target_states)
    def _check_rules_and_predictions(self, dataset, expected_string_rules,
                                     expected_string_constraints):
        expected_string_rules = [
            s.strip() for s in expected_string_rules.strip().split("\n")
            if len(s) > 0
        ]
        expected_string_constraints = [
            s.strip() for s in expected_string_constraints.strip().split("\n")
            if len(s) > 0
        ]

        expected_rules = []
        for string_rule in expected_string_rules:
            expected_rules.append(
                Rule.from_string(string_rule, dataset.features,
                                 dataset.targets))

        expected_constraints = []
        for string_constraint in expected_string_constraints:
            expected_constraints.append(
                Rule.from_string(string_constraint, dataset.features,
                                 dataset.targets))

        #eprint(expected_rules)

        rules, constraints = Synchronizer.fit(dataset)

        #eprint(output)

        for r in expected_rules:
            if r not in rules:
                eprint("Missing rule: ", r)
            self.assertTrue(r in rules)

        for r in rules:
            if r not in expected_rules:
                eprint("Additional rule: ", r)
            self.assertTrue(r in expected_rules)

        for r in expected_constraints:
            if r not in constraints:
                eprint("Missing constraint: ", r)
            self.assertTrue(r in constraints)

        for r in constraints:
            if r not in expected_constraints:
                eprint("Additional constraint: ", r)
            self.assertTrue(r in constraints)

        model = CDMVLP(dataset.features, dataset.targets, rules, constraints)

        #model.compile("synchronizer")
        #model.summary()

        expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data)

        predicted = model.predict(model.feature_states())
        predicted = set(
            (tuple(s1), tuple(s2)) for (s1, S2) in predicted for s2 in S2)

        eprint()
        done = 0
        for s1, s2 in expected:
            done += 1
            eprint("\rChecking transitions ", done, "/", len(expected), end='')
            self.assertTrue((s1, s2) in predicted)

        done = 0
        for s1, s2 in predicted:
            done += 1
            eprint("\rChecking transitions ",
                   done,
                   "/",
                   len(predicted),
                   end='')
            self.assertTrue((s1, s2) in expected)
Ejemplo n.º 5
0
    def test_constructor(self):
        print(">> CDMVLP(features, targets, rules)")
        for i in range(self._nb_tests):
            dataset = random_StateTransitionsDataset( \
            nb_transitions=random.randint(1, self._nb_transitions), \
            nb_features=random.randint(1,self._nb_features), \
            nb_targets=random.randint(1,self._nb_targets), \
            max_feature_values=self._nb_feature_values, \
            max_target_values=self._nb_target_values)

            model = CDMVLP(features=dataset.features, targets=dataset.targets)
            features = dataset.features
            targets = dataset.targets

            self.assertEqual(model.features, features)
            self.assertEqual(model.targets, targets)
            self.assertEqual(model.rules, [])
            self.assertEqual(model.constraints, [])
            self.assertEqual(model.algorithm, None)

            # Exceptions:
            #-------------

            # Features format
            features = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]'  # not list
            self.assertRaises(TypeError, CDMVLP, features, targets)

            features = [["x0", ["0", "1"]], ("x1", ["0", "1"]),
                        ("x2", ["0", "1"])]  # not tuple
            self.assertRaises(TypeError, CDMVLP, features, targets)

            features = [("x0", "0", "1"), ("x1", "0", "1"),
                        ("x2", ["0", "1"])]  # not tuple of size 2
            self.assertRaises(TypeError, CDMVLP, features, targets)

            features = [("x0", ["0", "1"]), ("x1", '0","1"'),
                        ("x2", ["0", "1"])]  # domain is not list
            self.assertRaises(TypeError, CDMVLP, features, targets)

            features = [("x0", ["0", "1"]), ("x1", [0, "1"]),
                        ("x2", ["0", "1"])]  # domain values are not string
            self.assertRaises(ValueError, CDMVLP, features, targets)

            # Targets format
            features = [("x0", ["0", "1"]), ("x1", ["0", "1"]),
                        ("x2", ["0", "1"])]

            targets = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]'  # not list
            self.assertRaises(TypeError, CDMVLP, features, targets)

            targets = [["x0", ["0", "1"]], ("x1", ["0", "1"]),
                       ("x2", ["0", "1"])]  # not tuple
            self.assertRaises(TypeError, CDMVLP, features, targets)

            targets = [("x0", "0", "1"), ("x1", "0", "1"),
                       ("x2", ["0", "1"])]  # not tuple of size 2
            self.assertRaises(TypeError, CDMVLP, features, targets)

            targets = [("x0", ["0", "1"]), ("x1", '0","1"'),
                       ("x2", ["0", "1"])]  # domain is not list
            self.assertRaises(TypeError, CDMVLP, features, targets)

            targets = [("x0", ["0", "1"]), ("x1", [0, "1"]),
                       ("x2", ["0", "1"])]  # domain values are not string
            self.assertRaises(ValueError, CDMVLP, features, targets)
Ejemplo n.º 6
0
    def test_summary(self):
        print(">> CDMVLP.summary()")
        for i in range(self._nb_tests):
            for algorithm in self._SUPPORTED_ALGORITHMS:
                # Empty CDMVLP
                model = random_CDMVLP( \
                nb_features=random.randint(1,self._nb_features), \
                nb_targets=random.randint(1,self._nb_targets), \
                max_feature_values=self._nb_feature_values, \
                max_target_values=self._nb_target_values, \
                algorithm=algorithm)

                model.rules = []
                model.constraints = []

                expected_print = \
                "CDMVLP summary:\n"+\
                " Algorithm: "+ algorithm +"\n"
                expected_print += " Features: \n"
                for var, vals in model.features:
                    expected_print += "  " + var + ": " + str(vals) + "\n"
                expected_print += " Targets: \n"
                for var, vals in model.targets:
                    expected_print += "  " + var + ": " + str(vals) + "\n"
                expected_print += " Rules: []\n"
                expected_print += " Constraints: []\n"

                old_stdout = sys.stdout
                sys.stdout = mystdout = StringIO()
                model.summary()
                sys.stdout = old_stdout

                self.assertEqual(mystdout.getvalue(), expected_print)

                # Usual CDMVLP
                model = random_CDMVLP( \
                nb_features=random.randint(2,self._nb_features), \
                nb_targets=random.randint(2,self._nb_targets), \
                max_feature_values=self._nb_feature_values, \
                max_target_values=self._nb_target_values, \
                algorithm=algorithm)

                expected_print = \
                "CDMVLP summary:\n"+\
                " Algorithm: "+ algorithm +"\n"
                expected_print += " Features: \n"
                for var, vals in model.features:
                    expected_print += "  " + var + ": " + str(vals) + "\n"
                expected_print += " Targets: \n"
                for var, vals in model.targets:
                    expected_print += "  " + var + ": " + str(vals) + "\n"
                if len(model.rules) == 0:
                    expected_print += " Rules: []\n"
                else:
                    expected_print += " Rules:\n"
                    for r in model.rules:
                        expected_print += "  " + r.logic_form(
                            model.features, model.targets) + "\n"
                if len(model.constraints) == 0:
                    expected_print += " Constraints: []\n"
                else:
                    expected_print += " Constraints:\n"
                    for r in model.constraints:
                        expected_print += "  " + r.logic_form(
                            model.features, model.targets) + "\n"

                old_stdout = sys.stdout
                sys.stdout = mystdout = StringIO()
                model.summary()
                sys.stdout = old_stdout

                self.assertEqual(mystdout.getvalue(), expected_print)

            # Exceptions
            #------------

            model = CDMVLP(features=model.features, targets=model.targets)
            self.assertRaises(ValueError, model.summary)  # compile not called
Ejemplo n.º 7
0
    def test_predict(self):
        print(">> CDMVLP.predict()")
        for i in range(self._nb_tests):

            dataset = random_StateTransitionsDataset( \
            nb_transitions=random.randint(1, self._nb_transitions), \
            nb_features=random.randint(1,self._nb_features), \
            nb_targets=random.randint(1,self._nb_targets), \
            max_feature_values=self._nb_feature_values, \
            max_target_values=self._nb_target_values)

            for algorithm in self._SUPPORTED_ALGORITHMS:
                model = CDMVLP(features=dataset.features,
                               targets=dataset.targets)
                model.compile(algorithm=algorithm)
                model.fit(dataset=dataset)

                feature_states = list(set(
                    tuple(s1) for s1, s2 in dataset.data))

                prediction = model.predict(feature_states)

                for state_id, s1 in enumerate(feature_states):
                    feature_state_encoded = []
                    for var_id, val in enumerate(s1):
                        val_id = model.features[var_id][1].index(str(val))
                        feature_state_encoded.append(val_id)

                    #eprint(feature_state_encoded)

                    target_states = SynchronousConstrained.next(
                        feature_state_encoded, model.targets, model.rules,
                        model.constraints)
                    output = []
                    for s in target_states:
                        target_state = []
                        for var_id, val_id in enumerate(s):
                            #eprint(var_id, val_id)
                            if val_id == -1:
                                target_state.append("?")
                            else:
                                target_state.append(
                                    model.targets[var_id][1][val_id])
                        output.append(target_state)
                    self.assertEqual(prediction[state_id][0], list(s1))
                    self.assertEqual(prediction[state_id][1], output)

                # Force missing value
                model.rules = [
                    r for r in model.rules if
                    r.head_variable != random.randint(0, len(model.targets))
                ]

                prediction = model.predict(feature_states)
                for state_id, s1 in enumerate(feature_states):
                    feature_state_encoded = []
                    for var_id, val in enumerate(s1):
                        val_id = model.features[var_id][1].index(str(val))
                        feature_state_encoded.append(val_id)

                    #eprint(feature_state_encoded)

                    target_states = SynchronousConstrained.next(
                        feature_state_encoded, model.targets, model.rules,
                        model.constraints)
                    output = []
                    for s in target_states:
                        target_state = []
                        for var_id, val_id in enumerate(s):
                            #eprint(var_id, val_id)
                            if val_id == -1:
                                target_state.append("?")
                            else:
                                target_state.append(
                                    model.targets[var_id][1][val_id])
                        output.append(target_state)

                    self.assertEqual(prediction[state_id][1], output)

                # Exceptions:
                self.assertRaises(
                    TypeError, model.predict,
                    "")  # Feature_states bad format: is not a list
                self.assertRaises(
                    TypeError, model.predict,
                    [["0", "1"], 0, 10
                     ])  # Feature_states bad format: is not a list of list
                self.assertRaises(
                    TypeError, model.predict, [["0", "1"], [0, 10]]
                )  # Feature_states bad format: is not a list of list of string

                feature_states = [
                    list(s) for s in set(tuple(s1) for s1, s2 in dataset.data)
                ]
                state_id = random.randint(0, len(feature_states) - 1)
                original = feature_states[state_id].copy()

                feature_states[state_id] = feature_states[
                    state_id][:-random.randint(1, len(dataset.features))]
                self.assertRaises(
                    TypeError, model.predict, feature_states
                )  # Feature_states bad format: size of state not correspond to model features <
                feature_states[state_id] = original.copy()

                feature_states[state_id].extend(
                    ["0" for i in range(random.randint(1, 10))])
                self.assertRaises(
                    TypeError, model.predict, feature_states
                )  # Feature_states bad format: size of state not correspond to model features >
                feature_states[state_id] = original.copy()

                var_id = random.randint(0, len(dataset.features) - 1)
                feature_states[state_id][var_id] = "bad_value"
                self.assertRaises(
                    ValueError, model.predict, feature_states
                )  # Feature_states bad format: value out of domain
                feature_states[state_id] = original.copy()
Ejemplo n.º 8
0
    def test_fit(self):
        print(">> CDMVLP.fit(dataset)")
        for i in range(self._nb_tests):
            dataset = random_StateTransitionsDataset( \
            nb_transitions=random.randint(1, self._nb_transitions), \
            nb_features=random.randint(1,self._nb_features), \
            nb_targets=random.randint(1,self._nb_targets), \
            max_feature_values=self._nb_feature_values, \
            max_target_values=self._nb_target_values)

            for algorithm in self._SUPPORTED_ALGORITHMS:
                for verbose in [0, 1]:

                    model = CDMVLP(features=dataset.features,
                                   targets=dataset.targets)
                    model.compile(algorithm=algorithm)
                    f = io.StringIO()
                    with contextlib.redirect_stderr(f):
                        model.fit(dataset=dataset, verbose=verbose)

                    expected_rules, expected_constraints = Synchronizer.fit(
                        dataset, complete=(algorithm == "synchronizer"))
                    self.assertEqual(expected_rules, model.rules)
                    self.assertEqual(expected_constraints, model.constraints)

                    # Exceptions
                    #------------

                    model = CDMVLP(features=dataset.features,
                                   targets=dataset.targets)
                    model.compile(algorithm=algorithm)
                    self.assertRaises(ValueError, model.fit, [],
                                      verbose)  # dataset is not of valid type

                    model.algorithm = "bad_value"
                    self.assertRaises(ValueError, model.fit, dataset,
                                      verbose)  # algorithm not supported

                    model.algorithm = algorithm
                    original = CDMVLP._COMPATIBLE_DATASETS.copy()

                    class newdataset(Dataset):
                        def __init__(self, data, features, targets):
                            x = ""

                    CDMVLP._COMPATIBLE_DATASETS = [newdataset]
                    self.assertRaises(
                        ValueError, model.fit, newdataset([], [], []),
                        verbose)  # dataset not supported by the algo
                    CDMVLP._COMPATIBLE_DATASETS = original

                    model.algorithm = "gula"
                    original = CDMVLP._ALGORITHMS.copy()

                    class newdataset(Dataset):
                        def __init__(self, data, features, targets):
                            x = ""

                    CDMVLP._ALGORITHMS = ["gula"]
                    self.assertRaises(NotImplementedError, model.fit, dataset,
                                      verbose)  # dataset not supported yet
                    CDMVLP._ALGORITHMS = original
Ejemplo n.º 9
0
def evaluate_scalability_on_bn_benchmark(algorithm,
                                         benchmark,
                                         benchmark_name,
                                         semantics,
                                         run_tests,
                                         train_size=None,
                                         full_transitions=None):
    """
        Evaluate accuracy and explainability of an algorithm
        over a given benchmark with a given number/proporsion
        of training samples.

        Args:
            algorithm: Class
                Class of the algorithm to be tested
            benchmark: String
                Label of the benchmark to be tested
            semantics: String
                Semantics to be tested
            train_size: float in [0,1] or int
                Size of the training set in proportion (float in [0,1])
                or explicit (int)
    """

    # 0) Extract logic program
    #-----------------------
    P = benchmark
    #eprint(P)
    #eprint(semantics)

    # 1) Generate transitions
    #-------------------------------------

    # Boolean network benchmarks only have rules for value 1, if none match next value is 0
    if full_transitions is None:
        eprint("Generating benchmark transitions ...")
        full_transitions = [
            (np.array(feature_state),
             np.array(["0" if x == "?" else "1" for x in target_state]))
            for feature_state in benchmark.feature_states()
            for target_state in benchmark.predict([feature_state], semantics)[
                tuple(feature_state)]
        ]
    #eprint(full_transitions)

    # 2) Prepare scores containers
    #---------------------------
    results_time = []

    # 3) Average over several tests
    #-----------------------------
    for run in range(run_tests):

        # 3.1 Split train/test sets
        #-----------------------
        random.shuffle(full_transitions)
        train = full_transitions
        test = []

        # Complete, Proportion or explicit?
        if train_size is not None:
            if isinstance(train_size, float):  # percentage
                last_obs = max(int(train_size * len(full_transitions)), 1)
            else:  # exact number of transitions
                last_obs = train_size
            train = full_transitions[:last_obs]
            test = full_transitions[last_obs:]

        # DBG
        if run == 0:
            eprint(">>> Start Training on " + str(len(train)) + "/" +
                   str(len(full_transitions)) + " transitions (" +
                   str(round(100 * len(train) / len(full_transitions), 2)) +
                   "%)")

        eprint(">>>> run: " + str(run + 1) + "/" + str(run_tests), end='')

        dataset = StateTransitionsDataset(train, benchmark.features,
                                          benchmark.targets)

        # csv format of results
        if train_size != None:
            expected_train_size = train_size
        else:
            expected_train_size = 1.0
        real_train_size = round(len(train) / (len(full_transitions)), 2)

        common_settings = \
        algorithm + "," +\
        semantics + "," +\
        benchmark_name + "," +\
        str(len(benchmark.features)) + "," +\
        str(len(full_transitions)) + "," +\
        "random_transitions" + "," +\
        str(expected_train_size) + "," +\
        str(real_train_size) + "," +\
        str(len(train))

        # 3.2) Learn from training set
        #-------------------------

        # Define a timeout
        signal.signal(signal.SIGALRM, handler)
        signal.alarm(TIME_OUT)
        run_time = -2
        try:
            start = time.time()

            if algorithm in ["gula", "pride", "brute-force"]:
                model = WDMVLP(features=benchmark.features,
                               targets=benchmark.targets)
            elif algorithm in ["synchronizer"]:
                model = CDMVLP(features=benchmark.features,
                               targets=benchmark.targets)
            else:
                eprint("Error, algorithm not accepted: " + algorithm)
                exit()

            model.compile(algorithm=algorithm)
            model.fit(dataset)

            signal.alarm(0)
            end = time.time()
            run_time = end - start
            results_time.append(run_time)
        except TimeoutException:
            signal.alarm(0)
            eprint(" TIME OUT")
            print(common_settings + "," + "-1")
            return len(train), -1

        #signal.alarm(0)

        print(common_settings + "," + str(run_time))
        eprint(" " + str(round(run_time, 3)) + "s")

    # 4) Average scores
    #-------------------
    avg_run_time = sum(results_time) / run_tests

    eprint(">> AVG Run time: " + str(round(avg_run_time, 3)) + "s")

    return len(train), avg_run_time