예제 #1
0
    def randomize_hypotheses(self):
        # Start by just randomizing grammars and neighbors and seeing that their scores make sense
        # TODO move on to a search algorithm such as simulated annealing
        good_hypotheses = set()
        a = ord('a')
        possible_segments = [chr(i) for i in range(a, a + 26)]
        for i in range(100000):
            hypothesis = Hypothesis.randomize_grammar(
                self.nodes_by_type, self.target_grammar.vocabulary,
                possible_segments)
            score = Hypothesis.get_mdl_score(hypothesis, self.data)
            if score < float('inf'):
                good_hypotheses.add(hypothesis)

        for hypothesis in sorted(
                good_hypotheses,
                key=lambda x: Hypothesis.get_mdl_score(x, self.data)):
            print("\n** Good hypothesis (score {}):".format(
                Hypothesis.get_mdl_score(hypothesis, self.data)))
            print(hypothesis)
            print()
예제 #2
0
 def run(self):
     print("** Target grammar (score {}):".format(
         Hypothesis.get_mdl_score(self.target_grammar, self.data)))
     print(self.target_grammar)
     self.randomize_hypotheses()