def randomize_hypotheses(self): # Start by just randomizing grammars and neighbors and seeing that their scores make sense # TODO move on to a search algorithm such as simulated annealing good_hypotheses = set() a = ord('a') possible_segments = [chr(i) for i in range(a, a + 26)] for i in range(100000): hypothesis = Hypothesis.randomize_grammar( self.nodes_by_type, self.target_grammar.vocabulary, possible_segments) score = Hypothesis.get_mdl_score(hypothesis, self.data) if score < float('inf'): good_hypotheses.add(hypothesis) for hypothesis in sorted( good_hypotheses, key=lambda x: Hypothesis.get_mdl_score(x, self.data)): print("\n** Good hypothesis (score {}):".format( Hypothesis.get_mdl_score(hypothesis, self.data))) print(hypothesis) print()
def run(self): print("** Target grammar (score {}):".format( Hypothesis.get_mdl_score(self.target_grammar, self.data))) print(self.target_grammar) self.randomize_hypotheses()