for index, token in enumerate(tokens): resulting_chart[index, index] = token for row in range(1, L): for col in range(0, row): resulting_chart[row][col] = ' ' for row in range(0, L): for col in range(row+2, L+1): if not resulting_chart[row, col]: resulting_chart[row, col] = ' ' f.write(str(resulting_chart)) f.write('\n') f.close() return (chart, best_probability[0]) if __name__ == "__main__": try: os.remove('workfile.txt') except OSError: pass sentences_to_parse = 10 pcfg = PCFG.load(PCFG_SOURCE) with open(TOKEN_SOURCE, "r") as source: for index, line in enumerate(source): if index == sentences_to_parse: break tokens = line.split() (_, best_probability) = CKY_chart(tokens, pcfg) print("{:.4f}: {}".format(best_probability.bw, " ".join(tokens)))
class Timeout(object): def __init__(self, seconds=1, error_message="Timeout"): self.seconds = seconds self.error_message = error_message def handle_timeout(self, signum, frame): raise TimeoutError(self.error_message) def __enter__(self): signal.signal(signal.SIGALRM, self.handle_timeout) signal.alarm(self.seconds) def __exit__(self, type, value, traceback): signal.alarm(0) if __name__ == "__main__": pcfg = PCFG.load(PCFG_SOURCE) print "PCFG loaded." with open(TREE_SOURCE, "r") as source: for tree in Tree.from_stream(source): tokens = [leaf.decode("ASCII") for leaf in tree.leaves()] try: with Timeout(TIMEOUT): (_, bw_prob) = CYK_chart(pcfg, tokens) print bw_prob except TimeoutError: pass