def load_special_combinators(filename): for line in open(filename): comment = line.find("#") if comment > -1: line = line[:comment] line = line.strip() if len(line) == 0: continue items = line.split(" ") head_is_left = items[0] == "l" left = cat.parse(items[1]) right = cat.parse(items[2]) result = cat.parse(items[3]) res.append(SpecialCombinator(left, right, result, head_is_left)) return res
def load_unary(filename): for line in open(filename): comment = line.find("#") if comment > -1: line = line[:comment] line = line.strip() if len(line) == 0: continue items = line.split() assert len(items) == 2 inp = cat.parse(items[0]) out = cat.parse(items[1]) if res.has_key(inp): res[inp].append(out) else: res[inp] = [out] return res
def parse_leaf(self): self.word_id += 1 self.check("{") cate = self.next(" ")[1:].encode("utf-8") cate = cate[:cate.find("_")] cate = cat.parse(cate) word = self.next("}")[:-1].split("/")[0] return Leaf(word, cate, self.word_id)
def parse_tree(self): self.check("{") op = self.next(" ") op = combinators[op[1:]] cate = cat.parse(self.next(" ").encode("utf-8")) self.check("{") children = [] while self.peek() != "}": children.append(self.next_node()) if self.peek() == " ": self.next(" ") self.next("}") left_is_head = True if len(children) == 1 else \ op.head_is_left(children[0].cat, children[1].cat) return Tree(cate, left_is_head, children, op)
def can_apply(self, left, right): # Comments from easyCCG: # * Don't start making weird ,\, categories... # * Improves coverage of C&C evaluation script. # Categories can just conjoin first, then type-raise. # (not self.right.is_type_raised) # * Blocks noun conjunctions, which should normally be NP conjunctions. # In a better world, conjunctions would have categories like (NP\NP/NP. # Doesn't affect F-scopes, but makes output semantically nicer. # # issues related to C&C evaluation script? # """C&C evaluation script does't let you do this, for some reason""" if cat.parse("NP\\NP").matches(right): return False return (left == cat.CONJ or \ left == cat.COMMA or \ left == cat.SEMICOLON) and \ not right.is_punct and \ not right.is_type_raised and \ not (not right.is_functor and right.type == "N")