if len(tokens)==1: return self.static_dics.get(tokens[0],[]) else: return [] def __setitem__(self,tok,cats): self.static_dics[tok] = cats def has_key(self,tok): return (tok in self.static_dics) def get(self,toklist,defval): ret = self.__getitem__(toklist) return ret parser = CCGParser() parser.combinators = [LApp,RApp,LB,RB,Conj,RT("NP[sbj]"),LBx] parser.terminators = ["ROOT","S","S[wq]","S[q]","S[imp]"] parser.lexicon = Lexicon() parser.concatenator = "" def tokenize(s): if len(s)==0: return s elif s[-1]==".": tokens = s[:-1].split() tokens.append( s[-1] ) return tokens else: return s.split() if __name__=="__main__":
return False else: assert(len(term)>=2),lt return (check(term[1]) and check(term[2])) if type(rt)==list or rt.value()!="COMMA": return None elif not check(lt): return None return lt parser = CCGParser() parser.combinators = [LApp,RApp,LB,RB,Conj,FwdRel,SkipCommaJP,RT("NP[sbj]"),RBx] parser.terminators = ["ROOT","S","S[exc]","S[imp]","S[null]","S[q]","S[wq]","S[null-q]","S[nom]"] parser.lexicon = default_lexicon() parser.concatenator = "" def run(text,type=0): for sentence in sentencize(text): print(u"test run : sentence={0}".format(sentence)) parser.lexicon.guess(sentence) for t in parser.parse(sentence): if type==0: for r in t.leaves(): if r.token in parser.lexicon.guess_dics: print(u"{0}\t{1}\t(guess)".format(r.token , r.catname)) else: print(u"{0}\t{1}".format(r.token , r.catname)) break
def Rel(lt, rt): if lt != Symbol("NP"): return None if rt == [BwdApp, Symbol("S[pss]"), Symbol("NP")]: return lt return None parser = CCGParser() parser.combinators = [ LApp, RApp, LB, RB, LT("NP"), LT("S\\NP"), RT("NP"), Conj, SkipComma, Rel ] parser.terminators = ["ROOT", "S", "S[q]", "S[wq]", "S[imp]"] parser.lexicon = default_lexicon() parser.concatenator = " " def run(text, type=0): for tokens in tokenize(text): print(u"test run : tokens={0}".format(str(tokens))) for t in parser.parse(tokens): if type == 0: for r in t.leaves(): print(u"{0}\t{1}".format(r.token, r.catname)) break else: print(t.show()) break
return (check(term[1]) and check(term[2])) if type(rt) == list or rt.value() != "COMMA": return None elif not check(lt): return None return lt parser = CCGParser() parser.combinators = [ LApp, RApp, LB, RB, Conj, FwdRel, SkipCommaJP, RT("NP[sbj]"), RBx ] parser.terminators = [ "ROOT", "S", "S[exc]", "S[imp]", "S[null]", "S[q]", "S[wq]", "S[null-q]", "S[nom]" ] parser.lexicon = default_lexicon() parser.concatenator = "" def run(text, type=0): for sentence in sentencize(text): print(u"test run : sentence={0}".format(sentence)) parser.lexicon.guess(sentence) for t in parser.parse(sentence): if type == 0: for r in t.leaves(): if r.token in parser.lexicon.guess_dics: print(u"{0}\t{1}\t(guess)".format(r.token, r.catname)) else: