Ejemplo n.º 1
0
           self.static_dics.setdefault(ls[0].decode('utf-8'),[]).extend( ls[1].split(",") )
    def __getitem__(self,tokens):
        if len(tokens)==1:
           return self.static_dics.get(tokens[0],[])
        else:
           return []
    def __setitem__(self,tok,cats):
        self.static_dics[tok] = cats
    def has_key(self,tok):
        return (tok in self.static_dics)
    def get(self,toklist,defval):
        ret = self.__getitem__(toklist)
        return ret


parser = CCGParser()
parser.combinators = [LApp,RApp,LB,RB,Conj,RT("NP[sbj]"),LBx]
parser.terminators = ["ROOT","S","S[wq]","S[q]","S[imp]"]
parser.lexicon = Lexicon()
parser.concatenator = ""

def tokenize(s):
    if len(s)==0:
        return s
    elif s[-1]==".":
        tokens = s[:-1].split()
        tokens.append( s[-1] )
        return tokens
    else:
        return s.split()
Ejemplo n.º 2
0
            return self.static_dics.get(tokens[0], [])
        else:
            return []

    def __setitem__(self, tok, cats):
        self.static_dics[tok] = cats

    def has_key(self, tok):
        return (tok in self.static_dics)

    def get(self, toklist, defval):
        ret = self.__getitem__(toklist)
        return ret


parser = CCGParser()
parser.combinators = [LApp, RApp, LB, RB, Conj, RT("NP[sbj]"), LBx]
parser.terminators = ["ROOT", "S", "S[wq]", "S[q]", "S[imp]"]
parser.lexicon = Lexicon()
parser.concatenator = ""


def tokenize(s):
    if len(s) == 0:
        return s
    elif s[-1] == ".":
        tokens = s[:-1].split()
        tokens.append(s[-1])
        return tokens
    else:
        return s.split()
Ejemplo n.º 3
0
        "(NP/NP)\\NP"
    ]
    lexicon["don't"] = ["(S\\NP)/(S\\NP)"]
    return lexicon


#-- special rule for English
def Rel(lt, rt):
    if lt != Symbol("NP"):
        return None
    if rt == [BwdApp, Symbol("S[pss]"), Symbol("NP")]:
        return lt
    return None


parser = CCGParser()
parser.combinators = [
    LApp, RApp, LB, RB,
    LT("NP"),
    LT("S\\NP"),
    RT("NP"), Conj, SkipComma, Rel
]
parser.terminators = ["ROOT", "S", "S[q]", "S[wq]", "S[imp]"]
parser.lexicon = default_lexicon()
parser.concatenator = " "


def run(text, type=0):
    for tokens in tokenize(text):
        print(u"test run : tokens={0}".format(str(tokens)))
        for t in parser.parse(tokens):
Ejemplo n.º 4
0
def default_lexicon():
    ret = {}
    ret[u"。"] = ["ROOT\\S", "ROOT\\S[imp]", "ROOT\\S[q]", "ROOT\\S[wq]"]
    ret[u"?"] = ["ROOT\\S[q]", "ROOT\\S[wq]"]
    for line in open(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         "ccglex.ma")):
        line = line.strip()
        if len(line) == 0: continue
        if line[0] == "#": continue
        ls = line.split('\t')
        ret.setdefault(ls[0].decode('utf-8'), []).extend(ls[2].split(","))
    return ret


parser = CCGParser()
parser.combinators = [LApp, RApp, LB, RB, LBx, Conj, SkipComma, RT("NP")]
parser.terminators = ["ROOT", "S", "S[wq]", "S[q]", "S[imp]"]
parser.lexicon = default_lexicon()
parser.concatenator = ""

if __name__ == "__main__":

    def __repr__(s):
        if sys.stdout.encoding == 'UTF-8':
            return s
        else:
            return repr(s)

    for line in sys.stdin:
        line = line.strip()
Ejemplo n.º 5
0
   return lexicon




#-- special rule for English
def Rel(lt,rt):
    if lt!=Symbol("NP"):
       return None
    if rt==[BwdApp , Symbol("S[pss]") , Symbol("NP")]:
       return lt
    return None



parser = CCGParser()
parser.combinators = [LApp,RApp,LB,RB,LT("NP"),LT("S\\NP"),RT("NP"),Conj,SkipComma,Rel]
parser.terminators = ["ROOT","S","S[q]","S[wq]","S[imp]"]
parser.lexicon = default_lexicon()
parser.concatenator = " "



def run(text,type=0):
   for tokens in tokenize(text):
       print(u"test run : tokens={0}".format(str(tokens)))
       for t in parser.parse(tokens):
          if type==0:
              for r in t.leaves():
                 print(u"{0}\t{1}".format(r.token , r.catname))
              break
Ejemplo n.º 6
0
def default_lexicon():
    ret = {}
    ret[u"。"] = ["ROOT\\S" , "ROOT\\S[imp]" , "ROOT\\S[q]" , "ROOT\\S[wq]"]
    ret[u"?"] = ["ROOT\\S[q]" , "ROOT\\S[wq]"]
    for line in open(os.path.join(os.path.dirname( os.path.abspath(__file__) ) ,"ccglex.ma")):
        line = line.strip()
        if len(line)==0:continue
        if line[0]=="#":continue
        ls = line.split('\t')
        ret.setdefault(ls[0].decode('utf-8'),[]).extend( ls[2].split(",") )
    return ret



parser = CCGParser()
parser.combinators = [LApp,RApp,LB,RB,LBx,Conj,SkipComma,RT("NP")]
parser.terminators = ["ROOT","S","S[wq]","S[q]","S[imp]"]
parser.lexicon = default_lexicon()
parser.concatenator = ""


if __name__=="__main__":
   def __repr__(s):
       if sys.stdout.encoding=='UTF-8':
            return s
       else:
            return repr(s)
   for line in sys.stdin:
       line = line.strip()
       line = line.decode('utf-8')
Ejemplo n.º 7
0
                 return True
       elif term[0].value()=="forall":
            return False
       else:
            assert(len(term)>=2),lt
            return (check(term[1]) and check(term[2]))
    if type(rt)==list or rt.value()!="COMMA":
         return None
    elif not check(lt):
         return None
    return lt




parser = CCGParser()
parser.combinators = [LApp,RApp,LB,RB,Conj,FwdRel,SkipCommaJP,RT("NP[sbj]"),RBx]
parser.terminators = ["ROOT","S","S[exc]","S[imp]","S[null]","S[q]","S[wq]","S[null-q]","S[nom]"]
parser.lexicon = default_lexicon()
parser.concatenator = ""

def run(text,type=0):
   for sentence in sentencize(text):
       print(u"test run : sentence={0}".format(sentence))
       parser.lexicon.guess(sentence)
       for t in parser.parse(sentence):
          if type==0:
              for r in t.leaves():
                 if r.token in parser.lexicon.guess_dics:
                     print(u"{0}\t{1}\t(guess)".format(r.token , r.catname))
                 else:
Ejemplo n.º 8
0
            else:
                return True
        elif term[0].value() == "forall":
            return False
        else:
            assert (len(term) >= 2), lt
            return (check(term[1]) and check(term[2]))

    if type(rt) == list or rt.value() != "COMMA":
        return None
    elif not check(lt):
        return None
    return lt


parser = CCGParser()
parser.combinators = [
    LApp, RApp, LB, RB, Conj, FwdRel, SkipCommaJP,
    RT("NP[sbj]"), RBx
]
parser.terminators = [
    "ROOT", "S", "S[exc]", "S[imp]", "S[null]", "S[q]", "S[wq]", "S[null-q]",
    "S[nom]"
]
parser.lexicon = default_lexicon()
parser.concatenator = ""


def run(text, type=0):
    for sentence in sentencize(text):
        print(u"test run : sentence={0}".format(sentence))