if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: lTokens = [] head_child = self.find_head(head, lTokens) if self.m_bLabeled: head_child.label = 'ROOT' for tokn in lTokens: if self.m_bLabeled: print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link), tokn.label]) else: print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link)]) print # empty line #================================================================ if __name__ == '__main__': from tools import config if len(sys.argv) < 3: print "\nUsage: ctb2dep.py rule_file config_file [log_file] > output\n" sys.exit(1) sLogs = None if len(sys.argv) == 4: sLogs = sys.argv[3] cf = config.CConfig(sys.argv[2]) rule = CHeadRules(sys.argv[1], sLogs, cf.labeled) fiditer.sentence_iterator(rule.process, cf.directory, cf.range)
import getopt import binarize if __name__ == '__main__': import sys import config try: opts, args = getopt.getopt(sys.argv[1:], "nul:") except getopt.GetoptError: print "\nUsage: binarize.py [-nu] [-llogfile] rule_file config_file > output\n" print "-n: not binarize\n" sys.exit(1) if len(args) < 2: print "\nUsage: binarize.py [-nu] [-llogfile] rule_file config_file > output\n" print "-n: not binarize\n" print "-u: remove unary nodes\n" sys.exit(1) sLogs = None bBinarize = True bRemoveUnary = False for opt in opts: if opt[0] == '-n': bBinarize = False elif opt[0] == '-l': sLogs = opt[1] elif opt[0] == '-u': bRemoveUnary = True rule = binarize.CBinarizer(args[0], sLogs, bBinarize, bRemoveUnary) cf = config.CConfig(args[1]) fiditer.sentence_iterator(rule.process, cf.directory, cf.range)
return node else: node.type = "token" sToken, nIndex = __find_token(sSentence, nIndex) node.token = sToken assert sSentence[nIndex] == ")" node.end_index = nIndex return node #================================================================ def fromString(sSentence): if sSentence.strip() == "": return "Empty sentence" return __find_node(sSentence, 0) #================================================================ if __name__ == '__main__': import fiditer import config cf = config.CConfig("fid.config") def printx(x): print fromString(x) fiditer.sentence_iterator(printx, cf.directory, cf.range)
nIndex = child.end_index nIndex = __next_index(sSentence, nIndex+1) node.children = lChildren assert sSentence[nIndex] == ")" node.end_index = nIndex return node else: node.type = "token" sToken, nIndex = __find_token(sSentence, nIndex) node.token = sToken assert sSentence[nIndex]==")" node.end_index = nIndex return node #================================================================ def parse_object(sSentence): if sSentence.strip() == "": return "Empty sentence" return __find_node(sSentence, 0) #================================================================ if __name__ == '__main__': import fiditer import config cf = config.CConfig("fid.config") def printx(x): print parse_object(x) fiditer.sentence_iterator( printx, cf.directory, cf.range )