def prettyprint(self): file = open(self.path) for line in file: srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) print srcnode.prettyprint() file.close()
def process(self): file = open(self.path) for line in file: srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) print srcnode.tokens(self.sep) file.close()
def prettyprint(self): file = open(self.path) for line in file: if not line.strip(): print; continue srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) print srcnode.prettyprint(self.escape) file.close()
def process(self, retval): file = open(self.path) for line in file: srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) self.extract_rules(srcnode, retval) file.close()
def topipe(self): sys.path.append(os.path.join(os.path.dirname(__file__), '../../../ccg')) import pipe file = open(self.path) for line in file: if not line.strip(): print; continue srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) print pipe.PrintBinarizedTree(srcnode) file.close()
def process(self): file = open(self.path) for line in file: if not line.strip(): print; continue srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) nodes = self.build_node(srcnode) assert len(nodes) == 1 yield nodes[0] file.close()
def topipe(self): sys.path.append(os.path.join(os.path.dirname(__file__), '../ext/zpar/scripts/ccg')) import pipe file = open(self.path) for line in file: if not line.strip(): print; continue tokens = line.split() nIndex = 0 while True: srcnode = binarize.CBinarizedTreeNode() nIndex = srcnode.load_list(tokens, nIndex) print pipe.PrintBinarizedTree(srcnode) assert nIndex <= len(tokens) if nIndex == len(tokens): break file.close()
def updatepos(path, pospath, sSep, output): def updatenode(node, sent): if node.type == 'token': node.name = sent[0][1] sent.pop(0) else: updatenode(node.left_child, sent) if node.right_child: updatenode(node.right_child, sent) file = codecs.open(path, encoding='utf-8') pos_it = posio.posread(pospath, sSep) for line in file: pos = pos_it.next() # get node srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) updatenode(srcnode, pos) # prin output.write(srcnode.utf8print() + "\n")
def updatepos(path, pospath, sSep): def updatenode(node, sent): if node.type == 'token': node.name = sent[0][1] sent.pop(0) else: updatenode(node.left_child, sent) if node.right_child: updatenode(node.right_child, sent) file = open(path) pos_it = posio.posread(pospath, sSep) for line in file: pos = pos_it.next() # get node srcnode = binarize.CBinarizedTreeNode() srcnode.load(line) updatenode(srcnode, pos) # prin print srcnode
opts, args = getopt.getopt(sys.argv[1:], "d:i:o:r:") except getopt.GetoptError: print "\nUsage: append_wordstructure.py [-iinfile] [-ddictionary_file] [-ooutfile] [-rdictionaryout_file]\n" sys.exit(1) sInfile = None sDictionary = None sOutfile = None sDictOutFile = None for opt in opts: if opt[0] == '-d': sDictionary = opt[1] elif opt[0] == '-i': sInfile = opt[1] elif opt[0] == '-o': sOutfile = opt[1] elif opt[0] == '-r': sDictOutFile = opt[1] wordstut = CWordStrucutre(sDictionary,sDictOutFile) if sInfile == None or sOutfile == None or sDictOutFile == None: print "\nUsage: append_wordstructure.py [-iinfile] [-ddictionary_file] [-ooutfile] [-rdictionaryout_file]\n" sys.exit(1) file = codecs.open(sInfile,encoding='utf-8') wfile = codecs.open(sOutfile,'w',encoding='utf-8') for line in file: znode = binarize.CBinarizedTreeNode(); znode.load(line) newtree = wordstut.process(znode) wfile.write(newtree.utf8print()+"\n") wfile.close()