def readChunk(self, lines): for i in range(len(lines)): line = lines[i] if (not line): return lines[i:] if (line == 'EOS'): return lines[i:] if (line[0] == '*'): if (i == 0): matchobj = re.fullmatch('\* \d+ ([-\d]\d*)D.*', line) if (not matchobj): print(line) self.dst = int(matchobj.group(1)) else: return lines[i:] else: morph = Morph.readMecabLine(line) if (morph == None): print('Error on: ' + line) return lines[i:] self.morphs.append(morph)
from morph import Morph file_cabocha = open('ai.ja.txt.parsed') states = [] flag = True while flag: state = [] while True: line = file_cabocha.readline() if (not line): flag = False break if (line == 'EOS\n'): if (len(state)): states.append(state) break morph = Morph.readMecabLine(line) if (morph): state.append(morph) for morph in states[1]: print(morph.tostr())