예제 #1
0
	def readChunk(self, lines):
		for i in range(len(lines)):
			line = lines[i]
			if (not line):
				return lines[i:]
			if (line == 'EOS'):
				return lines[i:]
			if (line[0] == '*'):
				if (i == 0):
					matchobj = re.fullmatch('\* \d+ ([-\d]\d*)D.*', line)
					if (not matchobj):
						print(line)
					self.dst = int(matchobj.group(1))
				else:
					return lines[i:]
			else:
				morph = Morph.readMecabLine(line)
				if (morph == None):
					print('Error on: ' + line)
					return lines[i:]
				self.morphs.append(morph)
예제 #2
0
파일: 40.py 프로젝트: bekasa001/NLP
from morph import Morph

file_cabocha = open('ai.ja.txt.parsed')

states = []
flag = True
while flag:
    state = []
    while True:
        line = file_cabocha.readline()
        if (not line):
            flag = False
            break
        if (line == 'EOS\n'):
            if (len(state)):
                states.append(state)
            break
        morph = Morph.readMecabLine(line)
        if (morph):
            state.append(morph)

for morph in states[1]:
    print(morph.tostr())