コード例 #1
0
def splitTrainSet():
	path = './resource/tagged_headers_everyline.txt'
	lines = open(path).readlines()
	nl = []
	tmp = []
	for line in lines:
		if line.strip().endswith('::line_number::0'):
			tmp.append('\n')
			nl.append(tmp)
			tmp = [line]
		else:
			tmp.append(line)
	if len(tmp)>0:
		tmp.append('\n')
		nl.append(tmp)

	print 'len(nl) = ',len(nl)
	open('./py_scikit/tmp/train.in','w').writelines(Tools.flatList(nl[:600]))
	open('./py_scikit/tmp/testa.in','w').writelines(Tools.flatList(nl[600:750]))
	open('./py_scikit/tmp/testb.in','w').writelines(Tools.flatList(nl[750:]))