def splitTrainSet(): path = './resource/tagged_headers_everyline.txt' lines = open(path).readlines() nl = [] tmp = [] for line in lines: if line.strip().endswith('::line_number::0'): tmp.append('\n') nl.append(tmp) tmp = [line] else: tmp.append(line) if len(tmp)>0: tmp.append('\n') nl.append(tmp) print 'len(nl) = ',len(nl) open('./py_scikit/tmp/train.in','w').writelines(Tools.flatList(nl[:600])) open('./py_scikit/tmp/testa.in','w').writelines(Tools.flatList(nl[600:750])) open('./py_scikit/tmp/testb.in','w').writelines(Tools.flatList(nl[750:]))