예제 #1
0
#!/usr/bin/env python
#coding:gbk
'''
Created on 2013-3-15

@author: felicia
'''

import Global

if __name__ ==  '__main__':
	fi = open('training_set.txt', 'r')
	fo = open('keywd.txt', 'w')
	kw = {}
	for line in fi:
		segResult = Global.GetTokenPos(line)
		for segword, pos in segResult:
			if not kw.has_key(segword):
				kw[segword] = [pos, 0]
			kw[segword][-1] += 1
	l = kw.items()
	l.sort(key = lambda x: x[-1][-1], reverse = True)
	for segword, (pos, cnt) in l:
		fo.write('%s\t%d\t%d\n' %(segword, pos, cnt))
	fi.close()
	fo.close()
	'''
	segResult = Global.GetTokenPos("ÄãºÃÄãºÃ")    
	for i, (segword, pos) in enumerate(segResult):
		print segword,pos
	'''