def main(): u = Updater(0.5) with open(sys.argv[1],'r') as f: last = update_pool(get_a_line(f.readline())) lines = f.readlines() u.start() for l in lines: r = get_a_line(l) if equal(r,last): last['data']=last['data']+r['data'] else: result.append(last) last = r last = update_pool(last) u.output = '%s.%s%s%s'%(len(result),last['city'],last['area'],last['road']) pool_dup = sorted(pool,reverse=True,key=lambda x:len(x)) for r in result: r['feat']=find_feature(pool_dup,r['city']+r['area']+r['road']) u.output = 'changing:%s%s%s,%s'%(r['city'],r['area'],r['road'],r['feat']) u.stop() print 'writing files' po = codecs.open('word_pool.js','w','utf-8') po.write(u'var pool=') po.write(json.dumps(pool_dup,encoding='utf8',ensure_ascii=False)) po.write(u';') po.close() ro = codecs.open('data.js','w','utf-8') ro.write(u'var database=') ro.write(json.dumps(result,encoding='utf8',ensure_ascii=False)) ro.write(u';') ro.close()
def main(): u = Updater(0.5) extracted = {} with codecs.open(sys.argv[1],'r','utf8') as f: lines = f.readlines() u.start() for l in lines: for n in l: if n!=u'\n' and n!=u'\u3000' and n!=u'\r': extracted[n]=1 u.output=n u.stop() op = codecs.open('extracted.txt','w','utf-8') k = extracted.keys() k.sort() for c in k: op.write(c) op.close()