Esempio n. 1
0
def render(args: argparse.Namespace) -> int:
    kbutil_dll = get_kbutil_dll_path()
    output_path = "case"

    for piece in ["left", "center", "right"]:
        input_path = "layout/triad_{}.xml".format(piece)
        ret = generate_svg(kbutil_dll, input_path, output_path)
        if ret != 0:
            print("Failed to generate SVG")
            return ret

        bearings_path = "./temp/bearings_{}.json".format(piece)
        bearings_debug_path = "./temp/bearings_{}.svg".format(piece)

        mkdir(os.path.dirname(bearings_path))
        ret = generate_bearings(
            kbutil_dll, input_path, bearings_path, bearings_debug_path
        )
        if ret != 0:
            print("Failed to switch bearings")
            return ret

    if args.open:
        sh(
            [
                "inkscape",
                os.path.join(output_path, "triad_left.svg"),
                os.path.join(output_path, "triad_right.svg"),
                os.path.join(output_path, "triad_center.svg"),
            ]
        )

    return 0
Esempio n. 2
0
def gen_lang_site(master, site, config):
    lang = site.lang
    out_dir = "%s/%s" % (config.out_dir, lang)
    util.mkdir(out_dir)
    util.mkdir("%s/weeks" % out_dir)
    util.mkdir("%s/days" % out_dir)

    pages = standard_pages(site, config)
    if not config.index_only:
        pages += week_pages(site, config)
        pages += day_pages(site, config)

        if site.topics:
            util.mkdir("%s/topics" % out_dir)
            pages += topic_pages(site, config)
        if site.facebook:
            pages += facebook_pages(site, config)
        if site.interviews:
            pages += interviews_pages(site, config)
        if site.custom:
            pages += custom_pages(site, config)
        if site.blogs:
            util.mkdir("%s/blogs" % out_dir)
            pages += blogs(site, config)
        if site.gen_sitemap:
            pages += [sitemap_page(master, site, config)]
        if site.gen_search:
            pages += [search_page(master, site, config)]

    for page in pages:
        with open_out_file(out_dir, page[0]) as outfile:
            outfile.write(page[1])
            print("Generated %s" % outfile.name)
Esempio n. 3
0
 def check_dirs(self):
     mkdir(self.conf['dirs']['output'])
     for key in self.conf['dirs']:
         val = self.conf['dirs'][key]
         if os.path.isdir(val) is False:
             print('\nFolder does not exist ' + val + '\nExit.\n')
             sys.exit(1)
Esempio n. 4
0
	def get_model_from_hdfs(self,model_path,feature_path,model_name):
		dst_path=os.path.join(self.data_path,model_name)
		util.mkdir(dst_path,self.logger)
		rc = util.hdfs_file_copy(feature_path,dst_path,False,"hadoop",self.logger)
		if(rc is False):
			self.logger.error("Fail to get %s from hdfs",feature_path)
			return False
		rc = util.hdfs_file_copy(model_path,dst_path,False,"hadoop",self.logger)	
		if(rc is False):
			self.logger.error("Fail to get %s from hdfs",model_path)
			return False
		return True
Esempio n. 5
0
	def __init__(self, type):
		self.path = os.path.dirname(os.path.abspath(__file__))
		self.logger = mylogging.getLogger('deploy_client.log')
		self.version=time.strftime('%Y%m%d%H%M%S',time.localtime(time.time()))
		self.online_path=os.path.join(self.path,'./online')
		self.backup_path=os.path.join(self.path,'./backup')
		self.data_path=os.path.join(self.path,'./data')
		self.service_conf_path=os.path.join(self.path,'./config')
		self.release_lib_path=os.path.join(self.path,'./release')
		#to make sure data dir exsists ,and empty
                if(type !='test'):
		    if os.access(self.data_path, os.F_OK):
                        util.del_file(self.data_path,self.logger)
		    else:
			util.mkdir(self.data_path,self.logger)
		
		    if os.access(self.service_conf_path, os.F_OK):
			util.del_file(self.service_conf_path,self.logger)
		    else:
			util.mkdir(self.service_conf_path,self.logger)
		
		    if os.access(self.release_lib_path, os.F_OK):
			util.del_file(self.release_lib_path,self.logger)
		    else:
			util.mkdir(self.release_lib_path,self.logger)
		util.mkdir(self.backup_path,self.logger)
		util.mkdir(self.online_path,self.logger)
		#read config file
		config = ConfigParser.ConfigParser()
		config.read('env.conf')
		self.monitor=config.get('config', 'monitor');
		self.model_name=config.get('config', 'model_name')
		self.service_name=config.get('config', 'service_name')
		self.model_path=config.get('config', 'model_path')
		self.feature_path=config.get('config', 'feature_path')
		self.model_port=config.get('config', 'model_port')
		zk_regist=config.get('config', 'zk_regist')
		zknodes=config.get('config', 'zklist').split(',')
		for item in zknodes:
			cluster_id=config.get(item, 'cluster_id')
			group_id=config.get(item, 'group_id')
			service_id=config.get(item, 'service_id')
			type_id=config.get(item, 'type')
			zkinfo={}
			zkinfo['service_name'] = item
			zkinfo['cluster_id'] = cluster_id
			zkinfo['group_id'] = group_id
			zkinfo['service_id'] = service_id
			zkinfo['type_id'] = type_id
			self.zklist.append(zkinfo)
		if(zk_regist=='1'):
			self.registe_zk_service
			self.logger.info("regist zk %s",zkinfo)
Esempio n. 6
0
def pool_gen_global_site_channels(pool, master):
    config = master.config
    site = master.global_site

    pages = []
    out_dir = "%s/global" % (config.out_dir)
    results = []
    if site.gen_channel_html and config.channel:
        util.mkdir("%s/channels" % out_dir)
        chunks = util.chunks(site.groups, 10)
        for chunk in chunks:
            results.append(
                pool.apply_async(single_channel_pages, (site, config, chunk)))
    return results
Esempio n. 7
0
def _create_fs(query, router, edgenode):
    from fuse import FUSE
    from fs.vdfs import VDFS

    if edgenode:
        from lib.util import set_edgenode
        set_edgenode()

    mnt = get_mnt_path()
    _unmount(mnt)
    mkdir(mnt)

    conf = get_conf_path()
    mkdir(conf)

    FUSE(VDFS(query, router), mnt, foreground=True)
Esempio n. 8
0
    def __init__(self, router=None, core=None):
        path = get_var_path()
        mkdir(path)

        name = self._get_name()
        if name in FIELDS:
            self._field = name
        else:
            log_err(self, 'invalid entry')
            raise Exception('Error: invalid entry')

        if not router or router.local:
            from interface.localfs import LocalFS
            self._fs = LocalFS()
        else:
            from interface.remotefs import RemoteFS
            self._fs = RemoteFS(router)
        self._core = core
Esempio n. 9
0
	def get_model_from_hdfs(self,model_path,feature_path,model_name):
		dst_path=os.path.join(self.data_path,model_name)
		util.mkdir(dst_path,self.logger)
		rc = util.hdfs_file_copy(feature_path,dst_path,False,"hadoop",self.logger)
		if rc is False:
			self.logger.error("Fail to get %s from hdfs",feature_path)
			return False
                local_models_path = os.path.join(self.path, "models")
		if not os.path.isdir(local_models_path):
		    util.mkdir(local_models_path,self.logger)
		rc = util.hdfs_file_copy(model_path,os.path.join(local_models_path, model_name), False,"hadoop",self.logger)	
		if rc is False:
			self.logger.error("Fail to get %s from hdfs",model_path)
			return False
                predict_model_path = self.assemble_model_file(os.path.join(local_models_path,model_name))
                print_log("assemble predict model path:%s\n" % predict_model_path) 
		rc = util.copy_file(predict_model_path,os.path.join(dst_path,"predict.model"),self.logger)
		if rc is False:
                        print_log("copy predict model to data dir error:%s\n") 
			return False
		return True
Esempio n. 10
0
        def packet_dnn(self):
                dst_path=os.path.join(self.data_path, self.model_name, "version")
                f_path = os.path.join(self.data_path, self.model_name)
                util.mkdir(dst_path,self.logger)
                print_log("client begin packeting model\n") 
                #get model
                rc = util.hdfs_file_copy(self.model_path,dst_path,False,"hadoop",self.logger)
                if rc is False:
                        print_log("Fail to get %s from hdfs" % self.model_path)
                        return False
                rc = util.hdfs_file_copy(self.feature_path,f_path,False,"hadoop",self.logger)
                if rc is False:
                        print_log("Fail to get %s from hdfs" % self.feature_path)
                        return False
		#modify config
		rc = self.modify_config(self.model_name)
		if rc is False:
		    return False,'modify config error'
		#get release so lib
		self.get_release_so()
		online_path=os.path.join(self.online_path,self.service_name,self.version);
		tar_path=os.path.join(online_path,self.version)
		shutil.move(self.service_conf_path,os.path.join(tar_path,"./config"))
		shutil.move(self.data_path,os.path.join(tar_path,"./data"))
		shutil.move(self.release_lib_path,os.path.join(tar_path,"./lib"))
		tar_file = util.tar(tar_path,self.logger)
		util.del_dir(tar_path,self.logger)
		#res = util.get_file_md5(tar_file)
		res=util.gen_file_md5(online_path,tar_file,self.version,self.logger)
		#upload to monitor
		cmd = "rsync -r %s %s" %(os.path.join(self.online_path,self.service_name),self.monitor)
                print_log("packet upload cmd: %s\n" % cmd) 
		rc,stdout,stderr= util.run_shell(cmd,self.logger)
		if rc is False:
			return False,'upload model error'
		self.logger.info("success upload model %s to monitor %s,model version %s",self.model_name,self.monitor,self.version)		
                print_log("client finish packet: %s\n" % tar_file) 
		return True, self.version 
Esempio n. 11
0
def ponoko(args: argparse.Namespace) -> int:
    kbutil_dll = get_kbutil_dll_path()
    output_path = os.path.join(get_triad_repo_dir(), "case", "ponoko")
    mkdir(output_path)

    for piece in ["left", "center", "right"]:
        print(piece)

        input_path = "layout/triad_{}.xml".format(piece)
        ret = generate_svg(kbutil_dll, input_path, output_path)
        if ret != 0:
            print("Failed to generate SVG")
            return ret

    if args.open:
        sh([
            "inkscape",
            os.path.join(output_path, "triad_left.svg"),
            os.path.join(output_path, "triad_right.svg"),
            os.path.join(output_path, "triad_center.svg"),
        ])

    return 0
Esempio n. 12
0
def packageKSimhash(requir_user_file, thsold):
    ''' 对prov_sub数据通过user作为质点,进行降维
                                    @param requir_user_file     原始数据文件(user_id, province, question)
                                    @param thsold               阈值,如果小于此数值即为有用数值,int
                                '''
    prov_set = getProvinceSet()
    with open(USER_PATH + requir_user_file, 'r') as user_file:
        readers = csv.DictReader(user_file)
        for reader in readers:
            prov = reader['province'][:2]
            if prov not in prov_set:
                prov = '全国'

            user_id = reader['user_id']
            question_id = eval(reader['question'])

            question_list = tableToJson(table='question_simhash_20171111',
                                        question_id=question_id)

            if len(question_list) > 0:
                subj_dic = {}
                recom_set = set([])

                for sub_kpoint in question_list:
                    if str(sub_kpoint[1]) not in subj_dic.keys():
                        subj_dic[str(sub_kpoint[1])] = [sub_kpoint[0]]
                    else:
                        subj_dic[str(sub_kpoint[1])].append(sub_kpoint[0])

                for ks, vs in subj_dic.items():
                    ps_file = PROV_SUB_PATH + datetime + '/' + prov + '/' + prov_subj_file.format(
                        prov, ks, datetime)

                    if os.path.exists(ps_file):
                        PATH = K_SIMH_PATH + datetime + '/' + prov
                        mkdir(PATH)

                        if os.path.exists(
                                PATH + '/' +
                                output_file.format(prov, ks, datetime)):
                            os.remove(PATH + '/' +
                                      output_file.format(prov, ks, datetime))
                        k_simh_list = []
                        with open(ps_file, 'r') as ksimhash_file:
                            while True:
                                ksimhash = ksimhash_file.readline()
                                if ksimhash:
                                    sim_dis = K_Simhash(
                                        ksimhash=eval(ksimhash), vs=vs)

                                    if sim_dis < thsold:
                                        with open(
                                                PATH + '/' +
                                                output_file.format(
                                                    prov, ks, datetime),
                                                'a') as txt_file:
                                            txt_file.writelines(
                                                json.dumps(eval(ksimhash)))
                                            txt_file.write('\n')

                                else:
                                    break
                            ksimhash_file.close()

                    else:
                        print(u"没有在Prov_Sub_input中查询到生成的(省份-学科)文件!")
Esempio n. 13
0
_DIR = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))).replace(
        '\\', '/')
DATABASE = _DIR + '/new_database/'
RAW_PATH = DATABASE + 'Raw_input/'
SUB_KPOINT_PATH = DATABASE + 'Sub_kpoint_input/'
datetime = '09-11'
init_file = 'user_if_{}.csv'
skp_file = 'question_sub_kpoint_{}_{}.txt'

if __name__ == '__main__':
    province_set = {
        'nan', '0', '1', '2', '3', 'Rize', 'Juzny', 'hobbit', '全国', '台湾',
        'NULL'
    }
    mkdir(SUB_KPOINT_PATH + datetime)
    PATH = SUB_KPOINT_PATH + datetime + '/'
    with open(RAW_PATH + init_file.format(datetime), 'r',
              encoding='utf-8') as raw_file:
        readers = csv.DictReader(raw_file)
        for reader in readers:
            prov = reader['province']
            if prov in province_set:
                prov = '全国'
            else:
                prov = prov[:2]

            question_id = []
            try:
                if isinstance(reader['question'], str):
                    if len(eval(reader['question'])) > 0:
Esempio n. 14
0
            else:
                break

        logging.info(u"已经解析完{0}省份下{1}学科的数据,并存入到Prov_Sub_input文件!".format(prov, subj))
        new_file.close()
        sub_kpoint_file.close()


if __name__ == '__main__':
    prov_set = getProvinceSet()
    # prov_set = {'青海'}
    subj_set = {str(j) for j in range(1, 11)} | {str(j) for j in range(21, 31)} | {str(j) for j in range(41, 51)}
    pool = Pool(3)

    for prov in prov_set:
        P_S_PATH = PROV_SUB_PATH + datetime + '/' + prov
        mkdir(P_S_PATH)
        logging.info("the classify the subject ")

        for subj in subj_set:
            logging.info(u"正在读取{0}省份下{1}学科的Sub_kpoint_input文件".format(prov, subj))

            pool.apply_async(packClassSub, kwds={
                "prov":prov,
                "subj":subj,
                "P_S_PATH":P_S_PATH
            })

    pool.close()
    pool.join()
    subj_set = {str(j)
                for j in range(1, 11)} | {str(j)
                                          for j in range(21, 31)
                                          } | {str(j)
                                               for j in range(41, 51)}

    for datetime in datetimes:
        logging.basicConfig(
            format=LOGGING_FORMAT,
            level=logging.INFO,
            filename='working/Recom_subqid_{}.log'.format(datetime),
            filemode='a')

        for prov in prov_set:
            PATH = SUBJ_KPO_PATH + datetime + '/' + prov
            mkdir(PATH)
            logging.info("running the {0} at the time between {1}".format(
                prov, datetime))

            for subj in subj_set:
                if os.path.exists(PATH + '/' +
                                  output_file.format(prov, subj, datetime)):
                    os.remove(PATH + '/' +
                              output_file.format(prov, subj, datetime))

            pool.apply_async(packSubjKpo,
                             kwds={
                                 "prov": prov,
                                 "PATH": PATH,
                                 "datetime": datetime
                             })
Esempio n. 16
0
    total_num = len(X)//slice_num  # map to nodes 
    if slidx != slice_num-1:
        X = X[slidx*total_num: (slidx+1)*total_num]
    else:
        X = X[slidx*total_num: ]
    total_num = len(X)
    total_idx = np.arange(total_num)
    chunk_len = total_num//(mpisize-1) if total_num >= mpisize - 1 else total_num  # map to process
    data = [X[total_idx[i*chunk_len: (i+1)*chunk_len]] for i in tqdm(range(mpisize), desc='Scatter data')]  # scatter the data to other process
else:
    data = None
data = comm.scatter(data, root=0)

# TMF
data_path = '../feature-{3}/{0}/{1}/{2}/'.format(*[mode, freq, slidx, model_type])
mkdir(data_path)

all_feature = []

win = 3000  # length of the time series
D = 3  # order of the motif, 3 means triad
shape = np.array([len(range(1, (win-1)//(D-1) + 1)), len(range(0, win-(D-1)*1)), D])  # TMF image shape
overlap = win-(D-1)*shape[0]  # overlap cause by the rotation in the TMF image
TMF = np.zeros(shape)  # placeholder of TMF image

for ts in tqdm(data):
    filt_ts = filt_ECG(ts, freq) 
    TMF = gen_TMF(filt_ts, overlap, TMF, D)  # the gen_TMF function is optimized with numba package
    img = np.expand_dims(TMF, axis=0)  # [1, W, H, 3]
    feature = extractor.predict(img)  # [1, 512]
    all_feature.append(feature)
Esempio n. 17
0
	def __init__(self, action, model_name, service_name, algorithm):
                print_log("model name: " + model_name + ",service name: " + service_name) 
		self.path = os.path.dirname(os.path.abspath(__file__))
                self.restore_env_conf()
                self.algorithm = algorithm
		self.logger = mylogging.getLogger('deploy_client.log')
                cmd = "rm -rf " + os.path.join(self.path, "models/*") 
	        rc, stdout, stderr = util.run_shell(cmd,self.logger)
		self.version=time.strftime('%Y%m%d%H%M%S',time.localtime(time.time()))
		self.online_path=os.path.join(self.path,'./online')
		self.backup_path=os.path.join(self.path,'./backup')
		self.data_path=os.path.join(self.path,'./data')
		self.service_conf_path=os.path.join(self.path,'./config')
		self.release_lib_path=os.path.join(self.path,'./release')
                self.docker_images['LR'] = "registry.intra.weibo.com/weibo_rd_algorithmplatform/modelservice_lr:v1.1"
                self.docker_images['DNN'] = "registry.intra.weibo.com/weibo_rd_algorithmplatform/dnn_prod:v1.0"
                self.modify_zk_info(model_name, service_name)
		#to make sure data dir exsists ,and empty
		if os.access(self.data_path, os.F_OK):
                    util.del_file(self.data_path,self.logger)
		else:
		    util.mkdir(self.data_path,self.logger)
		
		if os.access(self.service_conf_path, os.F_OK):
		    util.del_file(self.service_conf_path,self.logger)
		else:
		    util.mkdir(self.service_conf_path,self.logger)
		
		if os.access(self.release_lib_path, os.F_OK):
		    util.del_file(self.release_lib_path,self.logger)
		else:
		    util.mkdir(self.release_lib_path,self.logger)
		util.mkdir(self.backup_path,self.logger)
		util.mkdir(self.online_path,self.logger)
		#read config file
		config = ConfigParser.ConfigParser()
		config.read('env.conf')
		self.monitor=config.get('config', 'monitor');
		self.docker_image_tag=self.docker_images[algorithm.upper()]
                print_log("docker tag: %s" % self.docker_image_tag) 
		self.model_name=model_name
		self.service_name=service_name
                self.get_model_feature_path(action) 
		self.model_port=config.get('config', 'model_port')
		zk_regist=config.get('config', 'zk_regist')
		zknodes=config.get('config', 'zklist').split(',')
		for item in zknodes:
			cluster_id=config.get(item, 'cluster_id')
			group_id=config.get(item, 'group_id')
			service_id=config.get(item, 'service_id')
			type_id=config.get(item, 'type')
			zkinfo={}
			zkinfo['service_name'] = item
			zkinfo['cluster_id'] = cluster_id
			zkinfo['group_id'] = group_id
			zkinfo['service_id'] = service_id
			zkinfo['type_id'] = type_id
			self.zklist.append(zkinfo)
		if zk_regist=="1" and action =="deploy":
			self.registe_zk_service()
			self.logger.info("regist zk %s",zkinfo)
    prov_set = getProvinceSet()
    subj_set = {str(j)
                for j in range(1, 11)} | {str(j)
                                          for j in range(21, 31)
                                          } | {str(j)
                                               for j in range(41, 51)}
    # subj_set = {'2'}
    # prov_set = {'福建'}

    for datetime in datetimes:
        logging.basicConfig(
            format=LOGGING_FORMAT,
            level=logging.INFO,
            filename='working/itemCF_Eval_{}.log'.format(datetime),
            filemode='a')

        for prov in prov_set:
            ICF_PATH = ITEMCF_PATH + datetime + '/' + prov
            mkdir(ICF_PATH)

            for subj in subj_set:
                pool.apply_async(packItemCFEval,
                                 kwds={
                                     'prov': prov,
                                     'subj': subj,
                                     'datetime': datetime,
                                     'ICF_PATH': ICF_PATH
                                 })

    pool.close()
    pool.join()
Esempio n. 19
0
            ps_file.close()
        logging.info(u"已经解析完{0}省份下{1}学科的数据,并存入到文件!".format(prov, subj))


if __name__ == '__main__':
    prov_set = getProvinceSet()
    subj_set = {str(j)
                for j in range(1, 11)} | {str(j)
                                          for j in range(21, 31)
                                          } | {str(j)
                                               for j in range(41, 51)}
    # prov_set = {'全国'}
    # subj_set = {'3'}

    pool = Pool(2)
    for prov in prov_set:
        FO_PATH = FPGROWTH_PATH + datetime + '/' + prov
        mkdir(FO_PATH)

        for subj in subj_set:
            pool.apply_async(packageFPGrowthRun,
                             kwds={
                                 "prov": prov,
                                 "subj": subj,
                                 "datetime": datetime,
                                 "FO_PATH": FO_PATH
                             })

    pool.close()
    pool.join()
Esempio n. 20
0
                support = support * 0.9
                confidence = confidence * 0.9
                flag += 1


if __name__ == '__main__':
    #prov_set = getProvinceSet()
    # , '甘肃', '宁夏', '四川', '全国', '重庆', '陕西', '吉林', '北京', '海南'
    prov_set = {'江苏'}
    support = 0.03
    confidence = 0.10
    # subj_set = {str(j) for j in range(1, 11)} | {str(j) for j in range(21, 31)} | {str(j) for j in range(41, 51)}
    subj_set = {'5'}
    for prov in prov_set:
        AO_PATH = APRIORI_PATH + datetime + '/' + prov
        mkdir(AO_PATH)

        for subj in subj_set:
            packageWhileAprioriRun(prov=prov,
                                   subj=subj,
                                   support=support,
                                   confidence=confidence,
                                   datetime=datetime,
                                   AO_PATH=AO_PATH)

    # pool = Pool()
    # for prov in prov_set:
    #     AO_PATH = APRIORI_PATH + datetime + '/' + prov
    #     mkdir(AO_PATH)
    #     for subj in subj_set:
    #         pool.apply_async(packageAprioriRun, kwds={
"""
Map the parallel_flex.py to different nodes and cores to run them parallelly.
Author: Yadong Zhang
E-mail: [email protected]

Demo:
$ python3 map_length_effect.py 10
"""
import sys
sys.path.append('../')
import subprocess
from lib.util import mkdir

nodes = int(sys.argv[1])
for idx in range(nodes):
    mkdir('slice/{0}/'.format(idx))
    qsub_command = """qsub -v slice={0},nodes={1} -q adf length_effect.sh""".format(
        *[idx, nodes])
    exit_status = subprocess.call(qsub_command, shell=True)  # upload
    if exit_status is 1:  # Check to make sure the job submitted
        print("Job {0} failed to submit".format(qsub_command))
Esempio n. 22
0
    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))).replace(
        '\\', '/')
DATABASE = _DIR + '/new_database/Input/'
RAW_PATH = DATABASE + 'Raw_input/'
PRE_DATA_PATH = DATABASE + 'Pre_data_input/'
init_file = 'user_if_{}.csv'
pre_province_file = 'user_json_{}_{}.csv'
pre_province_txt = 'user_json_{}_{}.txt'
datetime = '09-11'

if __name__ == '__main__':
    province_set = {
        'nan', '0', '1', '2', '3', 'Rize', 'Juzny', 'hobbit', '全国', '台湾',
        'NULL'
    }
    mkdir(PRE_DATA_PATH + datetime)
    PATH = PRE_DATA_PATH + datetime + '/'
    with open(RAW_PATH + init_file.format(datetime), 'r',
              encoding='utf-8') as raw_file:
        # data = csv.reader(f)
        # for d in data:
        #     print(d)
        readers = csv.DictReader(raw_file)
        for reader in readers:
            prov = reader['province']
            if prov in province_set:
                prov = '全国'
            elif prov in {'闽'}:
                prov = '福建'
            else:
                prov = prov[:2]