def output_error(mydb, df0, target): cputime = tickwatch() df1 = df0[df0['check_town_geo'] == 0] df2 = df0[df0['reCntycode'] == 0] df3 = df0[df0['reTowncode'] == 0] df4 = df0[df0['reNumber'] == 0] frames = [df1, df2, df3, df4] result = pd.concat(frames) result = result[[ 'geom', 'origin_address', 'fid', 'cnty_code', 'town_code', 'lie', 'lin', 'road', 'zone', 'lane', 'alley', 'number', 'floor', 'checklist' ]] name = target + '_modify' mydb.get_alias(name).write(result) cputime.tick('Write down dataframe' + name)
def output_error(): cputime = tickwatch() target = 'A10008_TRAN' mydb = manidb('G:/NCREE_GIS/2020_address/TGOS_NLSC_TWN22.sqlite') df0 = mydb.get_alias(target).read() cputime.tick('Calculation accomplished') ## export the outlier df1 = df0[df0['check_town_geo'] == 0] df2 = df0[df0['reCntycode'] == 0] df3 = df0[df0['reTowncode'] == 0] df4 = df0[df0['reNumber'] == 0] frames = [df1, df2, df3, df4] result = pd.concat(frames) mydb.get_alias(target + '_out').write(result) cputime.tick('Write down dataframe' + target)
def function_x(): cputime = tickwatch() from osgeo import ogr mydb = manidb('G:/NCREE_GIS/2020_address/TGOS_NLSC_TWN22.sqlite') cnty = mydb.get_alias('metadata_nsg_cnty').read() town = mydb.get_alias('metadata_nsg_town').read() for key, row in cnty[cnty['ncity'] == '10008'].iterrows(): cntynum = row['ncity'] cnty_wkt = row['cnty_wkt'] target = 'A' + cntynum #resource tab0 = mydb.get_alias(target) #outcome tab1 = mydb.get_alias(target + '_TRAN') df0 = tab0.read() df0['point_wkt'] = df0[['TWD97_X', 'TWD97_Y' ]].astype(str).apply(lambda x: ' '.join(x), axis=1) df0['point_wkt'] = df0['point_wkt'].apply(lambda x: f'POINT({x})') if df0.empty: continue cputime.tick() #check county boundary df0 = check_addr_column(cnty_wkt, df0, 'point_wkt', fun4checkgeo) # check town boundary town_wkt = lookup_value(df0, 'town_code', town) ziparg = zip(town_wkt['town_wkt'].tolist(), df0['point_wkt'].tolist()) #----------------------------------------- with Pool(8) as mpool: check_town = mpool.map(fun4checkgeo, ziparg) #----------------------------------------- #debug #check_town = [] #for arg in ziparg: # check_town.append(fun4checkgeo(arg)) #------------------------------------ df_check_town = pd.DataFrame.from_dict(check_town, orient='columns') df0['check_town_geo'] = df_check_town['checkgeo'] cputime.tick('Geometry checked') #check county code df0 = check_addr_column(cntynum, df0, 'cnty_code', fun4cntycode) # check town code df0 = check_addr_column(cntynum, df0, 'town_code', fun4towncode) # check number df0 = check_addr_column(cntynum, df0, 'num', fun4number) df0 = trans_column(df0) mydb.get_alias(target + '_TRAN').write(df0)
import re import pandas import csv, re, math, json import numpy as np import pandas as pd from itertools import zip_longest import Lily.ctao2.ctao2_database_alias from Lily.ctao2.ctao2_database_alias import manidb, alias, tickwatch from multiprocessing import Pool import osgeo, ogr if __name__ == '__console__' or __name__ == '__main__': cputime = tickwatch() #%%----------read------------- origindb = manidb( 'G:/NCREE_GIS/htax/nsg_bldg_taipei.sqlite' ) country = 'taipei' df_tax = origindb.get_alias(f'htax_{country}').read() df_tgo = origindb.get_alias(f'tgos_{country}_group').read()[['nsg_addr_key','geom']]#.head(200) cputime.tick('Dataframe read') #%%---------search------------- df_tax = df_tax.merge(df_tgo, left_on='nsg_addr_key', right_on='nsg_addr_key') #df_tgo = df_tgo.set_index(['nsg_addr_key']) #tax_addr_list = df_tax['nsg_addr_key'].tolist() #i = 0 #for addr in tax_addr_list: # i += 1; print(i)
def function_x(mydb, cnty, town, cntynum, cnty_wkt): cputime = tickwatch() target = 'A' + cntynum #resource df0 = mydb.get_alias(target).read() cputime.tick('Data read') sdf = df0[[ 'fid', 'cnty_code', 'town_code', 'lie', 'lin', 'road', 'zone', 'lane', 'alley', 'num' ]] df0.insert(0, 'origin_address', sdf.apply(lambda a: str(a.to_list()), axis=1)) df0['point_wkt'] = df0[['TWD97_X', 'TWD97_Y' ]].astype(str).apply(lambda x: ' '.join(x), axis=1) df0['point_wkt'] = df0['point_wkt'].apply(lambda x: f'POINT({x})') #if df0.empty : # continue cputime.tick('Point_wkt added') #check county boundary df0 = check_addr_column(cnty_wkt, df0, 'point_wkt', fun4checkgeo) # check town boundary town_wkt = lookup_value(df0, 'town_code', town) ziparg = zip(town_wkt['town_wkt'].tolist(), df0['point_wkt'].tolist()) #----------------------------------------- with Pool(8) as mpool: check_town = mpool.map(fun4checkgeo, ziparg) #----------------------------------------- #debug #check_town = [] #for arg in ziparg: # check_town.append(fun4checkgeo(arg)) #------------------------------------ df_check_town = pd.DataFrame.from_dict(check_town, orient='columns') df0['check_town_geo'] = df_check_town['check_town_geo'] cputime.tick('Geometry checked') #check county code df0 = check_addr_column(cntynum, df0, 'cnty_code', fun4cntycode) # check town code df0 = check_addr_column(cntynum, df0, 'town_code', fun4towncode) # check number df0 = check_addr_column(cntynum, df0, 'num', fun4number) cputime.tick('Code and number checked') cdf = df0[[ 'check_town_geo', 'reCntycode', 'reTowncode', 'reNumber', 'reFloor' ]] df0['checklist'] = cdf.apply(lambda a: a.to_csv(), axis=1) df_tran = df0[[ 'geom', 'origin_address', 'fid', 'cnty_code', 'town_code', 'lie', 'lin', 'road', 'zone', 'lane', 'alley', 'number', 'floor', 'checklist' ]] mydb.get_alias(target + '_TRAN').write(df_tran) cputime.tick('Write down dataframe' + target) # take out error data output_error(mydb, df0, target)