def output_error(): cputime = tickwatch() target = 'A10008_TRAN' mydb = manidb('G:/NCREE_GIS/2020_address/TGOS_NLSC_TWN22.sqlite') df0 = mydb.get_alias(target).read() cputime.tick('Calculation accomplished') ## export the outlier df1 = df0[df0['check_town_geo'] == 0] df2 = df0[df0['reCntycode'] == 0] df3 = df0[df0['reTowncode'] == 0] df4 = df0[df0['reNumber'] == 0] frames = [df1, df2, df3, df4] result = pd.concat(frames) mydb.get_alias(target + '_out').write(result) cputime.tick('Write down dataframe' + target)
df1 = df0[df0['check_town_geo'] == 0] df2 = df0[df0['reCntycode'] == 0] df3 = df0[df0['reTowncode'] == 0] df4 = df0[df0['reNumber'] == 0] frames = [df1, df2, df3, df4] result = pd.concat(frames) mydb.get_alias(target + '_out').write(result) cputime.tick('Write down dataframe' + target) if __name__ == '__console__' or __name__ == '__main__': target = 'A10008_TRAN' modify = 'A10008_TRAN_modify' mydb = manidb('G:/NCREE_GIS/2020_address/TGOS_NLSC_TWN22.sqlite') df0 = mydb.get_alias(target).read() df1 = mydb.get_alias(modify).read() df0 = df0.set_index(['fid']) df1 = df1.set_index(['fid']) # df_x = df0.loc[df1.index,:] df0.loc[df1.index, :] = df1[:] # df_y = df0.loc[df1.index, :] sdf = df0[[ 'origin_address', 'cnty_code', 'town_code', 'lie', 'lin', 'road', 'zone', 'lane', 'alley', 'number', 'floor'
def function_x(): cputime = tickwatch() from osgeo import ogr mydb = manidb('G:/NCREE_GIS/2020_address/TGOS_NLSC_TWN22.sqlite') cnty = mydb.get_alias('metadata_nsg_cnty').read() town = mydb.get_alias('metadata_nsg_town').read() for key, row in cnty[cnty['ncity'] == '10008'].iterrows(): cntynum = row['ncity'] cnty_wkt = row['cnty_wkt'] target = 'A' + cntynum #resource tab0 = mydb.get_alias(target) #outcome tab1 = mydb.get_alias(target + '_TRAN') df0 = tab0.read() df0['point_wkt'] = df0[['TWD97_X', 'TWD97_Y' ]].astype(str).apply(lambda x: ' '.join(x), axis=1) df0['point_wkt'] = df0['point_wkt'].apply(lambda x: f'POINT({x})') if df0.empty: continue cputime.tick() #check county boundary df0 = check_addr_column(cnty_wkt, df0, 'point_wkt', fun4checkgeo) # check town boundary town_wkt = lookup_value(df0, 'town_code', town) ziparg = zip(town_wkt['town_wkt'].tolist(), df0['point_wkt'].tolist()) #----------------------------------------- with Pool(8) as mpool: check_town = mpool.map(fun4checkgeo, ziparg) #----------------------------------------- #debug #check_town = [] #for arg in ziparg: # check_town.append(fun4checkgeo(arg)) #------------------------------------ df_check_town = pd.DataFrame.from_dict(check_town, orient='columns') df0['check_town_geo'] = df_check_town['checkgeo'] cputime.tick('Geometry checked') #check county code df0 = check_addr_column(cntynum, df0, 'cnty_code', fun4cntycode) # check town code df0 = check_addr_column(cntynum, df0, 'town_code', fun4towncode) # check number df0 = check_addr_column(cntynum, df0, 'num', fun4number) df0 = trans_column(df0) mydb.get_alias(target + '_TRAN').write(df0)
target_result['dc_json_report'] = json.dumps(dc_report) #%%----------run------------- step1(addr_text) step2(target_result['dc_unusual_tail']) if dc_report['re'] else {} step3() if dc_report['re'] and dc_report['num_tp'] else {} return (ind, target_result) if __name__ == '__console__' or __name__ == '__main__': #machine argument #%%----------read------------- ax_dec_1 = ax_htax_decomposition() origindb = manidb('G:/NCREE_GIS/htax/nsg_bldg_TTK_bm.sqlite') target_tab = '63000' df_all = origindb.get_alias(f'rawdata_hou_A{target_tab}_addr').read() print('read') df_bin = df_all.set_index(['HOU_LOSN']) #df_bin['LOCAT_ADDR'] = df_bin['LOCAT_ADDR'].apply( lambda x : x.split() [0] ) ## decomposition ## onere = [ ax_dec_1.decomposition(item) for item in df_bin['LOCAT_ADDR'].items() ] df_onere = pandas.DataFrame.from_dict(dict(onere), orient='index')
import pandas import csv, re, math, json import numpy as np import pandas as pd from itertools import zip_longest import Lily.ctao2.ctao2_database_alias from Lily.ctao2.ctao2_database_alias import manidb, alias, tickwatch from multiprocessing import Pool import osgeo, ogr if __name__ == '__console__' or __name__ == '__main__': cputime = tickwatch() #%%----------read------------- origindb = manidb( 'G:/NCREE_GIS/htax/nsg_bldg_taipei.sqlite' ) country = 'taipei' df_tax = origindb.get_alias(f'htax_{country}').read() df_tgo = origindb.get_alias(f'tgos_{country}_group').read()[['nsg_addr_key','geom']]#.head(200) cputime.tick('Dataframe read') #%%---------search------------- df_tax = df_tax.merge(df_tgo, left_on='nsg_addr_key', right_on='nsg_addr_key') #df_tgo = df_tgo.set_index(['nsg_addr_key']) #tax_addr_list = df_tax['nsg_addr_key'].tolist() #i = 0 #for addr in tax_addr_list: # i += 1; print(i) # if addr in df_tgo.index:
step2(target_result['dc_unusual_tail']) if dc_report['pattern'] else {} step3() if dc_report['pattern'] and dc_report['check_num'] else {} return (ind, target_result) if __name__ == '__console__' or __name__ == '__main__': cputime = tickwatch() #%%---------target------------- workdir = 'G:/NCREE_GIS/' target_tab = '92000' country = 'yilan' db = manidb(workdir + 'htax/nsg_bldg_3826.sqlite') #db = manidb( workdir + 'htax/nsg_bldg_3825.sqlite' ) cputime.tick('Dataframe read') #%%------step1. read pickle------------- #df = pd.read_pickle(workdir + 'data_pickle/bin_{target_tab}') #try: # df['LOCAT_ADDR'] = df['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #except: # break #db.get_alias('rawdata_hou_A{target_tab}_addr').write(df) cputime.tick('read pickle done') #%%-------step2. htax address decomposition-------------
return a def get_df(df0, list): df = pd.DataFrame.from_dict(list, orient='columns') for colname in df.columns: df0[colname] = df[colname].fillna(value='') return df0 if __name__ == '__console__' or __name__ == '__main__': cputime = tickwatch() #%%-----------resource-------------- mydb = manidb('G:/NCREE_GIS/tgos_address/2021_TGOS_NLSC_TWN22_V1.sqlite') output = manidb('G:/NCREE_GIS/tgos_address/nsg_bldg_TGOS.sqlite') #cnty = mydb.get_alias('metadata_nsg_cnty').read() cnty = mydb.get_alias('metadata_nsg_cnty_3825').read() town = mydb.get_alias('metadata_nsg_town').read() for key, row in cnty[cnty['ncity'] == '09020'].iterrows(): #for key, row in cnty.iterrows(): #%%----------read------------- mpool = Pool(8) cntynum = row['ncity'] cnty_wkt = row['cnty_wkt']
dbf = Dbf5(srcdb_file, codec='big5') df = dbf.to_dataframe() #.head(20) addr = df['CNTY_NAME'].fillna(value='') + df['TOWN_NAME'].fillna( value='') + df['ATRACTNAME'].fillna(value='') + df["AROAD"].fillna( value='') + df["AAREA"].fillna(value='') + df["ALANE"].fillna( value='') + df["AALLEY"].fillna(value='') + df["ANO"].fillna( value='') api = mpool.map(google_map_api, addr) list_lat = [] list_lng = [] list_location_type = [] for i in api: if i != {}: print(i) list_lat.append(i['lat']) list_lng.append(i['lng']) list_location_type.append(i['location_type']) else: list_lat.append('') list_lng.append('') list_location_type.append('') df['GOOGLE_LAT'] = list_lat df['GOOGLE_LON'] = list_lng df['location_type'] = list_location_type output = manidb('G:/NCREE_GIS/2020_address/_Total_err3_s2.sqlite') output.get_alias('_Total_err3_s2_V1').write(df)
# # step1(addr_text) step2(target_result['dc_unusual_tail']) if dc_report['re'] else {} step3() if dc_report['re'] and dc_report['num_tp'] else {} return (ind, target_result) if __name__ == '__console__' or __name__ == '__main__': #machine argument uAnswer = answer() ax_dec_1 = ax_htax_decomposition() workdir = uAnswer.host.home + '/Desktop/crying_freeman/data_nsg/' origindb = manidb(workdir + 'nsg_bldg_TTK_bm.sqlite') target_tab = '10017' #read data df_all = origindb.get_alias(f'rawdata_hou_A{target_tab}_addr').read() #for debug and development #df_bin = df_all.head(10000) df_bin = df_all df_bin = df_bin.set_index(['HOU_LOSN']) #df_bin['LOCAT_ADDR'] = df_bin['LOCAT_ADDR'].apply( lambda x : x.split() [0] ) ## decomposition ##
lambda x: x.split()[0]) df_10002['LOCAT_ADDR'] = df_10002['LOCAT_ADDR'].apply( lambda x: x.split()[0]) #df_10008['LOCAT_ADDR'] = df_10008['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10009['LOCAT_ADDR'] = df_10009['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10010['LOCAT_ADDR'] = df_10010['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10013['LOCAT_ADDR'] = df_10013['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10014['LOCAT_ADDR'] = df_10014['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10015['LOCAT_ADDR'] = df_10015['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10016['LOCAT_ADDR'] = df_10016['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10017['LOCAT_ADDR'] = df_10017['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10018['LOCAT_ADDR'] = df_10018['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) #df_10020['LOCAT_ADDR'] = df_10020['LOCAT_ADDR'].apply( lambda x : x.split()[0] ) with manidb(workdir + 'nsg_bldg_3826.sqlite') as db: #db.get_alias('rawdata_hou_A66000_addr').write(df_66000) #db.get_alias('rawdata_hou_A67000_addr').write(df_67000) #db.get_alias('rawdata_hou_A68000_addr').write(df_68000) db.get_alias('rawdata_hou_A91000_addr').write(df_91000) db.get_alias('rawdata_hou_A92000_addr').write(df_92000) db.get_alias('rawdata_hou_A10002_addr').write(df_10002) #db.get_alias('rawdata_hou_A10008_addr').write(df_10008) #db.get_alias('rawdata_hou_A10009_addr').write(df_10009) #db.get_alias('rawdata_hou_A10010_addr').write(df_10010) #db.get_alias('rawdata_hou_A10013_addr').write(df_10013) #db.get_alias('rawdata_hou_A10014_addr').write(df_10014) #db.get_alias('rawdata_hou_A10015_addr').write(df_10015) #db.get_alias('rawdata_hou_A10016_addr').write(df_10016)