def todofunction(root_path): # 发现别人插值的没有SpatialReference,还是要自己插值一遍 # todo 为了论文进度,先手动都插值了一遍,等有空了自己调用api做一遍 # 1.下一步要实现 调一下QGIS的插值API,做操作,参考:https://gis.stackexchange.com/questions/100188/how-to-compute-an-interpolation-raster-from-the-python-console-in-qgis/233322#233322 # aro_stage_ETL(root_path,'乳熟') station_list = pandas.read_csv(os.path.join(root_path, 'HHHstations.csv')) result_file = (os.path.join(root_path, 'stations_cx.txt')) for index, lonlatdata in station_list.iterrows(): try: lon = float(lonlatdata['Longitude']) lat = float(lonlatdata['Latitude']) StationID = lonlatdata['StationID'] station_name = lonlatdata['StationName'] for i in range(2001, 2014): year = str(i) stage_start_day, stage_end_day = get_grow_stage_by_lonlat(root_path, lon, lat, year, stage='CX') # print (round(stage_start_day),round(stage_end_day)) fu, start_month, start_day = Common_func.day_num_to_yyyymmdd(year, stage_start_day) fu, end_month, end_day = Common_func.day_num_to_yyyymmdd(year, stage_end_day) row = str(StationID) + ' ' + station_name + ' ' + str(lon) + ' ' + str(lat) + ' ' + year + ' ' + str( round(stage_start_day)) + ' ' + str(round(stage_end_day)) print(row) #Modis_IO.write_txt(result_file, row) except: print('no data')
def funTest(): root_path = Common_func.UsePlatform() station_list = pandas.read_csv(os.path.join(root_path, 'HHHstations.csv')) station_stage_list =pandas.read_csv(os.path.join(root_path,'stations_cx_back.txt'),' ') result_file = (os.path.join(root_path, 'stations_heat_stress_hours.txt')) for index, lonlatdata in station_list.iterrows(): lon = float(lonlatdata['Longitude']) lat = float(lonlatdata['Latitude']) stationID = lonlatdata['StationID'] station_name = lonlatdata['StationName'] for i in range(2001, 2014): year = str(i) stage_start_day = int(station_stage_list[(station_stage_list['year'] == i) & (station_stage_list['stationID'] == stationID)]['cx_start'].values[0]) stage_end_day = int(station_stage_list[(station_stage_list['year'] == i) & (station_stage_list['stationID'] == stationID)]['cx_end'].values[0]) models.base_stations_data.get_heat_stress_hours_every_station(root_path,stationID,station_name,lon,lat,i,stage_start_day,stage_end_day,340,result_file)
def get_station_value_by_num_day(root_path, year, daydir, StationID, T_tpye='HighestTemperature'): fu, month, day = Common_func.day_num_to_yyyymmdd(year, daydir) data = pd.read_csv(os.path.join(root_path, 'meteodata.csv')) station_value = data[(data['Year'] == int(year)) & (data['Months'] == int(month)) & (data['Days'] == int(day)) & (data['StationID'] == StationID)].head() try: station_high_value = (station_value[T_tpye].values[0].astype(int)) / 10 except: station_high_value = -273.15 # print(station_high_value) return fu, station_high_value
def funcTest(): root_path = Common_func.UsePlatform() data_file=root_path + 'grid_station_night.txt' data = pd.read_csv(data_file,",") data = data[(data['gridval'] > 0) & (data['stationval'] > 0)] X = data['gridval'].values y = data['stationval'].values stationID_list = data['stationID'].unique() title = '黄淮海地区2003-2018年夜间遥感-气象温度散点图' for stationID in stationID_list: lon,lat = stations.Station_ETL.get_lonlat_by_stationID(stationID) station_data = data[data['stationID'] == stationID] station_name = stations.Station_ETL.get_station_name_by_stationID(stationID) X = station_data['gridval'].values y = station_data['stationval'].values a, b, RMSE, R2 = multi_linear_fit(X,y) print (stationID,station_name,lon,lat,R2)
def count_pecent(): im_geotrans = '' im_proj = '' amount_data = np.zeros((1221, 2224)) data_path = os.path.join(Common_func.UsePlatform(), 'results', 'nights') for root, dirs, files in os.walk(data_path): for file in files: im_data, im_geotrans, im_proj = Modis_IO.read_img( os.path.join(data_path, file), 1) amount_data = amount_data + im_data amount_data = np.where(amount_data >= 0, (amount_data / 16).astype(float), np.nan) np.around(amount_data, decimals=2) Modis_IO.write_img(os.path.join(data_path, '2003-2018.tif'), im_proj, im_geotrans, amount_data) #count_pecent()
def RHF_cluster(): root_path = Common_func.UsePlatform() im_proj = '' im_geotrans = '' data_path = os.path.join(root_path, 'results', 'days') amount_data = [] im_data = [] for i in range(2003, 2019): file = str(i) + '.tif' im_data, im_geotrans, im_proj = Modis_IO.read_img( os.path.join(data_path, file), 1) im_data = np.where(im_data > 0, im_data, np.nan) amount_data.append(im_data.flatten()) #amount_data = np.array(amount_data).T #amount_data = np.nan_to_num(amount_data) data = pd.DataFrame(amount_data).add_prefix("col") data.profile_report(title='Pandas Profiling Report').to_file( output_file="output.html")
def aro_stage_ETL(root_path, grow_stage): ''' 按年和生育阶段获取目标区内的点 ''' grow_stage_data = pandas.read_csv(os.path.join(root_path, 'grow_peroid_data', 'grow_period_points.csv')) station_list = pandas.read_csv(os.path.join(root_path, 'grow_peroid_data', 'agro_stations.csv')) start_year = 2001 end_year = 2014 for year in range(start_year, end_year): result_file = os.path.join(root_path, 'grow_peroid_data', 'RS', str(year) + '.txt') first_row = 'stationID Longitude Latitude year grow_stage grow_date' Modis_IO.write_txt(result_file, first_row) year_grow_stage_data = grow_stage_data[ (grow_stage_data['yearID'] == year) & (grow_stage_data['growthStage'] == grow_stage)] for stationID in year_grow_stage_data['AgoStationID']: grow_stage_day = \ year_grow_stage_data[year_grow_stage_data['AgoStationID'] == stationID]['growthDate'].values[0] lon = float(station_list[station_list['区站号'] == stationID]['经度'].values[0]) lat = float(station_list[station_list['区站号'] == stationID]['纬度'].values[0]) row = str(stationID) + ' ' + str(lon) + ' ' + str(lat) + ' ' + str(year) + ' ' + grow_stage + ' ' + str( Common_func.yyyymmdd_to_day_num(grow_stage_day)) Modis_IO.write_txt(result_file, row) print(result_file)
def FuncTest(): root_path = Common_func.UsePlatform() starttime = datetime.datetime.now() begin_year = 2003 end_year = 2019 im_proj = '' im_geotrans = '' for i in range(begin_year, end_year): year = str(i) # RHF(root_path, year) # EveryPoint(root_path, year) # 计算函数 # year = str('2005') # 统计做图 # results(root_path, year) # 气象点和格网点的关系方法 # every_station(root_path, year) # 计算时间 endtime = datetime.datetime.now() print((endtime - starttime).seconds)
data.profile_report(title='Pandas Profiling Report').to_file( output_file="output.html") #amount_data = np.nanmean(amount_data, axis=0).reshape(1221,2224) #amount_data = feature_cluster(im_data) #Modis_IO.write_img(os.path.join(data_path, '2003-2018_mean.tif'), im_proj, im_geotrans, amount_data) #RHF_cluster() #data = os.path.join(Common_func.UsePlatform(),'stations','hour-sum-mask.tif') #im_data, im_geotrans, im_proj = Modis_IO.read_img(data, 1) #im_data = np.where(im_data>0,im_data,np.nan) #im_data = feature_cluster(im_data) #Modis_IO.write_img(os.path.join(Common_func.UsePlatform(), 'stations','hour-sum-mask_cluster.tif'),im_proj,im_geotrans,im_data) station_hours = os.path.join(Common_func.UsePlatform(), 'stations', 'hour-sum-mask_cluster_final.tif') modis_hours = os.path.join(Common_func.UsePlatform(), 'results', 'RHD', '2003-2018.tif') im_data_S, im_geotrans, im_proj = Modis_IO.read_img(station_hours, 1) im_data_M, im_geotrans, im_proj = Modis_IO.read_img(modis_hours, 1) im_data_S = im_data_S.flatten() im_data_M = im_data_M.flatten() #im_data_S = np.where(im_data_S<-2,np.nan,im_data_S) final_result = np.zeros(im_data_M.shape).flatten() row = 1221 col = 2224 lenth = row * col for i in range(0, lenth): #tem = im_data_M[i] if np.isnan(im_data_M[i]):
def get_heat_stress_hours_every_station(root_path, stationID, station_name, lon, lat, year, stage_start_day, stage_end_day, heat_temperature, result_file): """ 计算每个点的高温时长 :param root_path: 数据根目录 :param stationID: 点id :param station_name: 点名字 :param lon: 经度 :param lat: 纬度 :param year: 年份 :param stage_start_day: 生育期起始日期 :param stage_end_day: 生育期截至日期 :param heat_temperature: 高温阈值 :param result_file: 结果保存文件路径 :return: 输出 输入参数+高温时长 """ #heat_temperature =heat_temperature*10 meteo_data = pandas.read_csv(os.path.join(root_path, 'stationdata.csv')) sum_heat_days = 0 sum_heat_hours = 0 for day_num in range(stage_start_day, stage_end_day + 1): fu, month, day = Common_func.day_num_to_yyyymmdd(year, day_num) heat_days = meteo_data[ (meteo_data['StationID'] == stationID) & (meteo_data['Year'] == year) & (meteo_data['Months'] == month) & (meteo_data['Days'] == day) & (meteo_data['HighestTemperature'] >= heat_temperature)] if heat_days.empty: print(station_name + ' ' + str(year) + ' ' + str(day_num) + ' ' + 'no heat!') continue else: try: # 高温时长模型实现 # 参数 太阳赤纬 sun_chiwei = 0.39795 * math.cos(0.98563 * (day_num - 173)) T_max = heat_days['HighestTemperature'].values[0] / 10 T_min = heat_days['LowestTemperature'].values[0] / 10 # fix a bug 天数-1 之后转日期,不能直接日-1 会报错 fu_next, month_next, day_next = Common_func.day_num_to_yyyymmdd( year, day_num + 1) T_min_tomorrow = meteo_data[ (meteo_data['StationID'] == stationID) & (meteo_data['Year'] == year) & (meteo_data['Months'] == month_next) & (meteo_data['Days'] == day_next)]['LowestTemperature'].values[0] / 10 a = math.sin(lat) * math.sin(sun_chiwei) b = math.cos(lat) * math.cos(sun_chiwei) DL = 12 * (1 + (2 / math.pi) * a * (math.sin(a / b))) p = 2 heat_temperature_real = heat_temperature / 10 A1 = math.asin( (heat_temperature_real - T_min) / (T_max - T_min)) A2 = math.asin((heat_temperature_real - T_min_tomorrow) / (T_max - T_min_tomorrow)) heat_stress_hours = (DL + 2 * p) * (1 - (A1 + A2) / math.pi) sum_heat_hours = sum_heat_hours + heat_stress_hours sum_heat_days = sum_heat_days + 1 row = str(stationID) + ' ' + station_name + ' ' + str( lon) + ' ' + str(lat) + ' ' + str(year) + ' ' + str( round(stage_start_day)) + ' ' + str( round(stage_end_day)) + ' ' + str( day_num) + ' ' + str(T_max) + ' ' + str( format(heat_stress_hours, '2f')) print(row) Modis_IO.write_txt(result_file, row) except: continue row = str(stationID) + ' ' + station_name + ' ' + str(lon) + ' ' + str( lat) + ' ' + str(year) + ' ' + str(sum_heat_days) + ' ' + str( format(sum_heat_hours, '2f')) file = str(year) + '.txt' Modis_IO.write_txt(file, row)
round(stage_end_day)) + ' ' + str( day_num) + ' ' + str(T_max) + ' ' + str( format(heat_stress_hours, '2f')) print(row) Modis_IO.write_txt(result_file, row) except: continue row = str(stationID) + ' ' + station_name + ' ' + str(lon) + ' ' + str( lat) + ' ' + str(year) + ' ' + str(sum_heat_days) + ' ' + str( format(sum_heat_hours, '2f')) file = str(year) + '.txt' Modis_IO.write_txt(file, row) root_path = Common_func.UsePlatform() def Cal(root_path): for i in range(2010, 2019): year = i lonlatlist = pd.read_csv(os.path.join(root_path, 'HHHstations.csv')) for index, lonlatdata in lonlatlist.iterrows(): try: lon = float(lonlatdata['Longitude']) lat = float(lonlatdata['Latitude']) stationID = lonlatdata['StationID'] station_name = stations.Station_ETL.get_station_name_by_stationID( stationID) cx_data = pd.read_csv(Common_func.cx, " ") stage_start_day = 152
def scatter_3D(cluster_feature): fig = plt.figure() ax = Axes3D(fig) ax.scatter(cluster_feature[:, 0], cluster_feature[:, 1], cluster_feature[:, 2] % 100, edgecolor='k') ax.set_xlabel('grid_value') ax.set_ylabel('station_value') ax.set_zlabel('day') fig.show() data_path = os.path.join(Common_func.UsePlatform(), 'grid_station_day.txt') orig_data = pandas.read_csv(data_path, ',') # print(len(orig_data)) grid_station_data = orig_data[(orig_data['gridval'] > 0) & (orig_data['stationval'] > 0)] # print(len(grid_station_data), len(grid_station_data)/len(orig_data)) cluster_feature = grid_station_data[['gridval', 'stationval', 'date']].values station_list = grid_station_data['stationID'].unique() for stationid in station_list: lon, lat = stations.Station_ETL.get_lonlat_by_stationID(stationid) sum_days = len(orig_data[(orig_data['stationID'] == stationid)]) val_days = len( grid_station_data[(grid_station_data['stationID'] == stationid)])