def calc_moy(ddirout, deb, fin, pays, niveau, types, sat, prod, res_temp, res, varname, shape): # traitement des dates datedeb = datetime.strptime(deb, "%Y-%m-%d") datefin = datetime.strptime(fin, "%Y-%m-%d") anfin = datetime.strptime(fin, "%Y-%m-%d").strftime("%Y") ddirin = os.path.join(ddirDB, types, sat, prod, res) os.chdir(ddirin) files = sorted(glob.glob('*' + res_temp + '.nc')) #liste des fichiers .nc print "\n#########################################################################################################################" print "############################################## PERIODE " + deb + " " + fin + " ############################################" ################ import shapefile ############################################################################# if niveau == "aire": if shape == "all_fs": try: #fshape = '/work/crct/se5780me/carto/fs_par_annee/'+shape+'/150409_BF_FS_2015.shp' fshape = os.path.join(ddirDB, 'carto/fs_par_annee', shape, '150409_BF_FS_' + anfin + '.shp') geodf = gpd.GeoDataFrame.from_file(fshape) except: fshape = os.path.join( ddirDB, 'carto/fs_par_annee/all_fs/150409_BF_FS_2015.shp') geodf = gpd.GeoDataFrame.from_file(fshape) else: try: fshape = os.path.join( ddirDB, 'carto/fs_par_annee', shape, '150409_BF_FS_' + shape + '_' + anfin + '.shp') #fshape = '/work/crct/se5780me/carto/fs_par_annee/'+shape+'/150409_BF_FS_'+shape+'2015.shp' geodf = gpd.GeoDataFrame.from_file(fshape) except: fshape = os.path.join(ddirDB, 'carto/fs_par_annee', shape, '150409_BF_FS_' + shape + '_2015.shp') geodf = gpd.GeoDataFrame.from_file(fshape) else: fshape = os.path.join(ddirDB, 'carto', niveau, pays + '_' + niveau + '_sante.shp') geodf = gpd.GeoDataFrame.from_file(fshape) nbdist = len(geodf[geodf.columns[1]]) # nombre de districts/aires listdist = '//'.join( geodf[geodf.columns[1]].tolist() ) # liste des districts/aires de santé qui sera chargée dans le .nc comme attribut de la dimension index print "shapefile utilise: ", fshape.split("/")[-1] print "nombre de districts/aires de sante: ", nbdist ############################################################################################################### ############################ CREATION NETCDF ################################################################## ############################################################################################################### if varname == "Deep_Blue_Aerosol_Optical_Depth_550_Land": varname1 = "AOT" else: varname1 = varname print files[:] output = varname1 + '_' + files[ 0][:-5] + '_' + niveau + '_' + shape + '_' + pays + '_' + deb.replace( '-', '') + fin.replace('-', '') + res_temp + '.nc' ncnew = Dataset(ddirout + '/' + output, 'w') # dimensions##### ncnew.createDimension('time', None) ncnew.createDimension('index_dist', nbdist) # variables##### tp = ncnew.createVariable('time', 'f8', ('time', )) index = ncnew.createVariable('index_dist', 'f4', ('index_dist', )) index[:] = range(nbdist) nbpx = ncnew.createVariable('count', 'f4', ('time', 'index_dist')) vmin = ncnew.createVariable('min', 'f4', ('time', 'index_dist')) vmax = ncnew.createVariable('max', 'f4', ('time', 'index_dist')) vmean = ncnew.createVariable('mean', 'f4', ('time', 'index_dist')) vstd = ncnew.createVariable('std', 'f4', ('time', 'index_dist')) #vmed = ncnew.createVariable('median','f4',('time','index_dist')) # attributs##### ncnew.Convention = 'CF-1.5' ncnew.description = 'moyenne districts pour la variable :', varname ncnew.history = 'Created ' + time.ctime(time.time()) ncnew.source = ' ' index.standard_name = listdist tp.standard_name = 'time' tp.calendar = 'gregorian' #fillvalue = np.nan ################################################################################################################ ################################################################################################################ # initialisation des listes pour les variables, augmentées après chaque tour de boucle nbpx_tmp = [] vmin_tmp = [] vmax_tmp = [] vmean_tmp = [] vstd_tmp = [] vmed_tmp = [] t0 = time.time() # démarrage du temps de calcul print "\nfichier en traitement: ", files[0] nc = Dataset(files[0], 'r') var_in = nc.variables[varname] dates = nc.variables['time'] # definition des dates de début et fin en format numérique, à partir de l'unité de temps du .nc ndatedeb = date2num(datedeb, dates.units) ndatefin = date2num(datefin, dates.units) if datetime.strftime( num2date(dates[0], dates.units), "%H" ) != "0": # condition qui vérifie l'heure de la donnée(0h, 3h,6h,...) ndatedeb += 24 - int( datetime.strftime(num2date(dates[0], dates.units), "%H")) ndatefin += 24 - int( datetime.strftime(num2date(dates[0], dates.units), "%H")) # détermination des indices des dates debut et fin dans la matrice # if ndatedeb >= dates[0] and ndatedeb <= dates[-1]: iddeb = np.abs(dates[:] - ndatedeb).argmin() # else: # iddeb = 0 # if ndatefin >= dates[0] and ndatefin <= dates[-1]: idfin = np.abs(dates[:] - ndatefin).argmin() - 1 # else: # idfin = len(dates[:])-1 # extraction du bloc de dates et ajout à la variable time(tp) du newnc serie_dates = dates[iddeb:idfin + 1] print "date de debut: ", num2date(serie_dates[0], dates.units) print "date de fin: ", num2date(serie_dates[-1], dates.units) var = np.array(var_in[iddeb:idfin + 1, ...]) # traitement de la matrice avec fillvalue, scalefactor et addoffset if sat == 'toms': var[var == var_in._FillValue] = -999 else: var[var == var_in._FillValue] = np.nan if "scale_factor" in var_in.ncattrs(): var = (var[:] - var_in.add_offset) * var_in.scale_factor # définition des caractéristiques géographiques transform,resolution spatiale, lat max et lon min lat = nc.variables['latitude'][:] lon = nc.variables['longitude'][:] xo = min(lon) yo = max(lat) resx = np.abs(np.mean(np.diff(lon))) resy = np.abs(np.mean(np.diff(lat))) transform = [xo, 0.01, 0.0, yo, 0.0, -0.01] ############################################################################################################# ############################################################################################################# idt = len(serie_dates) // 8 if idt == 0: idt = 1 ndt = range(0, len(serie_dates), idt) nb_mat_in = [var[ix:ix + (idt), ...] for ix in ndt] # decoupage de la matrice en blocs de 26 jours res = Parallel(n_jobs=-1)( delayed(calc_stats)(resx, resy, geodf, nbdist, transform, temps_x) for temps_x in nb_mat_in) # appel de la fonction calc_stats avec parallélisation # chargement des calculs dans les variables temporaires nbpx_tmp.append( np.concatenate([res[n][0] for n in range(0, len(ndt))], axis=0)) vmax_tmp.append( np.concatenate([res[n][1] for n in range(0, len(ndt))], axis=0)) vmean_tmp.append( np.concatenate([res[n][2] for n in range(0, len(ndt))], axis=0)) vmed_tmp.append( np.concatenate([res[n][3] for n in range(0, len(ndt))], axis=0)) vmin_tmp.append( np.concatenate([res[n][4] for n in range(0, len(ndt))], axis=0)) vstd_tmp.append( np.concatenate([res[n][5] for n in range(0, len(ndt))], axis=0)) t1 = time.time() - t0 print "\nelapsed time: ", t1, "sec" print "fichier en sortie: ", output print "rep de sortie: ", ddirout print "#########################################################################################################################" print "#########################################################################################################################" print "#########################################################################################################################\n\n" # chargement des variables dans le .nc tp[:] = np.append(tp[:], serie_dates) tp.units = dates.units nbpx[:] = np.concatenate( [nbpx_tmp[d_t] for d_t in range(0, len(nbpx_tmp))], axis=0) vmax[:] = np.concatenate( [vmax_tmp[d_t] for d_t in range(0, len(vmax_tmp))], axis=0) vmean[:] = np.concatenate( [vmean_tmp[d_t] for d_t in range(0, len(vmean_tmp))], axis=0) vmin[:] = np.concatenate( [vmin_tmp[d_t] for d_t in range(0, len(vmin_tmp))], axis=0) vstd[:] = np.concatenate( [vstd_tmp[d_t] for d_t in range(0, len(vstd_tmp))], axis=0) index = [num2date(d, dates.units).date() for d in serie_dates] columns_name = geodf.name.values.tolist() tmpvar_dict = { "nbpx": nbpx_tmp, "vmax": vmax_tmp, "vmean": vmean_tmp, "vmin": vmin_tmp, "vstd": vstd_tmp } list_df = {} for n in tmpvar_dict: list_df[n] = pd.DataFrame(np.concatenate( [tmpvar_dict[n][d_t] for d_t in range(0, len(tmpvar_dict[n]))], axis=0), index=index, columns=columns_name).round(4) #df.to_csv(ddirout+'/'+output[:-3]+'_'+n+'.csv', header=True) nc.close() ncnew.close() return list_df
def calc_moy(ddirout,ncfile,fshape,deb,fin,pays,niveau,types,sat,prod,res_temp,res,varname,level): # traitement des dates datedeb = datetime.strptime(deb,"%Y-%m-%d") datefin = datetime.strptime(fin,"%Y-%m-%d") ddirin = os.path.join(ddirDB, types, sat, prod, res) os.chdir(ddirin) geodf = gpd.GeoDataFrame.from_file(fshape) nbdist = len(geodf[geodf.columns[1]]) # nombre de districts/aires listdist='//'.join(geodf[geodf.columns[1]].tolist())# liste des districts/aires de santé qui sera chargée dans le .nc comme attribut de la dimension index ############################ CREATION NETCDF ################################################################## ############################################################################################################### output = os.path.join(ddirout, varname + '_' + os.path.basename(ncfile)[:-5] + '_' + niveau + '_' + pays + '_' + deb.replace('-','') + fin.replace('-','') + '_' + res_temp + '.nc') ncnew = Dataset(output, 'w') # dimensions##### ncnew.createDimension('time', None) ncnew.createDimension('index_dist', nbdist) # variables##### tp = ncnew.createVariable('time','f8',('time',)) index = ncnew.createVariable('index_dist','f4',('index_dist',)) index[:] = range(nbdist) nbpx = ncnew.createVariable('count','f4',('time','index_dist')) vmin = ncnew.createVariable('min','f4',('time','index_dist')) vmax = ncnew.createVariable('max','f4',('time','index_dist')) vmean = ncnew.createVariable('mean','f4',('time','index_dist')) vstd = ncnew.createVariable('std','f4',('time','index_dist')) #vmed = ncnew.createVariable('median','f4',('time','index_dist')) # attributs##### ncnew.Convention ='CF-1.5' ncnew.description = 'moyenne districts pour la variable :',varname ncnew.history = 'Created ' + time.ctime(time.time()) ncnew.source = ' ' index.standard_name = listdist tp.standard_name = 'time' tp.calendar = 'gregorian' #fillvalue = np.nan ################################################################################################################ ################################################################################################################ # initialisation des listes pour les variables, augmentées après chaque tour de boucle nbpx_tmp = [] vmin_tmp = [] vmax_tmp = [] vmean_tmp = [] vstd_tmp = [] vmed_tmp = [] nc = Dataset(ncfile, 'r') var_in = nc.variables[varname] dates = nc.variables['time'] # definition des dates de début et fin en format numérique, à partir de l'unité de temps du .nc ndatedeb = date2num(datedeb,dates.units) ndatefin = date2num(datefin,dates.units) if datetime.strftime(num2date(dates[0],dates.units),"%H") != "0": # condition qui vérifie l'heure de la donnée(0h, 3h,6h,...) ndatedeb += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H")) ndatefin += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H")) # détermination des indices des dates debut et fin dans la matrice iddeb = np.abs(dates[:]-ndatedeb).argmin() idfin = np.abs(dates[:]-ndatefin).argmin()-1 # extraction du bloc de dates et ajout à la variable time(tp) du newnc serie_dates = dates[iddeb:idfin+1] if level == -1: var = np.array(var_in[iddeb:idfin+1,...]) else: var = np.array(var_in[iddeb:idfin+1,level,...]) # traitement de la matrice avec fillvalue, scalefactor et addoffset if sat == 'toms': var[var==var_in._FillValue]=-999 else: var[var==var_in._FillValue]=np.nan if "scale_factor" in var_in.ncattrs(): var = (var[:]-var_in.add_offset)*var_in.scale_factor # définition des caractéristiques géographiques transform,resolution spatiale, lat max et lon min lat = nc.variables['latitude'][:] lon = nc.variables['longitude'][:] xo = min(lon) yo = max(lat) resx = np.abs(np.mean(np.diff(lon))) resy = np.abs(np.mean(np.diff(lat))) transform = [xo, 0.01, 0.0, yo, 0.0, -0.01] ############################################################################################################# ############################################################################################################# idt = len(serie_dates)//8 if idt == 0: idt = 1 ndt = range(0,len(serie_dates),idt) nb_mat_in = [var[ix:ix+(idt),...] for ix in ndt]# decoupage de la matrice en blocs de 26 jours res = Parallel(n_jobs=-1)(delayed(calc_stats)(resx,resy,geodf,nbdist,transform,temps_x) for temps_x in nb_mat_in)# appel de la fonction calc_stats avec parallélisation # chargement des calculs dans les variables temporaires nbpx_tmp.append(np.concatenate([res[n][0] for n in range(0,len(ndt))], axis=0)) vmax_tmp.append(np.concatenate([res[n][1] for n in range(0,len(ndt))], axis=0)) vmean_tmp.append(np.concatenate([res[n][2] for n in range(0,len(ndt))], axis=0)) vmed_tmp.append(np.concatenate([res[n][3] for n in range(0,len(ndt))], axis=0)) vmin_tmp.append(np.concatenate([res[n][4] for n in range(0,len(ndt))], axis=0)) vstd_tmp.append(np.concatenate([res[n][5] for n in range(0,len(ndt))], axis=0)) # chargement des variables dans le .nc tp[:] = np.append(tp[:],serie_dates) tp.units = dates.units nbpx[:] = np.concatenate([nbpx_tmp[d_t] for d_t in range(0,len(nbpx_tmp))], axis=0) vmax[:] = np.concatenate([vmax_tmp[d_t] for d_t in range(0,len(vmax_tmp))], axis=0) vmean[:] = np.concatenate([vmean_tmp[d_t] for d_t in range(0,len(vmean_tmp))], axis=0) vmin[:] = np.concatenate([vmin_tmp[d_t] for d_t in range(0,len(vmin_tmp))], axis=0) vstd[:] = np.concatenate([vstd_tmp[d_t] for d_t in range(0,len(vstd_tmp))], axis=0) index = [num2date(d,dates.units).date() for d in serie_dates] columns_name = geodf.name.values.tolist() tmpvar_dict = {"nbpx":nbpx_tmp,"vmax":vmax_tmp,"vmean":vmean_tmp,"vmin":vmin_tmp,"vstd":vstd_tmp} list_df = {} for n in tmpvar_dict: list_df[n] = pd.DataFrame (np.concatenate([tmpvar_dict[n][d_t] for d_t in range(0,len(tmpvar_dict[n]))], axis=0), index=index, columns=columns_name).round(4) #df.to_csv(ddirout+'/'+output[:-3]+'_'+n+'.csv', header=True) nc.close() ncnew.close() return list_df, output