def execute(**kwargs): # note that with args, we actually mean the same as those specified with # the argparse module above # overwrite the args according to the kwargs when the procedure is called # as module function for key, value in kwargs.items(): args.__dict__[key] = value print("-- begin arguments --") for key, value in args.__dict__.items(): print(key, ': ', value) print("-- end arguments ----") # load specified class4gl library if args.c4gl_path_lib is not None: sys.path.insert(0, args.c4gl_path_lib) from class4gl import class4gl_input, data_global, class4gl from interface_multi import stations, stations_iterator, records_iterator, get_record_yaml, get_records from class4gl import blh, class4gl_input # this is a variant of global run in which the output of runs are still written # out even when the run crashes. # #only include the following timeseries in the model output # timeseries_only = \ # ['Cm', 'Cs', 'G', 'H', 'L', 'LE', 'LEpot', 'LEref', 'LEsoil', 'LEveg', 'Lwin', # 'Lwout', 'Q', 'RH_h', 'Rib', 'Swin', 'Swout', 'T2m', 'dq', 'dtheta', # 'dthetav', 'du', 'dv', 'esat', 'gammaq', 'gammatheta', 'h', 'q', 'qsat', # 'qsurf', 'ra', 'rs', 'theta', 'thetav', 'time', 'u', 'u2m', 'ustar', 'uw', # 'v', 'v2m', 'vw', 'wq', 'wtheta', 'wthetae', 'wthetav', 'wthetae', 'zlcl'] EXP_DEFS =\ { 'BASE_ITER':{'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'BASE_ITER_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'BASE_ITER_W_ADV':{'sw_ac' : ['adv',"w"],'sw_ap': True,'sw_lit': False}, 'BASE_ITER_W':{'sw_ac' : ["w"],'sw_ap': True,'sw_lit': False}, 'BASE_ITER_ADV_B05':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False,'beta':0.2}, 'ERA_NOAC_ITER': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'NOAC_ITER': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'ADV_ITER':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'W_ITER': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'AC_ITER': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, 'GLOBAL_NOAC_ITER': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'GLOBAL_ADV_ITER':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, } # #SET = 'GLOBAL' # SET = args.dataset # ======================== print("getting a list of stations") # ======================== # these are all the stations that are found in the input dataset all_stations = stations(args.path_forcing, suffix=args.subset_forcing, refetch_stations=False) # ==================================== print('defining all_stations_select') # ==================================== # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if args.station_id is not None: print("Selecting station by ID") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID( STNID=int(args.station_id)) all_stations_select = pd.DataFrame([run_station]) else: print("Selecting stations from a row range in the table") all_stations_select = pd.DataFrame(all_stations.table) if args.last_station_row is not None: all_stations_select = all_station_select.iloc[:( int(args.last_station) + 1)] if args.first_station_row is not None: all_stations_select = all_station_select.iloc[int(args. first_station):] print("station numbers included in the whole batch "+\ "(all chunks):",list(all_stations_select.index)) print(all_stations_select) print("getting all records of the whole batch") all_records_morning_select = get_records(all_stations_select,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) # only run a specific chunck from the selection if args.global_chunk_number is not None: if args.station_chunk_number is not None: raise ValueError( 'You need to specify either global-chunk-number or station-chunk-number, not both.' ) if not (int(args.split_by) > 0): raise ValueError( "global_chunk_number is specified, but --split-by is not a strict positive number, so I don't know how to split the batch into chunks." ) run_station_chunk = None print( 'determining the station and its chunk number according global_chunk_number (' + args.global_chunk_number + ')') totalchunks = 0 stations_iter = all_stations_select.iterrows() in_current_chunk = False try: while not in_current_chunk: istation, current_station = stations_iter.__next__() all_records_morning_station_select = all_records_morning_select.query( 'STNID == ' + str(current_station.name)) chunks_current_station = math.ceil( float(len(all_records_morning_station_select)) / float(args.split_by)) print('chunks_current_station', chunks_current_station) in_current_chunk = (int(args.global_chunk_number) < (totalchunks + chunks_current_station)) if in_current_chunk: run_stations = pd.DataFrame([ current_station ]) # run_stations.loc[(int(args.__dict__['last_station'])] run_station_chunk = int( args.global_chunk_number) - totalchunks totalchunks += chunks_current_station except StopIteration: raise ValueError( "Could not determine station chunk number. --global_chunk_number (" + args.global_chunk_number + ") outside of range [0," + str(totalchunks) + '[') print("station = ", list(run_stations.index)) print("station chunk number:", run_station_chunk) # if no global chunk is specified, then run the whole station selection in one run, or # a specific chunk for each selected station according to # args.station_chunk_number else: run_stations = pd.DataFrame( all_stations_select ) # run_stations.loc[(int(args.__dict__['last_station'])] if args.station_chunk_number is not None: run_station_chunk = int(args.station_chunk_number) print("station(s) that is processed.", list(run_stations.index)) print("chunk number: ", run_station_chunk) else: if args.split_by != -1: raise ValueError( "Chunks are defined by --split_by, but I don't know which chunk to run. Please provide --global_chunk_number or --station_chunk_number, or leave out --split-by." ) run_station_chunk = 0 print("stations that are processed.", list(run_stations.index)) #print(all_stations) print('Fetching initial/forcing records') records_morning = get_records(run_stations,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) if len(records_morning) == 0: raise IOError("No initialization records records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # note that if runtime is an integer number, we don't need to get the afternoon # profiles. if args.runtime == 'from_profile_pair': print( 'Fetching afternoon records for determining the simulation runtimes' ) records_afternoon = get_records(run_stations,\ args.path_forcing,\ subset='end', refetch_records=False, ) if len(records_afternoon) == 0: raise IOError("No final state records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # print(records_morning.index) # print(records_afternoon.index) # align afternoon records with the noon records, and set same index print(len(records_afternoon)) print(len(records_morning)) print("aligning morning and afternoon records") records_morning['dates'] = records_morning['ldatetime'].dt.date records_afternoon['dates'] = records_afternoon['ldatetime'].dt.date records_afternoon.set_index(['STNID', 'dates'], inplace=True) ini_index_dates = records_morning.set_index(['STNID', 'dates']).index records_afternoon = records_afternoon.loc[ini_index_dates] records_afternoon.index = records_morning.index experiments = args.experiments.strip(' ').split(' ') if args.experiments_names is not None: experiments_names = args.experiments_names.strip(' ').split(' ') if len(experiments_names) != len(experiments): raise ValueError( 'Lenght of --experiments_names is different from --experiments' ) else: experiments_names = experiments for iexpname, expid in enumerate(experiments): expname = experiments_names[iexpname] exp = EXP_DEFS[expid] path_exp = args.path_experiments + '/' + expname + '/' os.system('mkdir -p ' + path_exp) records_morning_station = records_morning.query( 'STNID == ' + str(current_station.name)) # records_afternoon_station = records_afternoon.query('STNID == '+str(current_station.name)) # for varkey in ['h','theta']: # records_morning_station['d'+varkey+'dt'] = \ # (records_afternoon_station[sourcekey][varkey] - records_morning_station[sourcekey][varkey])/\ # (records_afternoon_station[sourcekey].ldatetime - records_morning_station[sourcekey].ldatetime).dt.seconds*3600. # select_loc = records_morning_station.query( '(dthetadt > 0) & (dhdt > 0.)').index # records_morning_station = records_morning_station.loc[select_loc] # records_afternoon_station = records_afternoon_station.loc[select_loc] for istation, current_station in run_stations.iterrows(): if (int(args.split_by) * int(run_station_chunk)) >= (len(records_morning_station)): print("warning: outside of profile number range for station "+\ str(current_station)+". Skipping chunk number for this station.") else: fn_morning = args.path_forcing + '/' + format( current_station.name, '05d') + '_' + args.subset_forcing + '.yaml' print('fn_morning', fn_morning) if os.path.isfile(fn_morning): file_morning = open(fn_morning) else: fn_morning = \ args.path_forcing+'/'+format(current_station.name,'05d')+\ '_'+str(run_station_chunk)+'_'+args.subset_forcing+'.yaml' file_morning = open(fn_morning) fn_afternoon = args.path_forcing + '/' + format( current_station.name, '05d') + '_end.yaml' print(fn_afternoon) if args.runtime == 'from_profile_pair': file_afternoon = open(fn_afternoon) fn_ini = path_exp+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_ini.yaml' print('fn_ini', fn_ini) fn_mod = path_exp+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_end.yaml' file_ini = open(fn_ini, 'w') file_mod = open(fn_mod, 'w') #iexp = 0 onerun = False print('starting station chunk number: '\ +str(run_station_chunk)+'(size: '+str(args.split_by)+' soundings)') isim = 0 records_morning_station_chunk = records_morning_station.iloc[( (run_station_chunk) * int(args.split_by) ):( (run_station_chunk + 1) * int(args.split_by) )] # [(int(args.split_by)*run_station_chunk):(int(args.split_by)*(run_station_chunk+1))] for ( STNID, chunk, index ), record_morning in records_morning_station_chunk.iterrows(): #if iexp == 11: c4gli_morning = get_record_yaml(file_morning, record_morning.index_start, record_morning.index_end, mode='model_input') if args.diag_tropo is not None: seltropo = (c4gli_morning.air_ac.p > c4gli_morning.air_ac.p.iloc[-1] + 3000. * (-1.2 * 9.81)) profile_tropo = c4gli_morning.air_ac[seltropo] for var in args.diag_tropo: #['t','q','u','v',]: if var[:3] == 'adv': mean_adv_tropo = np.mean( profile_tropo[var + '_x'] + profile_tropo[var + '_y']) c4gli_morning.update( source='era-interim', pars={var + '_tropo': mean_adv_tropo}) else: print("warning: tropospheric variable " + var + " not recognized") #print('c4gli_morning_ldatetime',c4gli_morning.pars.ldatetime) if args.runtime == 'from_profile_pair': record_afternoon = records_afternoon.loc[(STNID, chunk, index)] c4gli_afternoon = get_record_yaml( file_afternoon, int(record_afternoon.index_start), int(record_afternoon.index_end), mode='model_input') runtime = int((c4gli_afternoon.pars.datetime_daylight - c4gli_morning.pars.datetime_daylight ).total_seconds()) elif args.runtime == 'from_input': runtime = c4gli_morning.pars.runtime else: runtime = int(args.runtime) c4gli_morning.update(source='pairs',pars={'runtime' : \ runtime}) c4gli_morning.update(source=expname, pars=exp) c4gl = class4gl(c4gli_morning) #EFobs = c4gli_morning.pars.BR /(c4gli_morning.pars.BR+1.) EFobs = c4gli_morning.pars.EF b = c4gli_morning.pars.wwilt c = c4gli_morning.pars.wfc #max(c4gli_morning.pars.wfc,c4gli_morning.pars.wsat-0.01) try: #fb = f(b) c4gli_morning.pars.wg = b c4gli_morning.pars.w2 = b c4gl = class4gl(c4gli_morning) c4gl.run() EFmod = c4gl.out.LE.sum() / (c4gl.out.H.sum() + c4gl.out.LE.sum()) fb = EFmod - EFobs EFmodb = EFmod c4glb = c4gl c4gli_morningb = c4gli_morning #fc = f(c) c4gli_morning.pars.wg = c c4gli_morning.pars.w2 = c c4gl = class4gl(c4gli_morning) c4gl.run() EFmod = c4gl.out.LE.sum() / (c4gl.out.H.sum() + c4gl.out.LE.sum()) fc = EFmod - EFobs print(EFmodb, EFobs, fb) print(EFmod, EFobs, fc) c4glc = c4gl c4gli_morningc = c4gli_morning i = 0 if fc * fb > 0.: if abs(fb) < abs(fc): c4gl = c4glb c4gli_morning = c4gli_morningb else: c4gl = c4glc c4gli_morning = c4gli_morningc print( "Warning!!! function value of the boundaries have the same sign, so I will not able to find a root" ) else: print('starting ITERATION!!!') cn = c - fc / (fc - fb) * (c - b) #fcn = f(cn) c4gli_morning.pars.wg = np.asscalar(cn) c4gli_morning.pars.w2 = np.asscalar(cn) c4gl = class4gl(c4gli_morning) c4gl.run() fcn = c4gl.out.LE.sum() / ( c4gl.out.H.sum() + c4gl.out.LE.sum()) - EFobs tol = 0.02 ftol = 10. maxiter = 10 is1 = 0 is1max = 1 while ((abs(cn - c) > tol) or (abs(fcn) > ftol)) and (fcn != 0) and ( i < maxiter): if fc * fcn > 0: temp = c c = b b = temp a = b fa = fb b = c fb = fc c = cn fc = fcn print(i, a, b, c, fcn) s1 = c - fc / (fc - fb) * (c - b) s2 = c - fc / (fc - fa) * (c - a) # take the one that is closest to the border (opposite to the previous border), making the chance that the border is eliminated is bigger if (abs(s1 - b) < abs(s2 - b)): is1 = 0 else: is1 += 1 # we prefer s1, but only allow it a few times to not provide the opposite boundary if is1 < is1max: s = s1 print('s1') else: is1 = 0 s = s2 print('s2') if c > b: l = b r = c else: l = c r = b m = (b + c) / 2. if ((s > l) and (s < r)): # and (abs(m-b) < abs(s - b)): cn = s print('midpoint') else: cn = m print('bissection') #fcn = f(cn) c4gli_morning.pars.wg = np.asscalar(cn) c4gli_morning.pars.w2 = np.asscalar(cn) c4gl = class4gl(c4gli_morning) c4gl.run() fcn = c4gl.out.LE.sum() / (c4gl.out.H.sum( ) + c4gl.out.LE.sum()) - EFobs i += 1 if i == maxiter: raise StopIteration('did not converge') #c4gl = class4gl(c4gli_morning) #c4gl.run() c4gli_morning.pars.itersteps = i c4gli_morning.dump(file_ini) c4gl.dump(file_mod,\ include_input=False,\ # timeseries_only=timeseries_only,\ ) onerun = True except: print('run not succesfull') #iexp = iexp +1 file_ini.close() file_mod.close() file_morning.close() if args.runtime == 'from_profile_pair': file_afternoon.close() if onerun: records_ini = get_records(pd.DataFrame([current_station]),\ path_exp,\ getchunk = int(run_station_chunk),\ subset='ini', refetch_records=True, ) records_mod = get_records(pd.DataFrame([current_station]),\ path_exp,\ getchunk = int(run_station_chunk),\ subset='end',\ refetch_records=True,\ ) else: # remove empty files os.system('rm ' + fn_ini) os.system('rm ' + fn_mod)
# timeseries_only = \ # ['Cm', 'Cs', 'G', 'H', 'L', 'LE', 'LEpot', 'LEref', 'LEsoil', 'LEveg', 'Lwin', # 'Lwout', 'Q', 'RH_h', 'Rib', 'Swin', 'Swout', 'T2m', 'dq', 'dtheta', # 'dthetav', 'du', 'dv', 'esat', 'gammaq', 'gammatheta', 'h', 'q', 'qsat', # 'qsurf', 'ra', 'rs', 'theta', 'thetav', 'time', 'u', 'u2m', 'ustar', 'uw', # 'v', 'v2m', 'vw', 'wq', 'wtheta', 'wthetae', 'wthetav', 'wthetae', 'zlcl'] # #SET = 'GLOBAL' # SET = batch_args.dataset # path_forcingSET = batch_args.path_forcing+'/'+SET+'/' print("getting all stations from " + batch_args.path_forcing) # these are all the stations that are found in the input dataset all_stations = stations(batch_args.path_forcing, suffix=batch_args.subset_forcing, refetch_stations=False) print('defining all_stations_select') # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if batch_args.station_id is not None: print("Selecting stations by --station_id") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID( STNID=int(batch_args.station_id)) all_stations_select = pd.DataFrame([run_station]) else: print( "Selecting stations from a row range in the table [--first_station_row,--last_station_row]" )
# df_stations = pd.read_fwf(fn_stations,names=['Country code',\ # 'ID',\ # 'Name',\ # 'latitude',\ # 'longitude',\ # 'height',\ # 'unknown',\ # 'startyear',\ # 'endyear']) # # =============================== print("getting a list of stations") # =============================== all_stations = stations(args.path_input, refetch_stations=False) df_stations = all_stations.table df_stations.columns if args.station_id is not None: df_stations = df_stations.query('STNID == ' + args.station_id) else: if args.last_station_row is not None: df_stations = df_stations[:(int(args.last_station_row) + 1)] if args.first_station_row is not None: df_stations = df_stations[int(args.first_station_row):] STNlist = list(df_stations.iterrows()) os.system('mkdir -p ' + args.path_output) for iSTN, STN in STNlist:
parser.add_argument('--experiments') parser.add_argument('--split-by',default=-1)# station soundings are split # up in chunks parser.add_argument('--station-chunk') args = parser.parse_args() #SET = 'GLOBAL' SET = args.dataset if 'path-soundings' in args.__dict__.keys(): path_soundingsSET = args.__dict__['path-soundings']+'/'+SET+'/' else: path_soundingsSET = '/kyukon/data/gent/gvo000/gvo00090/D2D/data/SOUNDINGS/'+SET+'/' all_stations = stations(path_soundingsSET,suffix='morning',refetch_stations=True).table all_records_morning = get_records(all_stations,\ path_soundingsSET,\ subset='morning', refetch_records=False, ) if args.global_chunk is not None: totalchunks = 0 stations_iterator = all_stations.iterrows() in_current_chunk = False while not in_current_chunk: istation,current_station = stations_iterator.__next__() all_records_morning_station = all_records_morning.query('STNID == '+str(current_station.name)) chunks_current_station = math.ceil(float(len(all_records_morning_station))/float(args.split_by))
'GLOBAL_ADV_SHR':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False,'sw_shr':True}, 'GLOBAL_W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'GLOBAL_AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, 'IOPS_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'IOPS_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'IOPS_W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'IOPS_AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, } # ======================== print("getting a list of stations") # ======================== # these are all the stations that are found in the input dataset all_stations = stations(args.path_input, suffix=args.subset_input, refetch_stations=False) # ==================================== print('defining all_stations_select') # ==================================== # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if args.station_id is not None: print("Selecting station by ID") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID(STNID=int(args.station_id)) all_stations_select = pd.DataFrame([run_station]) else:
def execute(**kwargs): # note that with args, we actually mean the same as those specified with # the argparse module above # overwrite the args according to the kwargs when the procedure is called # as module function for key, value in kwargs.items(): args.__dict__[key] = value print("-- begin arguments --") for key, value in args.__dict__.items(): print(key, ': ', value) print("-- end arguments ----") # load specified class4gl library if args.c4gl_path_lib is not None: sys.path.insert(0, args.c4gl_path_lib) from class4gl import class4gl_input, data_global, class4gl from interface_multi import stations, stations_iterator, records_iterator, get_record_yaml, get_records from class4gl import blh, class4gl_input # this is a variant of global run in which the output of runs are still written # out even when the run crashes. # #only include the following timeseries in the model output # timeseries_only = \ # ['Cm', 'Cs', 'G', 'H', 'L', 'LE', 'LEpot', 'LEref', 'LEsoil', 'LEveg', 'Lwin', # 'Lwout', 'Q', 'RH_h', 'Rib', 'Swin', 'Swout', 'T2m', 'dq', 'dtheta', # 'dthetav', 'du', 'dv', 'esat', 'gammaq', 'gammatheta', 'h', 'q', 'qsat', # 'qsurf', 'ra', 'rs', 'theta', 'thetav', 'time', 'u', 'u2m', 'ustar', 'uw', # 'v', 'v2m', 'vw', 'wq', 'wtheta', 'wthetae', 'wthetav', 'wthetae', 'zlcl'] if (args.global_vars is not None): globaldata = data_global() globaldata.load_datasets(recalc=0) # ======================== print("getting a list of stations") # ======================== # these are all the stations that are found in the input dataset all_stations = stations(args.path_forcing, suffix=args.subset_forcing, refetch_stations=False) # ==================================== print('defining all_stations_select') # ==================================== # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if args.station_id is not None: print("Selecting station by ID") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID( STNID=int(args.station_id)) all_stations_select = pd.DataFrame([run_station]) else: print("Selecting stations from a row range in the table") all_stations_select = pd.DataFrame(all_stations.table) if args.last_station_row is not None: all_stations_select = all_station_select.iloc[:( int(args.last_station) + 1)] if args.first_station_row is not None: all_stations_select = all_station_select.iloc[int(args. first_station):] print("station numbers included in the whole batch "+\ "(all chunks):",list(all_stations_select.index)) print(all_stations_select) print("getting all records of the whole batch") all_records_morning_select = get_records(all_stations_select,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) # only run a specific chunck from the selection if args.global_chunk_number is not None: if args.station_chunk_number is not None: raise ValueError( 'You need to specify either global-chunk-number or station-chunk-number, not both.' ) if (args.split_by is None) or (args.split_by <= 0): raise ValueError( "global_chunk_number is specified, but --split_by is not a strict positive number, so I don't know how to split the batch into chunks." ) run_station_chunk = None print( 'determining the station and its chunk number according global_chunk_number (' + args.global_chunk_number + ')') totalchunks = 0 stations_iter = all_stations_select.iterrows() in_current_chunk = False try: while not in_current_chunk: istation, current_station = stations_iter.__next__() all_records_morning_station_select = all_records_morning_select.query( 'STNID == ' + str(current_station.name)) chunks_current_station = math.ceil( float(len(all_records_morning_station_select)) / float(args.split_by)) print('chunks_current_station', chunks_current_station) in_current_chunk = (int(args.global_chunk_number) < (totalchunks + chunks_current_station)) if in_current_chunk: run_stations = pd.DataFrame([ current_station ]) # run_stations.loc[(int(args.__dict__['last_station'])] run_station_chunk = int( args.global_chunk_number) - totalchunks totalchunks += chunks_current_station except StopIteration: raise ValueError( "Could not determine station chunk number. --global_chunk_number (" + args.global_chunk_number + ") outside of range [0," + str(totalchunks) + '[') print("station = ", list(run_stations.index)) print("station chunk number:", run_station_chunk) # if no global chunk is specified, then run the whole station selection in one run, or # a specific chunk for each selected station according to # args.station_chunk_number else: run_stations = pd.DataFrame( all_stations_select ) # run_stations.loc[(int(args.__dict__['last_station'])] if args.station_chunk_number is not None: run_station_chunk = int(args.station_chunk_number) print("station(s) that is processed.", list(run_stations.index)) print("chunk number: ", run_station_chunk) else: if args.split_by is not None: raise ValueError( "Chunks are defined by --split_by, but I don't know which chunk to run. Please provide --global_chunk_number or --station_chunk_number, or leave out --split_by." ) run_station_chunk = 0 print("stations that are processed.", list(run_stations.index)) #print(all_stations) print('Fetching initial/forcing records') records_morning = get_records(run_stations,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) if len(records_morning) == 0: raise IOError("No initialization records records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # note that if runtime is an integer number, we don't need to get the afternoon # profiles. path_output = args.path_output os.system('mkdir -p ' + path_output) for istation, current_station in run_stations.iterrows(): # records_morning_station = records_morning.query('STNID == '+str(current_station.name)) records_morning_station = records_morning.loc[(current_station.name):( current_station.name)] fn_morning = args.path_forcing + '/' + format( current_station.name, '05d') + '_' + args.subset_forcing + '.yaml' if os.path.isfile(fn_morning): file_morning = open(fn_morning) else: fn_morning = \ args.path_forcing+'/'+format(current_station.name,'05d')+\ '_'+str(run_station_chunk)+'_'+args.subset_forcing+'.yaml' file_morning = open(fn_morning) # if args.runtime == 'from_profile_pair': # file_afternoon = open(args.path_forcing+'/'+format(current_station.name,'05d')+'_end.yaml') fn_ini = path_output+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_ini.yaml' file_ini = open(fn_ini, 'w') #iexp = 0 onerun = False print('starting station chunk number: '\ +str(run_station_chunk)+' (chunk size:',args.split_by,')') skip_chunk = False if 'chunk' in records_morning.index.names: records_morning_station_chunk = records_morning_station.loc[( current_station.name, run_station_chunk):(current_station.name, run_station_chunk)] else: start_record = run_station_chunk * args.split_by if run_station_chunk is not 0 else 0 end_record = ( run_station_chunk + 1) * args.split_by if args.split_by is not None else None if start_record >= (len(records_morning_station)): print("warning: outside of profile number range for station "+\ str(current_station)+". Skipping chunk number for this station.") skip_chunk = True records_morning_station_chunk = None else: records_morning_station_chunk = records_morning_station.iloc[ start_record: end_record] # [(int(args.split_by)*run_station_chunk):(int(args.split_by)*(run_station_chunk+1))] if not skip_chunk: isim = 0 for (STNID, chunk, index ), record_morning in records_morning_station_chunk.iterrows(): print('starting '+str(isim+1)+' out of '+\ str(len(records_morning_station_chunk) )+\ ' (station total: ',str(len(records_morning_station)),')') c4gli_morning = get_record_yaml(file_morning, record_morning.index_start, record_morning.index_end, mode='model_input') if args.global_vars is not None: c4gli_morning.get_global_input( globaldata, only_keys=args.global_vars.strip().split(':')) onerun = True print("dumping to " + str(file_ini) + ' (' + fn_ini + ')') c4gli_morning.dump(file_ini) isim += 1 file_ini.close() file_morning.close() if onerun: records_ini = get_records(pd.DataFrame([current_station]),\ path_output,\ getchunk = int(run_station_chunk),\ subset='ini', refetch_records=True, ) else: # remove empty files os.system('rm ' + fn_ini)
def execute(**kwargs): # note that with args, we actually mean the same as those specified with # the argparse module above # overwrite the args according to the kwargs when the procedure is called # as module function for key, value in kwargs.items(): args.__dict__[key] = value print("-- begin arguments --") for key, value in args.__dict__.items(): print(key, ': ', value) print("-- end arguments ----") # load specified class4gl library if args.c4gl_path_lib is not None: sys.path.insert(0, args.c4gl_path_lib) from class4gl import class4gl_input, data_global, class4gl from interface_multi import stations, stations_iterator, records_iterator, get_record_yaml, get_records from class4gl import blh, class4gl_input # this is a variant of global run in which the output of runs are still written # out even when the run crashes. # #only include the following timeseries in the model output # timeseries_only = \ # ['Cm', 'Cs', 'G', 'H', 'L', 'LE', 'LEpot', 'LEref', 'LEsoil', 'LEveg', 'Lwin', # 'Lwout', 'Q', 'RH_h', 'Rib', 'Swin', 'Swout', 'T2m', 'dq', 'dtheta', # 'dthetav', 'du', 'dv', 'esat', 'gammaq', 'gammatheta', 'h', 'q', 'qsat', # 'qsurf', 'ra', 'rs', 'theta', 'thetav', 'time', 'u', 'u2m', 'ustar', 'uw', # 'v', 'v2m', 'vw', 'wq', 'wtheta', 'wthetae', 'wthetav', 'wthetae', 'zlcl'] # for iEXP in range(4): # EXPKEY = 'LCZ'+str(iEXP) # EXP_DEFS[EXPKEY] = {'sw_ac' : [],'sw_ap': True,'sw_lit': False, 'urban':'LCZ'+str(iEXP)} EXP_DEFS =\ { 'LCZ':{'sw_ac' : [],'sw_ap': True,'sw_lit': False, 'urban':'lcw1'}, 'BASE':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'NOADV':{'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'ERA_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, 'GLOBAL_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'GLOBAL_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'GLOBAL_ADV_SM2':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'IOPS_ADV_SM2':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'GLOBAL_ADV_ERA_NEW':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'GLOBAL_ADV_SHR':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False,'sw_shearwe':True}, 'GLOBAL_W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'GLOBAL_AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, 'IOPS_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, 'IOPS_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, 'IOPS_W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, 'IOPS_AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, } # ======================== print("getting a list of stations") # ======================== # these are all the stations that are found in the input dataset all_stations = stations(args.path_forcing, suffix=args.subset_forcing, refetch_stations=False) # ==================================== print('defining all_stations_select') # ==================================== # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if args.station_id is not None: print("Selecting station by ID") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID( STNID=int(args.station_id)) all_stations_select = pd.DataFrame([run_station]) else: print("Selecting stations from a row range in the table") all_stations_select = pd.DataFrame(all_stations.table) if args.last_station_row is not None: all_stations_select = all_station_select.iloc[:( int(args.last_station) + 1)] if args.first_station_row is not None: all_stations_select = all_station_select.iloc[int(args. first_station):] print("station numbers included in the whole batch "+\ "(all chunks):",list(all_stations_select.index)) print(all_stations_select) print("getting all records of the whole batch") all_records_morning_select = get_records(all_stations_select,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) # only run a specific chunck from the selection if args.global_chunk_number is not None: if args.station_chunk_number is not None: raise ValueError( 'You need to specify either global-chunk-number or station-chunk-number, not both.' ) if (args.split_by is None) or (args.split_by <= 0): raise ValueError( "global_chunk_number is specified, but --split_by is not a strict positive number, so I don't know how to split the batch into chunks." ) run_station_chunk = None print( 'determining the station and its chunk number according global_chunk_number (' + args.global_chunk_number + ')') totalchunks = 0 stations_iter = all_stations_select.iterrows() in_current_chunk = False try: while not in_current_chunk: istation, current_station = stations_iter.__next__() all_records_morning_station_select = all_records_morning_select.query( 'STNID == ' + str(current_station.name)) chunks_current_station = math.ceil( float(len(all_records_morning_station_select)) / float(args.split_by)) print('chunks_current_station', chunks_current_station) in_current_chunk = (int(args.global_chunk_number) < (totalchunks + chunks_current_station)) if in_current_chunk: run_stations = pd.DataFrame([ current_station ]) # run_stations.loc[(int(args.__dict__['last_station'])] run_station_chunk = int( args.global_chunk_number) - totalchunks totalchunks += chunks_current_station except StopIteration: raise ValueError( "Could not determine station chunk number. --global_chunk_number (" + args.global_chunk_number + ") outside of range [0," + str(totalchunks) + '[') print("station = ", list(run_stations.index)) print("station chunk number:", run_station_chunk) # if no global chunk is specified, then run the whole station selection in one run, or # a specific chunk for each selected station according to # args.station_chunk_number else: run_stations = pd.DataFrame( all_stations_select ) # run_stations.loc[(int(args.__dict__['last_station'])] if args.station_chunk_number is not None: run_station_chunk = int(args.station_chunk_number) print("station(s) that is processed.", list(run_stations.index)) print("chunk number: ", run_station_chunk) else: if args.split_by is not None: raise ValueError( "Chunks are defined by --split_by, but I don't know which chunk to run. Please provide --global_chunk_number or --station_chunk_number, or leave out --split_by." ) run_station_chunk = 0 print("stations that are processed.", list(run_stations.index)) #print(all_stations) print('Fetching initial/forcing records') records_morning = get_records(run_stations,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) if len(records_morning) == 0: raise IOError("No initialization records records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # note that if runtime is an integer number, we don't need to get the afternoon # profiles. if args.runtime == 'from_profile_pair': print( 'Fetching afternoon records for determining the simulation runtimes' ) records_afternoon = get_records(run_stations,\ args.path_forcing,\ subset='end', refetch_records=False, ) if len(records_afternoon) == 0: raise IOError("No final state records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # print(records_morning.index) # print(records_afternoon.index) # align afternoon records with the noon records, and set same index print('hello') print(len(records_afternoon)) print(len(records_morning)) print("aligning morning and afternoon records") records_morning['dates'] = records_morning['ldatetime'].dt.date records_afternoon['dates'] = records_afternoon['ldatetime'].dt.date records_afternoon.set_index(['STNID', 'dates'], inplace=True) ini_index_dates = records_morning.set_index(['STNID', 'dates']).index records_afternoon = records_afternoon.loc[ini_index_dates] records_afternoon.index = records_morning.index experiments = args.experiments.strip(' ').split(' ') if args.experiments_names is not None: experiments_names = args.experiments_names.strip(' ').split(' ') if len(experiments_names) != len(experiments): raise ValueError( 'Lenght of --experiments_names is different from --experiments' ) else: experiments_names = experiments for iexpname, expid in enumerate(experiments): expname = experiments_names[iexpname] exp = EXP_DEFS[expid] path_exp = args.path_experiments + '/' + expname + '/' os.system('mkdir -p ' + path_exp) for istation, current_station in run_stations.iterrows(): print(istation, current_station) records_morning_station = records_morning.query( 'STNID == ' + str(current_station.name)) start_record = run_station_chunk * args.split_by if run_station_chunk is not 0 else 0 end_record = ( run_station_chunk + 1) * args.split_by if args.split_by is not None else None if start_record >= (len(records_morning_station)): print("warning: outside of profile number range for station "+\ str(current_station)+". Skipping chunk number for this station.") else: fn_morning = args.path_forcing + '/' + format( current_station.name, '05d') + '_' + args.subset_forcing + '.yaml' if os.path.isfile(fn_morning): file_morning = open(fn_morning) else: fn_morning = \ args.path_forcing+'/'+format(current_station.name,'05d')+\ '_'+str(run_station_chunk)+'_'+args.subset_forcing+'.yaml' file_morning = open(fn_morning) if args.runtime == 'from_profile_pair': file_afternoon = open(args.path_forcing + '/' + format(current_station.name, '05d') + '_end.yaml') fn_ini = path_exp+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_ini.yaml' fn_end_mod = path_exp+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_end.yaml' file_ini = open(fn_ini, 'w') file_end_mod = open(fn_end_mod, 'w') #iexp = 0 onerun = False print('starting station chunk number: '\ +str(run_station_chunk)+' (chunk size:',args.split_by,')') records_morning_station_chunk = records_morning_station.iloc[ start_record: end_record] # [(int(args.split_by)*run_station_chunk):(int(args.split_by)*(run_station_chunk+1))] isim = 0 for ( STNID, chunk, index ), record_morning in records_morning_station_chunk.iterrows(): print('starting '+str(isim+1)+' out of '+\ str(len(records_morning_station_chunk) )+\ ' (station total: ',str(len(records_morning_station)),')') c4gli_morning = get_record_yaml(file_morning, record_morning.index_start, record_morning.index_end, mode='model_input') if args.diag_tropo is not '': print( 'add tropospheric parameters on advection and subsidence (for diagnosis)' ) seltropo = (c4gli_morning.air_ac.p > c4gli_morning.air_ac.p.iloc[-1] + 3000. * (-1.2 * 9.81)) profile_tropo = c4gli_morning.air_ac[seltropo] for var in args.diag_tropo.split( ','): #['t','q','u','v',]: if var[:3] == 'adv': mean_adv_tropo = np.mean( profile_tropo[var + '_x'] + profile_tropo[var + '_y']) c4gli_morning.update( source='era-interim', pars={var + '_tropo': mean_adv_tropo}) else: print("warning: tropospheric variable " + var + " not recognized") if args.runtime == 'from_profile_pair': record_afternoon = records_afternoon.loc[(STNID, chunk, index)] c4gli_afternoon = get_record_yaml( file_afternoon, int(record_afternoon.index_start), int(record_afternoon.index_end), mode='model_input') runtime = int((c4gli_afternoon.pars.datetime_daylight - c4gli_morning.pars.datetime_daylight ).total_seconds()) elif args.runtime == 'from_input': runtime = c4gli_morning.pars.runtime else: runtime = int(args.runtime) c4gli_morning.update(source='pairs',pars={'runtime' : \ runtime}) c4gli_morning.update(source=expname, pars=exp) c4gl = class4gl(c4gli_morning, debug_level=args.debug_level) if args.error_handling == 'dump_always': try: print('checking data sources') if not c4gli_morning.check_source_globaldata(): print( 'Warning: some input sources appear invalid' ) c4gl.run() print('run succesful') except Exception as inst: print('run not succesful. Error message is:') print(inst.args) onerun = True print("dumping to " + str(file_ini) + ' (' + fn_ini + ')') c4gli_morning.dump(file_ini) c4gl.dump(file_end_mod,\ include_input=False,\ #timeseries_only=timeseries_only,\ ) onerun = True # in this case, only the file will dumped if the runs were # successful elif args.error_handling == 'dump_on_success': try: print('checking data sources') if not c4gli_morning.check_source_globaldata(): print( 'Warning: some input sources appear invalid' ) c4gl.run() print('run succesful') c4gli_morning.dump(file_ini) print("dumping to " + str(file_ini)) c4gl.dump(file_end_mod,\ include_input=False,\ #timeseries_only=timeseries_only,\ ) onerun = True except Exception as inst: print('run not succesful. Error message is:') print(inst.args) print('run not succesful') isim += 1 file_ini.close() file_end_mod.close() file_morning.close() if args.runtime == 'from_profile_pair': file_afternoon.close() if onerun: records_ini = get_records(pd.DataFrame([current_station]),\ path_exp,\ getchunk = int(run_station_chunk),\ subset='ini', refetch_records=True, ) records_end_mod = get_records(pd.DataFrame([current_station]),\ path_exp,\ getchunk = int(run_station_chunk),\ subset='end',\ refetch_records=True,\ ) else: # remove empty files os.system('rm ' + fn_ini) os.system('rm ' + fn_end_mod)