def status_output(status_msg, percent, file_name): """Print status and flush to file. Args: status_msg: status message percent: percentage rate of progress file_name: file handler """ UtilClass.writelog(file_name, "[Output] %d..., %s" % (percent, status_msg), 'a')
def status_output(status_msg, percent, file_name): # type: (AnyStr, Union[int, float], AnyStr) -> None """Print status and flush to file. Args: status_msg: status message percent: percentage rate of progress file_name: file name """ UtilClass.writelog(file_name, "[Output] %d..., %s" % (percent, status_msg), 'a')
def log(lines, log_file=None): """Output log message.""" err = False for line in lines: print (line) if log_file is not None: UtilClass.writelog(log_file, line, 'append') if 'BAD TERMINATION' in line.upper(): err = True break if err: TauDEM.error("Error occurred when calling TauDEM function, please check!", log_file)
def log(lines, log_file=None): """Output log message.""" err = False for line in lines: print(line) if log_file is not None: UtilClass.writelog(log_file, line, 'append') if 'BAD TERMINATION' in line.upper(): err = True break if err: TauDEM.error('Error occurred when calling TauDEM function, please check!', log_file)
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None): """Watershed Delineation.""" # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None: workingdir = os.path.dirname(dem) namecfg = TauDEMFilesUtils(workingdir) workingdir = namecfg.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. Get predefined intermediate file names filled_dem = namecfg.filldem flow_dir = namecfg.d8flow slope = namecfg.slp flow_dir_dinf = namecfg.dinf slope_dinf = namecfg.dinf_slp dir_code_dinf = namecfg.dinf_d8dir weight_dinf = namecfg.dinf_weight acc = namecfg.d8acc stream_raster = namecfg.stream_raster default_outlet = namecfg.outlet_pre modified_outlet = namecfg.outlet_m stream_skeleton = namecfg.stream_pd acc_with_weight = namecfg.d8acc_weight stream_order = namecfg.stream_order ch_network = namecfg.channel_net ch_coord = namecfg.channel_coord stream_net = namecfg.streamnet_shp subbasin = namecfg.subbsn dist2_stream_d8 = namecfg.dist2stream_d8 # 4. perform calculation UtilClass.writelog(logfile, "[Output] %d..., %s" % (10, "pitremove DEM..."), 'a') TauDEM.pitremove(np, dem, filled_dem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (20, "Calculating D8 and Dinf flow direction..."), 'a') TauDEM.d8flowdir(np, filled_dem, flow_dir, slope, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.dinfflowdir(np, filled_dem, flow_dir_dinf, slope_dinf, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) DinfUtil.output_compressed_dinf(flow_dir_dinf, dir_code_dinf, weight_dinf) UtilClass.writelog( logfile, "[Output] %d..., %s" % (30, "D8 flow accumulation..."), 'a') TauDEM.aread8(np, flow_dir, acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (40, "Generating stream raster initially..."), 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics( acc) TauDEM.threshold(np, acc, stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (50, "Moving outlet to stream..."), 'a') if outlet_file is None: outlet_file = default_outlet TauDEM.connectdown(np, flow_dir, acc, outlet_file, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, flow_dir, stream_raster, outlet_file, modified_outlet, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (60, "Generating stream skeleton..."), 'a') TauDEM.peukerdouglas(np, filled_dem, stream_skeleton, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (70, "Flow accumulation with outlet..."), 'a') tmp_outlet = None if singlebasin: tmp_outlet = modified_outlet TauDEM.aread8(np, flow_dir, acc_with_weight, tmp_outlet, stream_skeleton, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog( logfile, "[Output] %d..., %s" % (75, "Drop analysis to select optimal threshold..."), 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(acc_with_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum numthresh = 20 logspace = 'true' drp_file = namecfg.drptxt TauDEM.dropanalysis(np, filled_dem, flow_dir, acc_with_weight, acc_with_weight, modified_outlet, minthresh, maxthresh, numthresh, logspace, drp_file, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(drp_file): raise RuntimeError( "Dropanalysis failed and drp.txt was not created!") drpf = open(drp_file, "r") temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) print(thresh) drpf.close() UtilClass.writelog( logfile, "[Output] %d..., %s" % (80, "Generating stream raster..."), 'a') TauDEM.threshold(np, acc_with_weight, stream_raster, float(thresh), workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (90, "Generating stream net..."), 'a') TauDEM.streamnet(np, filled_dem, flow_dir, acc_with_weight, stream_raster, modified_outlet, stream_order, ch_network, ch_coord, stream_net, subbasin, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (95, "Calculating distance to stream (D8)..."), 'a') TauDEM.d8hdisttostrm(np, flow_dir, stream_raster, dist2_stream_d8, 1, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d.., %s" % (100, "Original subbasin delineation is finished!"), 'a')
def error(msg, log_file=None): """Print, output error message and raise RuntimeError.""" UtilClass.print_msg(msg + os.linesep) if log_file is not None: UtilClass.writelog(log_file, msg, 'append') raise RuntimeError(msg)
def main(cfg): """Main workflow of NSGA-II based Scenario analysis.""" random.seed() scoop_log('Population: %d, Generation: %d' % (cfg.opt.npop, cfg.opt.ngens)) # Initial timespan variables stime = time.time() plot_time = 0. allmodels_exect = list() # execute time of all model runs # create reference point for hypervolume ref_pt = numpy.array(worse_objects) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' # read observation data from MongoDB cali_obj = Calibration(cfg) # Read observation data just once model_cfg_dict = cali_obj.model.ConfigDict model_obj = MainSEIMS(args_dict=model_cfg_dict) obs_vars, obs_data_dict = model_obj.ReadOutletObservations(object_vars) # Initialize population param_values = cali_obj.initialize(cfg.opt.npop) pop = list() for i in range(cfg.opt.npop): ind = creator.Individual(param_values[i]) ind.gen = 0 ind.id = i ind.obs.vars = obs_vars[:] ind.obs.data = deepcopy(obs_data_dict) pop.append(ind) param_values = numpy.array(param_values) # Write calibrated values to MongoDB # TODO, extract this function, which is same with `Sensitivity::write_param_values_to_mongodb`. write_param_values_to_mongodb(cfg.model.host, cfg.model.port, cfg.model.db_name, cali_obj.ParamDefs, param_values) # get the low and up bound of calibrated parameters bounds = numpy.array(cali_obj.ParamDefs['bounds']) low = bounds[:, 0] up = bounds[:, 1] low = low.tolist() up = up.tolist() pop_select_num = int(cfg.opt.npop * cfg.opt.rsel) init_time = time.time() - stime def check_validation(fitvalues): """Check the validation of the fitness values of an individual.""" flag = True for condidx, condstr in enumerate(conditions): if condstr is None: continue if not eval('%f%s' % (fitvalues[condidx], condstr)): flag = False return flag def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and set fitness of individuals according to calibration step.""" popnum = len(invalid_pops) labels = list() try: # parallel on multi-processors or clusters using SCOOP from scoop import futures invalid_pops = list( futures.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) except ImportError or ImportWarning: # Python build-in map (serial) invalid_pops = list( toolbox.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) for tmpind in invalid_pops: tmpfitnessv = list() for k, v in list(multiobj.items()): tmpvalues, tmplabel = tmpind.cali.efficiency_values( k, object_names[k]) tmpfitnessv += tmpvalues[:] labels += tmplabel[:] tmpind.fitness.values = tuple(tmpfitnessv) # Filter for a valid solution if filter_ind: invalid_pops = [ tmpind for tmpind in invalid_pops if check_validation(tmpind.fitness.values) ] if len(invalid_pops) < 2: print( 'The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!' ) exit(2) return invalid_pops, labels # Currently, `invalid_pops` contains evaluated individuals # Record the count and execute timespan of model runs during the optimization modelruns_count = {0: len(pop)} modelruns_time = { 0: 0. } # Total time counted according to evaluate_parallel() modelruns_time_sum = { 0: 0. } # Summarize time of every model runs according to pop # Generation 0 before optimization stime = time.time() pop, plotlables = evaluate_parallel(pop) modelruns_time[0] = time.time() - stime for ind in pop: allmodels_exect.append( [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[0] += ind.runtime # currently, len(pop) may less than pop_select_num pop = toolbox.select(pop, pop_select_num) # Output simulated data to json or pickle files for future use. output_population_details(pop, cfg.opt.simdata_dir, 0) record = stats.compile(pop) logbook.record(gen=0, evals=len(pop), **record) scoop_log(logbook.stream) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % ( cfg.opt.ngens, cfg.opt.npop) scoop_log(output_str) UtilClass.writelog(cfg.opt.logfile, output_str, mode='replace') modelsel_count = { 0: len(pop) } # type: Dict[int, int] # newly added Pareto fronts for gen in range(1, cfg.opt.ngens + 1): output_str = '###### Generation: %d ######\n' % gen scoop_log(output_str) offspring = [toolbox.clone(ind) for ind in pop] # method1: use crowding distance (normalized as 0~1) as eta # tools.emo.assignCrowdingDist(offspring) # method2: use the index of individual at the sorted offspring list as eta if len(offspring ) >= 2: # when offspring size greater than 2, mate can be done for i, ind1, ind2 in zip(range(len(offspring) // 2), offspring[::2], offspring[1::2]): if random.random() > cfg.opt.rcross: continue eta = i toolbox.mate(ind1, ind2, eta, low, up) toolbox.mutate(ind1, eta, low, up, cfg.opt.rmut) toolbox.mutate(ind2, eta, low, up, cfg.opt.rmut) del ind1.fitness.values, ind2.fitness.values else: toolbox.mutate(offspring[0], 1., low, up, cfg.opt.rmut) del offspring[0].fitness.values # Evaluate the individuals with an invalid fitness invalid_inds = [ind for ind in offspring if not ind.fitness.valid] valid_inds = [ind for ind in offspring if ind.fitness.valid] if len(invalid_inds) == 0: # No need to continue scoop_log( 'Note: No invalid individuals available, the NSGA2 will be terminated!' ) break # Write new calibrated parameters to MongoDB param_values = list() for idx, ind in enumerate(invalid_inds): ind.gen = gen ind.id = idx param_values.append(ind[:]) param_values = numpy.array(param_values) write_param_values_to_mongodb(cfg.model.host, cfg.model.port, cfg.model.db_name, cali_obj.ParamDefs, param_values) # Count the model runs, and execute models invalid_ind_size = len(invalid_inds) modelruns_count.setdefault(gen, invalid_ind_size) stime = time.time() invalid_inds, plotlables = evaluate_parallel(invalid_inds) curtimespan = time.time() - stime modelruns_time.setdefault(gen, curtimespan) modelruns_time_sum.setdefault(gen, 0.) for ind in invalid_inds: allmodels_exect.append( [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[gen] += ind.runtime # Select the next generation population # Previous version may result in duplications of the same scenario in one Pareto front, # thus, I decided to check and remove the duplications first. # pop = toolbox.select(pop + valid_inds + invalid_inds, pop_select_num) tmppop = pop + valid_inds + invalid_inds pop = list() unique_sces = dict() for tmpind in tmppop: if tmpind.gen in unique_sces and tmpind.id in unique_sces[ tmpind.gen]: continue if tmpind.gen not in unique_sces: unique_sces.setdefault(tmpind.gen, [tmpind.id]) elif tmpind.id not in unique_sces[tmpind.gen]: unique_sces[tmpind.gen].append(tmpind.id) pop.append(tmpind) pop = toolbox.select(pop, pop_select_num) output_population_details(pop, cfg.opt.simdata_dir, gen) hyper_str = 'Gen: %d, New model runs: %d, ' \ 'Execute timespan: %.4f, Sum of model run timespan: %.4f, ' \ 'Hypervolume: %.4f\n' % (gen, invalid_ind_size, curtimespan, modelruns_time_sum[gen], hypervolume(pop, ref_pt)) scoop_log(hyper_str) UtilClass.writelog(cfg.opt.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_inds), **record) scoop_log(logbook.stream) # Count the newly generated near Pareto fronts new_count = 0 for ind in pop: if ind.gen == gen: new_count += 1 modelsel_count.setdefault(gen, new_count) # Plot 2D near optimal pareto front graphs, # i.e., (NSE, RSR), (NSE, PBIAS), and (RSR,PBIAS) # And 3D near optimal pareto front graphs, i.e., (NSE, RSR, PBIAS) stime = time.time() front = numpy.array([ind.fitness.values for ind in pop]) plot_pareto_front_single(front, plotlables, cfg.opt.out_dir, gen, 'Near Pareto optimal solutions') plot_time += time.time() - stime # save in file # Header information output_str += 'generation\tcalibrationID\t' for kk, vv in list(object_names.items()): output_str += pop[0].cali.output_header(kk, vv, 'Cali') if cali_obj.cfg.calc_validation: for kkk, vvv in list(object_names.items()): output_str += pop[0].vali.output_header(kkk, vvv, 'Vali') output_str += 'gene_values\n' for ind in pop: output_str += '%d\t%d\t' % (ind.gen, ind.id) for kk, vv in list(object_names.items()): output_str += ind.cali.output_efficiency(kk, vv) if cali_obj.cfg.calc_validation: for kkk, vvv in list(object_names.items()): output_str += ind.vali.output_efficiency(kkk, vvv) output_str += str(ind) output_str += '\n' UtilClass.writelog(cfg.opt.logfile, output_str, mode='append') # TODO: Figure out if we should terminate the evolution # Plot hypervolume and newly executed model count plot_hypervolume_single(cfg.opt.hypervlog, cfg.opt.out_dir) # Save newly added Pareto fronts of each generations new_fronts_count = numpy.array(list(modelsel_count.items())) numpy.savetxt('%s/new_pareto_fronts_count.txt' % cfg.opt.out_dir, new_fronts_count, delimiter=str(','), fmt=str('%d')) # Save and print timespan information allmodels_exect = numpy.array(allmodels_exect) numpy.savetxt('%s/exec_time_allmodelruns.txt' % cfg.opt.out_dir, allmodels_exect, delimiter=str(' '), fmt=str('%.4f')) scoop_log('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % t for t in allmodels_exect.max(0)), '\t'.join('%.3f' % t for t in allmodels_exect.min(0)), '\t'.join( '%.3f' % t for t in allmodels_exect.mean(0)), '\t'.join( '%.3f' % t for t in allmodels_exect.sum(0)))) exec_time = 0. for genid, tmptime in list(modelruns_time.items()): exec_time += tmptime exec_time_sum = 0. for genid, tmptime in list(modelruns_time_sum.items()): exec_time_sum += tmptime allcount = 0 for genid, tmpcount in list(modelruns_count.items()): allcount += tmpcount scoop_log('Initialization timespan: %.4f\n' 'Model execution timespan: %.4f\n' 'Sum of model runs timespan: %.4f\n' 'Plot Pareto graphs timespan: %.4f' % (init_time, exec_time, exec_time_sum, plot_time)) return pop, logbook
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None): """Watershed Delineation.""" # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None: workingdir = os.path.dirname(dem) namecfg = TauDEMFilesUtils(workingdir) workingdir = namecfg.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. Get predefined intermediate file names filled_dem = namecfg.filldem flow_dir = namecfg.d8flow slope = namecfg.slp flow_dir_dinf = namecfg.dinf slope_dinf = namecfg.dinf_slp dir_code_dinf = namecfg.dinf_d8dir weight_dinf = namecfg.dinf_weight acc = namecfg.d8acc stream_raster = namecfg.stream_raster default_outlet = namecfg.outlet_pre modified_outlet = namecfg.outlet_m stream_skeleton = namecfg.stream_pd acc_with_weight = namecfg.d8acc_weight stream_order = namecfg.stream_order ch_network = namecfg.channel_net ch_coord = namecfg.channel_coord stream_net = namecfg.streamnet_shp subbasin = namecfg.subbsn dist2_stream_d8 = namecfg.dist2stream_d8 # 4. perform calculation UtilClass.writelog(logfile, '[Output] %d..., %s' % (10, 'pitremove DEM...'), 'a') TauDEM.pitremove(np, dem, filled_dem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (20, 'Calculating D8 and Dinf flow direction...'), 'a') TauDEM.d8flowdir(np, filled_dem, flow_dir, slope, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.dinfflowdir(np, filled_dem, flow_dir_dinf, slope_dinf, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) DinfUtil.output_compressed_dinf(flow_dir_dinf, dir_code_dinf, weight_dinf) UtilClass.writelog(logfile, '[Output] %d..., %s' % (30, 'D8 flow accumulation...'), 'a') TauDEM.aread8(np, flow_dir, acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (40, 'Generating stream raster initially...'), 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics(acc) TauDEM.threshold(np, acc, stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (50, 'Moving outlet to stream...'), 'a') if outlet_file is None: outlet_file = default_outlet TauDEM.connectdown(np, flow_dir, acc, outlet_file, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, flow_dir, stream_raster, outlet_file, modified_outlet, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (60, 'Generating stream skeleton...'), 'a') TauDEM.peukerdouglas(np, filled_dem, stream_skeleton, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (70, 'Flow accumulation with outlet...'), 'a') tmp_outlet = None if singlebasin: tmp_outlet = modified_outlet TauDEM.aread8(np, flow_dir, acc_with_weight, tmp_outlet, stream_skeleton, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog(logfile, '[Output] %d..., %s' % (75, 'Drop analysis to select optimal threshold...'), 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(acc_with_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum numthresh = 20 logspace = 'true' drp_file = namecfg.drptxt TauDEM.dropanalysis(np, filled_dem, flow_dir, acc_with_weight, acc_with_weight, modified_outlet, minthresh, maxthresh, numthresh, logspace, drp_file, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(drp_file): raise RuntimeError('Dropanalysis failed and drp.txt was not created!') with open(drp_file, 'r', encoding='utf-8') as drpf: temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) print(thresh) UtilClass.writelog(logfile, '[Output] %d..., %s' % (80, 'Generating stream raster...'), 'a') TauDEM.threshold(np, acc_with_weight, stream_raster, float(thresh), workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (90, 'Generating stream net...'), 'a') TauDEM.streamnet(np, filled_dem, flow_dir, acc_with_weight, stream_raster, modified_outlet, stream_order, ch_network, ch_coord, stream_net, subbasin, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (95, 'Calculating distance to stream (D8)...'), 'a') TauDEM.d8hdisttostrm(np, flow_dir, stream_raster, dist2_stream_d8, 1, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d.., %s' % (100, 'Original subbasin delineation is finished!'), 'a')
def main(sceobj): # type: (SUScenario) -> () """Main workflow of NSGA-II based Scenario analysis.""" if sceobj.cfg.eval_info['BASE_ENV'] < 0: run_base_scenario(sceobj) print('The environment effectiveness value of the ' 'base scenario is %.2f' % sceobj.cfg.eval_info['BASE_ENV']) random.seed() # Initial timespan variables stime = time.time() plot_time = 0. allmodels_exect = list() # execute time of all model runs pop_size = sceobj.cfg.opt.npop gen_num = sceobj.cfg.opt.ngens cx_rate = sceobj.cfg.opt.rcross mut_perc = sceobj.cfg.opt.pmut mut_rate = sceobj.cfg.opt.rmut sel_rate = sceobj.cfg.opt.rsel pop_select_num = int(pop_size * sel_rate) ws = sceobj.cfg.opt.out_dir cfg_unit = sceobj.cfg.bmps_cfg_unit cfg_method = sceobj.cfg.bmps_cfg_method worst_econ = sceobj.worst_econ worst_env = sceobj.worst_env # available gene value list possible_gene_values = list(sceobj.bmps_params.keys()) if 0 not in possible_gene_values: possible_gene_values.append(0) units_info = sceobj.cfg.units_infos suit_bmps = sceobj.suit_bmps gene_to_unit = sceobj.cfg.gene_to_unit unit_to_gene = sceobj.cfg.unit_to_gene updown_units = sceobj.cfg.updown_units scoop_log('Population: %d, Generation: %d' % (pop_size, gen_num)) scoop_log('BMPs configure unit: %s, configuration method: %s' % (cfg_unit, cfg_method)) # create reference point for hypervolume ref_pt = numpy.array([worst_econ, worst_env]) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' # Initialize population initialize_byinputs = False if sceobj.cfg.initial_byinput and sceobj.cfg.input_pareto_file is not None and \ sceobj.cfg.input_pareto_gen > 0: # Initial by input Pareto solutions inpareto_file = sceobj.modelcfg.model_dir + os.sep + sceobj.cfg.input_pareto_file if os.path.isfile(inpareto_file): inpareto_solutions = read_pareto_solutions_from_txt(inpareto_file, sce_name='scenario', field_name='gene_values') if sceobj.cfg.input_pareto_gen in inpareto_solutions: pareto_solutions = inpareto_solutions[sceobj.cfg.input_pareto_gen] pop = toolbox.population_byinputs(sceobj.cfg, pareto_solutions) # type: List initialize_byinputs = True if not initialize_byinputs: pop = toolbox.population(sceobj.cfg, n=pop_size) # type: List init_time = time.time() - stime def delete_fitness(new_ind): """Delete the fitness and other information of new individual.""" del new_ind.fitness.values new_ind.gen = -1 new_ind.id = -1 new_ind.io_time = 0. new_ind.comp_time = 0. new_ind.simu_time = 0. new_ind.runtime = 0. def check_validation(fitvalues): """Check the validation of the fitness values of an individual.""" flag = True for condidx, condstr in enumerate(conditions): if condstr is None: continue if not eval('%f%s' % (fitvalues[condidx], condstr)): flag = False return flag def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and get fitness of individuals.""" popnum = len(invalid_pops) try: # parallel on multiprocesor or clusters using SCOOP from scoop import futures invalid_pops = list(futures.map(toolbox.evaluate, [sceobj.cfg] * popnum, invalid_pops)) except ImportError or ImportWarning: # serial invalid_pops = list(map(toolbox.evaluate, [sceobj.cfg] * popnum, invalid_pops)) # Filter for a valid solution if filter_ind: invalid_pops = [tmpind for tmpind in invalid_pops if check_validation(tmpind.fitness.values)] if len(invalid_pops) < 2: print('The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!') exit(2) return invalid_pops # Currently, `invalid_pops` contains evaluated individuals # Record the count and execute timespan of model runs during the optimization modelruns_count = {0: len(pop)} modelruns_time = {0: 0.} # Total time counted according to evaluate_parallel() modelruns_time_sum = {0: 0.} # Summarize time of every model runs according to pop # Generation 0 before optimization stime = time.time() pop = evaluate_parallel(pop) modelruns_time[0] = time.time() - stime for ind in pop: ind.gen = 0 allmodels_exect.append([ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[0] += ind.runtime # Currently, len(pop) may less than pop_select_num pop = toolbox.select(pop, pop_select_num) record = stats.compile(pop) logbook.record(gen=0, evals=len(pop), **record) scoop_log(logbook.stream) front = numpy.array([ind.fitness.values for ind in pop]) # save front for further possible use numpy.savetxt(sceobj.scenario_dir + os.sep + 'pareto_front_gen0.txt', front, delimiter=str(' '), fmt=str('%.4f')) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % (gen_num, pop_size) scoop_log(output_str) UtilClass.writelog(sceobj.cfg.opt.logfile, output_str, mode='replace') modelsel_count = {0: len(pop)} # type: Dict[int, int] # newly added Pareto fronts for gen in range(1, gen_num + 1): output_str = '###### Generation: %d ######\n' % gen scoop_log(output_str) offspring = [toolbox.clone(ind) for ind in pop] if len(offspring) >= 2: # when offspring size greater than 2, mate can be done for ind1, ind2 in zip(offspring[::2], offspring[1::2]): old_ind1 = toolbox.clone(ind1) old_ind2 = toolbox.clone(ind2) if random.random() <= cx_rate: if cfg_method == BMPS_CFG_METHODS[3]: # SLPPOS method toolbox.mate_slppos(ind1, ind2, sceobj.cfg.hillslp_genes_num) elif cfg_method == BMPS_CFG_METHODS[2]: # UPDOWN method toolbox.mate_updown(updown_units, gene_to_unit, unit_to_gene, ind1, ind2) else: toolbox.mate_rdm(ind1, ind2) if cfg_method == BMPS_CFG_METHODS[0]: toolbox.mutate_rdm(possible_gene_values, ind1, perc=mut_perc, indpb=mut_rate) toolbox.mutate_rdm(possible_gene_values, ind2, perc=mut_perc, indpb=mut_rate) else: tagnames = None if sceobj.cfg.bmps_cfg_unit == BMPS_CFG_UNITS[3]: tagnames = sceobj.cfg.slppos_tagnames toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, suit_bmps, ind1, perc=mut_perc, indpb=mut_rate, unit=cfg_unit, method=cfg_method, tagnames=tagnames, thresholds=sceobj.cfg.boundary_adaptive_threshs) toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, suit_bmps, ind2, perc=mut_perc, indpb=mut_rate, unit=cfg_unit, method=cfg_method, tagnames=tagnames, thresholds=sceobj.cfg.boundary_adaptive_threshs) if check_individual_diff(old_ind1, ind1): delete_fitness(ind1) if check_individual_diff(old_ind2, ind2): delete_fitness(ind2) # Evaluate the individuals with an invalid fitness invalid_inds = [ind for ind in offspring if not ind.fitness.valid] valid_inds = [ind for ind in offspring if ind.fitness.valid] invalid_ind_size = len(invalid_inds) if invalid_ind_size == 0: # No need to continue scoop_log('Note: No invalid individuals available, the NSGA2 will be terminated!') break modelruns_count.setdefault(gen, invalid_ind_size) stime = time.time() invalid_inds = evaluate_parallel(invalid_inds) curtimespan = time.time() - stime modelruns_time.setdefault(gen, curtimespan) modelruns_time_sum.setdefault(gen, 0.) for ind in invalid_inds: ind.gen = gen allmodels_exect.append([ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[gen] += ind.runtime # Select the next generation population # Previous version may result in duplications of the same scenario in one Pareto front, # thus, I decided to check and remove the duplications first. # pop = toolbox.select(pop + valid_inds + invalid_inds, pop_select_num) tmppop = pop + valid_inds + invalid_inds pop = list() unique_sces = dict() for tmpind in tmppop: if tmpind.gen in unique_sces and tmpind.id in unique_sces[tmpind.gen]: continue if tmpind.gen not in unique_sces: unique_sces.setdefault(tmpind.gen, [tmpind.id]) elif tmpind.id not in unique_sces[tmpind.gen]: unique_sces[tmpind.gen].append(tmpind.id) pop.append(tmpind) pop = toolbox.select(pop, pop_select_num) hyper_str = 'Gen: %d, New model runs: %d, ' \ 'Execute timespan: %.4f, Sum of model run timespan: %.4f, ' \ 'Hypervolume: %.4f\n' % (gen, invalid_ind_size, curtimespan, modelruns_time_sum[gen], hypervolume(pop, ref_pt)) scoop_log(hyper_str) UtilClass.writelog(sceobj.cfg.opt.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_inds), **record) scoop_log(logbook.stream) # Count the newly generated near Pareto fronts new_count = 0 for ind in pop: if ind.gen == gen: new_count += 1 modelsel_count.setdefault(gen, new_count) # Plot 2D near optimal pareto front graphs stime = time.time() front = numpy.array([ind.fitness.values for ind in pop]) # save front for further possible use numpy.savetxt(sceobj.scenario_dir + os.sep + 'pareto_front_gen%d.txt' % gen, front, delimiter=str(' '), fmt=str('%.4f')) # Comment out the following plot code if matplotlib does not work. try: from scenario_analysis.visualization import plot_pareto_front_single pareto_title = 'Near Pareto optimal solutions' xlabel = 'Economy' ylabel = 'Environment' if sceobj.cfg.plot_cfg.plot_cn: xlabel = r'经济净投入' ylabel = r'环境效益' pareto_title = r'近似最优Pareto解集' plot_pareto_front_single(front, [xlabel, ylabel], ws, gen, pareto_title, plot_cfg=sceobj.cfg.plot_cfg) except Exception as e: scoop_log('Exception caught: %s' % str(e)) plot_time += time.time() - stime # save in file output_str += 'generation\tscenario\teconomy\tenvironment\tgene_values\n' for indi in pop: output_str += '%d\t%d\t%f\t%f\t%s\n' % (indi.gen, indi.id, indi.fitness.values[0], indi.fitness.values[1], str(indi)) UtilClass.writelog(sceobj.cfg.opt.logfile, output_str, mode='append') # Plot hypervolume and newly executed model count # Comment out the following plot code if matplotlib does not work. try: from scenario_analysis.visualization import plot_hypervolume_single plot_hypervolume_single(sceobj.cfg.opt.hypervlog, ws, plot_cfg=sceobj.cfg.plot_cfg) except Exception as e: scoop_log('Exception caught: %s' % str(e)) # Save newly added Pareto fronts of each generations new_fronts_count = numpy.array(list(modelsel_count.items())) numpy.savetxt('%s/new_pareto_fronts_count.txt' % ws, new_fronts_count, delimiter=str(','), fmt=str('%d')) # Save and print timespan information allmodels_exect = numpy.array(allmodels_exect) numpy.savetxt('%s/exec_time_allmodelruns.txt' % ws, allmodels_exect, delimiter=str(' '), fmt=str('%.4f')) scoop_log('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % v for v in allmodels_exect.max(0)), '\t'.join('%.3f' % v for v in allmodels_exect.min(0)), '\t'.join('%.3f' % v for v in allmodels_exect.mean(0)), '\t'.join('%.3f' % v for v in allmodels_exect.sum(0)))) exec_time = 0. for genid, tmptime in list(modelruns_time.items()): exec_time += tmptime exec_time_sum = 0. for genid, tmptime in list(modelruns_time_sum.items()): exec_time_sum += tmptime allcount = 0 for genid, tmpcount in list(modelruns_count.items()): allcount += tmpcount scoop_log('Initialization timespan: %.4f\n' 'Model execution timespan: %.4f\n' 'Sum of model runs timespan: %.4f\n' 'Plot Pareto graphs timespan: %.4f' % (init_time, exec_time, exec_time_sum, plot_time)) return pop, logbook
def main(cfg): """Main workflow of NSGA-II based Scenario analysis.""" random.seed() pop_size = cfg.nsga2_npop gen_num = cfg.nsga2_ngens rule_cfg = cfg.bmps_rule rule_mth = cfg.rule_method cx_rate = cfg.nsga2_rcross mut_perc = cfg.nsga2_pmut mut_rate = cfg.nsga2_rmut sel_rate = cfg.nsga2_rsel ws = cfg.nsga2_dir worst_econ = cfg.worst_econ worst_env = cfg.worst_env # available gene value list possible_gene_values = list(cfg.bmps_params.keys()) possible_gene_values.append(0) units_info = cfg.units_infos slppos_tagnames = cfg.slppos_tagnames suit_bmps = cfg.slppos_suit_bmps gene_to_unit = cfg.gene_to_slppos unit_to_gene = cfg.slppos_to_gene print_message('Population: %d, Generation: %d' % (pop_size, gen_num)) print_message('BMPs configure method: %s' % ('rule-based' if rule_cfg else 'random-based')) # create reference point for hypervolume ref_pt = numpy.array([worst_econ, worst_env]) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' pop = toolbox.population(cfg, n=pop_size) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in pop if not ind.fitness.valid] try: # parallel on multiprocesor or clusters using SCOOP from scoop import futures fitnesses = futures.map(toolbox.evaluate, [cfg] * len(invalid_ind), invalid_ind) # print('parallel-fitnesses: ', fitnesses) except ImportError or ImportWarning: # serial fitnesses = toolbox.map(toolbox.evaluate, [cfg] * len(invalid_ind), invalid_ind) # print('serial-fitnesses: ', fitnesses) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[:2] ind.id = fit[2] # This is just to assign the crowding distance to the individuals # no actual selection is done pop = toolbox.select(pop, pop_size) record = stats.compile(pop) logbook.record(gen=0, evals=len(invalid_ind), **record) print_message(logbook.stream) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % (gen_num, pop_size) print_message(output_str) UtilClass.writelog(cfg.logfile, output_str, mode='replace') for gen in range(1, gen_num + 1): output_str = '###### Generation: %d ######\n' % gen print_message(output_str) # Vary the population offspring = tools.selTournamentDCD(pop, int(pop_size * sel_rate)) offspring = [toolbox.clone(ind) for ind in offspring] # print_message('Offspring size: %d' % len(offspring)) if len(offspring) >= 2: # when offspring size greater than 2, mate can be done for ind1, ind2 in zip(offspring[::2], offspring[1::2]): if random.random() <= cx_rate: if rule_cfg: toolbox.mate_rule(slppos_tagnames, ind1, ind2) else: toolbox.mate_rdn(ind1, ind2) if rule_cfg: toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, slppos_tagnames, suit_bmps, ind1, perc=mut_perc, indpb=mut_rate, method=rule_mth) toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, slppos_tagnames, suit_bmps, ind2, perc=mut_perc, indpb=mut_rate, method=rule_mth) else: toolbox.mutate_rdm(possible_gene_values, ind1, perc=mut_perc, indpb=mut_rate) toolbox.mutate_rdm(possible_gene_values, ind2, perc=mut_perc, indpb=mut_rate) del ind1.fitness.values, ind2.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] invalid_ind_size = len(invalid_ind) # print_message('Evaluate pop size: %d' % invalid_ind_size) try: from scoop import futures fitnesses = futures.map(toolbox.evaluate, [cfg] * invalid_ind_size, invalid_ind) except ImportError or ImportWarning: fitnesses = toolbox.map(toolbox.evaluate, [cfg] * invalid_ind_size, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[:2] ind.id = fit[2] # Select the next generation population pop = toolbox.select(pop + offspring, pop_size) hyper_str = 'Gen: %d, hypervolume: %f\n' % (gen, hypervolume(pop, ref_pt)) print_message(hyper_str) UtilClass.writelog(cfg.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_ind), **record) print_message(logbook.stream) # Create plot front = numpy.array([ind.fitness.values for ind in pop]) plot_pareto_front(front, ['Economic effectiveness', 'Environmental effectiveness'], ws, gen, 'Pareto frontier of Scenarios Optimization') # save in file output_str += 'scenario\teconomy\tenvironment\tgene_values\n' for indi in pop: output_str += '%d\t%f\t%f\t%s\n' % (indi.id, indi.fitness.values[0], indi.fitness.values[1], str(indi)) UtilClass.writelog(cfg.logfile, output_str, mode='append') # Delete SEIMS output files, and BMP Scenario database of current generation delete_model_outputs(cfg.model_dir, cfg.hostname, cfg.port, cfg.bmp_scenario_db) return pop, logbook
def main(cfg): """Main workflow of NSGA-II based Scenario analysis.""" random.seed() pop_size = cfg.nsga2_npop gen_num = cfg.nsga2_ngens rule_cfg = cfg.bmps_rule rule_mth = cfg.rule_method cx_rate = cfg.nsga2_rcross mut_perc = cfg.nsga2_pmut mut_rate = cfg.nsga2_rmut sel_rate = cfg.nsga2_rsel ws = cfg.nsga2_dir worst_econ = cfg.worst_econ worst_env = cfg.worst_env # available gene value list possible_gene_values = list(cfg.bmps_params.keys()) possible_gene_values.append(0) units_info = cfg.units_infos slppos_tagnames = cfg.slppos_tagnames suit_bmps = cfg.slppos_suit_bmps gene_to_unit = cfg.gene_to_slppos unit_to_gene = cfg.slppos_to_gene print_message('Population: %d, Generation: %d' % (pop_size, gen_num)) print_message('BMPs configure method: %s' % ('rule-based' if rule_cfg else 'random-based')) # create reference point for hypervolume ref_pt = numpy.array([worst_econ, worst_env]) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' pop = toolbox.population(cfg, n=pop_size) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in pop if not ind.fitness.valid] try: # parallel on multiprocesor or clusters using SCOOP from scoop import futures fitnesses = futures.map(toolbox.evaluate, [cfg] * len(invalid_ind), invalid_ind) # print('parallel-fitnesses: ', fitnesses) except ImportError or ImportWarning: # serial fitnesses = toolbox.map(toolbox.evaluate, [cfg] * len(invalid_ind), invalid_ind) # print('serial-fitnesses: ', fitnesses) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[:2] ind.id = fit[2] # This is just to assign the crowding distance to the individuals # no actual selection is done pop = toolbox.select(pop, pop_size) record = stats.compile(pop) logbook.record(gen=0, evals=len(invalid_ind), **record) print_message(logbook.stream) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % ( gen_num, pop_size) print_message(output_str) UtilClass.writelog(cfg.logfile, output_str, mode='replace') for gen in range(1, gen_num + 1): output_str = '###### Generation: %d ######\n' % gen print_message(output_str) # Vary the population offspring = tools.selTournamentDCD(pop, int(pop_size * sel_rate)) offspring = [toolbox.clone(ind) for ind in offspring] # print_message('Offspring size: %d' % len(offspring)) if len(offspring ) >= 2: # when offspring size greater than 2, mate can be done for ind1, ind2 in zip(offspring[::2], offspring[1::2]): if random.random() <= cx_rate: if rule_cfg: toolbox.mate_rule(slppos_tagnames, ind1, ind2) else: toolbox.mate_rdn(ind1, ind2) if rule_cfg: toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, slppos_tagnames, suit_bmps, ind1, perc=mut_perc, indpb=mut_rate, method=rule_mth) toolbox.mutate_rule(units_info, gene_to_unit, unit_to_gene, slppos_tagnames, suit_bmps, ind2, perc=mut_perc, indpb=mut_rate, method=rule_mth) else: toolbox.mutate_rdm(possible_gene_values, ind1, perc=mut_perc, indpb=mut_rate) toolbox.mutate_rdm(possible_gene_values, ind2, perc=mut_perc, indpb=mut_rate) del ind1.fitness.values, ind2.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] invalid_ind_size = len(invalid_ind) # print_message('Evaluate pop size: %d' % invalid_ind_size) try: from scoop import futures fitnesses = futures.map(toolbox.evaluate, [cfg] * invalid_ind_size, invalid_ind) except ImportError or ImportWarning: fitnesses = toolbox.map(toolbox.evaluate, [cfg] * invalid_ind_size, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[:2] ind.id = fit[2] # Select the next generation population pop = toolbox.select(pop + offspring, pop_size) hyper_str = 'Gen: %d, hypervolume: %f\n' % (gen, hypervolume(pop, ref_pt)) print_message(hyper_str) UtilClass.writelog(cfg.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_ind), **record) print_message(logbook.stream) # Create plot front = numpy.array([ind.fitness.values for ind in pop]) plot_pareto_front( front, ['Economic effectiveness', 'Environmental effectiveness'], ws, gen, 'Pareto frontier of Scenarios Optimization') # save in file output_str += 'scenario\teconomy\tenvironment\tgene_values\n' for indi in pop: output_str += '%d\t%f\t%f\t%s\n' % ( indi.id, indi.fitness.values[0], indi.fitness.values[1], str(indi)) UtilClass.writelog(cfg.logfile, output_str, mode='append') # Delete SEIMS output files, and BMP Scenario database of current generation delete_model_outputs(cfg.model_dir, cfg.hostname, cfg.port, cfg.bmp_scenario_db) return pop, logbook
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None, avoid_redo=False): """Watershed Delineation based on D8 flow direction. Args: np: process number for MPI dem: DEM path outlet_file: predefined outlet shapefile path thresh: predefined threshold for extracting stream from accumulated flow direction singlebasin: when set True, only extract subbasins that drains into predefined outlets workingdir: directory that store outputs mpi_bin: directory of MPI executable binary, e.g., mpiexec, mpirun bin_dir: directory of TauDEM and other executable binaries logfile: log file path runtime_file: runtime file path hostfile: host list file path for MPI avoid_redo: avoid executing some functions that do not depend on input arguments when repeatedly invoke this function """ # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None or workingdir is '': workingdir = os.path.dirname(dem) nc = TauDEMFilesUtils(workingdir) # predefined names workingdir = nc.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. perform calculation # Filling DEM if not (avoid_redo and FileClass.is_file_exists(nc.filldem)): UtilClass.writelog(logfile, '[Output] %s' % 'remove pit...', 'a') TauDEM.pitremove(np, dem, nc.filldem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Flow direction based on D8 algorithm if not (avoid_redo and FileClass.is_file_exists(nc.d8flow)): UtilClass.writelog(logfile, '[Output] %s' % 'D8 flow direction...', 'a') TauDEM.d8flowdir(np, nc.filldem, nc.d8flow, nc.slp, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Flow accumulation without stream skeleton as weight if not (avoid_redo and FileClass.is_file_exists(nc.d8acc)): UtilClass.writelog(logfile, '[Output] %s' % 'D8 flow accumulation...', 'a') TauDEM.aread8(np, nc.d8flow, nc.d8acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Initial stream network using mean accumulation as threshold UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream raster initially...', 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics(nc.d8acc) TauDEM.threshold(np, nc.d8acc, nc.stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Outlets position initialization and adjustment UtilClass.writelog(logfile, '[Output] %s' % 'Moving outlet to stream...', 'a') if outlet_file is None: # if not given, take cell with maximum accumulation as outlet outlet_file = nc.outlet_pre TauDEM.connectdown(np, nc.d8flow, nc.d8acc, outlet_file, nc.outlet_m, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, nc.d8flow, nc.stream_raster, outlet_file, nc.outlet_m, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Stream skeleton by peuker-douglas algorithm UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream skeleton ...', 'a') TauDEM.peukerdouglas(np, nc.filldem, nc.stream_pd, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Weighted flow acculation with outlet UtilClass.writelog(logfile, '[Output] %s' % 'Flow accumulation with outlet...', 'a') tmp_outlet = None if singlebasin: tmp_outlet = nc.outlet_m TauDEM.aread8(np, nc.d8flow, nc.d8acc_weight, tmp_outlet, nc.stream_pd, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Determine threshold by input argument or dropanalysis function if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog(logfile, '[Output] %s' % 'Drop analysis to select optimal threshold...', 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(nc.d8acc_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum TauDEM.dropanalysis(np, nc.filldem, nc.d8flow, nc.d8acc_weight, nc.d8acc_weight, nc.outlet_m, minthresh, maxthresh, 20, 'true', nc.drptxt, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(nc.drptxt): # raise RuntimeError('Dropanalysis failed and drp.txt was not created!') UtilClass.writelog(logfile, '[Output] %s' % 'dropanalysis failed!', 'a') thresh = 0.5 * (maxthresh - minthresh) + minthresh else: with open(nc.drptxt, 'r', encoding='utf-8') as drpf: temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) thresh = float(thresh) UtilClass.writelog(logfile, '[Output] %s: %f' % ('Selected optimal threshold: ', thresh), 'a') # Final stream network UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream raster...', 'a') TauDEM.threshold(np, nc.d8acc_weight, nc.stream_raster, thresh, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream net...', 'a') TauDEM.streamnet(np, nc.filldem, nc.d8flow, nc.d8acc_weight, nc.stream_raster, nc.outlet_m, nc.stream_order, nc.channel_net, nc.channel_coord, nc.streamnet_shp, nc.subbsn, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Serialize IDs of subbasins and the corresponding streams UtilClass.writelog(logfile, '[Output] %s' % 'Serialize subbasin&stream IDs...', 'a') id_map = StreamnetUtil.serialize_streamnet(nc.streamnet_shp, nc.streamnet_m) RasterUtilClass.raster_reclassify(nc.subbsn, id_map, nc.subbsn_m, GDT_Int32) StreamnetUtil.assign_stream_id_raster(nc.stream_raster, nc.subbsn_m, nc.stream_m) # convert raster to shapefile (for subbasin and basin) UtilClass.writelog(logfile, '[Output] %s' % 'Generating subbasin vector...', 'a') VectorUtilClass.raster2shp(nc.subbsn_m, nc.subbsn_shp, 'subbasin', 'SUBBASINID') # Finish the workflow UtilClass.writelog(logfile, '[Output] %s' % 'Original subbasin delineation is finished!', 'a')
def main(cfg): """Main workflow of NSGA-II based Scenario analysis.""" random.seed() print_message('Population: %d, Generation: %d' % (cfg.opt.npop, cfg.opt.ngens)) # Initial timespan variables stime = time.time() plot_time = 0. allmodels_exect = list() # execute time of all model runs # create reference point for hypervolume ref_pt = numpy.array(worse_objects) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' # read observation data from MongoDB cali_obj = Calibration(cfg) # Read observation data just once model_cfg_dict = cali_obj.model.ConfigDict model_obj = MainSEIMS(args_dict=model_cfg_dict) obs_vars, obs_data_dict = model_obj.ReadOutletObservations(object_vars) # Initialize population param_values = cali_obj.initialize(cfg.opt.npop) pop = list() for i in range(cfg.opt.npop): ind = creator.Individual(param_values[i]) ind.gen = 0 ind.id = i ind.obs.vars = obs_vars[:] ind.obs.data = deepcopy(obs_data_dict) pop.append(ind) param_values = numpy.array(param_values) # Write calibrated values to MongoDB # TODO, extract this function, which is same with `Sensitivity::write_param_values_to_mongodb`. write_param_values_to_mongodb(cfg.model.host, cfg.model.port, cfg.model.db_name, cali_obj.ParamDefs, param_values) # get the low and up bound of calibrated parameters bounds = numpy.array(cali_obj.ParamDefs['bounds']) low = bounds[:, 0] up = bounds[:, 1] low = low.tolist() up = up.tolist() pop_select_num = int(cfg.opt.npop * cfg.opt.rsel) init_time = time.time() - stime def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and set fitness of individuals according to calibration step.""" popnum = len(invalid_pops) labels = list() try: # parallel on multi-processors or clusters using SCOOP from scoop import futures invalid_pops = list(futures.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) except ImportError or ImportWarning: # Python build-in map (serial) invalid_pops = list(toolbox.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) for tmpind in invalid_pops: if step == 'Q': # Step 1 Calibrating discharge tmpind.fitness.values, labels = tmpind.cali.efficiency_values('Q', object_names) elif step == 'SED': # Step 2 Calibrating sediment sedobjvs, labels = tmpind.cali.efficiency_values('SED', object_names) qobjvs, qobjlabels = ind.cali.efficiency_values('Q', object_names) labels += [qobjlabels[0]] sedobjvs += [qobjvs[0]] tmpind.fitness.values = sedobjvs[:] elif step == 'NUTRIENT': # Step 3 Calibrating NUTRIENT,TN,TP tnobjvs, tnobjlabels = tmpind.cali.efficiency_values('CH_TN', object_names) tpobjvs, tpobjlabels = tmpind.cali.efficiency_values('CH_TP', object_names) qobjvs, qobjlabels = ind.cali.efficiency_values('Q', object_names) sedobjvs, sedobjlabels = tmpind.cali.efficiency_values('SED', object_names) objvs = [tnobjvs[0], tpobjvs[0], qobjvs[0], sedobjvs[0]] labels = [tnobjlabels[0], tpobjlabels[0], qobjlabels[0], sedobjlabels[0]] tmpind.fitness.values = objvs[:] # NSE > 0 is the preliminary condition to be a valid solution! if filter_NSE: invalid_pops = [tmpind for tmpind in invalid_pops if tmpind.fitness.values[0] > 0] if len(invalid_pops) < 2: print('The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!') exit(0) return invalid_pops, labels # Currently, `invalid_pops` contains evaluated individuals # Record the count and execute timespan of model runs during the optimization modelruns_count = {0: len(pop)} modelruns_time = {0: 0.} # Total time counted according to evaluate_parallel() modelruns_time_sum = {0: 0.} # Summarize time of every model runs according to pop # Generation 0 before optimization stime = time.time() pop, plotlables = evaluate_parallel(pop) modelruns_time[0] = time.time() - stime for ind in pop: allmodels_exect.append([ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[0] += ind.runtime # currently, len(pop) may less than pop_select_num pop = toolbox.select(pop, pop_select_num) # Output simulated data to json or pickle files for future use. output_population_details(pop, cfg.opt.simdata_dir, 0) record = stats.compile(pop) logbook.record(gen=0, evals=len(pop), **record) print_message(logbook.stream) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % (cfg.opt.ngens, cfg.opt.npop) print_message(output_str) UtilClass.writelog(cfg.opt.logfile, output_str, mode='replace') for gen in range(1, cfg.opt.ngens + 1): output_str = '###### Generation: %d ######\n' % gen print_message(output_str) offspring = [toolbox.clone(ind) for ind in pop] # method1: use crowding distance (normalized as 0~1) as eta # tools.emo.assignCrowdingDist(offspring) # method2: use the index of individual at the sorted offspring list as eta if len(offspring) >= 2: # when offspring size greater than 2, mate can be done for i, ind1, ind2 in zip(range(len(offspring) // 2), offspring[::2], offspring[1::2]): if random.random() > cfg.opt.rcross: continue eta = i toolbox.mate(ind1, ind2, eta, low, up) toolbox.mutate(ind1, eta, low, up, cfg.opt.rmut) toolbox.mutate(ind2, eta, low, up, cfg.opt.rmut) del ind1.fitness.values, ind2.fitness.values else: toolbox.mutate(offspring[0], 1., low, up, cfg.opt.rmut) del offspring[0].fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] valid_ind = [ind for ind in offspring if ind.fitness.valid] if len(invalid_ind) == 0: # No need to continue print_message('Note: No invalid individuals available, the NSGA2 will be terminated!') break # Write new calibrated parameters to MongoDB param_values = list() for idx, ind in enumerate(invalid_ind): ind.gen = gen ind.id = idx param_values.append(ind[:]) param_values = numpy.array(param_values) write_param_values_to_mongodb(cfg.model.host, cfg.model.port, cfg.model.db_name, cali_obj.ParamDefs, param_values) # Count the model runs, and execute models invalid_ind_size = len(invalid_ind) modelruns_count.setdefault(gen, invalid_ind_size) stime = time.time() invalid_ind, plotlables = evaluate_parallel(invalid_ind) curtimespan = time.time() - stime modelruns_time.setdefault(gen, curtimespan) modelruns_time_sum.setdefault(gen, 0.) for ind in invalid_ind: allmodels_exect.append([ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[gen] += ind.runtime # Select the next generation population tmp_pop = list() gen_idx = list() for ind in pop + valid_ind + invalid_ind: # these individuals are all evaluated! # remove individuals that has a NSE < 0 if [ind.gen, ind.id] not in gen_idx: if filter_NSE and ind.fitness.values[0] < 0: continue tmp_pop.append(ind) gen_idx.append([ind.gen, ind.id]) pop = toolbox.select(tmp_pop, pop_select_num) output_population_details(pop, cfg.opt.simdata_dir, gen) hyper_str = 'Gen: %d, New model runs: %d, ' \ 'Execute timespan: %.4f, Sum of model run timespan: %.4f, ' \ 'Hypervolume: %.4f\n' % (gen, invalid_ind_size, curtimespan, modelruns_time_sum[gen], hypervolume(pop, ref_pt)) print_message(hyper_str) UtilClass.writelog(cfg.opt.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_ind), **record) print_message(logbook.stream) # Plot 2D near optimal pareto front graphs, # i.e., (NSE, RSR), (NSE, PBIAS), and (RSR,PBIAS) # And 3D near optimal pareto front graphs, i.e., (NSE, RSR, PBIAS) stime = time.time() front = numpy.array([ind.fitness.values for ind in pop]) plot_pareto_front(front, plotlables, cfg.opt.out_dir, gen, 'Near Pareto optimal solutions') plot_time += time.time() - stime # save in file if step == 'Q': # Step 1 Calibrate discharge output_str += 'generation-calibrationID\t%s' % pop[0].cali.output_header('Q', object_names, 'Cali') if cali_obj.cfg.calc_validation: output_str += pop[0].vali.output_header('Q', object_names, 'Vali') elif step == 'SED': # Step 2 Calibrate sediment output_str += 'generation-calibrationID\t%s%s' % \ (pop[0].cali.output_header('SED', object_names, 'Cali'), pop[0].cali.output_header('Q', object_names, 'Cali')) if cali_obj.cfg.calc_validation: output_str += '%s%s' % (pop[0].vali.output_header('SED', object_names, 'Vali'), pop[0].vali.output_header('Q', object_names, 'Vali')) elif step == 'NUTRIENT': # Step 3 Calibrate NUTRIENT,TN,TP output_str += 'generation-calibrationID\t%s%s%s%s' % \ (pop[0].cali.output_header('CH_TN', object_names, 'Cali'), pop[0].cali.output_header('CH_TP', object_names, 'Cali'), pop[0].cali.output_header('Q', object_names, 'Cali'), pop[0].cali.output_header('SED', object_names, 'Cali')) if cali_obj.cfg.calc_validation: output_str += '%s%s%s%s' % ( pop[0].vali.output_header('CH_TN', object_names, 'Vali'), pop[0].vali.output_header('CH_TP', object_names, 'Vali'), pop[0].vali.output_header('Q', object_names, 'Vali'), pop[0].vali.output_header('SED', object_names, 'Vali')) output_str += 'gene_values\n' for ind in pop: if step == 'Q': # Step 1 Calibrate discharge output_str += '%d-%d\t%s' % (ind.gen, ind.id, ind.cali.output_efficiency('Q', object_names)) if cali_obj.cfg.calc_validation: output_str += ind.vali.output_efficiency('Q', object_names) elif step == 'SED': # Step 2 Calibrate sediment output_str += '%d-%d\t%s%s' % (ind.gen, ind.id, ind.cali.output_efficiency('SED', object_names), ind.cali.output_efficiency('Q', object_names)) if cali_obj.cfg.calc_validation: output_str += '%s%s' % (ind.vali.output_efficiency('SED', object_names), ind.vali.output_efficiency('Q', object_names)) elif step == 'NUTRIENT': # Step 3 Calibrate NUTRIENT, i.e., TN and TP output_str += '%d-%d\t%s%s%s%s' % (ind.gen, ind.id, ind.cali.output_efficiency('CH_TN', object_names), ind.cali.output_efficiency('CH_TP', object_names), ind.cali.output_efficiency('Q', object_names), ind.cali.output_efficiency('SED', object_names)) if cali_obj.cfg.calc_validation: output_str += '%s%s%s%s' % (ind.vali.output_efficiency('CH_TN', object_names), ind.vali.output_efficiency('CH_TP', object_names), ind.vali.output_efficiency('Q', object_names), ind.vali.output_efficiency('SED', object_names)) output_str += str(ind) output_str += '\n' UtilClass.writelog(cfg.opt.logfile, output_str, mode='append') # TODO: Figure out if we should terminate the evolution # Plot hypervolume and newly executed model count plot_hypervolume_single(cfg.opt.hypervlog, cfg.opt.out_dir) # Save and print timespan information allmodels_exect = numpy.array(allmodels_exect) numpy.savetxt('%s/exec_time_allmodelruns.txt' % cfg.opt.out_dir, allmodels_exect, delimiter=' ', fmt='%.4f') print_message('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % v for v in allmodels_exect.max(0)), '\t'.join('%.3f' % v for v in allmodels_exect.min(0)), '\t'.join('%.3f' % v for v in allmodels_exect.mean(0)), '\t'.join('%.3f' % v for v in allmodels_exect.sum(0)))) exec_time = 0. for genid, tmptime in list(modelruns_time.items()): exec_time += tmptime exec_time_sum = 0. for genid, tmptime in list(modelruns_time_sum.items()): exec_time_sum += tmptime allcount = 0 for genid, tmpcount in list(modelruns_count.items()): allcount += tmpcount print_message('Initialization timespan: %.4f\n' 'Model execution timespan: %.4f\n' 'Sum of model runs timespan: %.4f\n' 'Plot Pareto graphs timespan: %.4f' % (init_time, exec_time, exec_time_sum, plot_time)) return pop, logbook