def calibration_objectives(cali_obj, ind): """Evaluate the objectives of given individual. """ cali_obj.ID = ind.id model_args = cali_obj.model.ConfigDict model_args.setdefault('calibration_id', -1) model_args['calibration_id'] = ind.id model_obj = MainSEIMS(args_dict=model_args) # Set observation data to model_obj, no need to query database model_obj.SetOutletObservations(ind.obs.vars, ind.obs.data) # Execute model model_obj.SetMongoClient() model_obj.run() time.sleep(0.1) # Wait a moment in case of unpredictable file system error # read simulation data of the entire simulation period (include calibration and validation) if model_obj.ReadTimeseriesSimulations(): ind.sim.vars = model_obj.sim_vars[:] ind.sim.data = deepcopy(model_obj.sim_value) else: model_obj.clean(calibration_id=ind.id) model_obj.UnsetMongoClient() return ind # Calculate NSE, R2, RMSE, PBIAS, and RSR, etc. of calibration period ind.cali.vars, ind.cali.data = model_obj.ExtractSimData( cali_obj.cfg.cali_stime, cali_obj.cfg.cali_etime) ind.cali.sim_obs_data = model_obj.ExtractSimObsData( cali_obj.cfg.cali_stime, cali_obj.cfg.cali_etime) ind.cali.objnames, \ ind.cali.objvalues = model_obj.CalcTimeseriesStatistics(ind.cali.sim_obs_data, cali_obj.cfg.cali_stime, cali_obj.cfg.cali_etime) if ind.cali.objnames and ind.cali.objvalues: ind.cali.valid = True # Calculate NSE, R2, RMSE, PBIAS, and RSR, etc. of validation period if cali_obj.cfg.calc_validation: ind.vali.vars, ind.vali.data = model_obj.ExtractSimData( cali_obj.cfg.vali_stime, cali_obj.cfg.vali_etime) ind.vali.sim_obs_data = model_obj.ExtractSimObsData( cali_obj.cfg.vali_stime, cali_obj.cfg.vali_etime) ind.vali.objnames, \ ind.vali.objvalues = model_obj.CalcTimeseriesStatistics(ind.vali.sim_obs_data, cali_obj.cfg.vali_stime, cali_obj.cfg.vali_etime) if ind.vali.objnames and ind.vali.objvalues: ind.vali.valid = True # Get timespan ind.io_time, ind.comp_time, ind.simu_time, ind.runtime = model_obj.GetTimespan( ) # delete model output directory for saving storage model_obj.clean(calibration_id=ind.id) model_obj.UnsetMongoClient() return ind
def main(cfg): """Main workflow of NSGA-II based Scenario analysis.""" random.seed() scoop_log('Population: %d, Generation: %d' % (cfg.opt.npop, cfg.opt.ngens)) # Initial timespan variables stime = time.time() plot_time = 0. allmodels_exect = list() # execute time of all model runs # create reference point for hypervolume ref_pt = numpy.array(worse_objects) * multi_weight * -1 stats = tools.Statistics(lambda sind: sind.fitness.values) stats.register('min', numpy.min, axis=0) stats.register('max', numpy.max, axis=0) stats.register('avg', numpy.mean, axis=0) stats.register('std', numpy.std, axis=0) logbook = tools.Logbook() logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std' # read observation data from MongoDB cali_obj = Calibration(cfg) # Read observation data just once model_cfg_dict = cali_obj.model.ConfigDict model_obj = MainSEIMS(args_dict=model_cfg_dict) model_obj.SetMongoClient() obs_vars, obs_data_dict = model_obj.ReadOutletObservations(object_vars) model_obj.UnsetMongoClient() # Initialize population param_values = cali_obj.initialize(cfg.opt.npop) pop = list() for i in range(cfg.opt.npop): ind = creator.Individual(param_values[i]) ind.gen = 0 ind.id = i ind.obs.vars = obs_vars[:] ind.obs.data = deepcopy(obs_data_dict) pop.append(ind) param_values = numpy.array(param_values) # Write calibrated values to MongoDB # TODO, extract this function, which is same with `Sensitivity::write_param_values_to_mongodb`. write_param_values_to_mongodb(cfg.model.db_name, cali_obj.ParamDefs, param_values) # get the low and up bound of calibrated parameters bounds = numpy.array(cali_obj.ParamDefs['bounds']) low = bounds[:, 0] up = bounds[:, 1] low = low.tolist() up = up.tolist() pop_select_num = int(cfg.opt.npop * cfg.opt.rsel) init_time = time.time() - stime def check_validation(fitvalues): """Check the validation of the fitness values of an individual.""" flag = True for condidx, condstr in enumerate(conditions): if condstr is None: continue if not eval('%f%s' % (fitvalues[condidx], condstr)): flag = False return flag def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and set fitness of individuals according to calibration step.""" popnum = len(invalid_pops) labels = list() try: # parallel on multi-processors or clusters using SCOOP from scoop import futures invalid_pops = list( futures.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) except ImportError or ImportWarning: # Python build-in map (serial) invalid_pops = list( map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) for tmpind in invalid_pops: labels = list() # TODO, find an elegant way to get labels. tmpfitnessv = list() for k, v in list(multiobj.items()): tmpvalues, tmplabel = tmpind.cali.efficiency_values( k, object_names[k]) tmpfitnessv += tmpvalues[:] labels += tmplabel[:] tmpind.fitness.values = tuple(tmpfitnessv) # Filter for a valid solution if filter_ind: invalid_pops = [ tmpind for tmpind in invalid_pops if check_validation(tmpind.fitness.values) ] if len(invalid_pops) < 2: print( 'The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!' ) exit(2) return invalid_pops, labels # Currently, `invalid_pops` contains evaluated individuals # Record the count and execute timespan of model runs during the optimization modelruns_count = {0: len(pop)} modelruns_time = { 0: 0. } # Total time counted according to evaluate_parallel() modelruns_time_sum = { 0: 0. } # Summarize time of every model runs according to pop # Generation 0 before optimization stime = time.time() pop, plotlables = evaluate_parallel(pop) modelruns_time[0] = time.time() - stime for ind in pop: allmodels_exect.append( [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[0] += ind.runtime # currently, len(pop) may less than pop_select_num pop = toolbox.select(pop, pop_select_num) # Output simulated data to json or pickle files for future use. output_population_details(pop, cfg.opt.simdata_dir, 0, plot_cfg=cali_obj.cfg.plot_cfg) record = stats.compile(pop) logbook.record(gen=0, evals=len(pop), **record) scoop_log(logbook.stream) # Begin the generational process output_str = '### Generation number: %d, Population size: %d ###\n' % ( cfg.opt.ngens, cfg.opt.npop) scoop_log(output_str) UtilClass.writelog(cfg.opt.logfile, output_str, mode='replace') modelsel_count = { 0: len(pop) } # type: Dict[int, int] # newly added Pareto fronts for gen in range(1, cfg.opt.ngens + 1): output_str = '###### Generation: %d ######\n' % gen scoop_log(output_str) offspring = [toolbox.clone(ind) for ind in pop] # method1: use crowding distance (normalized as 0~1) as eta # tools.emo.assignCrowdingDist(offspring) # method2: use the index of individual at the sorted offspring list as eta if len(offspring ) >= 2: # when offspring size greater than 2, mate can be done for i, ind1, ind2 in zip(range(len(offspring) // 2), offspring[::2], offspring[1::2]): if random.random() > cfg.opt.rcross: continue eta = i toolbox.mate(ind1, ind2, eta, low, up) toolbox.mutate(ind1, eta, low, up, cfg.opt.rmut) toolbox.mutate(ind2, eta, low, up, cfg.opt.rmut) del ind1.fitness.values, ind2.fitness.values else: toolbox.mutate(offspring[0], 1., low, up, cfg.opt.rmut) del offspring[0].fitness.values # Evaluate the individuals with an invalid fitness invalid_inds = [ind for ind in offspring if not ind.fitness.valid] valid_inds = [ind for ind in offspring if ind.fitness.valid] if len(invalid_inds) == 0: # No need to continue scoop_log( 'Note: No invalid individuals available, the NSGA2 will be terminated!' ) break # Write new calibrated parameters to MongoDB param_values = list() for idx, ind in enumerate(invalid_inds): ind.gen = gen ind.id = idx param_values.append(ind[:]) param_values = numpy.array(param_values) write_param_values_to_mongodb(cfg.model.db_name, cali_obj.ParamDefs, param_values) # Count the model runs, and execute models invalid_ind_size = len(invalid_inds) modelruns_count.setdefault(gen, invalid_ind_size) stime = time.time() invalid_inds, plotlables = evaluate_parallel(invalid_inds) curtimespan = time.time() - stime modelruns_time.setdefault(gen, curtimespan) modelruns_time_sum.setdefault(gen, 0.) for ind in invalid_inds: allmodels_exect.append( [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime]) modelruns_time_sum[gen] += ind.runtime # Select the next generation population # Previous version may result in duplications of the same scenario in one Pareto front, # thus, I decided to check and remove the duplications first. # pop = toolbox.select(pop + valid_inds + invalid_inds, pop_select_num) tmppop = pop + valid_inds + invalid_inds pop = list() unique_sces = dict() for tmpind in tmppop: if tmpind.gen in unique_sces and tmpind.id in unique_sces[ tmpind.gen]: continue if tmpind.gen not in unique_sces: unique_sces.setdefault(tmpind.gen, [tmpind.id]) elif tmpind.id not in unique_sces[tmpind.gen]: unique_sces[tmpind.gen].append(tmpind.id) pop.append(tmpind) pop = toolbox.select(pop, pop_select_num) output_population_details(pop, cfg.opt.simdata_dir, gen, plot_cfg=cali_obj.cfg.plot_cfg) hyper_str = 'Gen: %d, New model runs: %d, ' \ 'Execute timespan: %.4f, Sum of model run timespan: %.4f, ' \ 'Hypervolume: %.4f\n' % (gen, invalid_ind_size, curtimespan, modelruns_time_sum[gen], hypervolume(pop, ref_pt)) scoop_log(hyper_str) UtilClass.writelog(cfg.opt.hypervlog, hyper_str, mode='append') record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_inds), **record) scoop_log(logbook.stream) # Count the newly generated near Pareto fronts new_count = 0 for ind in pop: if ind.gen == gen: new_count += 1 modelsel_count.setdefault(gen, new_count) # Plot 2D near optimal pareto front graphs, # i.e., (NSE, RSR), (NSE, PBIAS), and (RSR,PBIAS) # And 3D near optimal pareto front graphs, i.e., (NSE, RSR, PBIAS) stime = time.time() front = numpy.array([ind.fitness.values for ind in pop]) title = (u'近似最优Pareto解集' if cali_obj.cfg.plot_cfg.plot_cn else 'Near Pareto optimal solutions') plot_pareto_front_single(front, plotlables, cfg.opt.out_dir, gen, title, plot_cfg=cali_obj.cfg.plot_cfg) plot_time += time.time() - stime # save in file # Header information output_str += 'generation\tcalibrationID\t' for kk, vv in list(object_names.items()): output_str += pop[0].cali.output_header(kk, vv, 'Cali') if cali_obj.cfg.calc_validation: for kkk, vvv in list(object_names.items()): output_str += pop[0].vali.output_header(kkk, vvv, 'Vali') output_str += 'gene_values\n' for ind in pop: output_str += '%d\t%d\t' % (ind.gen, ind.id) for kk, vv in list(object_names.items()): output_str += ind.cali.output_efficiency(kk, vv) if cali_obj.cfg.calc_validation: for kkk, vvv in list(object_names.items()): output_str += ind.vali.output_efficiency(kkk, vvv) output_str += str(ind) output_str += '\n' UtilClass.writelog(cfg.opt.logfile, output_str, mode='append') # TODO: Figure out if we should terminate the evolution # Plot hypervolume and newly executed model count plot_hypervolume_single(cfg.opt.hypervlog, cfg.opt.out_dir, plot_cfg=cali_obj.cfg.plot_cfg) # Save newly added Pareto fronts of each generations new_fronts_count = numpy.array(list(modelsel_count.items())) numpy.savetxt('%s/new_pareto_fronts_count.txt' % cfg.opt.out_dir, new_fronts_count, delimiter=str(','), fmt=str('%d')) # Save and print timespan information allmodels_exect = numpy.array(allmodels_exect) numpy.savetxt('%s/exec_time_allmodelruns.txt' % cfg.opt.out_dir, allmodels_exect, delimiter=str(' '), fmt=str('%.4f')) scoop_log('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % t for t in allmodels_exect.max(0)), '\t'.join('%.3f' % t for t in allmodels_exect.min(0)), '\t'.join( '%.3f' % t for t in allmodels_exect.mean(0)), '\t'.join( '%.3f' % t for t in allmodels_exect.sum(0)))) exec_time = 0. for genid, tmptime in list(modelruns_time.items()): exec_time += tmptime exec_time_sum = 0. for genid, tmptime in list(modelruns_time_sum.items()): exec_time_sum += tmptime allcount = 0 for genid, tmpcount in list(modelruns_count.items()): allcount += tmpcount scoop_log('Initialization timespan: %.4f\n' 'Model execution timespan: %.4f\n' 'Sum of model runs timespan: %.4f\n' 'Plot Pareto graphs timespan: %.4f' % (init_time, exec_time, exec_time_sum, plot_time)) return pop, logbook
class Scenario(object): """Base class of Scenario Analysis. Attributes: ID(integer): Unique ID in BMPScenario database -> BMP_SCENARIOS collection eval_timerange(float): Simulation time range, read from MongoDB, the unit is year. economy(float): Economical effectiveness, e.g., income minus expenses environment(float): Environmental effectiveness, e.g., reduction rate of soil erosion gene_num(integer): The number of genes of one chromosome, i.e., an individual gene_values(list): BMP identifiers on each location of gene. The length is gen_num. bmp_items(dict): BMP configuration items that can be imported to MongoDB directly. The key is `bson.objectid.ObjectId`, the value is scenario item dict. rules(boolean): Config BMPs randomly or rule-based. modelrun(boolean): Has SEIMS model run successfully? """ def __init__(self, cfg): # type: (SAConfig) -> None """Initialize.""" self.ID = -1 self.eval_timerange = 1. # unit: year self.economy = 0. self.environment = 0. self.worst_econ = cfg.worst_econ self.worst_env = cfg.worst_env self.gene_num = 0 self.gene_values = list() # type: List[int] self.bmp_items = dict() self.rule_mtd = cfg.bmps_cfg_method self.bmps_info = cfg.bmps_info self.bmps_retain = cfg.bmps_retain self.eval_info = cfg.eval_info self.export_sce_txt = cfg.export_sce_txt self.export_sce_tif = cfg.export_sce_tif self.scenario_dir = cfg.scenario_dir # predefined directories to store scenarios related # SEIMS-based model related self.modelcfg = cfg.model self.modelcfg_dict = self.modelcfg.ConfigDict self.model = MainSEIMS(args_dict=self.modelcfg_dict) self.model.SetMongoClient() self.model.ReadMongoDBData() self.scenario_db = self.model.ScenarioDBName self.model.ResetSimulationPeriod() # Reset the simulation period # Reset the starttime and endtime of the desired outputs according to evaluation period if ModelCfgFields.output_id in self.eval_info: self.model.ResetOutputsPeriod( self.eval_info[ModelCfgFields.output_id], cfg.eval_stime, cfg.eval_etime) else: print( 'Warning: No OUTPUTID is defined in BMPs_info. Please make sure the ' 'STARTTIME and ENDTIME of ENVEVAL are consistent with Evaluation period!' ) self.model.UnsetMongoClient() # Unset in time! # (Re)Calculate timerange in the unit of year dlt = cfg.eval_etime - cfg.eval_stime + timedelta(seconds=1) self.eval_timerange = (dlt.days * 86400. + dlt.seconds) / 86400. / 365. self.modelout_dir = None # determined in `execute_seims_model` based on unique scenario ID self.modelrun = False # indicate whether the model has been executed def set_unique_id(self, given_id=None): # type: (Optional[int]) -> int """Set unique ID.""" if given_id is None: self.ID = next(generate_uniqueid()) else: self.ID = given_id # Update scenario ID for self.modelcfg and self.model self.model.scenario_id = self.ID self.modelcfg.scenario_id = self.ID self.modelcfg_dict[ 'scenario_id'] = self.ID if self.modelcfg_dict else 0 return self.ID def rule_based_config(self, method, conf_rate): # type: (float, str) -> None """Config available BMPs to each gene of the chromosome by rule-based method. Virtual function that should be overridden in inherited Scenario class. """ pass def random_based_config(self, conf_rate): # type: (float) -> None """Config available BMPs to each gene of the chromosome by random-based method. Virtual function that should be overridden in inherited Scenario class. """ pass def decoding(self): """Decoding gene_values to bmp_items This function should be overridden. """ pass def export_to_mongodb(self): """Export current scenario to MongoDB. Delete the same ScenarioID if existed. """ # client = ConnectMongoDB(self.modelcfg.host, self.modelcfg.port) # conn = client.get_conn() conn = MongoDBObj.client db = conn[self.scenario_db] collection = db[DBTableNames.scenarios] try: # find ScenarioID, remove if existed. if collection.find({ 'ID': self.ID }, no_cursor_timeout=True).count(): collection.remove({'ID': self.ID}) except NetworkTimeout or Exception: # In case of unexpected raise pass for objid, bmp_item in viewitems(self.bmp_items): bmp_item['_id'] = ObjectId() collection.insert_one(bmp_item) # client.close() def export_scenario_to_txt(self): """Export current scenario information to text file. This function is better be called after `calculate_environment` and `calculate_environment` or in static method, e.g., `scenario_effectiveness`. """ if not self.export_sce_txt: return ofile = self.scenario_dir + os.path.sep + 'Scenario_%d.txt' % self.ID with open(ofile, 'w', encoding='utf-8') as outfile: outfile.write('Scenario ID: %d\n' % self.ID) outfile.write('Gene number: %d\n' % self.gene_num) outfile.write('Gene values: %s\n' % ', '.join( (repr(v) for v in self.gene_values))) outfile.write('Scenario items:\n') if len(self.bmp_items) > 0: header = list() for obj, item in viewitems(self.bmp_items): header = list(item.keys()) break outfile.write('\t'.join(header)) outfile.write('\n') for obj, item in viewitems(self.bmp_items): outfile.write('\t'.join( str(v) for v in list(item.values()))) outfile.write('\n') outfile.write( 'Effectiveness:\n\teconomy: %f\n\tenvironment: %f\n' % (self.economy, self.environment)) def export_scenario_to_gtiff(self): """Export the areal BMPs to gtiff for further analysis. This function should be overridden in inherited class. """ pass def import_from_mongodb(self, sid): """Import a specified Scenario (`sid`) from MongoDB. This function should be overridden in inherited class. Returns: True if succeed, otherwise False. """ pass def import_from_txt(self, sid): """Import a specified Scenario (`sid`) from text file. This function should be overridden in inherited class. Returns: True if succeed, otherwise False. """ pass def calculate_economy(self): """Calculate economical effectiveness, which is application specified.""" pass def calculate_environment(self): """Calculate environment effectiveness, which is application specified.""" pass def clean(self, scenario_id=None, calibration_id=None, delete_scenario=False, delete_spatial_gfs=False): """Clean the intermediate data.""" # model clean self.model.SetMongoClient() self.model.clean(scenario_id=scenario_id, calibration_id=calibration_id, delete_scenario=delete_scenario, delete_spatial_gfs=delete_spatial_gfs) self.model.UnsetMongoClient() def execute_seims_model(self): """Run SEIMS for evaluating environmental effectiveness. If execution fails, the `self.economy` and `self.environment` will be set the worst values. """ scoop_log('Scenario ID: %d, running SEIMS model...' % self.ID) self.model.scenario_id = self.ID self.modelout_dir = self.model.OutputDirectory self.model.SetMongoClient() self.model.run() self.model.UnsetMongoClient() self.modelrun = True return self.model.run_success def initialize(self, input_genes=None): # type: (Optional[List]) -> List """Initialize a scenario. Returns: A list contains BMPs identifier of each gene location. """ pass