Beispiel #1
0
def calibration_objectives(cali_obj, ind):
    """Evaluate the objectives of given individual.
    """
    cali_obj.ID = ind.id
    model_args = cali_obj.model.ConfigDict
    model_args.setdefault('calibration_id', -1)
    model_args['calibration_id'] = ind.id
    model_obj = MainSEIMS(args_dict=model_args)

    # Set observation data to model_obj, no need to query database
    model_obj.SetOutletObservations(ind.obs.vars, ind.obs.data)

    # Execute model
    model_obj.SetMongoClient()
    model_obj.run()
    time.sleep(0.1)  # Wait a moment in case of unpredictable file system error

    # read simulation data of the entire simulation period (include calibration and validation)
    if model_obj.ReadTimeseriesSimulations():
        ind.sim.vars = model_obj.sim_vars[:]
        ind.sim.data = deepcopy(model_obj.sim_value)
    else:
        model_obj.clean(calibration_id=ind.id)
        model_obj.UnsetMongoClient()
        return ind
    # Calculate NSE, R2, RMSE, PBIAS, and RSR, etc. of calibration period
    ind.cali.vars, ind.cali.data = model_obj.ExtractSimData(
        cali_obj.cfg.cali_stime, cali_obj.cfg.cali_etime)
    ind.cali.sim_obs_data = model_obj.ExtractSimObsData(
        cali_obj.cfg.cali_stime, cali_obj.cfg.cali_etime)

    ind.cali.objnames, \
    ind.cali.objvalues = model_obj.CalcTimeseriesStatistics(ind.cali.sim_obs_data,
                                                            cali_obj.cfg.cali_stime,
                                                            cali_obj.cfg.cali_etime)
    if ind.cali.objnames and ind.cali.objvalues:
        ind.cali.valid = True

    # Calculate NSE, R2, RMSE, PBIAS, and RSR, etc. of validation period
    if cali_obj.cfg.calc_validation:
        ind.vali.vars, ind.vali.data = model_obj.ExtractSimData(
            cali_obj.cfg.vali_stime, cali_obj.cfg.vali_etime)
        ind.vali.sim_obs_data = model_obj.ExtractSimObsData(
            cali_obj.cfg.vali_stime, cali_obj.cfg.vali_etime)

        ind.vali.objnames, \
        ind.vali.objvalues = model_obj.CalcTimeseriesStatistics(ind.vali.sim_obs_data,
                                                                cali_obj.cfg.vali_stime,
                                                                cali_obj.cfg.vali_etime)
        if ind.vali.objnames and ind.vali.objvalues:
            ind.vali.valid = True

    # Get timespan
    ind.io_time, ind.comp_time, ind.simu_time, ind.runtime = model_obj.GetTimespan(
    )

    # delete model output directory for saving storage
    model_obj.clean(calibration_id=ind.id)
    model_obj.UnsetMongoClient()
    return ind
Beispiel #2
0
def main(cfg):
    """Main workflow of NSGA-II based Scenario analysis."""
    random.seed()
    scoop_log('Population: %d, Generation: %d' % (cfg.opt.npop, cfg.opt.ngens))

    # Initial timespan variables
    stime = time.time()
    plot_time = 0.
    allmodels_exect = list()  # execute time of all model runs

    # create reference point for hypervolume
    ref_pt = numpy.array(worse_objects) * multi_weight * -1

    stats = tools.Statistics(lambda sind: sind.fitness.values)
    stats.register('min', numpy.min, axis=0)
    stats.register('max', numpy.max, axis=0)
    stats.register('avg', numpy.mean, axis=0)
    stats.register('std', numpy.std, axis=0)
    logbook = tools.Logbook()
    logbook.header = 'gen', 'evals', 'min', 'max', 'avg', 'std'

    # read observation data from MongoDB
    cali_obj = Calibration(cfg)

    # Read observation data just once
    model_cfg_dict = cali_obj.model.ConfigDict
    model_obj = MainSEIMS(args_dict=model_cfg_dict)

    model_obj.SetMongoClient()
    obs_vars, obs_data_dict = model_obj.ReadOutletObservations(object_vars)
    model_obj.UnsetMongoClient()

    # Initialize population
    param_values = cali_obj.initialize(cfg.opt.npop)
    pop = list()
    for i in range(cfg.opt.npop):
        ind = creator.Individual(param_values[i])
        ind.gen = 0
        ind.id = i
        ind.obs.vars = obs_vars[:]
        ind.obs.data = deepcopy(obs_data_dict)
        pop.append(ind)
    param_values = numpy.array(param_values)

    # Write calibrated values to MongoDB
    # TODO, extract this function, which is same with `Sensitivity::write_param_values_to_mongodb`.
    write_param_values_to_mongodb(cfg.model.db_name, cali_obj.ParamDefs,
                                  param_values)
    # get the low and up bound of calibrated parameters
    bounds = numpy.array(cali_obj.ParamDefs['bounds'])
    low = bounds[:, 0]
    up = bounds[:, 1]
    low = low.tolist()
    up = up.tolist()
    pop_select_num = int(cfg.opt.npop * cfg.opt.rsel)
    init_time = time.time() - stime

    def check_validation(fitvalues):
        """Check the validation of the fitness values of an individual."""
        flag = True
        for condidx, condstr in enumerate(conditions):
            if condstr is None:
                continue
            if not eval('%f%s' % (fitvalues[condidx], condstr)):
                flag = False
        return flag

    def evaluate_parallel(invalid_pops):
        """Evaluate model by SCOOP or map, and set fitness of individuals
         according to calibration step."""
        popnum = len(invalid_pops)
        labels = list()
        try:  # parallel on multi-processors or clusters using SCOOP
            from scoop import futures
            invalid_pops = list(
                futures.map(toolbox.evaluate, [cali_obj] * popnum,
                            invalid_pops))
        except ImportError or ImportWarning:  # Python build-in map (serial)
            invalid_pops = list(
                map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops))
        for tmpind in invalid_pops:
            labels = list()  # TODO, find an elegant way to get labels.
            tmpfitnessv = list()
            for k, v in list(multiobj.items()):
                tmpvalues, tmplabel = tmpind.cali.efficiency_values(
                    k, object_names[k])
                tmpfitnessv += tmpvalues[:]
                labels += tmplabel[:]
            tmpind.fitness.values = tuple(tmpfitnessv)

        # Filter for a valid solution
        if filter_ind:
            invalid_pops = [
                tmpind for tmpind in invalid_pops
                if check_validation(tmpind.fitness.values)
            ]
            if len(invalid_pops) < 2:
                print(
                    'The initial population should be greater or equal than 2. '
                    'Please check the parameters ranges or change the sampling strategy!'
                )
                exit(2)
        return invalid_pops, labels  # Currently, `invalid_pops` contains evaluated individuals

    # Record the count and execute timespan of model runs during the optimization
    modelruns_count = {0: len(pop)}
    modelruns_time = {
        0: 0.
    }  # Total time counted according to evaluate_parallel()
    modelruns_time_sum = {
        0: 0.
    }  # Summarize time of every model runs according to pop

    # Generation 0 before optimization
    stime = time.time()
    pop, plotlables = evaluate_parallel(pop)
    modelruns_time[0] = time.time() - stime
    for ind in pop:
        allmodels_exect.append(
            [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime])
        modelruns_time_sum[0] += ind.runtime

    # currently, len(pop) may less than pop_select_num
    pop = toolbox.select(pop, pop_select_num)
    # Output simulated data to json or pickle files for future use.
    output_population_details(pop,
                              cfg.opt.simdata_dir,
                              0,
                              plot_cfg=cali_obj.cfg.plot_cfg)

    record = stats.compile(pop)
    logbook.record(gen=0, evals=len(pop), **record)
    scoop_log(logbook.stream)

    # Begin the generational process
    output_str = '### Generation number: %d, Population size: %d ###\n' % (
        cfg.opt.ngens, cfg.opt.npop)
    scoop_log(output_str)
    UtilClass.writelog(cfg.opt.logfile, output_str, mode='replace')

    modelsel_count = {
        0: len(pop)
    }  # type: Dict[int, int] # newly added Pareto fronts

    for gen in range(1, cfg.opt.ngens + 1):
        output_str = '###### Generation: %d ######\n' % gen
        scoop_log(output_str)

        offspring = [toolbox.clone(ind) for ind in pop]
        # method1: use crowding distance (normalized as 0~1) as eta
        # tools.emo.assignCrowdingDist(offspring)
        # method2: use the index of individual at the sorted offspring list as eta
        if len(offspring
               ) >= 2:  # when offspring size greater than 2, mate can be done
            for i, ind1, ind2 in zip(range(len(offspring) // 2),
                                     offspring[::2], offspring[1::2]):
                if random.random() > cfg.opt.rcross:
                    continue
                eta = i
                toolbox.mate(ind1, ind2, eta, low, up)
                toolbox.mutate(ind1, eta, low, up, cfg.opt.rmut)
                toolbox.mutate(ind2, eta, low, up, cfg.opt.rmut)
                del ind1.fitness.values, ind2.fitness.values
        else:
            toolbox.mutate(offspring[0], 1., low, up, cfg.opt.rmut)
            del offspring[0].fitness.values

        # Evaluate the individuals with an invalid fitness
        invalid_inds = [ind for ind in offspring if not ind.fitness.valid]
        valid_inds = [ind for ind in offspring if ind.fitness.valid]
        if len(invalid_inds) == 0:  # No need to continue
            scoop_log(
                'Note: No invalid individuals available, the NSGA2 will be terminated!'
            )
            break

        # Write new calibrated parameters to MongoDB
        param_values = list()
        for idx, ind in enumerate(invalid_inds):
            ind.gen = gen
            ind.id = idx
            param_values.append(ind[:])
        param_values = numpy.array(param_values)
        write_param_values_to_mongodb(cfg.model.db_name, cali_obj.ParamDefs,
                                      param_values)
        # Count the model runs, and execute models
        invalid_ind_size = len(invalid_inds)
        modelruns_count.setdefault(gen, invalid_ind_size)
        stime = time.time()
        invalid_inds, plotlables = evaluate_parallel(invalid_inds)
        curtimespan = time.time() - stime
        modelruns_time.setdefault(gen, curtimespan)
        modelruns_time_sum.setdefault(gen, 0.)
        for ind in invalid_inds:
            allmodels_exect.append(
                [ind.io_time, ind.comp_time, ind.simu_time, ind.runtime])
            modelruns_time_sum[gen] += ind.runtime

        # Select the next generation population
        # Previous version may result in duplications of the same scenario in one Pareto front,
        #   thus, I decided to check and remove the duplications first.
        # pop = toolbox.select(pop + valid_inds + invalid_inds, pop_select_num)
        tmppop = pop + valid_inds + invalid_inds
        pop = list()
        unique_sces = dict()
        for tmpind in tmppop:
            if tmpind.gen in unique_sces and tmpind.id in unique_sces[
                    tmpind.gen]:
                continue
            if tmpind.gen not in unique_sces:
                unique_sces.setdefault(tmpind.gen, [tmpind.id])
            elif tmpind.id not in unique_sces[tmpind.gen]:
                unique_sces[tmpind.gen].append(tmpind.id)
            pop.append(tmpind)
        pop = toolbox.select(pop, pop_select_num)

        output_population_details(pop,
                                  cfg.opt.simdata_dir,
                                  gen,
                                  plot_cfg=cali_obj.cfg.plot_cfg)
        hyper_str = 'Gen: %d, New model runs: %d, ' \
                    'Execute timespan: %.4f, Sum of model run timespan: %.4f, ' \
                    'Hypervolume: %.4f\n' % (gen, invalid_ind_size,
                                             curtimespan, modelruns_time_sum[gen],
                                             hypervolume(pop, ref_pt))
        scoop_log(hyper_str)
        UtilClass.writelog(cfg.opt.hypervlog, hyper_str, mode='append')

        record = stats.compile(pop)
        logbook.record(gen=gen, evals=len(invalid_inds), **record)
        scoop_log(logbook.stream)

        # Count the newly generated near Pareto fronts
        new_count = 0
        for ind in pop:
            if ind.gen == gen:
                new_count += 1
        modelsel_count.setdefault(gen, new_count)

        # Plot 2D near optimal pareto front graphs,
        #   i.e., (NSE, RSR), (NSE, PBIAS), and (RSR,PBIAS)
        # And 3D near optimal pareto front graphs, i.e., (NSE, RSR, PBIAS)
        stime = time.time()
        front = numpy.array([ind.fitness.values for ind in pop])
        title = (u'近似最优Pareto解集' if cali_obj.cfg.plot_cfg.plot_cn else
                 'Near Pareto optimal solutions')

        plot_pareto_front_single(front,
                                 plotlables,
                                 cfg.opt.out_dir,
                                 gen,
                                 title,
                                 plot_cfg=cali_obj.cfg.plot_cfg)
        plot_time += time.time() - stime

        # save in file
        # Header information
        output_str += 'generation\tcalibrationID\t'
        for kk, vv in list(object_names.items()):
            output_str += pop[0].cali.output_header(kk, vv, 'Cali')
        if cali_obj.cfg.calc_validation:
            for kkk, vvv in list(object_names.items()):
                output_str += pop[0].vali.output_header(kkk, vvv, 'Vali')

        output_str += 'gene_values\n'
        for ind in pop:
            output_str += '%d\t%d\t' % (ind.gen, ind.id)
            for kk, vv in list(object_names.items()):
                output_str += ind.cali.output_efficiency(kk, vv)
            if cali_obj.cfg.calc_validation:
                for kkk, vvv in list(object_names.items()):
                    output_str += ind.vali.output_efficiency(kkk, vvv)
            output_str += str(ind)
            output_str += '\n'
        UtilClass.writelog(cfg.opt.logfile, output_str, mode='append')

        # TODO: Figure out if we should terminate the evolution

    # Plot hypervolume and newly executed model count
    plot_hypervolume_single(cfg.opt.hypervlog,
                            cfg.opt.out_dir,
                            plot_cfg=cali_obj.cfg.plot_cfg)

    # Save newly added Pareto fronts of each generations
    new_fronts_count = numpy.array(list(modelsel_count.items()))
    numpy.savetxt('%s/new_pareto_fronts_count.txt' % cfg.opt.out_dir,
                  new_fronts_count,
                  delimiter=str(','),
                  fmt=str('%d'))

    # Save and print timespan information
    allmodels_exect = numpy.array(allmodels_exect)
    numpy.savetxt('%s/exec_time_allmodelruns.txt' % cfg.opt.out_dir,
                  allmodels_exect,
                  delimiter=str(' '),
                  fmt=str('%.4f'))
    scoop_log('Running time of all SEIMS models:\n'
              '\tIO\tCOMP\tSIMU\tRUNTIME\n'
              'MAX\t%s\n'
              'MIN\t%s\n'
              'AVG\t%s\n'
              'SUM\t%s\n' %
              ('\t'.join('%.3f' % t for t in allmodels_exect.max(0)),
               '\t'.join('%.3f' % t
                         for t in allmodels_exect.min(0)), '\t'.join(
                             '%.3f' % t
                             for t in allmodels_exect.mean(0)), '\t'.join(
                                 '%.3f' % t for t in allmodels_exect.sum(0))))

    exec_time = 0.
    for genid, tmptime in list(modelruns_time.items()):
        exec_time += tmptime
    exec_time_sum = 0.
    for genid, tmptime in list(modelruns_time_sum.items()):
        exec_time_sum += tmptime
    allcount = 0
    for genid, tmpcount in list(modelruns_count.items()):
        allcount += tmpcount

    scoop_log('Initialization timespan: %.4f\n'
              'Model execution timespan: %.4f\n'
              'Sum of model runs timespan: %.4f\n'
              'Plot Pareto graphs timespan: %.4f' %
              (init_time, exec_time, exec_time_sum, plot_time))

    return pop, logbook
Beispiel #3
0
class Scenario(object):
    """Base class of Scenario Analysis.

    Attributes:
        ID(integer): Unique ID in BMPScenario database -> BMP_SCENARIOS collection
        eval_timerange(float): Simulation time range, read from MongoDB, the unit is year.
        economy(float): Economical effectiveness, e.g., income minus expenses
        environment(float): Environmental effectiveness, e.g., reduction rate of soil erosion
        gene_num(integer): The number of genes of one chromosome, i.e., an individual
        gene_values(list): BMP identifiers on each location of gene. The length is gen_num.
        bmp_items(dict): BMP configuration items that can be imported to MongoDB directly.
                         The key is `bson.objectid.ObjectId`, the value is scenario item dict.
        rules(boolean): Config BMPs randomly or rule-based.
        modelrun(boolean): Has SEIMS model run successfully?
    """
    def __init__(self, cfg):
        # type: (SAConfig) -> None
        """Initialize."""
        self.ID = -1
        self.eval_timerange = 1.  # unit: year
        self.economy = 0.
        self.environment = 0.
        self.worst_econ = cfg.worst_econ
        self.worst_env = cfg.worst_env

        self.gene_num = 0
        self.gene_values = list()  # type: List[int]
        self.bmp_items = dict()

        self.rule_mtd = cfg.bmps_cfg_method
        self.bmps_info = cfg.bmps_info
        self.bmps_retain = cfg.bmps_retain
        self.eval_info = cfg.eval_info
        self.export_sce_txt = cfg.export_sce_txt
        self.export_sce_tif = cfg.export_sce_tif
        self.scenario_dir = cfg.scenario_dir  # predefined directories to store scenarios related

        # SEIMS-based model related
        self.modelcfg = cfg.model
        self.modelcfg_dict = self.modelcfg.ConfigDict
        self.model = MainSEIMS(args_dict=self.modelcfg_dict)

        self.model.SetMongoClient()
        self.model.ReadMongoDBData()

        self.scenario_db = self.model.ScenarioDBName
        self.model.ResetSimulationPeriod()  # Reset the simulation period
        # Reset the starttime and endtime of the desired outputs according to evaluation period
        if ModelCfgFields.output_id in self.eval_info:
            self.model.ResetOutputsPeriod(
                self.eval_info[ModelCfgFields.output_id], cfg.eval_stime,
                cfg.eval_etime)
        else:
            print(
                'Warning: No OUTPUTID is defined in BMPs_info. Please make sure the '
                'STARTTIME and ENDTIME of ENVEVAL are consistent with Evaluation period!'
            )

        self.model.UnsetMongoClient()  # Unset in time!

        # (Re)Calculate timerange in the unit of year
        dlt = cfg.eval_etime - cfg.eval_stime + timedelta(seconds=1)
        self.eval_timerange = (dlt.days * 86400. + dlt.seconds) / 86400. / 365.
        self.modelout_dir = None  # determined in `execute_seims_model` based on unique scenario ID
        self.modelrun = False  # indicate whether the model has been executed

    def set_unique_id(self, given_id=None):
        # type: (Optional[int]) -> int
        """Set unique ID."""
        if given_id is None:
            self.ID = next(generate_uniqueid())
        else:
            self.ID = given_id
        # Update scenario ID for self.modelcfg and self.model
        self.model.scenario_id = self.ID
        self.modelcfg.scenario_id = self.ID
        self.modelcfg_dict[
            'scenario_id'] = self.ID if self.modelcfg_dict else 0
        return self.ID

    def rule_based_config(self, method, conf_rate):
        # type: (float, str) -> None
        """Config available BMPs to each gene of the chromosome by rule-based method.

        Virtual function that should be overridden in inherited Scenario class.
        """
        pass

    def random_based_config(self, conf_rate):
        # type: (float) -> None
        """Config available BMPs to each gene of the chromosome by random-based method.

        Virtual function that should be overridden in inherited Scenario class.
        """
        pass

    def decoding(self):
        """Decoding gene_values to bmp_items

        This function should be overridden.
        """
        pass

    def export_to_mongodb(self):
        """Export current scenario to MongoDB.
        Delete the same ScenarioID if existed.
        """
        # client = ConnectMongoDB(self.modelcfg.host, self.modelcfg.port)
        # conn = client.get_conn()
        conn = MongoDBObj.client
        db = conn[self.scenario_db]
        collection = db[DBTableNames.scenarios]
        try:
            # find ScenarioID, remove if existed.
            if collection.find({
                    'ID': self.ID
            }, no_cursor_timeout=True).count():
                collection.remove({'ID': self.ID})
        except NetworkTimeout or Exception:
            # In case of unexpected raise
            pass
        for objid, bmp_item in viewitems(self.bmp_items):
            bmp_item['_id'] = ObjectId()
            collection.insert_one(bmp_item)
        # client.close()

    def export_scenario_to_txt(self):
        """Export current scenario information to text file.

        This function is better be called after `calculate_environment` and `calculate_environment`
            or in static method, e.g., `scenario_effectiveness`.
        """
        if not self.export_sce_txt:
            return
        ofile = self.scenario_dir + os.path.sep + 'Scenario_%d.txt' % self.ID
        with open(ofile, 'w', encoding='utf-8') as outfile:
            outfile.write('Scenario ID: %d\n' % self.ID)
            outfile.write('Gene number: %d\n' % self.gene_num)
            outfile.write('Gene values: %s\n' % ', '.join(
                (repr(v) for v in self.gene_values)))
            outfile.write('Scenario items:\n')
            if len(self.bmp_items) > 0:
                header = list()
                for obj, item in viewitems(self.bmp_items):
                    header = list(item.keys())
                    break
                outfile.write('\t'.join(header))
                outfile.write('\n')
                for obj, item in viewitems(self.bmp_items):
                    outfile.write('\t'.join(
                        str(v) for v in list(item.values())))
                    outfile.write('\n')
            outfile.write(
                'Effectiveness:\n\teconomy: %f\n\tenvironment: %f\n' %
                (self.economy, self.environment))

    def export_scenario_to_gtiff(self):
        """Export the areal BMPs to gtiff for further analysis.

        This function should be overridden in inherited class.
        """
        pass

    def import_from_mongodb(self, sid):
        """Import a specified Scenario (`sid`) from MongoDB.

        This function should be overridden in inherited class.
        Returns:
            True if succeed, otherwise False.
        """
        pass

    def import_from_txt(self, sid):
        """Import a specified Scenario (`sid`) from text file.

        This function should be overridden in inherited class.
        Returns:
            True if succeed, otherwise False.
        """
        pass

    def calculate_economy(self):
        """Calculate economical effectiveness, which is application specified."""
        pass

    def calculate_environment(self):
        """Calculate environment effectiveness, which is application specified."""
        pass

    def clean(self,
              scenario_id=None,
              calibration_id=None,
              delete_scenario=False,
              delete_spatial_gfs=False):
        """Clean the intermediate data."""
        # model clean
        self.model.SetMongoClient()
        self.model.clean(scenario_id=scenario_id,
                         calibration_id=calibration_id,
                         delete_scenario=delete_scenario,
                         delete_spatial_gfs=delete_spatial_gfs)
        self.model.UnsetMongoClient()

    def execute_seims_model(self):
        """Run SEIMS for evaluating environmental effectiveness.
        If execution fails, the `self.economy` and `self.environment` will be set the worst values.
        """
        scoop_log('Scenario ID: %d, running SEIMS model...' % self.ID)
        self.model.scenario_id = self.ID
        self.modelout_dir = self.model.OutputDirectory

        self.model.SetMongoClient()
        self.model.run()
        self.model.UnsetMongoClient()

        self.modelrun = True
        return self.model.run_success

    def initialize(self, input_genes=None):
        # type: (Optional[List]) -> List
        """Initialize a scenario.

        Returns:
            A list contains BMPs identifier of each gene location.
        """
        pass