Esempio n. 1
0
    def addGurobiStatistic(self, *, optimizer, point, **kwargs):
        """
        Add stat to the list of gurobi_stats.
        @param optimizer - the optimizer we were called from. Optimizer objects run on the mapper.
        @param point     - the point in the code where we were called
        """
        logging.info(
            json.dumps({
                C.OPTIMIZER: str(type(optimizer)),
                'point': point
            }))

        # Get the reduced call stack
        call_stack = C.STATISTICS_PATHNAME_DELIMITER.join([
            f"{frame.filename}:{frame.lineno}({frame.function})"
            for frame in inspect.stack()[1:4]
        ])

        s2 = {
            C.OPTIMIZER: type(optimizer).__name__.split(".")[-1],
            'instanceId': INSTANCE_ID,
            'uuid': optimizer.t_uuid,
            't': time.time(),
            't_env_start': optimizer.t_env_start,
            't_env_end': optimizer.t_env_end,
            't_modbuild_start': optimizer.t_modbuild_start,
            't_modbuild_end': optimizer.t_modbuild_end,
            't_presolve_start': optimizer.t_presolve_start,
            't_presolve_end': optimizer.t_presolve_end,
            't_optimize_start': optimizer.t_optimize_start,
            't_optimize_end': optimizer.t_optimize_end,
            'point': point,
            'geocode': self.geocode,
            'parentGeocode': self.parentGeocode,
            'childGeolevel': self.geolevel,
            'stack': call_stack,
            'failsafe_invoked': optimizer.failsafe_invoked
        }

        if optimizer.record_CPU_stats:
            rusage_self = resource.getrusage(resource.RUSAGE_SELF)
            rusage_children = resource.getrusage(resource.RUSAGE_CHILDREN)
            s3 = {
                'pid': os.getpid(),
                'ppid': os.getppid(),
                'loadavg': os.getloadavg()[0],
                'utime': rusage_self.ru_utime,
                'stime': rusage_self.ru_stime,
                'maxrss_bytes': rusage_self.ru_maxrss * 1024,
                'utime_children': rusage_children.ru_utime,
                'stime_children': rusage_children.ru_stime,
                'maxrss_children': rusage_children.ru_maxrss
            }
            s2 = {**s2, **s3}

        if hasattr(optimizer, 'childGeoLen'):
            s2['childGeoLen'] = getattr(optimizer, 'childGeoLen')

        # If a model was provided, capture information about the model
        if 'model' in kwargs:
            model = kwargs['model']
            del kwargs['model']
            s2 = {**s2, **model_info(model)}

        # Create a dictionary with the stats including the current dict and the additional kwargs
        obj = {**s2, **kwargs}
        self.gurobi_stats.append(obj)

        # Syslog logging.
        obj['applicationId'] = clogging.applicationId()

        json_data = json.dumps(obj)
        if len(json_data) > MAX_SYSLOG_LENGTH:
            # remove the call stack for logging, because it's really big!
            del obj['stack']
            json_data = json.dumps(obj)
        logging.info(json_data)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # Person histogram shape to get later from setup object, once reader is initiated and know table parameters
        self.hist_shape = self.setup.hist_shape

        # pylint: disable=bad-whitespace
        self.levels: Tuple[str, ...] = self.setup.levels
        self.levels_reversed: Tuple[str, ...] = tuple(reversed(self.levels))
        self.schema: str = self.setup.schema
        logging.info(f"levels: {self.levels}")

        self.vars_schema = {
            var: i
            for i, var in enumerate(self.setup.hist_vars)
        }
        self.log_and_print(
            f"the variables in the schema are: {self.vars_schema} (reader names)"
        )
        try:
            self.log_and_print(
                f"Their names in the {self.schema} schema are {self.setup.schema_obj.dimnames}"
            )
        except AssertionError:
            self.log_warning_and_print(
                f"Schema {self.schema} is not supported")

        # Default is False. It has to be set for minimal schema engine, but in regular topdown it is used too, for fail safe
        self.minimal_schema = self.gettuple(C.MINIMALSCHEMA,
                                            section=C.CONSTRAINTS,
                                            sep=" ",
                                            default=False)

        # Whether to run in spark (there is a local/serial mode, for testing and debugging)
        self.use_spark = self.setup.use_spark

        # Whether to save noisy answers
        self.save_noisy: bool = self.getboolean(
            C.SAVENOISY, default=True) if self.use_spark else False

        # Whether to discard noisy answers and reload them (makes sure
        # the noise is only drawn once and not again during RDD
        # reconstruction)

        self.reload_noisy: bool = self.getboolean(
            C.RELOADNOISY, default=True) if self.save_noisy else False

        # For restarting the DAS from the point after the noise was
        # generated and saved, so that no additional budget is spent
        self.postprocess_only: bool = self.setup.postprocess_only

        if self.postprocess_only:
            # Unique run ID of the run where the noisy answers were saved, to load for postprocess-only
            self.saved_noisy_app_id: str = self.getconfig(C.SAVED_NOISY_APP_ID)

        if self.das.make_bom_only():
            self.app_id = C.APPID_NO_SPARK
        else:
            self.app_id: str = clogging.applicationId(
            )  # if self.use_spark else C.APPID_NO_SPARK

        # Statistics System. Indicate which statistics we should save or not save.
        self.record_gurobi_stats: bool = self.getboolean(
            C.RECORD_GUROBI_STATS_OPTION,
            section=C.GUROBI_SECTION,
            default=False)
        self.record_cpu_stats: bool = self.getboolean(
            C.RECORD_CPU_STATS_OPTION, section=C.GUROBI_SECTION, default=False)
        self.record_vm_stats: bool = self.getboolean(C.RECORD_VM_STATS_OPTION,
                                                     section=C.GUROBI_SECTION,
                                                     default=False)
        self.save_gurobi_stats: bool = self.getboolean(
            C.SAVE_STATS_OPTION, section=C.GUROBI_SECTION, default=False)

        # Privacy protection mechanism (see primitives.py). Geometric is default, set explicitly in subclasses
        self.mechanism = primitives.basic_dp_answer

        # Shares of budget designated to each geolevel. Should be None if not doing by-level bugdet (e.g.bottomup engine)
        self.geolevel_prop_budgets: tuple = None
    ###
    ### Validate Configuration File
    ###
    # Look for deprecated variables
    DEPRECATED_CONFIG_VARIABLES = [(C.OUTPUT_FNAME, C.WRITER)]
    for (var, section) in DEPRECATED_CONFIG_VARIABLES:
        if var in config[section]:
            raise RuntimeError(
                f"config file contains deprecated variable {var} in section [{section}]"
            )

    ###
    ### Set up the environment as necessary
    ###

    applicationId = clogging.applicationId()
    logging.info("applicationId: %s", applicationId)
    os.environ[CC.APPLICATIONID_ENV] = applicationId
    os.environ[
        CC.
        PYTHON_VERSION] = f'python{sys.version_info.major}.{sys.version_info.minor}'

    if CC.CLUSTERID_ENV not in os.environ:
        logging.warning(
            "{} environment variable not set; setting to {}".format(
                CC.CLUSTERID_ENV, CC.CLUSTERID_UNKNOWN))
        os.environ[CC.CLUSTERID_ENV] = CC.CLUSTERID_UNKNOWN

    ###
    ### Set up Gurobi
    ###
Esempio n. 4
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # Person histogram shape to get later from setup object, once reader is initiated and know table parameters
        self.hist_shape = self.setup.hist_shape
        self.unit_hist_shape = self.setup.unit_hist_shape

        # pylint: disable=bad-whitespace
        self.all_levels: Tuple[str, ...] = self.setup.levels
        self.all_levels_reversed: Tuple[str,
                                        ...] = tuple(reversed(self.all_levels))

        self.levels_reversed = []
        for level in self.all_levels_reversed:
            self.levels_reversed.append(level)
            if level == self.setup.geo_bottomlevel:
                break

        self.levels = tuple(reversed(self.levels_reversed))

        print(f'self.all_levels: {self.all_levels}')
        print(f'self.all_levels_reversed: {self.all_levels_reversed}')
        print(f'self.levels: {self.levels}')
        print(f'self.levels_reversed: {self.levels_reversed}')

        self.spine_type = self.setup.spine_type
        self.schema: str = self.setup.schema
        logging.info(f"levels: {self.levels}")

        self.vars_schema = {
            var: i
            for i, var in enumerate(self.setup.hist_vars)
        }
        self.log_and_print(
            f"the variables in the schema are: {self.vars_schema} (reader names)"
        )
        try:
            self.log_and_print(
                f"Their names in the {self.schema} schema are {self.setup.schema_obj.dimnames}"
            )
        except AssertionError:
            self.log_warning_and_print(
                f"Schema {self.schema} is not supported")

        self.write_all_geolevels = self.getboolean(CC.WRITE_ALL_GEOLEVELS,
                                                   section=CC.WRITER,
                                                   default=False)

        # Default is False. It is used for fail safe in the optimizer
        self.minimal_schema = self.gettuple(CC.MINIMALSCHEMA,
                                            section=CC.CONSTRAINTS,
                                            sep=" ",
                                            default=False)

        # Whether to run in spark (there is a local/serial mode, for testing and debugging)
        self.use_spark = self.setup.use_spark

        # Whether to save noisy answers
        self.save_noisy: bool = self.getboolean(
            CC.SAVENOISY, default=True) if self.use_spark else False

        # Whether to discard noisy answers and reload them (makes sure
        # the noise is only drawn once and not again during RDD
        # reconstruction)

        self.reload_noisy: bool = self.getboolean(
            CC.RELOADNOISY, default=True) if self.save_noisy else False

        # For restarting the DAS from the point after the noise was
        # generated and saved, so that no additional budget is spent
        self.postprocess_only: bool = self.setup.postprocess_only

        if self.postprocess_only:
            # Unique run ID of the run where the noisy answers were saved, to load for postprocess-only
            self.saved_noisy_app_id: str = self.getconfig(
                CC.SAVED_NOISY_APP_ID)

        try:
            self.optimization_start_from_level = self.getconfig(
                CC.OPTIMIZATION_START_FROM_LEVEL)
        except (NoSectionError, NoOptionError):
            self.optimization_start_from_level = None

        if self.das.make_bom_only():
            self.app_id = CC.APPID_NO_SPARK
        else:
            self.app_id: str = clogging.applicationId(
            )  # if self.use_spark else CC.APPID_NO_SPARK

        self.validate_levels: list = self.getiter(key=CC.VALIDATE_AT_LEVEL,
                                                  section=CC.VALIDATOR_SECTION,
                                                  default=False)

        # Privacy protection mechanism (see primitives.py). Geometric is default, set explicitly in subclasses
        self.mechanism_name = self.setup.dp_mechanism_name
        self.mechanism = primitives.basic_dp_answer
        self.no_noise_mechanism = primitives.NoNoiseMechanism

        # Shares of budget designated to each geolevel. Should be None if not doing by-level bugdet (e.g.bottomup engine)
        self.geolevel_prop_budgets: tuple = None
        self.geolevel_prop_budgets_dict: dict = None

        self.initializeAndCheckParameters(
        )  # TODO: Turn this on to have config parameters checked before reading