예제 #1
0
    def _fetch_rapl_files(self):
        """
        Fetches RAPL files from the RAPL directory
        """

        # consider files like `intel-rapl:$i`
        files = list(filter(lambda x: ":" in x,
                            os.listdir(self._lin_rapl_dir)))

        i = 0
        for file in files:
            path = os.path.join(self._lin_rapl_dir, file, "name")
            with open(path) as f:
                name = f.read().strip()
                # Fake the name used by Power Gadget
                if "package" in name:
                    name = f"Processor Energy Delta_{i}(kWh)"
                    i += 1
                rapl_file = os.path.join(self._lin_rapl_dir, file, "energy_uj")
                try:
                    # Try to read the file to be sure we can
                    with open(rapl_file, "r") as f:
                        _ = float(f.read())
                    self._rapl_files.append(RAPLFile(name=name,
                                                     path=rapl_file))
                    logger.debug(
                        f"We will read Intel RAPL files at {rapl_file}")
                except PermissionError as e:
                    logger.error(
                        "Unable to read Intel RAPL files for CPU power, we will use a constant for your CPU power."
                        +
                        " Please view https://github.com/mlco2/codecarbon/issues/244"
                        + f" for workarounds : {e}")
        return
예제 #2
0
    def _main(self) -> Tuple[str, int]:
        """
        Get CPU power from constant mode

        :return: model name (str), power in Watt (int)
        """
        cpu_model_detected = detect_cpu_model()

        if cpu_model_detected:
            power = self._get_cpu_power_from_registry(cpu_model_detected)

            if power:
                logger.debug(
                    f"CPU : We detect a {cpu_model_detected} with a TDP of {power} W"
                )
                return cpu_model_detected, power
            else:
                logger.warning(
                    f"We saw that you have a {cpu_model_detected} but we don't know it."
                    + " Please contact us.")
                return cpu_model_detected, None
        else:
            logger.warning(
                "We were unable to detect your CPU using the `cpuinfo` package."
                + " Resorting to a default power consumption of 85W.")
        return "Unknown", None
예제 #3
0
    def get_static_cpu_details(self) -> Dict:
        """
        Return CPU details without computing them.
        """
        logger.debug(f"get_static_cpu_details {self.cpu_details}")

        return self.cpu_details
예제 #4
0
    def get_cpu_details(self, duration: Time, **kwargs) -> Dict:
        """
        Fetches the CPU Energy Deltas by fetching values from RAPL files
        """
        cpu_details = dict()
        try:
            list(
                map(lambda rapl_file: rapl_file.delta(duration),
                    self._rapl_files))

            for rapl_file in self._rapl_files:
                logger.debug(rapl_file)
                cpu_details[rapl_file.name] = rapl_file.energy_delta.kWh
                # We fake the name used by Power Gadget when using RAPL
                if "Energy" in rapl_file.name:
                    cpu_details[rapl_file.name.replace(
                        "Energy", "Power")] = rapl_file.power.W
        except Exception as e:
            logger.info(
                f"Unable to read Intel RAPL files at {self._rapl_files}\n \
                Exception occurred {e}",
                exc_info=True,
            )
        self.cpu_details = cpu_details
        logger.debug(f"get_cpu_details {self.cpu_details}")
        return cpu_details
예제 #5
0
    def _get_power_from_cpus(self) -> Power:
        """
        Get CPU power
        :return: power in kW
        """
        if self._mode == "constant":
            power = self._tdp * CONSUMPTION_PERCENTAGE_CONSTANT
            return Power.from_watts(power)
        elif self._mode == "intel_rapl":
            # Don't call get_cpu_details to avoid computing energy twice and loosing data.
            all_cpu_details: Dict = self._intel_interface.get_static_cpu_details(
            )
        else:
            all_cpu_details: Dict = self._intel_interface.get_cpu_details()

        power = 0
        for metric, value in all_cpu_details.items():
            # "^Processor Power_\d+\(Watt\)$" for Inter Power Gadget
            if re.match(r"^Processor Power", metric):
                power += value
                logger.debug(
                    f"_get_power_from_cpus - MATCH {metric} : {value}")

            else:
                logger.debug(
                    f"_get_power_from_cpus - DONT MATCH {metric} : {value}")
        return Power.from_watts(power)
예제 #6
0
    def get_country_emissions(self, energy: Energy, geo: GeoMetadata) -> float:
        """
        Computes emissions for a country on private infra,
        given a quantity of power consumed by
        using data for the mix of energy sources of that country.
        :param energy: Mean power consumption of the process (kWh)
        :param geo: Country and region metadata
        :return: CO2 emissions in kg
        """
        energy_mix = self._data_source.get_global_energy_mix_data()

        if geo.country_iso_code not in energy_mix:
            logger.warning(
                f"We do not have data for {geo.country_iso_code}, using world average."
            )
            carbon_intensity_per_source = (
                DataSource().get_carbon_intensity_per_source_data())
            return (EmissionsPerKWh.from_g_per_kWh(
                carbon_intensity_per_source.get("world_average")).kgs_per_kWh *
                    energy.kWh)  # kgs

        country_energy_mix: Dict = energy_mix[geo.country_iso_code]

        emissions_per_kWh = self._global_energy_mix_to_emissions_rate(
            country_energy_mix)
        logger.debug(
            f"We apply an energy mix of {emissions_per_kWh.kgs_per_kWh*1000:.0f}"
            + f" g.CO2eq/kWh for {geo.country_name}")

        return emissions_per_kWh.kgs_per_kWh * energy.kWh  # kgs
예제 #7
0
def is_rapl_available():
    try:
        IntelRAPL()
        return True
    except Exception as e:
        logger.debug(
            "Not using the RAPL interface, an exception occurred while instantiating "
            + f"IntelRAPL : {e}", )
        return False
예제 #8
0
def is_powergadget_available():
    try:
        IntelPowerGadget()
        return True
    except Exception as e:
        logger.debug(
            "Not using PowerGadget, an exception occurred while instantiating"
            + f" IntelPowerGadget : {e}", )
        return False
예제 #9
0
def get_gpu_details():
    """Get all GPUs instantaneous metrics
    >>> get_gpu_details()
    [
        {
            "name": "Tesla V100-SXM2-16GB",
            "uuid": "GPU-4e817856-1fb8-192a-7ab7-0e0e4476c184",
            "free_memory": 16945381376,
            "total_memory": 16945512448,
            "used_memory": 131072,
            "temperature": 28,
            "power_usage": 42159,
            "power_limit": 300000,
            "gpu_utilization": 0,
            "compute_mode": 0,
            "compute_processes": [],
            "graphics_processes": [],
        }
    ]
    """
    try:
        pynvml.nvmlInit()
        deviceCount = pynvml.nvmlDeviceGetCount()
        devices = []
        for i in range(deviceCount):
            handle = pynvml.nvmlDeviceGetHandleByIndex(i)

            # Memory
            memory = get_memory_info(handle)

            device_details = {
                "name": get_gpu_name(handle),
                "uuid": get_uuid(handle),
                "free_memory": memory.free,
                "total_memory": memory.total,
                "used_memory": memory.used,
                "temperature": get_temperature(handle),
                "power_usage": get_power_usage(handle),
                "power_limit": get_power_limit(handle),
                "gpu_utilization": get_gpu_utilization(handle),
                "compute_mode": get_compute_mode(handle),
                "compute_processes": get_compute_processes(handle),
                "graphics_processes": get_graphics_processes(handle),
            }
            devices.append(device_details)
        return devices

    except pynvml.NVMLError:
        logger.debug("Failed to retrieve gpu information", exc_info=True)
        return []
예제 #10
0
 def add_emission(self, carbon_emission: dict):
     assert self.experiment_id is not None
     self._previous_call = time.time()
     if self.run_id is None:
         # TODO : raise an Exception ?
         logger.debug(
             "ApiClient.add_emission need a run_id : the initial call may "
             + "have failed. Retrying..."
         )
         self._create_run(self.experiment_id)
         if self.run_id is None:
             logger.error(
                 "ApiClient.add_emission still no run_id, aborting for this time !"
             )
         return False
     if carbon_emission["duration"] < 1:
         logger.warning(
             "ApiClient : emissions not sent because of a duration smaller than 1."
         )
         return False
     emission = EmissionCreate(
         timestamp=get_datetime_with_timezone(),
         run_id=self.run_id,
         duration=int(carbon_emission["duration"]),
         emissions_sum=carbon_emission["emissions"],
         emissions_rate=carbon_emission["emissions_rate"],
         cpu_power=carbon_emission["cpu_power"],
         gpu_power=carbon_emission["gpu_power"],
         ram_power=carbon_emission["ram_power"],
         cpu_energy=carbon_emission["cpu_energy"],
         gpu_energy=carbon_emission["gpu_energy"],
         ram_energy=carbon_emission["ram_energy"],
         energy_consumed=carbon_emission["energy_consumed"],
     )
     try:
         payload = dataclasses.asdict(emission)
         url = self.url + "/emission"
         r = requests.post(url=url, json=payload, timeout=2)
         if r.status_code != 201:
             self._log_error(url, payload, r)
             return False
         logger.debug(f"ApiClient - Successful upload emission {payload} to {url}")
     except Exception as e:
         logger.error(e, exc_info=True)
         return False
     return True
예제 #11
0
파일: cloud.py 프로젝트: mlco2/codecarbon
def get_env_cloud_details(timeout=1):
    # type: (int) -> Optional[Any]
    """
    >>> get_env_cloud_details()
    {'provider': 'AWS',
     'metadata': {'accountId': '26550917306',
        'architecture': 'x86_64',
        'availabilityZone': 'us-east-1b',
        'billingProducts': None,
        'devpayProductCodes': None,
        'marketplaceProductCodes': None,
        'imageId': 'ami-025ed45832b817a35',
        'instanceId': 'i-7c3e81fed58d8f7f7',
        'instanceType': 'g4dn.2xlarge',
        'kernelId': None,
        'pendingTime': '2020-01-23T20:44:53Z',
        'privateIp': '172.156.72.143',
        'ramdiskId': None,
        'region': 'us-east-1',
        'version': '2017-09-30'}}
    """
    for provider in CLOUD_METADATA_MAPPING.keys():
        try:
            params = CLOUD_METADATA_MAPPING[provider]
            response = requests.get(params["url"],
                                    headers=params["headers"],
                                    timeout=timeout)
            response.raise_for_status()
            response_data = response.json()

            postprocess_function = params.get("postprocess_function")
            if postprocess_function is not None:
                response_data = postprocess_function(response_data)

            return {"provider": provider, "metadata": response_data}
        except Exception as e:
            logger.debug("Not running on %s, couldn't retrieve metadata: %r",
                         provider, e)

    return None
예제 #12
0
def get_gpu_static_info():
    """Get all GPUs static information.
    >>> get_gpu_static_info()
    [
        {
            "name": "Tesla V100-SXM2-16GB",
            "uuid": "GPU-4e817856-1fb8-192a-7ab7-0e0e4476c184",
            "total_memory": 16945512448,
            "power_limit": 300000,
            "gpu_index": 0,
        }
    ]
    """
    try:
        pynvml.nvmlInit()
        deviceCount = pynvml.nvmlDeviceGetCount()
        devices = []
        for i in range(deviceCount):
            handle = pynvml.nvmlDeviceGetHandleByIndex(i)

            # Memory
            memory = get_memory_info(handle)

            device_details = {
                "name": get_gpu_name(handle),
                "uuid": get_uuid(handle),
                "total_memory": memory.total,
                "power_limit": get_power_limit(handle),
                "gpu_index": i,
            }
            devices.append(device_details)
        return devices

    except pynvml.NVMLError:
        logger.debug("Failed to retrieve gpu static info", exc_info=True)
        return []
예제 #13
0
    def _measure_power_and_energy(self) -> None:
        """
        A function that is periodically run by the `BackgroundScheduler`
        every `self._measure_power_secs` seconds.
        :return: None
        """
        last_duration = time.time() - self._last_measured_time

        warning_duration = self._measure_power_secs * 3
        if last_duration > warning_duration:
            warn_msg = ("Background scheduler didn't run for a long period" +
                        " (%ds), results might be inaccurate")
            logger.warning(warn_msg, last_duration)

        for hardware in self._hardware:
            h_time = time.time()
            # Compute last_duration again for more accuracy
            last_duration = time.time() - self._last_measured_time
            power, energy = hardware.measure_power_and_energy(
                last_duration=last_duration)
            self._total_energy += energy
            if isinstance(hardware, CPU):
                self._total_cpu_energy += energy
                self._cpu_power = power
                logger.info(
                    f"Energy consumed for all CPUs : {self._total_cpu_energy.kWh:.6f} kWh"
                    + f". All CPUs Power : {self._cpu_power.W} W")
            elif isinstance(hardware, GPU):
                self._total_gpu_energy += energy
                self._gpu_power = power
                logger.info(
                    f"Energy consumed for all GPUs : {self._total_gpu_energy.kWh:.6f} kWh"
                    + f". All GPUs Power : {self._gpu_power.W} W")
            elif isinstance(hardware, RAM):
                self._total_ram_energy += energy
                self._ram_power = power
                logger.info(
                    f"Energy consumed for RAM : {self._total_ram_energy.kWh:.6f} kWh"
                    + f". RAM Power : {self._ram_power.W} W")
            else:
                logger.error(
                    f"Unknown hardware type: {hardware} ({type(hardware)})")
            h_time = time.time() - h_time
            logger.debug(
                f"{hardware.__class__.__name__} : {hardware.total_power().W:,.2f} "
                +
                f"W during {last_duration:,.2f} s [measurement time: {h_time:,.4f}]"
            )
        logger.info(
            f"{self._total_energy.kWh:.6f} kWh of electricity used since the begining."
        )
        self._last_measured_time = time.time()
        self._measure_occurrence += 1
        if self._cc_api__out is not None and self._api_call_interval != -1:
            if self._measure_occurrence >= self._api_call_interval:
                emissions = self._prepare_emissions_data(delta=True)
                logger.info(
                    f"{emissions.emissions_rate:.6f} g.CO2eq/s mean an estimation of "
                    +
                    f"{emissions.emissions_rate*3600*24*365/1000:,} kg.CO2eq/year"
                )
                self._cc_api__out.out(emissions)
                self._measure_occurrence = 0
        logger.debug(
            f"last_duration={last_duration}\n------------------------")
예제 #14
0
    def _prepare_emissions_data(self, delta=False) -> EmissionsData:
        """
        :delta: True to return only the delta comsumption since last call
        """
        cloud: CloudMetadata = self._get_cloud_metadata()
        duration: Time = Time.from_seconds(time.time() - self._start_time)

        if cloud.is_on_private_infra:
            emissions = self._emissions.get_private_infra_emissions(
                self._total_energy, self._geo)  # float: kg co2_eq
            country_name = self._geo.country_name
            country_iso_code = self._geo.country_iso_code
            region = self._geo.region
            on_cloud = "N"
            cloud_provider = ""
            cloud_region = ""
        else:
            emissions = self._emissions.get_cloud_emissions(
                self._total_energy, cloud)
            country_name = self._emissions.get_cloud_country_name(cloud)
            country_iso_code = self._emissions.get_cloud_country_iso_code(
                cloud)
            region = self._emissions.get_cloud_geo_region(cloud)
            on_cloud = "Y"
            cloud_provider = cloud.provider
            cloud_region = cloud.region
        total_emissions = EmissionsData(
            timestamp=datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
            project_name=self._project_name,
            run_id=str(self.run_id),
            duration=duration.seconds,
            emissions=emissions,
            emissions_rate=emissions * 1000 / duration.seconds,  # g/s
            cpu_power=self._cpu_power.W,
            gpu_power=self._gpu_power.W,
            ram_power=self._ram_power.W,
            cpu_energy=self._total_cpu_energy.kWh,
            gpu_energy=self._total_gpu_energy.kWh,
            ram_energy=self._total_ram_energy.kWh,
            energy_consumed=self._total_energy.kWh,
            country_name=country_name,
            country_iso_code=country_iso_code,
            region=region,
            on_cloud=on_cloud,
            cloud_provider=cloud_provider,
            cloud_region=cloud_region,
            os=self._conf.get("os"),
            python_version=self._conf.get("python_version"),
            gpu_count=self._conf.get("gpu_count"),
            gpu_model=self._conf.get("gpu_model"),
            cpu_count=self._conf.get("cpu_count"),
            cpu_model=self._conf.get("cpu_model"),
            longitude=self._conf.get("longitude"),
            latitude=self._conf.get("latitude"),
            ram_total_size=self._conf.get("ram_total_size"),
            tracking_mode=self._conf.get("tracking_mode"),
        )
        if delta:
            if self._previous_emissions is None:
                self._previous_emissions = total_emissions
            else:
                # Create a copy
                delta_emissions = dataclasses.replace(total_emissions)
                # Compute emissions rate from delta
                delta_emissions.compute_delta_emission(
                    self._previous_emissions)
                # TODO : find a way to store _previous_emissions only when
                # TODO : the API call succeeded
                self._previous_emissions = total_emissions
                total_emissions = delta_emissions
        logger.debug(total_emissions)
        return total_emissions
예제 #15
0
    api_call_interval=4,
    api_key="12aaaaaa-0b23-1234-1234-abcdef123456",
    save_to_api=True,
)
def train_model():
    """
    This function will do nothing during (occurrence * delay) seconds.
    The Code Carbon API will be called every (measure_power_secs * api_call_interval) seconds.
    """
    occurrence = 60 * 24 * 365 * 100  # Run for 100 years !
    delay = 60  # Seconds
    for i in range(occurrence):
        print(
            f"{occurrence * delay - i * delay} seconds before ending script..."
        )
        time.sleep(delay)


if __name__ == "__main__":
    logger.setLevel(logging.DEBUG)
    # create file handler which logs even debug messages
    fh = logging.FileHandler("codecarbon.log")
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "%(asctime)s - %(name)-12s: %(levelname)-8s %(message)s")
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    logger.debug("GO!")
    model = train_model()
    logger.debug("THE END!")