def __init__( self, *args, country_iso_code: Optional[str] = None, region: Optional[str] = None, cloud_provider: Optional[str] = None, cloud_region: Optional[str] = None, country_2letter_iso_code: Optional[str] = None, **kwargs, ): """ :param country_iso_code: 3 letter ISO Code of the country where the experiment is being run :param region: The provincial region, for example, California in the US. Currently, this only affects calculations for the United States and Canada :param cloud_provider: The cloud provider specified for estimating emissions intensity, defaults to None. See https://github.com/mlco2/codecarbon/blob/master/codecarbon/data/cloud/impact.csv for a list of cloud providers :param cloud_region: The region of the cloud data center, defaults to None. See https://github.com/mlco2/codecarbon/blob/master/codecarbon/data/cloud/impact.csv for a list of cloud regions Currently, this only affects calculations for the United States :param country_2letter_iso_code: For use with the CO2Signal emissions API. See http://api.electricitymap.org/v3/zones for a list of codes and their corresponding locations. """ self._cloud_provider: Optional[str] = cloud_provider self._cloud_region: Optional[str] = cloud_region self._country_iso_code: Optional[str] = country_iso_code self._region: Optional[ str] = region if region is None else region.lower() if self._cloud_provider: if self._cloud_region is None: logger.error( "CODECARBON : Cloud Region must not be None if cloud provider is set" ) df = DataSource().get_cloud_emissions_data() if (len(df.loc[(df["provider"] == self._cloud_provider) & (df["region"] == self._cloud_region)]) == 0): logger.error("CODECARBON : Cloud Provider/Region " f"{self._cloud_provider} {self._cloud_region} " "not found in cloud emissions data.") if self._country_iso_code: try: self._country_name: str = DataSource( ).get_global_energy_mix_data()[ self._country_iso_code]["countryName"] except KeyError as e: logger.error( f"CODECARBON : Does not support country with ISO code {self._country_iso_code} " f"Exception occured {e}") self.country_2letter_iso_code: Optional[str] = ( country_2letter_iso_code.upper() if country_2letter_iso_code else None) super().__init__(*args, **kwargs)
def __init__( self, country_iso_code: str, *args, region: Optional[str] = None, **kwargs, ): """ :param country_iso_code: 3 letter ISO Code of the country where the experiment is being run :param region: The provincial region, for example, California in the US. Currently, this only affects calculations for the United States """ # TODO: Currently we silently use a default value of Canada. # Decide if we should fail with missing args. self._country_iso_code: str = ("CAN" if country_iso_code is None else country_iso_code) try: self._country_name: str = ( DataSource().get_global_energy_mix_data().get( self._country_iso_code).get("countryName")) except Exception as e: logger.error( f"CODECARBON : Does not support country with ISO code {self._country_iso_code} " f"Exception occured {e}") self._region: Optional[ str] = region if region is None else region.lower() super().__init__(*args, **kwargs)
def get_country_emissions(self, energy: Energy, geo: GeoMetadata) -> float: """ Computes emissions for a country on private infra, given a quantity of power consumed by using data for the mix of energy sources of that country. :param energy: Mean power consumption of the process (kWh) :param geo: Country and region metadata :return: CO2 emissions in kg """ energy_mix = self._data_source.get_global_energy_mix_data() if geo.country_iso_code not in energy_mix: logger.warning( f"We do not have data for {geo.country_iso_code}, using world average." ) carbon_intensity_per_source = ( DataSource().get_carbon_intensity_per_source_data()) return (EmissionsPerKWh.from_g_per_kWh( carbon_intensity_per_source.get("world_average")).kgs_per_kWh * energy.kWh) # kgs country_energy_mix: Dict = energy_mix[geo.country_iso_code] emissions_per_kWh = self._global_energy_mix_to_emissions_rate( country_energy_mix) logger.debug( f"We apply an energy mix of {emissions_per_kWh.kgs_per_kWh*1000:.0f}" + f" g.CO2eq/kWh for {geo.country_name}") return emissions_per_kWh.kgs_per_kWh * energy.kWh # kgs
def _get_cpu_power_from_registry(self, cpu_model_raw: str) -> int: cpu_power_df = DataSource().get_cpu_power_data() cpu_matching = self._get_matching_cpu(cpu_model_raw, cpu_power_df) if cpu_matching: power = self._get_cpu_constant_power(cpu_matching, cpu_power_df) return power else: return None
def __init__( self, project_name: str = "codecarbon", measure_power_secs: int = 15, output_dir: str = ".", save_to_file: bool = True, gpu_ids: Optional[List] = None, ): """ :param project_name: Project name for current experiment run, default name as "codecarbon" :param measure_power_secs: Interval (in seconds) to measure hardware power usage, defaults to 15 :param output_dir: Directory path to which the experiment details are logged in a CSV file called `emissions.csv`, defaults to current directory :param save_to_file: Indicates if the emission artifacts should be logged to a file, defaults to True :param gpu_ids: User-specified known gpu ids to track, defaults to None """ self._project_name: str = project_name self._measure_power_secs: int = measure_power_secs self._start_time: Optional[float] = None self._last_measured_time: float = time.time() self._output_dir: str = output_dir self._total_energy: Energy = Energy.from_energy(kwh=0) self._scheduler = BackgroundScheduler() self._hardware = list() if gpu.is_gpu_details_available(): logger.info("CODECARBON : Tracking Nvidia GPU via pynvml") self._hardware.append(GPU.from_utils(gpu_ids)) if cpu.is_powergadget_available(): logger.info("CODECARBON : Tracking Intel CPU via Power Gadget") self._hardware.append( CPU.from_utils(self._output_dir, "intel_power_gadget")) elif cpu.is_rapl_available(): logger.info("CODECARBON : Tracking Intel CPU via RAPL interface") self._hardware.append( CPU.from_utils(self._output_dir, "intel_rapl")) # Run `self._measure_power` every `measure_power_secs` seconds in a background thread self._scheduler.add_job(self._measure_power, "interval", seconds=measure_power_secs) self._data_source = DataSource() self._emissions: Emissions = Emissions(self._data_source) self.persistence_objs: List[BaseOutput] = list() if save_to_file: self.persistence_objs.append( FileOutput(os.path.join(self._output_dir, "emissions.csv")))
def _get_power_from_constant(self) -> int: """ Get CPU power from constant mode :return: power in Watt """ cpu_info = cpuinfo.get_cpu_info() if cpu_info: model_raw = cpu_info.get("brand_raw", "") model = parse_cpu_model(model_raw) cpu_power_df = DataSource().get_cpu_power_data() cpu_power_df_model = cpu_power_df[cpu_power_df["Name"] == model] if len(cpu_power_df_model) > 0: power = cpu_power_df_model["TDP"].tolist()[0] return power return None
def _global_energy_mix_to_emissions_rate( energy_mix: Dict) -> EmissionsPerKWh: """ Convert a mix of electricity sources into emissions per kWh. :param energy_mix: A dictionary that breaks down the electricity produced into energy sources, with a total value. Format will vary, but must have keys for "total_TWh" :return: an EmissionsPerKwh object representing the average emissions rate in Kgs.CO2 / kWh """ # If we have the chance to have the carbon intensity for this country if energy_mix.get("carbon_intensity"): return EmissionsPerKWh.from_g_per_kWh( energy_mix.get("carbon_intensity")) # Else we compute it from the energy mix. # Read carbon_intensity from the json data file. carbon_intensity_per_source = ( DataSource().get_carbon_intensity_per_source_data()) carbon_intensity = 0 energy_sum = energy_mix["total_TWh"] energy_sum_computed = 0 # Iterate through each source of energy in the country for energy_type, energy_per_year in energy_mix.items(): if "_TWh" in energy_type: # Compute the carbon intensity ratio of this source for this country carbon_intensity_for_type = carbon_intensity_per_source.get( energy_type[:-len("_TWh")]) if carbon_intensity_for_type: # to ignore "total_TWh" carbon_intensity += (energy_per_year / energy_sum ) * carbon_intensity_for_type energy_sum_computed += energy_per_year # Sanity check if energy_sum_computed != energy_sum: logger.error( f"We find {energy_sum_computed} TWh instead of {energy_sum} TWh for {energy_mix.get('official_name_en')}, using world average." ) return EmissionsPerKWh.from_g_per_kWh( carbon_intensity_per_source.get("world_average")) return EmissionsPerKWh.from_g_per_kWh(carbon_intensity)
def __init__( self, *args, country_iso_code: Optional[str] = _sentinel, region: Optional[str] = _sentinel, cloud_provider: Optional[str] = _sentinel, cloud_region: Optional[str] = _sentinel, country_2letter_iso_code: Optional[str] = _sentinel, **kwargs, ): """ :param country_iso_code: 3 letter ISO Code of the country where the experiment is being run :param region: The provincial region, for example, California in the US. Currently, this only affects calculations for the United States and Canada :param cloud_provider: The cloud provider specified for estimating emissions intensity, defaults to None. See https://github.com/mlco2/codecarbon/ blob/master/codecarbon/data/cloud/impact.csv for a list of cloud providers :param cloud_region: The region of the cloud data center, defaults to None. See https://github.com/mlco2/codecarbon/ blob/master/codecarbon/data/cloud/impact.csv for a list of cloud regions. :param country_2letter_iso_code: For use with the CO2Signal emissions API. See http://api.electricitymap.org/v3/zones for a list of codes and their corresponding locations. """ self._external_conf = get_hierarchical_config() self._set_from_conf(cloud_provider, "cloud_provider") self._set_from_conf(cloud_region, "cloud_region") self._set_from_conf(country_2letter_iso_code, "country_2letter_iso_code") self._set_from_conf(country_iso_code, "country_iso_code") self._set_from_conf(region, "region") logger.info("offline tracker init") if self._region is not None: assert isinstance(self._region, str) self._region: str = self._region.lower() if self._cloud_provider: if self._cloud_region is None: logger.error("Cloud Region must be provided " + " if cloud provider is set") df = DataSource().get_cloud_emissions_data() if (len(df.loc[(df["provider"] == self._cloud_provider) & (df["region"] == self._cloud_region)]) == 0): logger.error("Cloud Provider/Region " f"{self._cloud_provider} {self._cloud_region} " "not found in cloud emissions data.") if self._country_iso_code: try: self._country_name: str = DataSource( ).get_global_energy_mix_data()[ self._country_iso_code]["country_name"] except KeyError as e: logger.error("Does not support country" + f" with ISO code {self._country_iso_code} " f"Exception occurred {e}") if self._country_2letter_iso_code: assert isinstance(self._country_2letter_iso_code, str) self._country_2letter_iso_code: str = self._country_2letter_iso_code.upper( ) super().__init__(*args, **kwargs)
def __init__( self, project_name: Optional[str] = _sentinel, measure_power_secs: Optional[int] = _sentinel, api_call_interval: Optional[int] = _sentinel, api_endpoint: Optional[str] = _sentinel, api_key: Optional[str] = _sentinel, output_dir: Optional[str] = _sentinel, output_file: Optional[str] = _sentinel, save_to_file: Optional[bool] = _sentinel, save_to_api: Optional[bool] = _sentinel, save_to_logger: Optional[bool] = _sentinel, logging_logger: Optional[LoggerOutput] = _sentinel, gpu_ids: Optional[List] = _sentinel, emissions_endpoint: Optional[str] = _sentinel, experiment_id: Optional[str] = _sentinel, co2_signal_api_token: Optional[str] = _sentinel, tracking_mode: Optional[str] = _sentinel, log_level: Optional[Union[int, str]] = _sentinel, on_csv_write: Optional[str] = _sentinel, logger_preamble: Optional[str] = _sentinel, ): """ :param project_name: Project name for current experiment run, default name as "codecarbon" :param measure_power_secs: Interval (in seconds) to measure hardware power usage, defaults to 15 :param api_call_interval: Occurrence to wait before calling API : -1 : only call api on flush() and at the end. 1 : at every measure 2 : every 2 measure, etc... :param api_endpoint: Optional URL of Code Carbon API endpoint for sending emissions data :param api_key: API key for Code Carbon API, mandatory to use it ! :param output_dir: Directory path to which the experiment details are logged, defaults to current directory :param output_file: Name of output CSV file, defaults to `emissions.csv` :param save_to_file: Indicates if the emission artifacts should be logged to a file, defaults to True :param save_to_api: Indicates if the emission artifacts should be send to the CodeCarbon API, defaults to False :param save_to_logger: Indicates if the emission artifacts should be written to a dedicated logger, defaults to False :param logging_logger: LoggerOutput object encapsulating a logging.logger or a Google Cloud logger :param gpu_ids: User-specified known gpu ids to track, defaults to None :param emissions_endpoint: Optional URL of http endpoint for sending emissions data :param experiment_id: Id of the experiment :param co2_signal_api_token: API token for co2signal.com (requires sign-up for free beta) :param tracking_mode: One of "process" or "machine" in order to measure the power consumptions due to the entire machine or try and isolate the tracked processe's in isolation. Defaults to "machine" :param log_level: Global codecarbon log level. Accepts one of: {"debug", "info", "warning", "error", "critical"}. Defaults to "info". :param on_csv_write: "append" or "update". Whether to always append a new line to the csv when writing or to update the existing `run_id` row (useful when calling`tracker.flush()` manually). Accepts one of "append" or "update". :param logger_preamble: String to systematically include in the logger's. messages. Defaults to "". """ # logger.info("base tracker init") self._external_conf = get_hierarchical_config() self._set_from_conf(api_call_interval, "api_call_interval", 8, int) self._set_from_conf(api_endpoint, "api_endpoint", "https://api.codecarbon.io") self._set_from_conf(co2_signal_api_token, "co2_signal_api_token") self._set_from_conf(emissions_endpoint, "emissions_endpoint") self._set_from_conf(gpu_ids, "gpu_ids") self._set_from_conf(log_level, "log_level", "info") self._set_from_conf(measure_power_secs, "measure_power_secs", 15, int) self._set_from_conf(output_dir, "output_dir", ".") self._set_from_conf(output_file, "output_file", "emissions.csv") self._set_from_conf(project_name, "project_name", "codecarbon") self._set_from_conf(save_to_api, "save_to_api", False, bool) self._set_from_conf(save_to_file, "save_to_file", True, bool) self._set_from_conf(save_to_logger, "save_to_logger", False, bool) self._set_from_conf(logging_logger, "logging_logger") self._set_from_conf(tracking_mode, "tracking_mode", "machine") self._set_from_conf(on_csv_write, "on_csv_write", "append") self._set_from_conf(logger_preamble, "logger_preamble", "") assert self._tracking_mode in ["machine", "process"] set_logger_level(self._log_level) set_logger_format(self._logger_preamble) self._start_time: Optional[float] = None self._last_measured_time: float = time.time() self._total_energy: Energy = Energy.from_energy(kWh=0) self._total_cpu_energy: Energy = Energy.from_energy(kWh=0) self._total_gpu_energy: Energy = Energy.from_energy(kWh=0) self._total_ram_energy: Energy = Energy.from_energy(kWh=0) self._cpu_power: Power = Power.from_watts(watts=0) self._gpu_power: Power = Power.from_watts(watts=0) self._ram_power: Power = Power.from_watts(watts=0) self._cc_api__out = None self._measure_occurrence: int = 0 self._cloud = None self._previous_emissions = None self._conf["os"] = platform.platform() self._conf["python_version"] = platform.python_version() self._conf["cpu_count"] = count_cpus() self._geo = None if isinstance(self._gpu_ids, str): self._gpu_ids: List[int] = parse_gpu_ids(self._gpu_ids) self._conf["gpu_ids"] = self._gpu_ids self._conf["gpu_count"] = len(self._gpu_ids) logger.info("[setup] RAM Tracking...") ram = RAM(tracking_mode=self._tracking_mode) self._conf["ram_total_size"] = ram.machine_memory_GB self._hardware: List[Union[RAM, CPU, GPU]] = [ram] # Hardware detection logger.info("[setup] GPU Tracking...") if gpu.is_gpu_details_available(): logger.info("Tracking Nvidia GPU via pynvml") self._hardware.append(GPU.from_utils(self._gpu_ids)) gpu_names = [n["name"] for n in gpu.get_gpu_static_info()] gpu_names_dict = Counter(gpu_names) self._conf["gpu_model"] = "".join( [f"{i} x {name}" for name, i in gpu_names_dict.items()]) self._conf["gpu_count"] = len(gpu.get_gpu_static_info()) else: logger.info("No GPU found.") logger.info("[setup] CPU Tracking...") if cpu.is_powergadget_available(): logger.info("Tracking Intel CPU via Power Gadget") hardware = CPU.from_utils(self._output_dir, "intel_power_gadget") self._hardware.append(hardware) self._conf["cpu_model"] = hardware.get_model() elif cpu.is_rapl_available(): logger.info("Tracking Intel CPU via RAPL interface") hardware = CPU.from_utils(self._output_dir, "intel_rapl") self._hardware.append(hardware) self._conf["cpu_model"] = hardware.get_model() else: logger.warning( "No CPU tracking mode found. Falling back on CPU constant mode." ) tdp = cpu.TDP() power = tdp.tdp model = tdp.model logger.info(f"CPU Model on constant consumption mode: {model}") self._conf["cpu_model"] = model if tdp: hardware = CPU.from_utils(self._output_dir, "constant", model, power) self._hardware.append(hardware) else: logger.warning("Failed to match CPU TDP constant. " + "Falling back on a global constant.") hardware = CPU.from_utils(self._output_dir, "constant") self._hardware.append(hardware) self._conf["hardware"] = list( map(lambda x: x.description(), self._hardware)) logger.info(">>> Tracker's metadata:") logger.info(f" Platform system: {self._conf.get('os')}") logger.info(f" Python version: {self._conf.get('python_version')}") logger.info( f" Available RAM : {self._conf.get('ram_total_size'):.3f} GB") logger.info(f" CPU count: {self._conf.get('cpu_count')}") logger.info(f" CPU model: {self._conf.get('cpu_model')}") logger.info(f" GPU count: {self._conf.get('gpu_count')}") logger.info(f" GPU model: {self._conf.get('gpu_model')}") # Run `self._measure_power` every `measure_power_secs` seconds in a # background thread self._scheduler = PeriodicScheduler( function=self._measure_power_and_energy, interval=self._measure_power_secs, ) self._data_source = DataSource() cloud: CloudMetadata = self._get_cloud_metadata() if cloud.is_on_private_infra: self._geo = self._get_geo_metadata() self._conf["longitude"] = self._geo.longitude self._conf["latitude"] = self._geo.latitude self._conf["region"] = cloud.region self._conf["provider"] = cloud.provider else: self._conf["region"] = cloud.region self._conf["provider"] = cloud.provider self._emissions: Emissions = Emissions(self._data_source, self._co2_signal_api_token) self.persistence_objs: List[BaseOutput] = list() if self._save_to_file: self.persistence_objs.append( FileOutput( os.path.join(self._output_dir, self._output_file), self._on_csv_write, )) if self._save_to_logger: self.persistence_objs.append(self._logging_logger) if self._emissions_endpoint: self.persistence_objs.append(HTTPOutput(emissions_endpoint)) if self._save_to_api: experiment_id = self._set_from_conf( experiment_id, "experiment_id", "5b0fa12a-3dd7-45bb-9766-cc326314d9f1") self._cc_api__out = CodeCarbonAPIOutput( endpoint_url=self._api_endpoint, experiment_id=experiment_id, api_key=api_key, conf=self._conf, ) self.run_id = self._cc_api__out.run_id self.persistence_objs.append(self._cc_api__out) else: self.run_id = uuid.uuid4()
def get_test_data_source() -> DataSource: return DataSource()
class Data: def __init__(self): self._data_source = DataSource() self._emissions = Emissions(self._data_source) @staticmethod def get_project_data(df: pd.DataFrame, project_name) -> dt.DataTable: project_df = df[df.project_name == project_name] project_df = project_df.sort_values(by="timestamp") project_data = project_df.to_dict("rows") columns = [{ "name": column, "id": column } for column in project_df.columns] return dt.DataTable(data=project_data, columns=columns) @staticmethod def get_project_summary(project_data: List[Dict]): last_run = project_data[-1] project_summary = { "last_run": { "timestamp": last_run["timestamp"], "duration": last_run["duration"], "emissions": round(last_run["emissions"], 1), "energy_consumed": round((last_run["energy_consumed"]), 1), }, "total": { "duration": sum( map(lambda experiment: experiment["duration"], project_data)), "emissions": round( sum( map(lambda experiment: experiment["emissions"], project_data)), 1, ), "energy_consumed": round( sum( map( lambda experiment: experiment["energy_consumed"], project_data, )), 1, ), }, "country_name": last_run["country_name"], "country_iso_code": last_run["country_iso_code"], "region": last_run["region"], "on_cloud": last_run["on_cloud"], "cloud_provider": last_run["cloud_provider"], "cloud_region": last_run["cloud_region"], } return project_summary def get_car_miles(self, project_carbon_equivalent: float): """ 8.89 × 10-3 metric tons CO2/gallon gasoline × 1/22.0 miles per gallon car/truck average × 1 CO2, CH4, and N2O/0.988 CO2 = 4.09 x 10-4 metric tons CO2E/mile = 0.409 kg CO2E/mile Source: EPA :param project_carbon_equivalent: total project emissions in kg CO2E :return: number of miles driven by avg car """ return "{:.0f}".format(project_carbon_equivalent / 0.409) def get_tv_time(self, project_carbon_equivalent: float): """ Gives the amount of time a 32-inch LCD flat screen TV will emit an equivalent amount of carbon Ratio is 0.097 kg CO2 / 1 hour tv :param project_carbon_equivalent: total project emissions in kg CO2E :return: equivalent TV time """ time_in_minutes = project_carbon_equivalent * (1 / 0.097) * 60 formated_value = "{:.0f} minutes".format(time_in_minutes) if time_in_minutes >= 60: time_in_hours = time_in_minutes / 60 formated_value = "{:.0f} hours".format(time_in_hours) if time_in_hours >= 24: time_in_days = time_in_hours / 24 formated_value = "{:.0f} days".format(time_in_days) return formated_value def get_household_fraction(self, project_carbon_equivalent: float): """ Total CO2 emissions for energy use per home: 5.734 metric tons CO2 for electricity + 2.06 metric tons CO2 for natural gas + 0.26 metric tons CO2 for liquid petroleum gas + 0.30 metric tons CO2 for fuel oil = 8.35 metric tons CO2 per home per year / 52 weeks = 160.58 kg CO2/week on average Source: EPA :param project_carbon_equivalent: total project emissions in kg CO2E :return: % of weekly emissions re: an average American household """ return "{:.2f}".format((project_carbon_equivalent / 160.58) * 100) def get_global_emissions_choropleth_data( self, net_energy_consumed: float) -> List[Dict]: def formatted_energy_percentage(energy_type: float, total: float) -> float: return float("{:.1f}".format((energy_type / total) * 100)) global_energy_mix = self._data_source.get_global_energy_mix_data() choropleth_data = [] for country_iso_code in global_energy_mix.keys(): country_name = global_energy_mix[country_iso_code]["countryName"] if country_iso_code not in ["_define", "ATA"]: from codecarbon.core.units import Energy energy_consumed = Energy.from_energy(kwh=net_energy_consumed) from codecarbon.external.geography import GeoMetadata country_emissions = self._emissions.get_country_emissions( energy_consumed, GeoMetadata(country_name=country_name, country_iso_code=country_iso_code), ) total = global_energy_mix[country_iso_code]["total"] choropleth_data.append({ "iso_code": country_iso_code, "emissions": country_emissions, "country": country_name, "coal": formatted_energy_percentage( global_energy_mix[country_iso_code]["coal"], total), "petroleum": formatted_energy_percentage( global_energy_mix[country_iso_code]["petroleum"], total), "natural_gas": formatted_energy_percentage( global_energy_mix[country_iso_code]["naturalGas"], total), "low_carbon": formatted_energy_percentage( global_energy_mix[country_iso_code]["lowCarbon"], total), }) return choropleth_data def get_regional_emissions_choropleth_data( self, net_energy_consumed: float, country_iso_code: str) -> List[Dict]: # add country codes here to render for different countries if country_iso_code.upper() not in ["USA"]: return [{"region_code": "", "region_name": "", "emissions": ""}] region_emissions = self._data_source.get_country_emissions_data( country_iso_code.lower()) choropleth_data = [] for region_name in region_emissions.keys(): region_code = region_emissions[region_name]["regionCode"] if region_name not in ["_unit"]: from codecarbon.core.units import Energy energy_consumed = Energy.from_energy(kwh=net_energy_consumed) from codecarbon.external.geography import GeoMetadata emissions = self._emissions.get_region_emissions( energy_consumed, GeoMetadata(country_iso_code=country_iso_code, region=region_name), ) choropleth_data.append({ "region_code": region_code, "region_name": region_name.upper(), "emissions": emissions, }) return choropleth_data def get_cloud_emissions_barchart_data( self, net_energy_consumed: float, on_cloud: str, cloud_provider: str, cloud_region: str, ) -> Tuple[str, pd.DataFrame]: if on_cloud == "N": return ( "", pd.DataFrame(data={ "region": [], "emissions": [], "countryName": [] }), ) cloud_emissions = self._data_source.get_cloud_emissions_data() cloud_emissions = cloud_emissions[[ "provider", "providerName", "region", "impact", "countryName" ]] from codecarbon.core.units import EmissionsPerKwh cloud_emissions["emissions"] = cloud_emissions.apply( lambda row: EmissionsPerKwh.from_g_per_kwh(row.impact).kgs_per_kwh * net_energy_consumed, axis=1, ) cloud_emissions_project_region = cloud_emissions[cloud_emissions.region == cloud_region] cloud_emissions = cloud_emissions[ (cloud_emissions.provider == cloud_provider) & (cloud_emissions.region != cloud_region)].sort_values( by="emissions") return ( cloud_emissions_project_region.iloc[0, :].providerName, pd.concat([cloud_emissions_project_region, cloud_emissions]), )
def __init__(self): self._data_source = DataSource() self._emissions = Emissions(self._data_source)
def __init__( self, project_name: str = "codecarbon", measure_power_secs: int = 15, output_dir: str = ".", save_to_file: bool = True, gpu_ids: Optional[List] = None, emissions_endpoint: Optional[str] = None, co2_signal_api_token: Optional[str] = None, ): """ :param project_name: Project name for current experiment run, default name as "codecarbon" :param measure_power_secs: Interval (in seconds) to measure hardware power usage, defaults to 15 :param output_dir: Directory path to which the experiment details are logged in a CSV file called `emissions.csv`, defaults to current directory :param save_to_file: Indicates if the emission artifacts should be logged to a file, defaults to True :param gpu_ids: User-specified known gpu ids to track, defaults to None :param emissions_endpoint: Optional URL of http endpoint for sending emissions data :param co2_signal_api_token: API token for co2signal.com (requires sign-up for free beta) """ self._project_name: str = project_name self._measure_power_secs: int = measure_power_secs self._start_time: Optional[float] = None self._last_measured_time: float = time.time() self._output_dir: str = output_dir self._total_energy: Energy = Energy.from_energy(kwh=0) self._scheduler = BackgroundScheduler() self._hardware = list() if gpu.is_gpu_details_available(): logger.info("CODECARBON : Tracking Nvidia GPU via pynvml") self._hardware.append(GPU.from_utils(gpu_ids)) if cpu.is_powergadget_available(): logger.info("CODECARBON : Tracking Intel CPU via Power Gadget") self._hardware.append( CPU.from_utils(self._output_dir, "intel_power_gadget")) elif cpu.is_rapl_available(): logger.info("CODECARBON : Tracking Intel CPU via RAPL interface") self._hardware.append( CPU.from_utils(self._output_dir, "intel_rapl")) # Print warning if no supported hardware is found' if not self._hardware: logger.warning( "CODECARBON : No CPU/GPU tracking mode found. This " "may be due to your code running on Windows WSL, or due to " "unsupported hardware (see " "https://github.com/mlco2/codecarbon#infrastructure-support)") # Run `self._measure_power` every `measure_power_secs` seconds in a background thread self._scheduler.add_job(self._measure_power, "interval", seconds=measure_power_secs) self._data_source = DataSource() self._emissions: Emissions = Emissions(self._data_source) self.persistence_objs: List[BaseOutput] = list() if save_to_file: self.persistence_objs.append( FileOutput(os.path.join(self._output_dir, "emissions.csv"))) if emissions_endpoint: self.persistence_objs.append(HTTPOutput(emissions_endpoint)) if co2_signal_api_token: co2_signal.CO2_SIGNAL_API_TOKEN = co2_signal_api_token
def test_get_carbon_intensity_per_source_data(self): # pytest tests/test_emissions.py::TestEmissions::test_get_carbon_intensity_per_source_data carbon_intensity = DataSource().get_carbon_intensity_per_source_data() self.assertEqual(len(carbon_intensity.keys()), 21) self.assertGreater(carbon_intensity["coal"], 800) self.assertLess(carbon_intensity["wind"], 80)
class Data: def __init__(self): self._data_source = DataSource() self._emissions = Emissions(self._data_source) @staticmethod def get_project_data(df: pd.DataFrame, project_name) -> dt.DataTable: project_df = df[df.project_name == project_name] project_df = project_df.sort_values(by="timestamp") project_data = project_df.to_dict("rows") columns = [{ "name": column, "id": column } for column in project_df.columns] return dt.DataTable(data=project_data, columns=columns) @staticmethod def get_project_summary(project_data: List[Dict]): last_run = project_data[-1] project_summary = { "last_run": { "timestamp": last_run["timestamp"], "duration": last_run["duration"], "emissions": round(last_run["emissions"], 1), "energy_consumed": round((last_run["energy_consumed"]), 1), }, "total": { "duration": sum( map(lambda experiment: experiment["duration"], project_data)), "emissions": sum( map(lambda experiment: experiment["emissions"], project_data)), "energy_consumed": sum( map(lambda experiment: experiment["energy_consumed"], project_data)), }, "country_name": last_run["country_name"], "country_iso_code": last_run["country_iso_code"], "region": last_run["region"], "on_cloud": last_run["on_cloud"], "cloud_provider": last_run["cloud_provider"], "cloud_region": last_run["cloud_region"], } return project_summary def get_car_miles(self, project_carbon_equivalent: float): """ 8.89 × 10-3 metric tons CO2/gallon gasoline × 1/22.0 miles per gallon car/truck average × 1 CO2, CH4, and N2O/0.988 CO2 = 4.09 x 10-4 metric tons CO2E/mile = 0.409 kg CO2E/mile Source: EPA :param project_carbon_equivalent: total project emissions in kg CO2E :return: number of miles driven by avg car """ return "{:.0f}".format(project_carbon_equivalent / 0.409) def get_tv_time(self, project_carbon_equivalent: float): """ Gives the amount of time a 32-inch LCD flat screen TV will emit an equivalent amount of carbon Ratio is 0.097 kg CO2 / 1 hour tv :param project_carbon_equivalent: total project emissions in kg CO2E :return: equivalent TV time """ time_in_minutes = project_carbon_equivalent * (1 / 0.097) * 60 formated_value = "{:.0f} minutes".format(time_in_minutes) if time_in_minutes >= 60: time_in_hours = time_in_minutes / 60 formated_value = "{:.0f} hours".format(time_in_hours) if time_in_hours >= 24: time_in_days = time_in_hours / 24 formated_value = "{:.0f} days".format(time_in_days) return formated_value def get_household_fraction(self, project_carbon_equivalent: float): """ Total CO2 emissions for energy use per home: 5.734 metric tons CO2 for electricity + 2.06 metric tons CO2 for natural gas + 0.26 metric tons CO2 for liquid petroleum gas + 0.30 metric tons CO2 for fuel oil = 8.35 metric tons CO2 per home per year / 52 weeks = 160.58 kg CO2/week on average Source: EPA :param project_carbon_equivalent: total project emissions in kg CO2E :return: % of weekly emissions re: an average American household """ return "{:.2f}".format((project_carbon_equivalent / 160.58) * 100) def get_global_emissions_choropleth_data( self, net_energy_consumed: float) -> List[Dict]: def formatted_energy_percentage(energy_type: float, total: float) -> float: return float("{:.1f}".format((energy_type / total) * 100)) global_energy_mix = self._data_source.get_global_energy_mix_data() choropleth_data = [] for country_iso_code in global_energy_mix.keys(): country_name = global_energy_mix[country_iso_code]["country_name"] if country_iso_code not in ["_define", "ATA"]: from codecarbon.core.units import Energy energy_consumed = Energy.from_energy(kWh=net_energy_consumed) from codecarbon.external.geography import GeoMetadata country_emissions = self._emissions.get_country_emissions( energy_consumed, GeoMetadata(country_name=country_name, country_iso_code=country_iso_code), ) total = global_energy_mix[country_iso_code]["total_TWh"] choropleth_data.append({ "iso_code": country_iso_code, "emissions": country_emissions, "country": country_name, "fossil": formatted_energy_percentage( global_energy_mix[country_iso_code]["fossil_TWh"], total), "geothermal": formatted_energy_percentage( global_energy_mix[country_iso_code]["geothermal_TWh"], total), "hydroelectricity": formatted_energy_percentage( global_energy_mix[country_iso_code] ["hydroelectricity_TWh"], total, ), "nuclear": formatted_energy_percentage( global_energy_mix[country_iso_code]["nuclear_TWh"], total), "solar": formatted_energy_percentage( global_energy_mix[country_iso_code]["solar_TWh"], total), "wind": formatted_energy_percentage( global_energy_mix[country_iso_code]["wind_TWh"], total), }) return choropleth_data def get_regional_emissions_choropleth_data( self, net_energy_consumed: float, country_iso_code: str) -> List[Dict]: # add country codes here to render for different countries if country_iso_code.upper() not in ["USA", "CAN"]: return [{"region_code": "", "region_name": "", "emissions": ""}] try: region_emissions = self._data_source.get_country_emissions_data( country_iso_code.lower()) except DataSourceException: # This country has regional data at the energy mix level, not the emissions level country_energy_mix = self._data_source.get_country_energy_mix_data( country_iso_code.lower()) region_emissions = { region: { "regionCode": region } for region, energy_mix in country_energy_mix.items() } choropleth_data = [] for region_name in region_emissions.keys(): region_code = region_emissions[region_name]["regionCode"] if region_name not in ["_unit"]: from codecarbon.core.units import Energy energy_consumed = Energy.from_energy(kWh=net_energy_consumed) from codecarbon.external.geography import GeoMetadata emissions = self._emissions.get_region_emissions( energy_consumed, GeoMetadata(country_iso_code=country_iso_code, region=region_name), ) choropleth_data.append({ "region_code": region_code, "region_name": region_name.upper(), "emissions": emissions, }) return choropleth_data def get_cloud_emissions_barchart_data( self, net_energy_consumed: float, on_cloud: str, cloud_provider: str, cloud_region: str, ) -> Tuple[str, pd.DataFrame]: if on_cloud == "N": return ( "", pd.DataFrame(data={ "region": [], "emissions": [], "country_name": [] }), ) cloud_emissions = self._data_source.get_cloud_emissions_data() cloud_emissions = cloud_emissions[[ "provider", "providerName", "region", "impact", "country_name" ]] from codecarbon.core.units import EmissionsPerKWh cloud_emissions["emissions"] = cloud_emissions.apply( lambda row: EmissionsPerKWh.from_g_per_kWh(row.impact).kgs_per_kWh * net_energy_consumed, axis=1, ) cloud_emissions_project_region = cloud_emissions[cloud_emissions.region == cloud_region] cloud_emissions = cloud_emissions[ (cloud_emissions.provider == cloud_provider) & (cloud_emissions.region != cloud_region)].sort_values( by="emissions") return ( cloud_emissions_project_region.iloc[0, :].providerName, pd.concat([cloud_emissions_project_region, cloud_emissions]), ) @staticmethod def get_data_from_api(host): transformed_projects = [] project_list = Data.list_projects(host) for project in project_list: project_sum_by_experiments_url = ( host + "/experiments/{project_id}/detailed_sums".format( project_id=project["id"])) project_name = project["name"] sums = requests.get(project_sum_by_experiments_url).json() for experiment in sums: experiment["project_name"] = project_name # experiment["emission_rate"] = 0 # if experiment["emissions_count"] > 0: # experiment["emission_rate"] = ( # experiment["emissions_rate"] / experiment["emissions_count"] # ) transformed_projects.append(experiment) df_projects = pd.DataFrame(transformed_projects) return df_projects @staticmethod def list_projects(host): projects = [] teams_url = host + "/teams" teams = requests.get(teams_url).json() for team in teams: projets_url = host + "/projects/team/{team_id}".format( team_id=team["id"]) team_projects = requests.get(projets_url).json() if team_projects: projects.append( list( map( lambda x: { "id": x["id"], "name": x["name"] }, iter(team_projects), ))) project_list = sum(projects, []) return project_list