price_perf_row["EntryName"] = r1["EntryName"].values[0] fom_row["PerfTtbarTotal"] = r1["PerfTtbarTotal"].values[0] fom_row["EntryName"] = r1["EntryName"].values[0] if r2.empty: running_vms = 0 else: running_vms = r2["RunningVms"].values[0] if r3.empty: price_perf_row["MaxLimit"] = DEFAULT_MAX_LIMIT fom_row["MaxLimit"] = DEFAULT_MAX_LIMIT else: price_perf_row["MaxLimit"] = r3["MaxLimit"].values[0] fom_row["MaxLimit"] = r3["MaxLimit"].values[0] price_perf = price_performance(row["SpotPrice"], price_perf_row["PerfTtbarTotal"]) price_perf_row["AWS_Price_Performance"] = price_perf fom_row["AWS_Figure_Of_Merit"] = figure_of_merit(running_vms, price_perf_row["MaxLimit"], price_perf) price_perf_rows.append(price_perf_row) fom_rows.append(fom_row) price_perf_df = pd.DataFrame(price_perf_rows) price_perf_df.reindex(sorted(price_perf_df.columns), axis=1) fom_df = pd.DataFrame(fom_rows) fom_df.reindex(sorted(fom_df.columns), axis=1) return {"AWS_Price_Performance": price_perf_df, "AWS_Figure_Of_Merit": fom_df} Transform.describe(FigureOfMerit)
raise RuntimeError('Mismatch in manifest keys: %s, %s' % (m_keys, g_keys)) for key in m_keys: merged_manifests[key] = manifests[key].append( group_manifests[key], ignore_index=True) else: merged_manifests = group_manifests return merged_manifests def read_fe_config(self): if not os.path.isfile(self.de_frontend_configfile): raise RuntimeError( 'Error reading Frontend config for DE %s. ' 'Run configure_gwms_frontend.py to generate one and after every change to the frontend configuration.' % self.de_frontend_configfile) fe_cfg = eval(open(self.de_frontend_configfile, 'r').read()) if not isinstance(fe_cfg, dict): raise ValueError('Frontend config for DE in %s is invalid' % self.de_frontend_configfile) return fe_cfg def sanitize_glidein_cpus(row): if str(row['GLIDEIN_CPUS']).lower() == 'auto': row['GLIDEIN_CPUS'] = row['GLIDEIN_ESTIMATED_CPUS'] row['GLIDEIN_CPUS'] = int(row['GLIDEIN_CPUS']) return row Transform.describe(GlideinRequestManifests)
self.logger.info( f"total number of slots on Nersc = {total_slots_nersc}") # pull slot info from User pool ######## total_slots_userpool = 0 for _index, row in userpool_slots_df.iterrows(): total_slots_userpool += int(row["TotalCpus"]) self.logger.info( f"total number of slots on userpool = {total_slots_userpool}") # compute the relative difference between the two metrics ### rel_diff = 0.0 diff = abs(total_slots_userpool - total_slots_nersc) if diff != 0: more = max(total_slots_nersc, total_slots_userpool) rel_diff = diff / more self.logger.info(f"diff = {diff:f}, rel diff = {rel_diff:f}") # construct the result namespace ############################ results["nersc.count"] = total_slots_nersc results["userpool.count"] = total_slots_userpool results["relative_diff"] = rel_diff return {"nersc_userpool_slots_comparison": results} Transform.describe(CompareNerscUserpoolSlots)
def __init__(self, config): super().__init__(config) def transform(self, data_block): """ Fills ``self.data`` with spot price data. :type spot_price_history: :obj:`list` :arg spot_price_history: list of spotprice data (:class:`SpotPriceData`) """ self.logger.debug("in AWSOccupancy transform") account_dict = data_block.get("spot_occupancy_config").to_dict() self.logger.debug(f"account_dict {account_dict}") occupancy_data = [] for account in account_dict: for region in account_dict[account]: occcupancy = OccupancyForRegion(region, profile_name=account) instances = occcupancy.get_ec2_instances() if instances: data = occcupancy.capacity(instances) if data: occupancy_data += data oc_list = [i.data for i in occupancy_data] # to fix the test failure return {"AWS_Occupancy": pd.DataFrame(oc_list)} Transform.describe(AWSOccupancy)
"GlideinConfigPerEntryMaxGlideins" ], ] df["GLIDEIN_Supported_VOs"] = vo if limits_df is None: limits_df = df else: limits_df = limits_df.append(df, ignore_index=True) az_it = sub_df.loc[ sub_df["GLIDEIN_Supported_VOs"].str.contains(vo), ["INSTANCE_TYPE", "AVAILABILITY_ZONE"]] regions = { az[:-1] for az in az_it.AVAILABILITY_ZONE.unique() } so_config_dict[vo] = {} for region in regions: it = az_it.loc[az_it["AVAILABILITY_ZONE"].str.contains( region)].INSTANCE_TYPE.unique().tolist() so_config_dict[vo][region] = it limits_df = limits_df.rename(columns=_ATTR_TRANSLATION_MAP) return { "aws_instance_limits": limits_df, "spot_occupancy_config": pd.DataFrame.from_dict(so_config_dict) } Transform.describe(AWSFactoryEntryData)
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 from decisionengine.framework.modules import Transform from decisionengine.framework.modules.Transform import Parameter @Transform.supports_config(Parameter("data_product_name", type=str), Parameter("consumes", type=list)) class DynamicTransform(Transform.Transform): def __init__(self, config): self.data_product_name = config["data_product_name"] self._consumes = dict.fromkeys(config["consumes"], int) self._produces = {self.data_product_name: int} def transform(self, data_block): all_values = [data_block[key] for key in self._consumes.keys()] return {self.data_product_name: sum(all_values, 0)} Transform.describe(DynamicTransform)
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 import pandas as pd from decisionengine.framework.modules import Transform @Transform.consumes(foo=pd.DataFrame) @Transform.produces(bar=pd.DataFrame) class TransformNOP(Transform.Transform): def __init__(self, config): super().__init__(config) def transform(self, data_block): df_in = self.foo(data_block) return {"bar": pd.DataFrame(df_in["key2"])} Transform.describe(TransformNOP)
from decisionengine.framework.modules import Transform @Transform.consumes(GCE_Instance_Performance=pd.DataFrame, GCE_Occupancy=pd.DataFrame) @Transform.produces(GCE_Burn_Rate=pd.DataFrame) class GceBurnRate(Transform.Transform): def __init__(self, config): super().__init__(config) def transform(self, data_block): self.logger.debug("in GceBurnRate transform") performance = self.GCE_Instance_Performance(data_block).fillna(0) occupancy = self.GCE_Occupancy(data_block).fillna(0) burn_df = pd.DataFrame([{"BurnRate": 0.0}]) if not occupancy.empty: df = pd.merge(occupancy, performance, how="inner", on=["AvailabilityZone", "InstanceType"]) if not df.empty: df["BurnRate"] = pd.to_numeric( df["Occupancy"]) * pd.to_numeric(df["PreemptiblePrice"]) burn_df = pd.DataFrame([{"BurnRate": df["BurnRate"].sum()}]) return {"GCE_Burn_Rate": burn_df} Transform.describe(GceBurnRate)
self.logger.debug("in AWSSpotPrice transform") account_dict = data_block.get("spot_occupancy_config").to_dict() self.logger.debug(f"account_dict {account_dict}") sp_data = [] for account in account_dict: for region, instances in account_dict[account].items(): spot_price_info = AWSSpotPriceForRegion(region, profile_name=account) spot_price_info.init_query(instance_types=instances) spot_price_history = spot_price_info.get_price(self.logger) if spot_price_history: sp_data += spot_price_info.spot_price_summary( spot_price_history) sp_list = [i.data for i in sp_data] column_names = [ "AccountName", "AvailabilityZone", "InstanceType", "ProductDescription", "SpotPrice", "Timestamp", ] return { "provisioner_resource_spot_prices": pd.DataFrame(sp_list, columns=column_names) } Transform.describe(AWSSpotPrice)
@Transform.consumes(provisioner_resource_spot_prices=pd.DataFrame, AWS_Occupancy=pd.DataFrame) @Transform.produces(AWS_Burn_Rate=pd.DataFrame) class AwsBurnRate(Transform.Transform): def __init__(self, config): super().__init__(config) def transform(self, data_block): spot_prices = self.provisioner_resource_spot_prices(data_block).fillna( 0) occupancy = self.AWS_Occupancy(data_block).fillna(0) burn_df = pd.DataFrame([{"BurnRate": 0.}]) if not occupancy.empty: df = pd.merge( occupancy, spot_prices, how="inner", on=["AccountName", "AvailabilityZone", "InstanceType"]) if not df.empty: df["BurnRate"] = pd.to_numeric( df["RunningVms"]) * pd.to_numeric(df["SpotPrice"]) burn_df = pd.DataFrame([{"BurnRate": df["BurnRate"].sum()}]) return {'AWS_Burn_Rate': burn_df} Transform.describe(AwsBurnRate)
factory_entries_lcf = self.Factory_Entries_LCF(data_block) figures_of_merit = [] for _i, row in factory_entries_lcf.iterrows(): entry_name = row["EntryName"] perf_df = performance[performance.EntryName == entry_name] for _j, perf_row in perf_df.iterrows(): running = float(row["GlideinMonitorTotalStatusRunning"]) max_allowed = float(row["GlideinConfigPerEntryMaxGlideins"]) max_idle = float(row["GlideinConfigPerEntryMaxIdle"]) idle = float(row["GlideinMonitorTotalStatusIdle"]) figures_of_merit.append({ "EntryName": entry_name, "FigureOfMerit": fom.figure_of_merit(perf_row["PricePerformance"], running, max_allowed, idle, max_idle, self.logger), }) return { "Nersc_Price_Performance": performance.filter(["EntryName", "PricePerformance"]), "Nersc_Figure_Of_Merit": pd.DataFrame(figures_of_merit), } Transform.describe(NerscFigureOfMerit)
if num_with_id == 0: # both and factory_only stay '0' results['factory_only.count'] = len(nersc_df) else: # compare the exact job IDs nersc_id_list = nersc_df.jobid.tolist() factory_id_set = set(factory_id_list) nersc_id_set = set(nersc_id_list) in_both_set = factory_id_set & nersc_id_set factory_only_set = factory_id_set - nersc_id_set nersc_only_set = nersc_id_set - factory_id_set num_in_both = len(in_both_set) num_in_factory = len(factory_only_set) num_in_nersc = len(nersc_only_set) results['both.count'] = num_in_both results['nersc_only.count'] = num_in_nersc results['factory_only.count'] = num_in_factory for index, row in nersc_df.iterrows(): if row['status'] == 'R': results['nersc.running.count'] += 1 return {'nersc_factory_jobs_comparison': results} Transform.describe(CompareNerscFactoryJobs)
self.logger = logging.getLogger() self.price_performance = config.get('price_performance', 1) def transform(self, datablock): """ Grid sites FOM are straight up assumed as 0 for now """ entries = self.Factory_Entries_Grid(datablock) if entries is None: entries = pandas.DataFrame({ATTR_ENTRYNAME: []}) foms = [] if not entries.empty: for index, entry in entries.iterrows(): running = float(entry['GlideinMonitorTotalStatusRunning']) max_allowed = float(entry['GlideinConfigPerEntryMaxGlideins']) max_idle = float(entry['GlideinConfigPerEntryMaxIdle']) idle = float(entry['GlideinMonitorTotalStatusIdle']) f = { ATTR_ENTRYNAME: entry[ATTR_ENTRYNAME], ATTR_FOM: figure_of_merit(self.price_performance, running, max_allowed, idle, max_idle) } foms.append(f) return {'Grid_Figure_Of_Merit': pandas.DataFrame(foms)} Transform.describe(GridFigureOfMerit)
df_job_clusters = pd.DataFrame(totals, columns=[ "Job_Bucket_Criteria_Expr", "Site_Bucket_Criteria_Expr", "Totals", "Frontend_Group" ]) self.logger.debug(f"Job category totals: {df_job_clusters}") except KeyError: self.logger.exception( "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names" ) return {"job_clusters": self.EMPTY_JOB_CLUSTER} except ValueError: self.logger.exception( "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names" ) return {"job_clusters": self.EMPTY_JOB_CLUSTER} except pd.core.computation.ops.UndefinedVariableError: self.logger.exception( "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names" ) return {"job_clusters": self.EMPTY_JOB_CLUSTER} self.logger.info("*** Ending job clustering ***") return {"job_clusters": df_job_clusters} Transform.describe(JobClustering)