Python Transformの例、decisionengine.framework.modules.Transform Pythonの例

コード例 #1

0

ファイルを表示

                price_perf_row["EntryName"] = r1["EntryName"].values[0]
                fom_row["PerfTtbarTotal"] = r1["PerfTtbarTotal"].values[0]
                fom_row["EntryName"] = r1["EntryName"].values[0]

            if r2.empty:
                running_vms = 0
            else:
                running_vms = r2["RunningVms"].values[0]

            if r3.empty:
                price_perf_row["MaxLimit"] = DEFAULT_MAX_LIMIT
                fom_row["MaxLimit"] = DEFAULT_MAX_LIMIT
            else:
                price_perf_row["MaxLimit"] = r3["MaxLimit"].values[0]
                fom_row["MaxLimit"] = r3["MaxLimit"].values[0]

            price_perf = price_performance(row["SpotPrice"], price_perf_row["PerfTtbarTotal"])
            price_perf_row["AWS_Price_Performance"] = price_perf
            fom_row["AWS_Figure_Of_Merit"] = figure_of_merit(running_vms, price_perf_row["MaxLimit"], price_perf)
            price_perf_rows.append(price_perf_row)
            fom_rows.append(fom_row)

        price_perf_df = pd.DataFrame(price_perf_rows)
        price_perf_df.reindex(sorted(price_perf_df.columns), axis=1)
        fom_df = pd.DataFrame(fom_rows)
        fom_df.reindex(sorted(fom_df.columns), axis=1)
        return {"AWS_Price_Performance": price_perf_df, "AWS_Figure_Of_Merit": fom_df}


Transform.describe(FigureOfMerit)

コード例 #2

0

ファイルを表示

ファイル: glidein_requests.py プロジェクト: shreyb/decisionengine_modules

                raise RuntimeError('Mismatch in manifest keys: %s, %s' %
                                   (m_keys, g_keys))
            for key in m_keys:
                merged_manifests[key] = manifests[key].append(
                    group_manifests[key], ignore_index=True)
        else:
            merged_manifests = group_manifests
        return merged_manifests

    def read_fe_config(self):
        if not os.path.isfile(self.de_frontend_configfile):
            raise RuntimeError(
                'Error reading Frontend config for DE %s. '
                'Run configure_gwms_frontend.py to generate one and after every change to the frontend configuration.'
                % self.de_frontend_configfile)
        fe_cfg = eval(open(self.de_frontend_configfile, 'r').read())
        if not isinstance(fe_cfg, dict):
            raise ValueError('Frontend config for DE in %s is invalid' %
                             self.de_frontend_configfile)
        return fe_cfg


def sanitize_glidein_cpus(row):
    if str(row['GLIDEIN_CPUS']).lower() == 'auto':
        row['GLIDEIN_CPUS'] = row['GLIDEIN_ESTIMATED_CPUS']
    row['GLIDEIN_CPUS'] = int(row['GLIDEIN_CPUS'])
    return row


Transform.describe(GlideinRequestManifests)

コード例 #3

0

ファイルを表示

        self.logger.info(
            f"total number of slots on Nersc = {total_slots_nersc}")

        # pull slot info from User pool ########

        total_slots_userpool = 0
        for _index, row in userpool_slots_df.iterrows():
            total_slots_userpool += int(row["TotalCpus"])

        self.logger.info(
            f"total number of slots on userpool = {total_slots_userpool}")

        # compute the relative difference between the two metrics ###
        rel_diff = 0.0
        diff = abs(total_slots_userpool - total_slots_nersc)
        if diff != 0:
            more = max(total_slots_nersc, total_slots_userpool)
            rel_diff = diff / more

        self.logger.info(f"diff = {diff:f}, rel diff = {rel_diff:f}")

        # construct the result namespace ############################
        results["nersc.count"] = total_slots_nersc
        results["userpool.count"] = total_slots_userpool
        results["relative_diff"] = rel_diff

        return {"nersc_userpool_slots_comparison": results}


Transform.describe(CompareNerscUserpoolSlots)

コード例 #4

0

ファイルを表示

ファイル: AWSOccupancy.py プロジェクト: knoepfel/decisionengine_modules

    def __init__(self, config):
        super().__init__(config)

    def transform(self, data_block):
        """
        Fills ``self.data`` with spot price data.

        :type spot_price_history: :obj:`list`
        :arg spot_price_history: list of spotprice data (:class:`SpotPriceData`)
        """

        self.logger.debug("in AWSOccupancy transform")
        account_dict = data_block.get("spot_occupancy_config").to_dict()
        self.logger.debug(f"account_dict {account_dict}")
        occupancy_data = []
        for account in account_dict:
            for region in account_dict[account]:
                occcupancy = OccupancyForRegion(region, profile_name=account)
                instances = occcupancy.get_ec2_instances()
                if instances:
                    data = occcupancy.capacity(instances)
                    if data:
                        occupancy_data += data

        oc_list = [i.data for i in occupancy_data]
        # to fix the test failure
        return {"AWS_Occupancy": pd.DataFrame(oc_list)}


Transform.describe(AWSOccupancy)

コード例 #5

0

ファイルを表示

                            "GlideinConfigPerEntryMaxGlideins"
                        ], ]
                    df["GLIDEIN_Supported_VOs"] = vo
                    if limits_df is None:
                        limits_df = df
                    else:
                        limits_df = limits_df.append(df, ignore_index=True)

                    az_it = sub_df.loc[
                        sub_df["GLIDEIN_Supported_VOs"].str.contains(vo),
                        ["INSTANCE_TYPE", "AVAILABILITY_ZONE"]]
                    regions = {
                        az[:-1]
                        for az in az_it.AVAILABILITY_ZONE.unique()
                    }
                    so_config_dict[vo] = {}
                    for region in regions:
                        it = az_it.loc[az_it["AVAILABILITY_ZONE"].str.contains(
                            region)].INSTANCE_TYPE.unique().tolist()
                        so_config_dict[vo][region] = it

            limits_df = limits_df.rename(columns=_ATTR_TRANSLATION_MAP)

        return {
            "aws_instance_limits": limits_df,
            "spot_occupancy_config": pd.DataFrame.from_dict(so_config_dict)
        }


Transform.describe(AWSFactoryEntryData)

コード例 #6

0

ファイルを表示

# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

from decisionengine.framework.modules import Transform
from decisionengine.framework.modules.Transform import Parameter


@Transform.supports_config(Parameter("data_product_name", type=str),
                           Parameter("consumes", type=list))
class DynamicTransform(Transform.Transform):
    def __init__(self, config):
        self.data_product_name = config["data_product_name"]
        self._consumes = dict.fromkeys(config["consumes"], int)
        self._produces = {self.data_product_name: int}

    def transform(self, data_block):
        all_values = [data_block[key] for key in self._consumes.keys()]
        return {self.data_product_name: sum(all_values, 0)}


Transform.describe(DynamicTransform)

コード例 #7

0

ファイルを表示

# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

import pandas as pd

from decisionengine.framework.modules import Transform


@Transform.consumes(foo=pd.DataFrame)
@Transform.produces(bar=pd.DataFrame)
class TransformNOP(Transform.Transform):
    def __init__(self, config):
        super().__init__(config)

    def transform(self, data_block):
        df_in = self.foo(data_block)
        return {"bar": pd.DataFrame(df_in["key2"])}


Transform.describe(TransformNOP)

コード例 #8

0

ファイルを表示

ファイル: GceBurnRate.py プロジェクト: knoepfel/decisionengine_modules

from decisionengine.framework.modules import Transform


@Transform.consumes(GCE_Instance_Performance=pd.DataFrame,
                    GCE_Occupancy=pd.DataFrame)
@Transform.produces(GCE_Burn_Rate=pd.DataFrame)
class GceBurnRate(Transform.Transform):
    def __init__(self, config):
        super().__init__(config)

    def transform(self, data_block):
        self.logger.debug("in GceBurnRate transform")
        performance = self.GCE_Instance_Performance(data_block).fillna(0)
        occupancy = self.GCE_Occupancy(data_block).fillna(0)

        burn_df = pd.DataFrame([{"BurnRate": 0.0}])
        if not occupancy.empty:
            df = pd.merge(occupancy,
                          performance,
                          how="inner",
                          on=["AvailabilityZone", "InstanceType"])
            if not df.empty:
                df["BurnRate"] = pd.to_numeric(
                    df["Occupancy"]) * pd.to_numeric(df["PreemptiblePrice"])
                burn_df = pd.DataFrame([{"BurnRate": df["BurnRate"].sum()}])

        return {"GCE_Burn_Rate": burn_df}


Transform.describe(GceBurnRate)

コード例 #9

0

ファイルを表示

ファイル: AWSSpotPrice.py プロジェクト: knoepfel/decisionengine_modules

        self.logger.debug("in AWSSpotPrice transform")
        account_dict = data_block.get("spot_occupancy_config").to_dict()
        self.logger.debug(f"account_dict {account_dict}")
        sp_data = []
        for account in account_dict:
            for region, instances in account_dict[account].items():
                spot_price_info = AWSSpotPriceForRegion(region,
                                                        profile_name=account)
                spot_price_info.init_query(instance_types=instances)
                spot_price_history = spot_price_info.get_price(self.logger)
                if spot_price_history:
                    sp_data += spot_price_info.spot_price_summary(
                        spot_price_history)

        sp_list = [i.data for i in sp_data]
        column_names = [
            "AccountName",
            "AvailabilityZone",
            "InstanceType",
            "ProductDescription",
            "SpotPrice",
            "Timestamp",
        ]
        return {
            "provisioner_resource_spot_prices":
            pd.DataFrame(sp_list, columns=column_names)
        }


Transform.describe(AWSSpotPrice)

コード例 #10

0

ファイルを表示

@Transform.consumes(provisioner_resource_spot_prices=pd.DataFrame,
                    AWS_Occupancy=pd.DataFrame)
@Transform.produces(AWS_Burn_Rate=pd.DataFrame)
class AwsBurnRate(Transform.Transform):
    def __init__(self, config):
        super().__init__(config)

    def transform(self, data_block):

        spot_prices = self.provisioner_resource_spot_prices(data_block).fillna(
            0)
        occupancy = self.AWS_Occupancy(data_block).fillna(0)

        burn_df = pd.DataFrame([{"BurnRate": 0.}])
        if not occupancy.empty:
            df = pd.merge(
                occupancy,
                spot_prices,
                how="inner",
                on=["AccountName", "AvailabilityZone", "InstanceType"])
            if not df.empty:
                df["BurnRate"] = pd.to_numeric(
                    df["RunningVms"]) * pd.to_numeric(df["SpotPrice"])
                burn_df = pd.DataFrame([{"BurnRate": df["BurnRate"].sum()}])

        return {'AWS_Burn_Rate': burn_df}


Transform.describe(AwsBurnRate)

コード例 #11

0

ファイルを表示

        factory_entries_lcf = self.Factory_Entries_LCF(data_block)

        figures_of_merit = []
        for _i, row in factory_entries_lcf.iterrows():
            entry_name = row["EntryName"]
            perf_df = performance[performance.EntryName == entry_name]

            for _j, perf_row in perf_df.iterrows():
                running = float(row["GlideinMonitorTotalStatusRunning"])
                max_allowed = float(row["GlideinConfigPerEntryMaxGlideins"])
                max_idle = float(row["GlideinConfigPerEntryMaxIdle"])
                idle = float(row["GlideinMonitorTotalStatusIdle"])
                figures_of_merit.append({
                    "EntryName":
                    entry_name,
                    "FigureOfMerit":
                    fom.figure_of_merit(perf_row["PricePerformance"], running,
                                        max_allowed, idle, max_idle,
                                        self.logger),
                })

        return {
            "Nersc_Price_Performance":
            performance.filter(["EntryName", "PricePerformance"]),
            "Nersc_Figure_Of_Merit":
            pd.DataFrame(figures_of_merit),
        }


Transform.describe(NerscFigureOfMerit)

コード例 #12

0

ファイルを表示

                if num_with_id == 0:
                    # both and factory_only stay '0'
                    results['factory_only.count'] = len(nersc_df)
                else:
                    # compare the exact job IDs
                    nersc_id_list = nersc_df.jobid.tolist()

                    factory_id_set = set(factory_id_list)
                    nersc_id_set = set(nersc_id_list)

                    in_both_set = factory_id_set & nersc_id_set
                    factory_only_set = factory_id_set - nersc_id_set
                    nersc_only_set = nersc_id_set - factory_id_set

                    num_in_both = len(in_both_set)
                    num_in_factory = len(factory_only_set)
                    num_in_nersc = len(nersc_only_set)

                    results['both.count'] = num_in_both
                    results['nersc_only.count'] = num_in_nersc
                    results['factory_only.count'] = num_in_factory

        for index, row in nersc_df.iterrows():
            if row['status'] == 'R':
                results['nersc.running.count'] += 1

        return {'nersc_factory_jobs_comparison': results}


Transform.describe(CompareNerscFactoryJobs)

コード例 #13

0

ファイルを表示

ファイル: grid_figure_of_merit.py プロジェクト: shreyb/decisionengine_modules

        self.logger = logging.getLogger()
        self.price_performance = config.get('price_performance', 1)

    def transform(self, datablock):
        """
        Grid sites FOM are straight up assumed as 0 for now
        """

        entries = self.Factory_Entries_Grid(datablock)
        if entries is None:
            entries = pandas.DataFrame({ATTR_ENTRYNAME: []})
        foms = []
        if not entries.empty:
            for index, entry in entries.iterrows():
                running = float(entry['GlideinMonitorTotalStatusRunning'])
                max_allowed = float(entry['GlideinConfigPerEntryMaxGlideins'])
                max_idle = float(entry['GlideinConfigPerEntryMaxIdle'])
                idle = float(entry['GlideinMonitorTotalStatusIdle'])
                f = {
                    ATTR_ENTRYNAME: entry[ATTR_ENTRYNAME],
                    ATTR_FOM: figure_of_merit(self.price_performance,
                                              running, max_allowed,
                                              idle, max_idle)
                }
                foms.append(f)

        return {'Grid_Figure_Of_Merit': pandas.DataFrame(foms)}


Transform.describe(GridFigureOfMerit)

コード例 #14

0

ファイルを表示

ファイル: job_clustering.py プロジェクト: knoepfel/decisionengine_modules

            df_job_clusters = pd.DataFrame(totals,
                                           columns=[
                                               "Job_Bucket_Criteria_Expr",
                                               "Site_Bucket_Criteria_Expr",
                                               "Totals", "Frontend_Group"
                                           ])
            self.logger.debug(f"Job category totals: {df_job_clusters}")

        except KeyError:
            self.logger.exception(
                "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names"
            )
            return {"job_clusters": self.EMPTY_JOB_CLUSTER}
        except ValueError:
            self.logger.exception(
                "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names"
            )
            return {"job_clusters": self.EMPTY_JOB_CLUSTER}
        except pd.core.computation.ops.UndefinedVariableError:
            self.logger.exception(
                "Unable to calculate totals from job manifests, may have missing classads or incorrect classad names"
            )
            return {"job_clusters": self.EMPTY_JOB_CLUSTER}

        self.logger.info("*** Ending job clustering ***")

        return {"job_clusters": df_job_clusters}


Transform.describe(JobClustering)