# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

import pandas as pd

from decisionengine.framework.modules import Source
from decisionengine_modules.htcondor.sources import source


@Source.produces(factoryclient_manifests=pd.DataFrame)
class FactoryClientManifests(source.ResourceManifests):
    def __init__(self, config):
        super().__init__(config)
        self.constraint = f'({self.constraint})&&(glideinmytype=="glidefactoryclient")'
        self.subsystem_name = "any"

    def acquire(self):
        self.logger.debug("in FactoryClientManifests acquire")
        return {"factoryclient_manifests": self.load()}


Source.describe(FactoryClientManifests)
Example #2
0
from decisionengine.framework.modules import Source
from decisionengine.framework.tests import SourceWithSampleConfigNOP

SourceAlias = SourceWithSampleConfigNOP.SourceWithSampleConfigNOP
Source.describe(SourceAlias)
Example #3
0
                            '%s,%s' % (col_host, sec_cols)
                        ] * len(df)
                    else:
                        df['CollectorHosts'] = [col_host] * len(df)

                    dataframe = pandas.concat([dataframe, df],
                                              ignore_index=True,
                                              sort=True)
            except htcondor_query.QueryError as e:
                self.logger.error('Failed to fetch glidefactory classads '
                                  'from collector host(s) "{}": {}'.format(
                                      collector_host, e))
            except Exception:
                self.logger.exception('Unexpected error fetching glidefactory '
                                      'classads from collector host(s) '
                                      '"{}"'.format(collector_host))

        if dataframe.empty:
            # There were no entry classads in the factory collector or
            # quering the collector failed
            return dict.fromkeys(self._entry_gridtype_map, pandas.DataFrame())

        results = {}
        for key, value in self._entry_gridtype_map.items():
            results[key] = dataframe.loc[(dataframe.GLIDEIN_GridType.isin(
                list(value)))]
        return results


Source.describe(FactoryEntries)
Example #4
0
import pandas

from decisionengine_modules.htcondor.sources import source
from decisionengine.framework.modules import Source


@Source.produces(startd_manifests=pandas.DataFrame)
class StartdManifests(source.ResourceManifests):
    def acquire(self):
        return {'startd_manifests': self.load()}


Source.describe(StartdManifests)
Example #5
0
from decisionengine.framework.modules import Source, SourceProxy
from decisionengine.framework.modules.Source import Parameter


@Source.supports_config(Parameter('entry_limit_attrs', type=list))
@Source.produces(GCE_Resource_Limits=typing.Any)
class GCEResourceLimits(SourceProxy.SourceProxy):
    """
    Consumes factory data to find GCE entry limits
    """

    def __init__(self, config):
        super().__init__(config)
        self.entry_limit_attrs = config.get('entry_limit_attrs')

    def acquire(self):
        """
        Acquire google factory entry limits from source proxy
        and return as pandas frame
        :rtype: :obj:`~pd.DataFrame`
        """

        factory_data = super().acquire()
        df_factory_data = factory_data.get(self.data_keys[0])
        df_entry_limits = df_factory_data[self.entry_limit_attrs]
        return {'GCE_Resource_Limits': df_entry_limits}


Source.describe(GCEResourceLimits)
            account_dict = account_conf[k].to_dict()
        sp_data = []
        for account in account_dict:
            for region, instances in account_dict[account].items():
                spot_price_info = AWSSpotPriceForRegion(region,
                                                        profile_name=account)
                spot_price_info.init_query(instance_types=instances)
                spot_price_history = spot_price_info.get_price()
                if spot_price_history:
                    sp_data += spot_price_info.spot_price_summary(
                        spot_price_history)

        sp_list = [i.data for i in sp_data]
        column_names = [
            'AccountName', 'AvailabilityZone', 'InstanceType',
            'ProductDescription', 'SpotPrice', 'Timestamp'
        ]
        return {
            'provisioner_resource_spot_prices':
            pd.DataFrame(sp_list, columns=column_names)
        }


Source.describe(AWSSpotPrice,
                sample_config={
                    "channel_name": "channel_aws_config_data",
                    "Dataproducts": ["spot_occupancy_config"],
                    "retries": 3,
                    "retry_timeout": 20
                })
"""
This source takes input from instance_performance_gce.csv
and adds it to data block
"""
import pandas as pd

from decisionengine.framework.modules import Source
from decisionengine.framework.modules.Source import Parameter


@Source.supports_config(
    Parameter('csv_file', type=str, comment="path to CSV file"))
@Source.produces(GCE_Instance_Performance=pd.DataFrame)
class GCEInstancePerformance(Source.Source):
    def __init__(self, config):
        super().__init__(config)
        self.csv_file = config.get('csv_file')
        if not self.csv_file:
            raise RuntimeError("No csv file found in configuration")

    def acquire(self):
        return {'GCE_Instance_Performance': pd.read_csv(self.csv_file)}


Source.describe(GCEInstancePerformance)
        """
        raw_results = []
        # By default, query edison and cori
        self.constraints['machines'] = self.constraints.get('machines',
                                                            ['edison', 'cori'])
        # get all systems that are up
        up_machines = [x for x in self.newt.get_status()
                       if x['status'] == 'up']
        if not up_machines:
            self.logger.info("All machines at NERSC are down")
        # filter machines that are up
        machines = [x for x in self.constraints.get('machines') if x in [
            y["system"] for y in up_machines]]
        if not machines:
            self.logger.info("All requested machines at NERSC are down")
        # filter results based on constraints specified in newt_keys dictionary
        newt_keys = self.constraints.get("newt_keys", {})
        for m in machines:
            values = self.newt.get_queue(m)
            for k, v in newt_keys.items():
                if v:
                    values = [x for x in values if x[k] in v]
            if values:
                raw_results.extend(values)

        pandas_frame = pd.DataFrame(raw_results)
        return {'Nersc_Job_Info': pandas_frame}


Source.describe(NerscJobInfo)
    def acquire(self):
        """
        Gets data from AWS

        :rtype: pandas frame (:class:`pd.DataFramelist`)
        """

        # Load kown accounts configuration
        # account configuration is dynamic
        account_dict = load_config.load(self.config_file, 5, 20)
        sp_data = []
        for account in account_dict:
            for region, instances in account_dict[account].items():
                spot_price_info = AWSSpotPriceForRegion(region,
                                                        profile_name=account)
                spot_price_info.init_query(instance_types=instances)
                spot_price_history = spot_price_info.get_price()
                if spot_price_history:
                    sp_data += spot_price_info.spot_price_summary(
                        spot_price_history)

        sp_list = [i.data for i in sp_data]

        return {'provisioner_resource_spot_prices': pd.DataFrame(sp_list)}


Source.describe(
    AWSSpotPrice,
    sample_config={'spot_price_configuration': 'spot_price_config_sample.py'})
from decisionengine.framework.modules.Source import Parameter

_RETRIES = 5
_TO = 20


@Source.supports_config(
    Parameter('data_file', type=str, comment="CSV job limits data file"))
@Source.produces(Job_Limits=pd.DataFrame)
class AWSJobLimits(Source.Source):
    def __init__(self, config):
        self.data_file = config['data_file']

    def acquire(self):
        rc = None
        for i in range(_RETRIES):
            if os.path.exists(self.data_file):
                rc = {
                    'Job_Limits':
                    pd.read_csv(self.data_file).drop_duplicates(
                        subset=['AvailabilityZone', 'InstanceType'],
                        keep='last').reset_index(drop=True)
                }
                break
            else:
                time.sleep(_TO)
        return rc


Source.describe(AWSJobLimits)
from decisionengine.framework.modules import Source, SourceProxy

NerscFigureOfMeritSourceProxy = SourceProxy.SourceProxy
Source.describe(NerscFigureOfMeritSourceProxy)
Example #12
0
from decisionengine.framework.modules import Source


@Source.produces(_placeholder=None)
class ErrorOnAcquire(Source.Source):
    def __init__(self, config):
        super().__init__(config)

    def acquire(self):
        raise RuntimeError("Test error-handling")


Source.describe(ErrorOnAcquire)
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

"""
Fill in data from Instance Performance CSV file
"""
import pandas as pd

from decisionengine.framework.modules import Source
from decisionengine.framework.modules.Source import Parameter


@Source.supports_config(Parameter("data_file", type=str, comment="CSV cost data file"))
@Source.produces(Performance_Data=pd.DataFrame)
class AWSInstancePerformance(Source.Source):
    def __init__(self, config):
        super().__init__(config)
        self.data_file = config.get("data_file")

    def acquire(self):
        self.logger.debug("in AWSInstancePerformance acquire")
        dataframe = (
            pd.read_csv(self.data_file)
            .drop_duplicates(subset=["AvailabilityZone", "InstanceType"], keep="last")
            .reset_index(drop=True)
        )
        return {"Performance_Data": dataframe}


Source.describe(AWSInstancePerformance)
            if values:
                with contextlib.suppress(KeyError):
                    # Empty return from get_usage, so just move on
                    results.extend(values["items"])
        # filter results based on constraints specified in newt_keys dictionary
        newt_keys = self.constraints.get("newt_keys", {})
        for key, values in newt_keys.items():
            k = key
            # The below remapping is needed for backward compatibility with
            # existing config files
            if key == "rname":
                k = "repoName"
            if key == "repo_type":
                k = "repoType"
            if values:
                results = [x for x in results if x[k] in values]
        return results

    def acquire(self):
        """
        Method to be called from Task Manager.
        redefines acquire from Source.py.
        Acquire NERSC allocation info and return as pandas frame
        :rtype: :obj:`~pd.DataFrame`
        """
        self.logger.debug("in NerscAllocationInfo acquire")
        return {"Nersc_Allocation_Info": pd.DataFrame(self.send_query())}


Source.describe(NerscAllocationInfo)
Example #15
0
@Source.supports_config(Parameter("multiplier", type=int))
@Source.supports_config(Parameter("channel_name", type=str))
@Source.produces(foo=pd.DataFrame)
class SourceWithSampleConfigNOP(Source.Source):
    def __init__(self, config):
        super().__init__(config)
        self.multiplier = config.get("multiplier")

    def acquire(self):
        return {
            "foo":
            pd.DataFrame([
                {
                    "col1": "value1",
                    "col2": 0.5 * self.multiplier
                },
                {
                    "col1": "value2",
                    "col2": 2.0 * self.multiplier
                },
            ])
        }


Source.describe(SourceWithSampleConfigNOP,
                sample_config={
                    "multiplier": 1,
                    "channel_name": "test1"
                })
from decisionengine.framework.modules import Source
from decisionengine.framework.modules import SourceProxy

FactoryEntriesSourceProxy = SourceProxy.SourceProxy
Source.describe(FactoryEntriesSourceProxy)
    def acquire(self):
        """
        Fills ``self.data`` with spot price data.

        :type spot_price_history: :obj:`list`
        :arg spot_price_history: list of spotprice data (:class:`SpotPriceData`)
        """

        # Load known accounts configuration
        # account configuration is dynamic
        account_dict = load_config.load(self.config_file, 5, 20)
        occupancy_data = []
        self.logger.debug('account_dict %s' % (self.account_dict, ))
        for account in account_dict:
            for region in account_dict[account]:
                occcupancy = OccupancyForRegion(region, profile_name=account)
                instances = occcupancy.get_ec2_instances()
                if instances:
                    data = occcupancy.capacity(instances)
                    if data:
                        occupancy_data += data

        oc_list = [i.data for i in occupancy_data]
        return {'AWS_Occupancy': pd.DataFrame(oc_list)}


Source.describe(
    AWSOccupancy,
    sample_config={'occupancy_configuration': 'occupancy_config_sample.py'})
                        f"Cost Rate Per Hour In the Last Six Hours: ${costRatePerHourInLastSixHours} / h"
                    )
                    self.logger.debug("")
                    self.logger.debug(
                        f"Cost In the Last Day: ${costInLastDay}")
                    self.logger.debug(
                        f"Cost Rate Per Hour In the Last Day: ${costRatePerHourInLastDay} / h"
                    )
                    self.logger.debug("---")
                    self.logger.debug("")

            except Exception as detail:
                self.logger.exception(
                    "Exception in AWS BillingInfo call to acquire")
                raise Exception(detail)

        return {
            "AWS_Billing_Info": pd.DataFrame(data),
            "AWS_Billing_Rate": pd.DataFrame(datarate)
        }


Source.describe(
    BillingInfo,
    sample_config={
        "billing_configuration":
        "/etc/decisionengine/modules.conf/AccountConstants_my.py",
        "dst_dir_for_s3_files": "/var/lib/decisionengine/awsfiles",
    },
)
Example #19
0
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0
"""
This source takes input from instance_performance_nersc.csv
and adds it to data block
"""
import pandas as pd

from decisionengine.framework.modules import Source
from decisionengine.framework.modules.Source import Parameter


@Source.supports_config(
    Parameter("csv_file", type=str, comment="path to CSV file"))
@Source.produces(Nersc_Instance_Performance=pd.DataFrame)
class NerscInstancePerformance(Source.Source):
    def __init__(self, config):
        super().__init__(config)
        self.csv_file = config.get("csv_file")
        if not self.csv_file:
            raise RuntimeError("No csv file found in configuration")

    def acquire(self):
        self.logger.debug("in NerscInstancePerformance acquire")
        return {"Nersc_Instance_Performance": pd.read_csv(self.csv_file)}


Source.describe(NerscInstancePerformance)
Example #20
0
from decisionengine.framework.modules import Source
from decisionengine.framework.modules import SourceProxy


JobClusteringSourceProxy = SourceProxy.SourceProxy
Source.describe(JobClusteringSourceProxy)
Example #21
0
from decisionengine.framework.modules import Source, SourceProxy

NerscAllocationInfoSourceProxy = SourceProxy.SourceProxy
Source.describe(NerscAllocationInfoSourceProxy)
Example #22
0
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC
# SPDX-License-Identifier: Apache-2.0

from decisionengine.framework.modules import Source
from decisionengine.framework.modules.Source import Parameter


@Source.supports_config(Parameter("int_value", type=int))
@Source.produces(int_value=int)
class IntSource(Source.Source):
    def __init__(self, config):
        self._value = config["int_value"]

    def acquire(self):
        return {"int_value": self._value}


Source.describe(IntSource)
Example #23
0
        Acquire GCE billing info and return as pandas frame

        :rtype: :obj:`~pd.DataFrame`
        """
        self.logger.debug("in GCEBillingInfo acquire")
        constantsDict = {
            "projectId": self.projectId,
            "credentialsProfileName": self.credentialsProfileName,
            "accountNumber": self.accountNumber,
            "bucketBillingName": "billing-" + str(self.projectId),
            "lastKnownBillDate": self.lastKnownBillDate,
            "balanceAtDate": self.balanceAtDate,
            "applyDiscount": self.applyDiscount,
        }
        globalConf = {"graphite_host": "dummy", "graphite_context_billing": "dummy", "outputPath": self.localFileDir}
        try:
            calculator = GCEBillCalculator(None, globalConf, constantsDict, self.logger)

            lastStartDateBilledConsideredDatetime, CorrectedBillSummaryDict = calculator.CalculateBill()

            self.logger.info("Calculated corrected bill summary for google")
            self.logger.info(CorrectedBillSummaryDict)

        except Exception:
            self.logger.exception("Exception in GCEBillingInfo call to acquire")

        return {"GCE_Billing_Info": pd.DataFrame([CorrectedBillSummaryDict])}


Source.describe(GCEBillingInfo)
Example #24
0
                df = pandas.DataFrame(condor_status.stored_data)
                if not df.empty:
                    (col_host, sec_cols
                     ) = htcondor_query.split_collector_host(collector_host)
                    df['CollectorHost'] = [col_host] * len(df)
                    if sec_cols != '':
                        df['CollectorHosts'] = [
                            '%s,%s' % (col_host, sec_cols)
                        ] * len(df)
                    else:
                        df['CollectorHosts'] = [col_host] * len(df)

                    dataframe = pandas.concat([dataframe, df],
                                              ignore_index=True,
                                              sort=True)
            except htcondor_query.QueryError as e:
                self.logger.error('Failed to get glidefactoryglobal classads '
                                  'from collector host(s) "{}": {}'.format(
                                      collector_host, e))
            except Exception:
                self.logger.exception('Unexpected error fetching '
                                      'glidefactoryglobal classads from '
                                      'collector host(s) '
                                      "{}"
                                      ''.format(collector_host))

        return {'factoryglobal_manifests': dataframe}


Source.describe(FactoryGlobalManifests)
from decisionengine.framework.modules import Source, SourceProxy

GCEBillingInfoSourceProxy = SourceProxy.SourceProxy
Source.describe(GCEBillingInfoSourceProxy)
                    self.logger.debug('Cost In the Last Six Hours: ${}'.format(
                        costInLastSixHours))
                    self.logger.debug(
                        'Cost Rate Per Hour In the Last Six Hours: ${} / h'.
                        format(costRatePerHourInLastSixHours))
                    self.logger.debug('')
                    self.logger.debug(
                        'Cost In the Last Day: ${}'.format(costInLastDay))
                    self.logger.debug(
                        'Cost Rate Per Hour In the Last Day: ${} / h'.format(
                            costRatePerHourInLastDay))
                    self.logger.debug('---')
                    self.logger.debug('')

            except Exception as detail:
                self.logger.error("In acquire: %s" % detail)
                raise Exception(detail)

        return {
            'AWS_Billing_Info': pd.DataFrame(data),
            'AWS_Billing_Rate': pd.DataFrame(datarate)
        }


Source.describe(BillingInfo,
                sample_config={
                    'billing_configuration':
                    '/etc/decisionengine/modules.conf/AccountConstants_my.py',
                    'dst_dir_for_s3_files': '/var/lib/decisionengine/awsfiles'
                })
    def acquire(self):
        """
        Gets data from AWS

        :rtype: pandas frame (:class:`pd.DataFramelist`)
        """

        # Load kown accounts configuration
        # account configuration is dynamic
        self.logger.debug("in AWSSpotPrice acquire")
        account_dict = load_config.load(self.config_file, 5, 20, self.logger)
        sp_data = []
        for account in account_dict:  # pylint: disable=not-an-iterable
            for region, instances in account_dict[account].items():  # pylint: disable=unsubscriptable-object
                spot_price_info = AWSSpotPriceForRegion(region,
                                                        profile_name=account)
                spot_price_info.init_query(instance_types=instances)
                spot_price_history = spot_price_info.get_price(self.logger)
                if spot_price_history:
                    sp_data += spot_price_info.spot_price_summary(
                        spot_price_history)

        sp_list = [i.data for i in sp_data]

        return {"provisioner_resource_spot_prices": pd.DataFrame(sp_list)}


Source.describe(
    AWSSpotPrice,
    sample_config={"spot_price_configuration": "spot_price_config_sample.py"})
"""
Calculates price / preformance and figure of merit and
saves it into the output file acording to design document.

"""

from decisionengine.framework.modules import Source, SourceProxy

FigureOfMeritSourceProxy = SourceProxy.SourceProxy
Source.describe(FigureOfMeritSourceProxy)
Example #29
0
                              format_list=self.classad_attrs,
                              condor_config=self.condor_config)

                for eachDict in condor_q.stored_data:
                    for key, value in self.correction_map.items():
                        if eachDict.get(key) is None:
                            eachDict[key] = value

                df = pandas.DataFrame(condor_q.stored_data)
                if not df.empty:
                    # Add schedd name and collector host to job records
                    df['ScheddName'] = pandas.Series([schedd] *
                                                     len(condor_q.stored_data))
                    df['CollectorHost'] = pandas.Series(
                        [collector_host] * len(condor_q.stored_data))
                    dataframe = dataframe.append(df, ignore_index=True)
            except htcondor_query.QueryError:
                self.logger.warning(
                    'Query error fetching job classads from schedd "%s" in collector host(s) "%s".'
                    % (schedd, collector_host))
            except Exception:
                msg = 'Unexpected error fetching job classads from schedd "{}" in collector host(s) "{}".'
                self.logger.warning(msg.format(schedd, collector_host))
                self.logger.error(
                    msg.format(schedd, collector_host) +
                    " Traceback: {}".format(traceback.format_exc()))
        return {'job_manifests': dataframe}


Source.describe(JobQ)
from decisionengine.framework.modules import SourceProxy
from decisionengine.framework.modules import Source

JobQSourceProxy = SourceProxy.SourceProxy
Source.describe(JobQSourceProxy)