# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 import pandas as pd from decisionengine.framework.modules import Source from decisionengine_modules.htcondor.sources import source @Source.produces(factoryclient_manifests=pd.DataFrame) class FactoryClientManifests(source.ResourceManifests): def __init__(self, config): super().__init__(config) self.constraint = f'({self.constraint})&&(glideinmytype=="glidefactoryclient")' self.subsystem_name = "any" def acquire(self): self.logger.debug("in FactoryClientManifests acquire") return {"factoryclient_manifests": self.load()} Source.describe(FactoryClientManifests)
from decisionengine.framework.modules import Source from decisionengine.framework.tests import SourceWithSampleConfigNOP SourceAlias = SourceWithSampleConfigNOP.SourceWithSampleConfigNOP Source.describe(SourceAlias)
'%s,%s' % (col_host, sec_cols) ] * len(df) else: df['CollectorHosts'] = [col_host] * len(df) dataframe = pandas.concat([dataframe, df], ignore_index=True, sort=True) except htcondor_query.QueryError as e: self.logger.error('Failed to fetch glidefactory classads ' 'from collector host(s) "{}": {}'.format( collector_host, e)) except Exception: self.logger.exception('Unexpected error fetching glidefactory ' 'classads from collector host(s) ' '"{}"'.format(collector_host)) if dataframe.empty: # There were no entry classads in the factory collector or # quering the collector failed return dict.fromkeys(self._entry_gridtype_map, pandas.DataFrame()) results = {} for key, value in self._entry_gridtype_map.items(): results[key] = dataframe.loc[(dataframe.GLIDEIN_GridType.isin( list(value)))] return results Source.describe(FactoryEntries)
import pandas from decisionengine_modules.htcondor.sources import source from decisionengine.framework.modules import Source @Source.produces(startd_manifests=pandas.DataFrame) class StartdManifests(source.ResourceManifests): def acquire(self): return {'startd_manifests': self.load()} Source.describe(StartdManifests)
from decisionengine.framework.modules import Source, SourceProxy from decisionengine.framework.modules.Source import Parameter @Source.supports_config(Parameter('entry_limit_attrs', type=list)) @Source.produces(GCE_Resource_Limits=typing.Any) class GCEResourceLimits(SourceProxy.SourceProxy): """ Consumes factory data to find GCE entry limits """ def __init__(self, config): super().__init__(config) self.entry_limit_attrs = config.get('entry_limit_attrs') def acquire(self): """ Acquire google factory entry limits from source proxy and return as pandas frame :rtype: :obj:`~pd.DataFrame` """ factory_data = super().acquire() df_factory_data = factory_data.get(self.data_keys[0]) df_entry_limits = df_factory_data[self.entry_limit_attrs] return {'GCE_Resource_Limits': df_entry_limits} Source.describe(GCEResourceLimits)
account_dict = account_conf[k].to_dict() sp_data = [] for account in account_dict: for region, instances in account_dict[account].items(): spot_price_info = AWSSpotPriceForRegion(region, profile_name=account) spot_price_info.init_query(instance_types=instances) spot_price_history = spot_price_info.get_price() if spot_price_history: sp_data += spot_price_info.spot_price_summary( spot_price_history) sp_list = [i.data for i in sp_data] column_names = [ 'AccountName', 'AvailabilityZone', 'InstanceType', 'ProductDescription', 'SpotPrice', 'Timestamp' ] return { 'provisioner_resource_spot_prices': pd.DataFrame(sp_list, columns=column_names) } Source.describe(AWSSpotPrice, sample_config={ "channel_name": "channel_aws_config_data", "Dataproducts": ["spot_occupancy_config"], "retries": 3, "retry_timeout": 20 })
""" This source takes input from instance_performance_gce.csv and adds it to data block """ import pandas as pd from decisionengine.framework.modules import Source from decisionengine.framework.modules.Source import Parameter @Source.supports_config( Parameter('csv_file', type=str, comment="path to CSV file")) @Source.produces(GCE_Instance_Performance=pd.DataFrame) class GCEInstancePerformance(Source.Source): def __init__(self, config): super().__init__(config) self.csv_file = config.get('csv_file') if not self.csv_file: raise RuntimeError("No csv file found in configuration") def acquire(self): return {'GCE_Instance_Performance': pd.read_csv(self.csv_file)} Source.describe(GCEInstancePerformance)
""" raw_results = [] # By default, query edison and cori self.constraints['machines'] = self.constraints.get('machines', ['edison', 'cori']) # get all systems that are up up_machines = [x for x in self.newt.get_status() if x['status'] == 'up'] if not up_machines: self.logger.info("All machines at NERSC are down") # filter machines that are up machines = [x for x in self.constraints.get('machines') if x in [ y["system"] for y in up_machines]] if not machines: self.logger.info("All requested machines at NERSC are down") # filter results based on constraints specified in newt_keys dictionary newt_keys = self.constraints.get("newt_keys", {}) for m in machines: values = self.newt.get_queue(m) for k, v in newt_keys.items(): if v: values = [x for x in values if x[k] in v] if values: raw_results.extend(values) pandas_frame = pd.DataFrame(raw_results) return {'Nersc_Job_Info': pandas_frame} Source.describe(NerscJobInfo)
def acquire(self): """ Gets data from AWS :rtype: pandas frame (:class:`pd.DataFramelist`) """ # Load kown accounts configuration # account configuration is dynamic account_dict = load_config.load(self.config_file, 5, 20) sp_data = [] for account in account_dict: for region, instances in account_dict[account].items(): spot_price_info = AWSSpotPriceForRegion(region, profile_name=account) spot_price_info.init_query(instance_types=instances) spot_price_history = spot_price_info.get_price() if spot_price_history: sp_data += spot_price_info.spot_price_summary( spot_price_history) sp_list = [i.data for i in sp_data] return {'provisioner_resource_spot_prices': pd.DataFrame(sp_list)} Source.describe( AWSSpotPrice, sample_config={'spot_price_configuration': 'spot_price_config_sample.py'})
from decisionengine.framework.modules.Source import Parameter _RETRIES = 5 _TO = 20 @Source.supports_config( Parameter('data_file', type=str, comment="CSV job limits data file")) @Source.produces(Job_Limits=pd.DataFrame) class AWSJobLimits(Source.Source): def __init__(self, config): self.data_file = config['data_file'] def acquire(self): rc = None for i in range(_RETRIES): if os.path.exists(self.data_file): rc = { 'Job_Limits': pd.read_csv(self.data_file).drop_duplicates( subset=['AvailabilityZone', 'InstanceType'], keep='last').reset_index(drop=True) } break else: time.sleep(_TO) return rc Source.describe(AWSJobLimits)
from decisionengine.framework.modules import Source, SourceProxy NerscFigureOfMeritSourceProxy = SourceProxy.SourceProxy Source.describe(NerscFigureOfMeritSourceProxy)
from decisionengine.framework.modules import Source @Source.produces(_placeholder=None) class ErrorOnAcquire(Source.Source): def __init__(self, config): super().__init__(config) def acquire(self): raise RuntimeError("Test error-handling") Source.describe(ErrorOnAcquire)
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 """ Fill in data from Instance Performance CSV file """ import pandas as pd from decisionengine.framework.modules import Source from decisionengine.framework.modules.Source import Parameter @Source.supports_config(Parameter("data_file", type=str, comment="CSV cost data file")) @Source.produces(Performance_Data=pd.DataFrame) class AWSInstancePerformance(Source.Source): def __init__(self, config): super().__init__(config) self.data_file = config.get("data_file") def acquire(self): self.logger.debug("in AWSInstancePerformance acquire") dataframe = ( pd.read_csv(self.data_file) .drop_duplicates(subset=["AvailabilityZone", "InstanceType"], keep="last") .reset_index(drop=True) ) return {"Performance_Data": dataframe} Source.describe(AWSInstancePerformance)
if values: with contextlib.suppress(KeyError): # Empty return from get_usage, so just move on results.extend(values["items"]) # filter results based on constraints specified in newt_keys dictionary newt_keys = self.constraints.get("newt_keys", {}) for key, values in newt_keys.items(): k = key # The below remapping is needed for backward compatibility with # existing config files if key == "rname": k = "repoName" if key == "repo_type": k = "repoType" if values: results = [x for x in results if x[k] in values] return results def acquire(self): """ Method to be called from Task Manager. redefines acquire from Source.py. Acquire NERSC allocation info and return as pandas frame :rtype: :obj:`~pd.DataFrame` """ self.logger.debug("in NerscAllocationInfo acquire") return {"Nersc_Allocation_Info": pd.DataFrame(self.send_query())} Source.describe(NerscAllocationInfo)
@Source.supports_config(Parameter("multiplier", type=int)) @Source.supports_config(Parameter("channel_name", type=str)) @Source.produces(foo=pd.DataFrame) class SourceWithSampleConfigNOP(Source.Source): def __init__(self, config): super().__init__(config) self.multiplier = config.get("multiplier") def acquire(self): return { "foo": pd.DataFrame([ { "col1": "value1", "col2": 0.5 * self.multiplier }, { "col1": "value2", "col2": 2.0 * self.multiplier }, ]) } Source.describe(SourceWithSampleConfigNOP, sample_config={ "multiplier": 1, "channel_name": "test1" })
from decisionengine.framework.modules import Source from decisionengine.framework.modules import SourceProxy FactoryEntriesSourceProxy = SourceProxy.SourceProxy Source.describe(FactoryEntriesSourceProxy)
def acquire(self): """ Fills ``self.data`` with spot price data. :type spot_price_history: :obj:`list` :arg spot_price_history: list of spotprice data (:class:`SpotPriceData`) """ # Load known accounts configuration # account configuration is dynamic account_dict = load_config.load(self.config_file, 5, 20) occupancy_data = [] self.logger.debug('account_dict %s' % (self.account_dict, )) for account in account_dict: for region in account_dict[account]: occcupancy = OccupancyForRegion(region, profile_name=account) instances = occcupancy.get_ec2_instances() if instances: data = occcupancy.capacity(instances) if data: occupancy_data += data oc_list = [i.data for i in occupancy_data] return {'AWS_Occupancy': pd.DataFrame(oc_list)} Source.describe( AWSOccupancy, sample_config={'occupancy_configuration': 'occupancy_config_sample.py'})
f"Cost Rate Per Hour In the Last Six Hours: ${costRatePerHourInLastSixHours} / h" ) self.logger.debug("") self.logger.debug( f"Cost In the Last Day: ${costInLastDay}") self.logger.debug( f"Cost Rate Per Hour In the Last Day: ${costRatePerHourInLastDay} / h" ) self.logger.debug("---") self.logger.debug("") except Exception as detail: self.logger.exception( "Exception in AWS BillingInfo call to acquire") raise Exception(detail) return { "AWS_Billing_Info": pd.DataFrame(data), "AWS_Billing_Rate": pd.DataFrame(datarate) } Source.describe( BillingInfo, sample_config={ "billing_configuration": "/etc/decisionengine/modules.conf/AccountConstants_my.py", "dst_dir_for_s3_files": "/var/lib/decisionengine/awsfiles", }, )
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 """ This source takes input from instance_performance_nersc.csv and adds it to data block """ import pandas as pd from decisionengine.framework.modules import Source from decisionengine.framework.modules.Source import Parameter @Source.supports_config( Parameter("csv_file", type=str, comment="path to CSV file")) @Source.produces(Nersc_Instance_Performance=pd.DataFrame) class NerscInstancePerformance(Source.Source): def __init__(self, config): super().__init__(config) self.csv_file = config.get("csv_file") if not self.csv_file: raise RuntimeError("No csv file found in configuration") def acquire(self): self.logger.debug("in NerscInstancePerformance acquire") return {"Nersc_Instance_Performance": pd.read_csv(self.csv_file)} Source.describe(NerscInstancePerformance)
from decisionengine.framework.modules import Source from decisionengine.framework.modules import SourceProxy JobClusteringSourceProxy = SourceProxy.SourceProxy Source.describe(JobClusteringSourceProxy)
from decisionengine.framework.modules import Source, SourceProxy NerscAllocationInfoSourceProxy = SourceProxy.SourceProxy Source.describe(NerscAllocationInfoSourceProxy)
# SPDX-FileCopyrightText: 2017 Fermi Research Alliance, LLC # SPDX-License-Identifier: Apache-2.0 from decisionengine.framework.modules import Source from decisionengine.framework.modules.Source import Parameter @Source.supports_config(Parameter("int_value", type=int)) @Source.produces(int_value=int) class IntSource(Source.Source): def __init__(self, config): self._value = config["int_value"] def acquire(self): return {"int_value": self._value} Source.describe(IntSource)
Acquire GCE billing info and return as pandas frame :rtype: :obj:`~pd.DataFrame` """ self.logger.debug("in GCEBillingInfo acquire") constantsDict = { "projectId": self.projectId, "credentialsProfileName": self.credentialsProfileName, "accountNumber": self.accountNumber, "bucketBillingName": "billing-" + str(self.projectId), "lastKnownBillDate": self.lastKnownBillDate, "balanceAtDate": self.balanceAtDate, "applyDiscount": self.applyDiscount, } globalConf = {"graphite_host": "dummy", "graphite_context_billing": "dummy", "outputPath": self.localFileDir} try: calculator = GCEBillCalculator(None, globalConf, constantsDict, self.logger) lastStartDateBilledConsideredDatetime, CorrectedBillSummaryDict = calculator.CalculateBill() self.logger.info("Calculated corrected bill summary for google") self.logger.info(CorrectedBillSummaryDict) except Exception: self.logger.exception("Exception in GCEBillingInfo call to acquire") return {"GCE_Billing_Info": pd.DataFrame([CorrectedBillSummaryDict])} Source.describe(GCEBillingInfo)
df = pandas.DataFrame(condor_status.stored_data) if not df.empty: (col_host, sec_cols ) = htcondor_query.split_collector_host(collector_host) df['CollectorHost'] = [col_host] * len(df) if sec_cols != '': df['CollectorHosts'] = [ '%s,%s' % (col_host, sec_cols) ] * len(df) else: df['CollectorHosts'] = [col_host] * len(df) dataframe = pandas.concat([dataframe, df], ignore_index=True, sort=True) except htcondor_query.QueryError as e: self.logger.error('Failed to get glidefactoryglobal classads ' 'from collector host(s) "{}": {}'.format( collector_host, e)) except Exception: self.logger.exception('Unexpected error fetching ' 'glidefactoryglobal classads from ' 'collector host(s) ' "{}" ''.format(collector_host)) return {'factoryglobal_manifests': dataframe} Source.describe(FactoryGlobalManifests)
from decisionengine.framework.modules import Source, SourceProxy GCEBillingInfoSourceProxy = SourceProxy.SourceProxy Source.describe(GCEBillingInfoSourceProxy)
self.logger.debug('Cost In the Last Six Hours: ${}'.format( costInLastSixHours)) self.logger.debug( 'Cost Rate Per Hour In the Last Six Hours: ${} / h'. format(costRatePerHourInLastSixHours)) self.logger.debug('') self.logger.debug( 'Cost In the Last Day: ${}'.format(costInLastDay)) self.logger.debug( 'Cost Rate Per Hour In the Last Day: ${} / h'.format( costRatePerHourInLastDay)) self.logger.debug('---') self.logger.debug('') except Exception as detail: self.logger.error("In acquire: %s" % detail) raise Exception(detail) return { 'AWS_Billing_Info': pd.DataFrame(data), 'AWS_Billing_Rate': pd.DataFrame(datarate) } Source.describe(BillingInfo, sample_config={ 'billing_configuration': '/etc/decisionengine/modules.conf/AccountConstants_my.py', 'dst_dir_for_s3_files': '/var/lib/decisionengine/awsfiles' })
def acquire(self): """ Gets data from AWS :rtype: pandas frame (:class:`pd.DataFramelist`) """ # Load kown accounts configuration # account configuration is dynamic self.logger.debug("in AWSSpotPrice acquire") account_dict = load_config.load(self.config_file, 5, 20, self.logger) sp_data = [] for account in account_dict: # pylint: disable=not-an-iterable for region, instances in account_dict[account].items(): # pylint: disable=unsubscriptable-object spot_price_info = AWSSpotPriceForRegion(region, profile_name=account) spot_price_info.init_query(instance_types=instances) spot_price_history = spot_price_info.get_price(self.logger) if spot_price_history: sp_data += spot_price_info.spot_price_summary( spot_price_history) sp_list = [i.data for i in sp_data] return {"provisioner_resource_spot_prices": pd.DataFrame(sp_list)} Source.describe( AWSSpotPrice, sample_config={"spot_price_configuration": "spot_price_config_sample.py"})
""" Calculates price / preformance and figure of merit and saves it into the output file acording to design document. """ from decisionengine.framework.modules import Source, SourceProxy FigureOfMeritSourceProxy = SourceProxy.SourceProxy Source.describe(FigureOfMeritSourceProxy)
format_list=self.classad_attrs, condor_config=self.condor_config) for eachDict in condor_q.stored_data: for key, value in self.correction_map.items(): if eachDict.get(key) is None: eachDict[key] = value df = pandas.DataFrame(condor_q.stored_data) if not df.empty: # Add schedd name and collector host to job records df['ScheddName'] = pandas.Series([schedd] * len(condor_q.stored_data)) df['CollectorHost'] = pandas.Series( [collector_host] * len(condor_q.stored_data)) dataframe = dataframe.append(df, ignore_index=True) except htcondor_query.QueryError: self.logger.warning( 'Query error fetching job classads from schedd "%s" in collector host(s) "%s".' % (schedd, collector_host)) except Exception: msg = 'Unexpected error fetching job classads from schedd "{}" in collector host(s) "{}".' self.logger.warning(msg.format(schedd, collector_host)) self.logger.error( msg.format(schedd, collector_host) + " Traceback: {}".format(traceback.format_exc())) return {'job_manifests': dataframe} Source.describe(JobQ)
from decisionengine.framework.modules import SourceProxy from decisionengine.framework.modules import Source JobQSourceProxy = SourceProxy.SourceProxy Source.describe(JobQSourceProxy)