logger.debug('Now parsing configuration file esk302_histogram_filler_plotter.') ######################################################################################### # --- minimal analysis information settings = process_manager.service(ConfigObject) settings['analysisName'] = 'esk302_histogram_filler_plotter' settings['version'] = 0 ######################################################################################### msg = r""" The plots and latex files produced by link hist_summary can be found in dir: {path} """ logger.info(msg, path=settings['resultsDir'] + '/' + settings['analysisName'] + '/data/v0/report/') # --- Analysis configuration flags. # E.g. use these flags turn on or off certain chains with links. # by default all set to false, unless already configured in # configobject or vars() settings['do_loop'] = True chunk_size = 400 ######################################################################################### # --- create dummy example dataset, which is read in below input_files = [resources.fixture('mock_accounts.csv.gz'), resources.fixture('mock_accounts.csv.gz')]
msg = r""" The setup consists of three simple chains that add progressively more information to the datastore. In the examples the datastore gets persisted after the execution of each chain, and can be picked up again as input for the next chain. - The pickled datastore(s) can be found in the data directory: {data_path} - The pickled configuration object(s) and backed-up configuration file can be found in: {conf_path} """ logger.info(msg, data_path=settings['resultsDir'] + '/' + settings['analysisName'] + '/data/v0/', conf_path=settings['resultsDir'] + '/' + settings['analysisName'] + '/config/v0/') # dummy information used in this macro, added to each chain below. f = {'hello': 'world', 'v': [3, 1, 4, 1, 5], 'n_favorite': 7} g = {'a': 1, 'b': 2, 'c': 3} h = [2, 7] ######################################################################################### # --- now set up the chains and links based on configuration flags ######### # chain 1 ch = Chain('chain1')
######################################################################################### # --- minimal analysis information settings = process_manager.service(ConfigObject) settings['analysisName'] = 'esk411_weibull_predictive_maintenance' settings['version'] = 0 ######################################################################################### # --- Analysis values, settings, helper functions, configuration flags. msg = r""" The plots and latex report produced by link WsUtils can be found in dir: {path} """ logger.info(msg, path=persistence.io_path('results_data', 'report')) settings['generate'] = True # settings['read_data'] = not settings['generate'] settings['model'] = True settings['process'] = True settings['fit_plot'] = True settings['summary'] = True fitpdf = 'sum3pdf' n_percentile_bins = 300 ######################################################################################### # --- now set up the chains and links based on configuration flags if settings['model']:
############################################################################### # --- minimal analysis information settings = process_manager.service(ConfigObject) settings['analysisName'] = 'tutorial_5' settings['version'] = 0 ############################################################################### # - First create, compile and load your pdf model. We can either create it # on the fly or load if it has already been created. pdf_name = 'MyPdf' pdf_lib_base = pdf_name + '_cxx' pdf_lib_ext = '.so' pdf_lib_name = pdf_lib_base + pdf_lib_ext if ROOT.gSystem.Load(pdf_lib_name) != 0: logger.info('Building and compiling RooFit pdf {name}.', name=pdf_name) # building a roofit pdf class called MyPdfV ROOT.RooClassFactory.makePdf( pdf_name, "x,A,B", "", "A*fabs(x)+pow(x-B,2)", True, False, "x:(A/2)*(pow(x.max(rangeName),2)+pow(x.min(rangeName),2))" "+(1./3)*(pow(x.max(rangeName)-B,3)-pow(x.min(rangeName)-B,3))") # compiling this class and loading it into ROOT on the fly. ROOT.gROOT.ProcessLineSync(".x {}.cxx+".format(pdf_name)) # --- check existence of class MyPdf in ROOT logger.info('Now checking existence of ROOT class {name}.', name=pdf_name) cl = ROOT.TClass.GetClass(pdf_name) if not cl: logger.fatal( 'Could not find ROOT class {name}. Did you build and compile it correctly?', name=pdf_name)
# --- minimal analysis information settings = process_manager.service(ConfigObject) settings['analysisName'] = 'esk601_spark_configuration' settings['version'] = 0 ########################################################################## # --- get Spark Manager to start/stop Spark sm = process_manager.service(SparkManager) ########################################################################## # --- METHOD 1: configuration file spark = sm.create_session(eskapade_settings=settings) sc = spark.sparkContext logger.info('---> METHOD 1: configuration file') logger.info(str(sc.getConf().getAll())) ########################################################################## # --- METHOD 2: link conf_link = SparkConfigurator(name='SparkConfigurator', log_level='WARN') conf_link.spark_settings = [('spark.app.name', settings['analysisName'] + '_link'), ('spark.master', 'local[42]'), ('spark.driver.host', '127.0.0.1')] config = Chain('Config') config.add(conf_link) logger.info('---> METHOD 2: link')
import pandas as pd from eskapade import analysis, process_manager, visualization, ConfigObject, Chain from eskapade.logger import Logger logger = Logger() ######################################################################################### msg = r""" Be sure to download the input dataset: $ wget https://s3-eu-west-1.amazonaws.com/kpmg-eskapade-share/data/LAozone.data """ logger.info(msg) ######################################################################################### # --- minimal analysis information settings = process_manager.service(ConfigObject) settings['analysisName'] = 'Tutorial_1' ######################################################################################### # --- analysis values, settings, helper functions, configuration flags. VAR_LABELS = dict(doy='Day of year', date='Date', vis='Visibility', vis_km='Visibility') VAR_UNITS = dict(vis='mi', vis_km='km') def comp_date(day):