def createProductionStep(name, type, inputQuery=None, outputQuery=None): # create a production step prodStep = ProductionStep() prodStep.Name = name prodStep.Type = type prodStep.Inputquery = inputQuery prodStep.Outputquery = outputQuery return prodStep
def build_simulation_step(DL0_data_set, name_tag=''): ''' Setup Corsika + sim_telarray step Note that there is no InputQuery, since jobs created by this steps don't require any InputData @return ProductionStep object ''' # Note that there is no InputQuery, # since jobs created by this steps don't require any InputData DIRAC.gLogger.notice('MC Production step') prod_step_1 = ProductionStep() prod_step_1.Name = 'Simulation_%s' % DL0_data_set.replace('AdvancedBaseline_NSB1x_','') prod_step_1.Name += '%s' % name_tag prod_step_1.Type = 'MCSimulation' prod_step_1.Outputquery = get_dataset_MQ(DL0_data_set) prod_step_1.Outputquery['nsb'] = {'in': [1, 5]} # get meta data to be passed to simulation job site = prod_step_1.Outputquery['site'] particle = prod_step_1.Outputquery['particle'] if prod_step_1.Outputquery['phiP']['='] == 180: pointing_dir = 'North' elif prod_step_1.Outputquery['phiP']['='] == 0: pointing_dir = 'South' zenith_angle = prod_step_1.Outputquery['thetaP']['='] # Here define the job description (i.e. Name, Executable, etc.) # to be associated to the first ProductionStep, as done when using the TS job1 = Prod5bMCPipeNSBJob() job1.version = '2020-06-29b' job1.compiler = 'gcc83_matchcpu' # Initialize JOB_ID job1.workflow.addParameter(Parameter("JOB_ID", "000000", "string", "", "", True, False, "Temporary fix")) # configuration job1.setName('Prod5b_MC_Pipeline_NSB') job1.set_site(site) job1.set_particle(particle) job1.set_pointing_dir(pointing_dir) job1.zenith_angle = zenith_angle job1.n_shower = 50000 if particle == 'gamma': job1.n_shower = 20000 job1.setOutputSandbox(['*Log.txt']) job1.start_run_number = '0' job1.run_number = '@{JOB_ID}' # dynamic job1.setupWorkflow(debug=False) # Add the job description to the first ProductionStep prod_step_1.Body = job1.workflow.toXML() # return ProductionStep object return prod_step_1
def build_evndisp_step(DL0_data_set, nsb=1): ''' Define a new EventDisplay analysis production step @return ProductionStep object ''' if nsb == 1: DIRAC.gLogger.notice('NSB1x Analysis') DL0_data_set_NSB = DL0_data_set elif nsb == 5: DIRAC.gLogger.notice('NSB5x Analysis') DL0_data_set_NSB = DL0_data_set.replace('NSB1x', 'NSB5x') prod_step_2 = ProductionStep() prod_step_2.Name = 'Analysis_'+DL0_data_set_NSB.replace('AdvancedBaseline_', '').replace('DL0', 'DL1') prod_step_2.Type = 'DataReprocessing' # This corresponds to the Transformation Type prod_step_2.Inputquery = get_dataset_MQ(DL0_data_set_NSB) prod_step_2.Outputquery = get_dataset_MQ(DL0_data_set_NSB.replace('DL0', 'DL1')) # Here define the job description to be associated to the second ProductionStep job2 = EvnDispProd5Job() job2.setName('Prod5_EvnDisp') # output job2.setOutputSandbox(['*Log.txt']) # refine output meta data if needed output_meta_data = copy(prod_step_2.Outputquery) job2.set_meta_data(output_meta_data) job2.set_file_meta_data(nsb=output_meta_data['nsb']['=']) # check if La Palma else use default that is Paranal if output_meta_data['site'] == 'LaPalma': job2.prefix = "CTA.prod5N" job2.layout_list = 'BL-0LSTs05MSTs-MSTF BL-0LSTs05MSTs-MSTN \ BL-4LSTs00MSTs-MSTN BL-4LSTs05MSTs-MSTF \ BL-4LSTs05MSTs-MSTN BL-4LSTs09MSTs-MSTF \ BL-4LSTs09MSTs-MSTN BL-4LSTs15MSTs-MSTF \ BL-4LSTs15MSTs-MSTN' DIRAC.gLogger.notice('LaPalma layouts:\n',job2.layout_list.split()) elif output_meta_data['site'] == 'Paranal': DIRAC.gLogger.notice('Paranal layouts:\n',job2.layout_list.split()) job2.ts_task_id = '@{JOB_ID}' # dynamic job2.setupWorkflow(debug=False) job2.setType('EvnDisp3') # mandatory *here* prod_step_2.Body = job2.workflow.toXML() prod_step_2.GroupSize = 5 return prod_step_2
def build_evndisp_step(DL0_data_set, nsb=1, name_tag=''): ''' Define a new EventDisplay analysis production step @return ProductionStep object ''' if nsb == 1: DIRAC.gLogger.notice('NSB1x Analysis') DL0_data_set_NSB = DL0_data_set elif nsb == 5: DIRAC.gLogger.notice('NSB5x Analysis') DL0_data_set_NSB = DL0_data_set.replace('NSB1x', 'NSB5x') prod_step_2 = ProductionStep() prod_step_2.Name = 'Analysis_'+DL0_data_set_NSB.replace('AdvancedBaseline_', '').replace('DL0', 'DL1') prod_step_2.Name += '%s' % name_tag prod_step_2.Type = 'DataReprocessing' # This corresponds to the Transformation Type prod_step_2.Inputquery = get_dataset_MQ(DL0_data_set_NSB) prod_step_2.Outputquery = get_dataset_MQ(DL0_data_set_NSB.replace('DL0', 'DL1')) # Here define the job description to be associated to the second ProductionStep job2 = EvnDispProd5SingJob(cpuTime=259200.) job2.version = 'eventdisplay-cta-dl1-prod5.v03' job2.setName('Prod5b_EvnDisp_Singularity') # output job2.setOutputSandbox(['*Log.txt']) # refine output meta data if needed output_meta_data = copy(prod_step_2.Outputquery) job2.set_meta_data(output_meta_data) job2.set_file_meta_data(nsb=output_meta_data['nsb']['=']) job2.ts_task_id = '@{JOB_ID}' # dynamic job2.group_size = 5 # for the input files verification job2.setupWorkflow(debug=False) job2.setType('EvnDisp3') # mandatory *here* prod_step_2.Body = job2.workflow.toXML() prod_step_2.GroupSize = 5 # have to match the above group size? # return ProductionStep object return prod_step_2
def test_SeqProduction(self): # Define the first step of the production prodStep1 = ProductionStep() prodStep1.Name = 'Sim_prog' prodStep1.Type = 'MCSimulation' outputquery = { 'zenith': { 'in': [20, 40] }, 'particle': 'gamma', 'tel_sim_prog': 'simtel', 'outputType': { 'in': ['Data', 'Log'] } } prodStep1.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep1) self.assertTrue(res['OK']) # Define the second step of the production prodStep2 = ProductionStep() prodStep2.Name = 'Reco_prog' prodStep2.Type = 'DataProcessing' prodStep2.ParentStep = prodStep1 inputquery = { 'zenith': 20, 'particle': 'gamma', 'tel_sim_prog': 'simtel', 'outputType': 'Data' } outputquery = { 'zenith': 20, 'particle': 'gamma', 'analysis_prog': 'evndisp', 'data_level': 1, 'outputType': { 'in': ['Data', 'Log'] } } prodStep2.Inputquery = inputquery prodStep2.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep2) self.assertTrue(res['OK']) # Define the third step of the production prodStep3 = ProductionStep() prodStep3.Name = 'Analyis_prog' prodStep3.Type = 'DataProcessing' prodStep3.ParentStep = prodStep2 inputquery = { 'zenith': 20, 'particle': 'gamma', 'analysis_prog': 'evndisp', 'data_level': 1, 'outputType': 'Data' } outputquery = { 'zenith': 20, 'particle': 'gamma', 'analysis_prog': 'evndisp', 'data_level': 2, 'outputType': { 'in': ['Data', 'Log'] } } prodStep3.Inputquery = inputquery prodStep3.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep3) self.assertTrue(res['OK']) # Get the production description prodDesc = self.prodClient.prodDescription # Create the production prodName = 'SeqProd' res = self.prodClient.addProduction(prodName, json.dumps(prodDesc)) self.assertTrue(res['OK']) # Start the production, i.e. instatiate the transformation steps res = self.prodClient.startProduction(prodName) self.assertTrue(res['OK']) # Get the transformations of the production res = self.prodClient.getProduction(prodName) self.assertTrue(res['OK']) prodID = res['Value']['ProductionID'] res = self.prodClient.getProductionTransformations(prodID) self.assertTrue(res['OK']) self.assertEqual(len(res['Value']), 3) # Delete the production res = self.prodClient.deleteProduction(prodName) self.assertTrue(res['OK'])
def test_SeqProduction(self): # Define the first step of the production prodStep1 = ProductionStep() prodStep1.Name = "Sim_prog" prodStep1.Type = "MCSimulation" outputquery = { "zenith": { "in": [20, 40] }, "particle": "gamma", "tel_sim_prog": "simtel", "outputType": { "in": ["Data", "Log"] }, } prodStep1.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep1) self.assertTrue(res["OK"]) # Define the second step of the production prodStep2 = ProductionStep() prodStep2.Name = "Reco_prog" prodStep2.Type = "DataProcessing" prodStep2.ParentStep = prodStep1 inputquery = { "zenith": 20, "particle": "gamma", "tel_sim_prog": "simtel", "outputType": "Data" } outputquery = { "zenith": 20, "particle": "gamma", "analysis_prog": "evndisp", "data_level": 1, "outputType": { "in": ["Data", "Log"] }, } prodStep2.Inputquery = inputquery prodStep2.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep2) self.assertTrue(res["OK"]) # Define the third step of the production prodStep3 = ProductionStep() prodStep3.Name = "Analyis_prog" prodStep3.Type = "DataProcessing" prodStep3.ParentStep = prodStep2 inputquery = { "zenith": 20, "particle": "gamma", "analysis_prog": "evndisp", "data_level": 1, "outputType": "Data", } outputquery = { "zenith": 20, "particle": "gamma", "analysis_prog": "evndisp", "data_level": 2, "outputType": { "in": ["Data", "Log"] }, } prodStep3.Inputquery = inputquery prodStep3.Outputquery = outputquery # Add the step to the production res = self.prodClient.addProductionStep(prodStep3) self.assertTrue(res["OK"]) # Get the production description prodDesc = self.prodClient.prodDescription # Create the production prodName = f"SeqProd{random.randint(0, 100_000)}" res = self.prodClient.addProduction(prodName, json.dumps(prodDesc)) self.assertTrue(res["OK"]) # Start the production, i.e. instatiate the transformation steps res = self.prodClient.startProduction(prodName) self.assertTrue(res["OK"]) # Get the transformations of the production res = self.prodClient.getProduction(prodName) self.assertTrue(res["OK"]) prodID = res["Value"]["ProductionID"] res = self.prodClient.getProductionTransformations(prodID) self.assertTrue(res["OK"]) self.assertEqual(len(res["Value"]), 3) # Delete the production res = self.prodClient.deleteProduction(prodName) self.assertTrue(res["OK"])