def test(self): if get_active_stack(tester) is not None: print( "An active stack should only exist when added explicitly" ) return False with duration(self, "a name") as context_object: active_stack = get_active_stack(self) # We should have an active stack in the context if active_stack is None: print( "In duration context the active stack should be added." ) return False if not get_child(active_stack, "active_stack").hasChildNodes(): print( "in the context the active_stack should at least contain one entry" ) return False # Now leave the context if get_child(active_stack, "active_stack").hasChildNodes(): print("After the context the active stack should be left") # There is stil an entry in the active stack return False return True
def test(self): if get_active_stack(tester) is not None: print "An active stack should only exist when added explicitly" return False with duration(self, "a name") as context_object: active_stack = get_active_stack(self) # We should have an active stack in the context if active_stack is None: print "In duration context the active stack should be added." return False if not get_child( active_stack, "active_stack").hasChildNodes(): print "in the context the active_stack should at least contain one entry" return False # Now leave the context if get_child( active_stack, "active_stack").hasChildNodes(): print "After the context the active stack should be left" # There is stil an entry in the active stack return False return True
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') pipeline_steps = self.parset.makeSubset( self.parset.fullModuleName('steps') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') # construct the step name list if there were pipeline.steps.<subset> for item in pipeline_steps.keys(): if item in step_name_list: loc = step_name_list.index(item) step_name_list[loc:loc] = pipeline_steps.getStringVector(item) step_name_list.remove(item) step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines input_dictionary = { 'parset': parset_file, 'parsetobj': self.parset, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir} resultdicts = {} for section in self.config.sections(): tmp_dict = {} for entry in self.config.items(section): input_dictionary[entry[0]] = entry[1] tmp_dict[entry[0]] = entry[1] resultdicts.update({section: copy.deepcopy(tmp_dict)}) resultdicts.update({'input': input_dictionary}) resultdicts.update({self.name: input_dictionary}) if 'pipeline.mapfile' in self.parset.keywords(): resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) self.logger.info("Beginning step %s" % (stepname,)) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' adds = None if stepname in step_parset_obj: adds = self._construct_step_parset(inputdict, step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict['stepname'] = stepname if adds: inputdict.update(adds) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keywords(): submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, }}) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keywords(): subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in self._keys(subpipeline_parset): if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in self._keys(subpipeline_parset): val = subpipeline_parset[k] if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'): for item in checklist: if item+".output" in str(val): val = str(val).replace(item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: # remove replacements strings to prevent loading the same key twice if k in self._keys(self.parset): self.parset.remove(k) self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self._keys(self.parset): if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip(): self.parset.remove(k) self.parset.add('! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task( typeval, inputargs, **inputdict ) # plugins if kind_of_step == 'plugin': bla = str(self.config.get('DEFAULT', 'recipe_directories')) pluginpath = bla.rstrip(']').lstrip('[').split(',') for i, item in enumerate(pluginpath): pluginpath[i] = os.path.join(item, 'plugins') if 'pluginpath' in pipeline_args.keys(): pluginpath.append(pipeline_args.getString('pluginpath')) with duration(self, stepname): resultdict = loader.call_plugin(typeval, pluginpath, inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict: resultdicts[activeloop[0]]['break'] = resultdict['break']
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug("Wrote input data mapfile: %s" % data_mapfile) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug("Wrote output corrected data mapfile: %s" % corrected_mapfile) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data'])) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile'))['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile)['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework metadata_file = "%s_feedback_Correlated" % (self.parset_file, ) with duration(self, "get_metadata"): self.run_task( "get_metadata", corrected_mapfile, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, mapfile=os.path.join(mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb' )['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile") self.input_data.save(input_data_mapfile) output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data.save(output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): output_data_mapfile = self.run_task("ndppp", (input_data_mapfile, output_data_mapfile), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 6. Create feedback file for further processing by the LOFAR framework # Create a parset containing the metadata metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_data_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile") self.input_data.save(input_data_mapfile) output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data.save(output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): self.run_task("ndppp", (input_data_mapfile, output_data_mapfile), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile ) # ********************************************************************* # 6. Create feedback file for further processing by the LOFAR framework # (MAC) # Create a parset-file containing the metadata for MAC/SAS with duration(self, "get_metadata"): self.run_task("get_metadata", output_data_mapfile, parset_file=self.parset_feedback_file, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join(mapfile_dir, "input_correlated.mapfile") output_correlated_mapfile = os.path.join(mapfile_dir, "output_correlated.mapfile") output_instrument_mapfile = os.path.join(mapfile_dir, "output_instrument.mapfile") self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug( "Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_correlated_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb')['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile)['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", dppp_mapfile, mapfile=os.path.join( mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb')['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model ) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug( "Wrote input data mapfile: %s" % data_mapfile ) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug( "Wrote output corrected data mapfile: %s" % corrected_mapfile ) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data']) ) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task("ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile') )['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile )['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task("bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # (MAC) # Create a parset-file containing the metadata for MAC/SAS with duration(self, "get_metadata"): self.run_task("get_metadata", corrected_mapfile, parset_file=self.parset_feedback_file, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts') ), product_type="Correlated") return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Create a sourcedb from the user-supplied sky model, # and an empty parmdb. skymodel = py_parset.getString('Calibration.SkyModel') # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", dppp_mapfile, skymodel=skymodel, suffix='.bbs.sourcedb' )['mapfile'] with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, suffix='.bbs.parmdb' )['mapfile'] # ********************************************************************* # 5. Run BBS to calibrate the data. # Create a parameter subset for BBS bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy output products to their final destination. # a. copy the measurement sets # b. copy the calculated instrument models # When the copier recipe has run, the map-files named in # output_correlated_mapfile and output_instrument_mapfile will # contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) with duration(self, "copier"): self.run_task("copier", mapfile_source=parmdb_mapfile, mapfile_target=output_instrument_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_instrument_mapfile ) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two and write the final feedback correlated_metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=correlated_metadata_file) instrument_metadata_file = "%s_feedback_InstrumentModel" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel", metadata_file=instrument_metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(correlated_metadata_file)) self.send_feedback_dataproducts(parameterset(instrument_metadata_file)) return 0
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines resultdicts = { 'input': { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } } resultdicts.update({ self.name: { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } }) if 'pipeline.mapfile' in self.parset.keys: resultdicts['input']['mapfile'] = str( self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str( self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' #self._construct_cmdline(inputargs, step, resultdicts) additional_input = {} if stepname in step_parset_obj: additional_input = self._construct_step_parset( step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict = {'stepname': stepname} inputdict.update(additional_input) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace( ' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector( 'pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keys: submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({ os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, } }) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keys: subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in subpipeline_parset.keys: if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in subpipeline_parset.keys: if not str(k).startswith('#'): val = subpipeline_parset[k] if not str(k).startswith('!'): for item in checklist: if item in str(val): val = str(val).replace( item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self.parset.keys: if str(k).startswith('!') and item in k: self.parset.remove(k) self.parset.add( '! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # remove replacements strings to prevent loading the same key twice for k in copy.deepcopy(self.parset.keys): if str(k).startswith('!'): self.parset.remove(k) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task(typeval, inputargs, **inputdict) # plugins if kind_of_step == 'plugin': with duration(self, stepname): resultdict = loader.call_plugin( typeval, pipeline_args.getString('pluginpath'), inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if resultdict is not None and 'break' in resultdict: if resultdict['break']: resultdicts[activeloop[0]]['break'] = resultdict['break']
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Create a sourcedb from the user-supplied sky model, # and an empty parmdb. skymodel = py_parset.getString('Calibration.SkyModel') # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", dppp_mapfile, skymodel=skymodel, suffix='.bbs.sourcedb' )['mapfile'] with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, suffix='.bbs.parmdb' )['mapfile'] # ********************************************************************* # 5. Run BBS to calibrate the data. # Create a parameter subset for BBS bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy output products to their final destination. # a. copy the measurement sets # b. copy the calculated instrument models # When the copier recipe has run, the map-files named in # output_correlated_mapfile and output_instrument_mapfile will # contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) with duration(self, "copier"): self.run_task("copier", mapfile_source=parmdb_mapfile, mapfile_target=output_instrument_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_instrument_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0