def test_align_data_maps_equal_maps_skip_set_5_multi_maps(self): # test for DataMap align data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) data_map3 = MultiDataMap(self.new_style_multi_map) data_map4 = MultiDataMap(self.new_style_multi_map) data_map5 = MultiDataMap(self.new_style_multi_map) # change single skip value data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2, data_map3, data_map4, data_map5) self.assertTrue( data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map3.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map4.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map5.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def _run_create_dbs_node(self, input_map, slice_paths_map, assoc_theta, source_list_map): """ Decompose the input mapfiles into task for specific nodes and distribute these to the node recipes. Wait for the jobs to finish and return the list of created jobs. """ # Compile the command to be executed on the remote machine node_command = " python3 %s" % (self.__file__.replace("master", "nodes")) # create jobs jobs = [] output_map = copy.deepcopy(input_map) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(input_map, output_map, slice_paths_map, source_list_map) source_list_map.iterator = slice_paths_map.iterator = \ input_map.iterator = DataMap.SkipIterator for idx, (input_item, slice_item, source_list_item) in enumerate(zip( input_map, slice_paths_map, source_list_map)): host_ms, concat_ms = input_item.host, input_item.file host_slice, slice_paths = slice_item.host, slice_item.file # Create the parameters depending on the input_map sourcedb_target_path = os.path.join( concat_ms + self.inputs["sourcedb_suffix"]) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_create_dbs_{0}".format(idx)) # The actual call for the node script arguments = [concat_ms, sourcedb_target_path, self.inputs["monetdb_hostname"], self.inputs["monetdb_port"], self.inputs["monetdb_name"], self.inputs["monetdb_user"], self.inputs["monetdb_password"], assoc_theta, self.inputs["parmdb_executable"], slice_paths, self.inputs["parmdb_suffix"], self.environment, working_dir, self.inputs["makesourcedb_path"], source_list_item.file, self.inputs["major_cycle"]] jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish if len(jobs) > 0: self._schedule_jobs(jobs) return jobs, output_map
def _run_create_dbs_node(self, input_map, slice_paths_map, assoc_theta, source_list_map): """ Decompose the input mapfiles into task for specific nodes and distribute these to the node recipes. Wait for the jobs to finish and return the list of created jobs. """ # Compile the command to be executed on the remote machine node_command = " python %s" % (self.__file__.replace("master", "nodes")) # create jobs jobs = [] output_map = copy.deepcopy(input_map) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(input_map, output_map, slice_paths_map, source_list_map) source_list_map.iterator = slice_paths_map.iterator = \ input_map.iterator = DataMap.SkipIterator for idx, (input_item, slice_item, source_list_item) in enumerate(zip( input_map, slice_paths_map,source_list_map)): host_ms, concat_ms = input_item.host, input_item.file host_slice, slice_paths = slice_item.host, slice_item.file # Create the parameters depending on the input_map sourcedb_target_path = os.path.join( concat_ms + self.inputs["sourcedb_suffix"]) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_create_dbs_{0}".format(idx)) # The actual call for the node script arguments = [concat_ms, sourcedb_target_path, self.inputs["monetdb_hostname"], self.inputs["monetdb_port"], self.inputs["monetdb_name"], self.inputs["monetdb_user"], self.inputs["monetdb_password"], assoc_theta, self.inputs["parmdb_executable"], slice_paths, self.inputs["parmdb_suffix"], self.environment, working_dir, self.inputs["makesourcedb_path"], source_list_item.file, self.inputs["major_cycle"]] jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish if len(jobs) > 0: self._schedule_jobs(jobs) return jobs, output_map
def _get_io_product_specs(self): """ Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) # convert input dataproducts from parset value to DataMap self.input_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames'), dps.getBoolVector('Input_Correlated.skip')) ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) self.output_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Output_SkyImage.locations'), dps.getStringVector('Output_SkyImage.filenames'), dps.getBoolVector('Output_SkyImage.skip')) ]) self.logger.debug("%d Output_SkyImage data products specified" % len(self.output_data)) self.output_correlated_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Output_Correlated.locations'), dps.getStringVector('Output_Correlated.filenames'), dps.getBoolVector('Output_Correlated.skip')) ]) # assure that the two output maps contain the same skip fields align_data_maps( self.output_data, self.output_correlated_data) self.logger.debug("%d Output_Correlated data products specified" % len(self.output_correlated_data)) # # Sanity checks on input- and output data product specifications # if not validate_data_maps(self.input_data, self.output_data): # raise PipelineException( # "Validation of input/output data product specification failed!" # )#Turned off untill DataMap is extended.. # Target data is basically scratch data, consisting of one concatenated # MS per image. It must be stored on the same host as the final image. self.target_data = copy.deepcopy(self.output_data) for idx, item in enumerate(self.target_data): item.file = os.path.join(self.scratch_directory, 'ms_per_image_%d' % idx, 'concat.ms')
def test_align_data_maps_equal_maps_no_change(self): data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) data_map3 = MultiDataMap(self.new_style_multi_map) # Perform an align align_data_maps(data_map, data_map2) # There should be no changes and the three maps should be the same for entrie1, entrie2, entrie3 in zip(data_map, data_map2, data_map3): self.assertEqual(entrie1, entrie3) self.assertEqual(entrie2, entrie3)
def test_align_data_maps_equal_maps_skip_set(self): data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) # Change a single entrie in the datamap to True data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2) # The second entrie.skip should be set to True self.assertTrue(data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def test_align_data_maps_equal_maps_skip_set(self): data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) # Change a single entrie in the datamap to True data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2) # The second entrie.skip should be set to True self.assertTrue( data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def test_align_data_maps_equal_maps_skip_set_5_multi_maps(self): # test for DataMap align data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) data_map3 = MultiDataMap(self.new_style_multi_map) data_map4 = MultiDataMap(self.new_style_multi_map) data_map5 = MultiDataMap(self.new_style_multi_map) # change single skip value data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2, data_map3, data_map4, data_map5) self.assertTrue(data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue(data_map3.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue(data_map4.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue(data_map5.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.iteritems(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): """ Steps: 1. Load and validate the input datamaps 2. Run the node parts of the recipe 3. Validate node output and format the recipe output """ super(selfcal_finalize, self).go() # ********************************************************************* # 1. Load the datamaps awimager_output_map = DataMap.load(self.inputs["awimager_output_map"]) ms_per_image_map = DataMap.load(self.inputs["ms_per_image_map"]) sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) target_mapfile = DataMap.load(self.inputs["target_mapfile"]) output_image_mapfile = DataMap.load( self.inputs["output_image_mapfile"]) concat_ms_mapfile = DataMap.load(self.inputs["concat_ms_map_path"]) output_correlated_map = DataMap.load( self.inputs["output_correlated_mapfile"]) processed_ms_dir = self.inputs["processed_ms_dir"] fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] # Align the skip fields align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map, concat_ms_mapfile, output_correlated_map) # Set the correct iterator sourcelist_map.iterator = awimager_output_map.iterator = \ ms_per_image_map.iterator = target_mapfile.iterator = \ output_image_mapfile.iterator = sourcedb_map.iterator = \ concat_ms_mapfile.iterator = output_correlated_map.iterator = \ DataMap.SkipIterator # ********************************************************************* # 2. Run the node side of the recupe command = " python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] for (awimager_output_item, ms_per_image_item, sourcelist_item, target_item, output_image_item, sourcedb_item, concat_ms_item, correlated_item) in zip(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map, concat_ms_mapfile, output_correlated_map): # collect the files as argument arguments = [ awimager_output_item.file, ms_per_image_item.file, sourcelist_item.file, target_item.file, output_image_item.file, self.inputs["minbaseline"], self.inputs["maxbaseline"], processed_ms_dir, fillrootimagegroup_exec, self.environment, sourcedb_item.file, concat_ms_item.file, correlated_item.file, self.inputs["msselect_executable"], ] self.logger.info( "Starting finalize with the folowing args: {0}".format( arguments)) jobs.append(ComputeJob(target_item.host, command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Validate the performance of the node script and assign output succesful_run = False for (job, output_image_item, output_correlated_item) in zip(jobs, output_image_mapfile, output_correlated_map): if not "hdf5" in job.results: # If the output failed set the skip to True output_image_item.skip = True output_correlated_item = True else: succesful_run = True # signal that we have at least a single run finished ok. # No need to set skip in this case if not succesful_run: self.logger.warn("Not a single finalizer succeeded") return 1 # Save the location of the output images output_image_mapfile.save(self.inputs['placed_image_mapfile']) self.logger.debug( "Wrote mapfile containing placed hdf5 images: {0}".format( self.inputs['placed_image_mapfile'])) # save the location of measurements sets output_correlated_map.save(self.inputs['placed_correlated_mapfile']) self.logger.debug("Wrote mapfile containing placed mss: {0}".format( self.inputs['placed_correlated_mapfile'])) self.outputs["placed_image_mapfile"] = self.inputs[ 'placed_image_mapfile'] self.outputs["placed_correlated_mapfile"] = self.inputs[ 'placed_correlated_mapfile'] return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(selfcal_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) align_data_maps(input_map, output_map, sourcedb_map) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for measurement_item, source_item in zip(input_map, sourcedb_map): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host , measurement_path = measurement_item.host, measurement_item.file host2 , sourcedb_path = source_item.host, source_item.file # construct and save the output name arguments = [self.inputs['executable'], self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['output_image'], measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], self.inputs['major_cycle'], self.inputs['nr_cycles'], self.inputs['perform_self_cal'] ] jobs.append(ComputeJob(host, node_command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the started awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.error("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(selfcal_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path']) # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python %s" % (self.__file__.replace( "master", "nodes")) map_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ concat_ms_map.iterator = DataMap.SkipIterator # ********************************************************************* for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map, sourcedb_map, concat_ms_map): #host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join(map_dir, host + "_ms_" + run_id + ".map") MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, host + "_parmdb_" + run_id + ".map") MultiDataMap([tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join(map_dir, host + "_sky_" + run_id + ".map") MultiDataMap([tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) # THe concat ms does not have to be written: It already is a # singular item (it is the output of the reduce step) # redmine issue #6021 arguments = [ self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path, concat_ms.file, self.inputs['major_cycle'] ] jobs.append(ComputeJob(host, node_command, arguments)) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed bbs node run detected, skipping work" "on this work item for further computations") # find failed job and set the skip field for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs): if job.results["returncode"] == 0: continue else: ms_item.skip = True concat_item.skip = True self.logger.warn("bbs failed on item: {0}".format( ms_item.file)) # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) # also save the concat_ms map with possible skips DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): """ Steps: 1. Load and validate the input datamaps 2. Run the node parts of the recipe 3. Validate node output and format the recipe output """ super(imager_finalize, self).go() # ********************************************************************* # 1. Load the datamaps awimager_output_map = DataMap.load( self.inputs["awimager_output_map"]) ms_per_image_map = DataMap.load( self.inputs["ms_per_image_map"]) sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) target_mapfile = DataMap.load(self.inputs["target_mapfile"]) output_image_mapfile = DataMap.load( self.inputs["output_image_mapfile"]) processed_ms_dir = self.inputs["processed_ms_dir"] fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] # Align the skip fields align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map) # Set the correct iterator sourcelist_map.iterator = awimager_output_map.iterator = \ ms_per_image_map.iterator = target_mapfile.iterator = \ output_image_mapfile.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator # ********************************************************************* # 2. Run the node side of the recupe command = " python %s" % (self.__file__.replace("master", "nodes")) jobs = [] for (awimager_output_item, ms_per_image_item, sourcelist_item, target_item, output_image_item, sourcedb_item) in zip( awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map): # collect the files as argument arguments = [awimager_output_item.file, ms_per_image_item.file, sourcelist_item.file, target_item.file, output_image_item.file, self.inputs["minbaseline"], self.inputs["maxbaseline"], processed_ms_dir, fillrootimagegroup_exec, self.environment, sourcedb_item.file] self.logger.info( "Starting finalize with the folowing args: {0}".format( arguments)) jobs.append(ComputeJob(target_item.host, command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Validate the performance of the node script and assign output succesful_run = False for (job, output_image_item) in zip(jobs, output_image_mapfile): if not "hdf5" in job.results: # If the output failed set the skip to True output_image_item.skip = True else: succesful_run = True # signal that we have at least a single run finished ok. # No need to set skip in this case if not succesful_run: self.logger.warn("Failed finalizer node run detected") return 1 output_image_mapfile.save(self.inputs['placed_image_mapfile']) self.logger.debug( "Wrote mapfile containing placed hdf5 images: {0}".format( self.inputs['placed_image_mapfile'])) self.outputs["placed_image_mapfile"] = self.inputs[ 'placed_image_mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( prefix, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if not self.inputs['inputkeys'] and self.inputs['inputkey']: self.inputs['inputkeys'].append(self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % len(self.inputs['inputkeys']), len(inputmapfiles)) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles): filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) #if keylist: #for name, value in zip(keylist, inputlist): if filedict: for name, value in filedict.iteritems(): if arglist_copy and name in arglist_copy: ind = arglist_copy.index(name) arglist_copy[ind] = value[i] elif name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, prefix, self.inputs['parsetasfile'], args_format, #self.inputs['working_directory'], self.environment ] ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0