def _validate_input_data(self, slice_paths_map, input_map): """ Performs a validation of the supplied slice_paths_map and inputmap. Displays error message if this fails """ validation_failed = None error_received = None try: validation_failed = not validate_data_maps(slice_paths_map, input_map) except AssertionError as exception : validation_failed = True error_received = str(exception) if validation_failed: self.logger.error(error_received) self.logger.error("Incorrect mapfiles: {0} and {1}".format( self.inputs["slice_paths_mapfile"], self.inputs['args'][0])) self.logger.error("content input_map: \n{0}".format(input_map)) self.logger.error("content slice_paths_map: \n{0}".format( slice_paths_map)) # return with failure return 1 # return with zero (all is ok state) return 0
def _load_mapfiles(self): """ Load data map file, instrument map file, and sky map file. Update the 'skip' fields in these map files: if 'skip' is True in any of the maps, then 'skip' must be set to True in all maps. """ self.logger.debug("Loading map files:" "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % ( self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'] ) ) self.data_map = DataMap.load(self.inputs['args'][0]) self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) self.sky_map = DataMap.load(self.inputs['sky_mapfile']) if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Update the skip fields of the three maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True
def _get_io_product_specs(self): """ Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) self.input_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames'), dps.getBoolVector('Input_Correlated.skip')) ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) self.output_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Output_Correlated.locations'), dps.getStringVector('Output_Correlated.filenames'), dps.getBoolVector('Output_Correlated.skip')) ]) self.logger.debug("%d Output_Correlated data products specified" % len(self.output_data)) # Sanity checks on input- and output data product specifications if not validate_data_maps(self.input_data, self.output_data): raise PipelineException( "Validation of input/output data product specification failed!" )
def _load_mapfiles(self): """ Load data map file, instrument map file, and sky map file. Update the 'skip' fields in these map files: if 'skip' is True in any of the maps, then 'skip' must be set to True in all maps. """ self.logger.debug( "Loading map files:" "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % (self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'])) self.data_map = DataMap.load(self.inputs['args'][0]) self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) self.sky_map = DataMap.load(self.inputs['sky_mapfile']) if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Update the skip fields of the three maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True
def _validate_io_product_specs(self): """ Sanity checks on input- and output data product specifications """ if not validate_data_maps(self.input_data['data'], self.input_data['instrument'], self.output_data['data']): raise PipelineException( "Validation of input/output data product specification failed!" )
def _validate_io_product_specs(self): """ Sanity checks on input- and output data product specifications """ if not validate_data_maps( self.input_data['data'], self.input_data['instrument'], self.output_data['data'] ): raise PipelineException( "Validation of input/output data product specification failed!" )
def _validate_input_data(self, slice_paths_map, input_map): """ Performs a validation of the supplied slice_paths_map and inputmap. Displays error message if this fails """ validation_failed = None error_received = None try: validation_failed = not validate_data_maps(slice_paths_map, input_map) except AssertionError, exception: validation_failed = True error_received = str(exception)
def _validate_input_data(self, slice_paths_map, input_map): """ Performs a validation of the supplied slice_paths_map and inputmap. Displays error message if this fails """ validation_failed = None error_received = None try: validation_failed = not validate_data_maps(slice_paths_map, input_map) except AssertionError, exception : validation_failed = True error_received = str(exception)
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip=False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file( parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file( None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file( None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset=parset_path, instrument_mapfile=parmdbs_map_path, sourcedb_mapfile=sourcedb_map_path, mapfile=output_mapfile, working_directory=self.scratch_directory) return output_mapfile
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip = False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file(parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file(None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file(None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset = parset_path, instrument_mapfile = parmdbs_map_path, sourcedb_mapfile = sourcedb_map_path, mapfile = output_mapfile, working_directory = self.scratch_directory) return output_mapfile
def go(self): self.logger.info("Starting setupsourcedb run") super(setupsourcedb, self).go() # ********************************************************************* # 1. Load input and output mapfiles. Validate args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" ) return 1 else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix'] ) # ********************************************************************* # 2. Check if input skymodel file exists. If not, make filename empty. try: skymodel = self.inputs['skymodel'] except KeyError: skymodel = "" self.logger.info("No skymodel specified. Using an empty one") # ******************************************************************** # 3. Call node side of script command = "python %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob( outp.host, command, arguments=[ self.inputs['executable'], skymodel, outp.file, self.inputs['type'] ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): super(gainoutliercorrection, self).go() self.logger.info("Starting gainoutliercorrection run") # ******************************************************************** # 1. Validate input # if sigma is none use default behaviour and use executable: test if # It excists executable = self.inputs['executable'] if executable == "": pass elif not os.access(executable, os.X_OK): self.logger.warn( "No parmexportcal excecutable is not found on the suplied" "path: {0}".format(self.inputs['executable'])) self.logger.warn("Defaulting to edit_parmdb behaviour") # ******************************************************************** # 2. load mapfiles, validate if a target output location is provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], (os.path.splitext(os.path.basename(item.file))[0] + self.inputs['suffix'])) # Update the skip fields of the two maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y in zip(indata, outdata): x.skip = y.skip = (x.skip or y.skip) # ******************************************************************** # 3. Call node side of the recipe command = "python3 %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp in zip(indata, outdata): jobs.append( ComputeJob(outp.host, command, arguments=[ inp.file, outp.file, self.inputs['executable'], self.environment, self.inputs['sigma'], self.inputs['export_instrument_model'] ])) self._schedule_jobs(jobs) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # ******************************************************************** # 4. validate performance, return corrected files if self.error.isSet(): self.logger.warn("Detected failed gainoutliercorrection job") return 1 else: self.logger.debug("Writing instrument map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): self.logger.info("Starting DPPP run") super(dppp, self).go() # # Keep track of "Total flagged" messages in the DPPP logs # # ---------------------------------------------------------------------- # self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines" # ********************************************************************* # 1. load input data file, validate output vs the input location if # output locations are provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # ******************************************************************** # 2. Load parmdb and sourcedb # Load parmdb-mapfile, if one was given. if self.inputs.has_key('parmdb_mapfile'): self.logger.debug("Loading parmdb mapfile: %s" % self.inputs['parmdb_mapfile']) parmdbdata = DataMap.load(self.inputs['parmdb_mapfile']) else: parmdbdata = copy.deepcopy(indata) for item in parmdbdata: item.file = '' # Load sourcedb-mapfile, if one was given. if self.inputs.has_key('sourcedb_mapfile'): self.logger.debug("Loading sourcedb mapfile: %s" % self.inputs['sourcedb_mapfile']) sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile']) else: sourcedbdata = copy.deepcopy(indata) for item in sourcedbdata: item.file = '' # Validate all the data maps. if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata): self.logger.error("Validation of data mapfiles failed!") return 1 # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata): w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip or z.skip) # ******************************************************************** # 3. Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata, sourcedbdata): jobs.append( ComputeJob(inp.host, command, arguments=[ inp.file, outp.file, pdb.file, sdb.file, self.inputs['parset'], self.inputs['executable'], self.environment, self.inputs['demix_always'], self.inputs['demix_if_needed'], self.inputs['data_start_time'], self.inputs['data_end_time'], self.inputs['nthreads'], self.inputs['clobber'] ], resources={"cores": self.inputs['nthreads']})) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # # ********************************************************************* # # 4. parse logfile for fully flagged baselines # matches = self.logger.searchpatterns["fullyflagged"].results # self.logger.searchpatterns.clear() # finished searching # stripchars = "".join(set("Fully flagged baselines: ")) # baselinecounter = defaultdict(lambda: 0) # for match in matches: # for pair in ( # pair.strip(stripchars) for pair in match.getMessage().split(";") # ): # baselinecounter[pair] += 1 # self.outputs['fullyflagged'] = baselinecounter.keys() # ********************************************************************* # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing data map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( prefix, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if not self.inputs['inputkeys'] and self.inputs['inputkey']: self.inputs['inputkeys'].append(self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % len(self.inputs['inputkeys']), len(inputmapfiles)) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles): filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) #if keylist: #for name, value in zip(keylist, inputlist): if filedict: for name, value in filedict.iteritems(): if arglist_copy and name in arglist_copy: ind = arglist_copy.index(name) arglist_copy[ind] = value[i] elif name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, prefix, self.inputs['parsetasfile'], args_format, #self.inputs['working_directory'], self.environment ] ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(selfcal_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) align_data_maps(input_map, output_map, sourcedb_map) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for measurement_item, source_item in zip(input_map, sourcedb_map): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host , measurement_path = measurement_item.host, measurement_item.file host2 , sourcedb_path = source_item.host, source_item.file # construct and save the output name arguments = [self.inputs['executable'], self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['output_image'], measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], self.inputs['major_cycle'], self.inputs['nr_cycles'], self.inputs['perform_self_cal'] ] jobs.append(ComputeJob(host, node_command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the started awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.error("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0
def go(self): super(gainoutliercorrection, self).go() self.logger.info("Starting gainoutliercorrection run") # ******************************************************************** # 1. Validate input # if sigma is none use default behaviour and use executable: test if # It excists executable = self.inputs['executable'] if executable == "": pass elif not os.access(executable, os.X_OK): self.logger.warn( "No parmexportcal excecutable is not found on the suplied" "path: {0}".format(self.inputs['executable'])) self.logger.warn("Defaulting to edit_parmdb behaviour") # ******************************************************************** # 2. load mapfiles, validate if a target output location is provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" ) return 1 else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], (os.path.splitext(os.path.basename(item.file))[0] + self.inputs['suffix']) ) # Update the skip fields of the two maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y in zip(indata, outdata): x.skip = y.skip = (x.skip or y.skip) # ******************************************************************** # 3. Call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp in zip(indata, outdata): jobs.append( ComputeJob( outp.host, command, arguments=[ inp.file, outp.file, self.inputs['executable'], self.environment, self.inputs['sigma'], self.inputs['export_instrument_model'] ] ) ) self._schedule_jobs(jobs) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # ******************************************************************** # 4. validate performance, return corrected files if self.error.isSet(): self.logger.warn("Detected failed gainoutliercorrection job") return 1 else: self.logger.debug("Writing instrument map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(imager_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) for w, x, y in zip(input_map, output_map, sourcedb_map): w.skip = x.skip = y.skip = ( w.skip or x.skip or y.skip ) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for idx, (measurement_item, source_item) in enumerate(zip(input_map, sourcedb_map)): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host , measurement_path = measurement_item.host, measurement_item.file host2 , sourcedb_path = source_item.host, source_item.file # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_awimager_{0}".format(idx)) # construct and save the output name arguments = [self.inputs['executable'], self.environment, self.inputs['parset'], working_dir, # put in unique dir, as node script wants to put private .par files next to it "%s_%s/image" % (self.inputs['output_image'], idx), measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], ] jobs.append(ComputeJob(host, node_command, arguments, resources={ "cores": self.inputs['nthreads'] })) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the starter awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.warn("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0
def go(self): self.logger.info("Starting DPPP run") super(dppp, self).go() # # Keep track of "Total flagged" messages in the DPPP logs # # ---------------------------------------------------------------------- # self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines" # ********************************************************************* # 1. load input data file, validate output vs the input location if # output locations are provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix'] ) # ******************************************************************** # 2. Load parmdb and sourcedb # Load parmdb-mapfile, if one was given. if self.inputs.has_key('parmdb_mapfile'): self.logger.debug( "Loading parmdb mapfile: %s" % self.inputs['parmdb_mapfile'] ) parmdbdata = DataMap.load(self.inputs['parmdb_mapfile']) else: parmdbdata = copy.deepcopy(indata) for item in parmdbdata: item.file = '' # Load sourcedb-mapfile, if one was given. if self.inputs.has_key('sourcedb_mapfile'): self.logger.debug( "Loading sourcedb mapfile: %s" % self.inputs['sourcedb_mapfile'] ) sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile']) else: sourcedbdata = copy.deepcopy(indata) for item in sourcedbdata: item.file = '' # Validate all the data maps. if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata): self.logger.error( "Validation of data mapfiles failed!" ) return 1 # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata): w.skip = x.skip = y.skip = z.skip = ( w.skip or x.skip or y.skip or z.skip ) # ******************************************************************** # 3. Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp, pdb, sdb in zip( indata, outdata, parmdbdata, sourcedbdata ): jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, outp.file, pdb.file, sdb.file, self.inputs['parset'], self.inputs['executable'], self.environment, self.inputs['demix_always'], self.inputs['demix_if_needed'], self.inputs['data_start_time'], self.inputs['data_end_time'], self.inputs['nthreads'], self.inputs['clobber'] ], resources={ "cores": self.inputs['nthreads'] } ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # # ********************************************************************* # # 4. parse logfile for fully flagged baselines # matches = self.logger.searchpatterns["fullyflagged"].results # self.logger.searchpatterns.clear() # finished searching # stripchars = "".join(set("Fully flagged baselines: ")) # baselinecounter = defaultdict(lambda: 0) # for match in matches: # for pair in ( # pair.strip(stripchars) for pair in match.getMessage().split(";") # ): # baselinecounter[pair] += 1 # self.outputs['fullyflagged'] = baselinecounter.keys() # ********************************************************************* # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) self.logger.debug("Writing data map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn): """ Create a distributed parameter database (ParmDB) for a distributed Measurement set (MS). 1. Create a parmdb template at the master side of the recipe 2. Call node side of recipe with template and possible targets 3. Validate performance, cleanup of temp files, construct output **Command line arguments** 1. A mapfile describing the data to be processed. 2. A mapfile with output location (If provide input and output are validated) """ inputs = { 'executable': ingredient.ExecField( '--executable', help="Full path to parmdbm executable", ), 'nproc': ingredient.IntField( '--nproc', help="Maximum number of simultaneous processes per compute node", default=8), 'suffix': ingredient.StringField( '--suffix', help="Suffix of the table name of the empty parmameter database", default=".parmdb"), 'working_directory': ingredient.StringField('-w', '--working-directory', help="Working directory used on output nodes. " "Results will be written here."), 'mapfile': ingredient.StringField( '--mapfile', help="Full path of mapfile to produce; it will contain " "a list of the generated empty parameter database files") } outputs = {'mapfile': ingredient.FileField()} def go(self): self.logger.info("Starting setupparmdb run") super(setupparmdb, self).go() # ********************************************************************* # 1. Create a temporary template parmdb at the master side of the recipe self.logger.info("Generating template parmdb") # generate a temp dir pdbdir = tempfile.mkdtemp( dir=self.config.get("layout", "job_directory")) pdbfile = os.path.join(pdbdir, self.inputs['suffix']) # Create a template use tempdir for location try: parmdbm_process = subprocess.Popen([self.inputs['executable']], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = parmdbm_process.communicate(template % pdbfile) log_process_output("parmdbm", sout, serr, self.logger) except OSError, err: self.logger.error("Failed to spawn parmdbm: %s" % str(err)) return 1 # ********************************************************************* # 2. Call node side of recipe with template and possible targets # If output location are provided as input these are validated. try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: # If output location provide validate the input and outputmap self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 # else output location is inputlocation+suffix else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # Call the node side command = "python %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob(outp.host, command, arguments=[pdbfile, outp.file])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): # If the returncode is 123456, failing ssh if job.results['returncode'] == 123456: self.logger.warning( "ssh connection with {0} failed." "Skipping further work on this task".format(outp.host)) self.logger.warning("Error code 123456.") outp.skip = True elif job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 3. validate performance, cleanup of temp files, construct output finally: self.logger.debug("Removing template parmdb") shutil.rmtree(pdbdir, ignore_errors=True) if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing parmdb map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): self.logger.info("Starting setupparmdb run") super(setupparmdb, self).go() # ********************************************************************* # 1. Create a temporary template parmdb at the master side of the recipe self.logger.info("Generating template parmdb") # generate a temp dir pdbdir = tempfile.mkdtemp( dir=self.config.get("layout", "job_directory"), suffix=".%s" % (os.path.basename(__file__), )) pdbfile = os.path.join(pdbdir, self.inputs['suffix']) # Create a template use tempdir for location try: parmdbm_process = subprocess.Popen([self.inputs['executable']], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = communicate_returning_strings( parmdbm_process, input=(template % pdbfile).encode()) log_process_output("parmdbm", sout, serr, self.logger) except OSError as err: self.logger.error("Failed to spawn parmdbm: %s" % str(err)) return 1 # ********************************************************************* # 2. Call node side of recipe with template and possible targets # If output location are provided as input these are validated. try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: # If output location provide validate the input and outputmap self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 # else output location is inputlocation+suffix else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # Call the node side command = "python3 %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob(outp.host, command, arguments=[pdbfile, outp.file])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): # If the returncode is 123456, failing ssh if job.results['returncode'] == 123456: self.logger.warning( "ssh connection with {0} failed." "Skipping further work on this task".format(outp.host)) self.logger.warning("Error code 123456.") outp.skip = True elif job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 3. validate performance, cleanup of temp files, construct output finally: self.logger.debug("Removing template parmdb") shutil.rmtree(pdbdir, ignore_errors=True) if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing parmdb map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.iteritems(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(imager_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) for w, x, y in zip(input_map, output_map, sourcedb_map): w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for idx, (measurement_item, source_item) in enumerate(zip(input_map, sourcedb_map)): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host, measurement_path = measurement_item.host, measurement_item.file host2, sourcedb_path = source_item.host, source_item.file # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_awimager_{0}".format(idx)) # construct and save the output name arguments = [ self.inputs['executable'], self.environment, self.inputs['parset'], working_dir, # put in unique dir, as node script wants to put private .par files next to it "%s_%s/image" % (self.inputs['output_image'], idx), measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], ] jobs.append( ComputeJob(host, node_command, arguments, resources={"cores": self.inputs['nthreads']})) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the starter awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.warn("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0