def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, copied_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ time_slice_path_list = [] for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image # Get the subset of ms that are part of the current timeslice, # cast to datamap input_map_subgroup = DataMap( copied_ms_map[start_slice_range:end_slice_range]) output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) # convert the datamap to a file list: Do not remove skipped files: # ndppp needs the incorrect files there to allow filling with zeros ndppp_input_ms = [item.file for item in input_map_subgroup] # Join into a single list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = { 'uselogger': 'True', # enables log4cplus 'msin': msin, 'msout': time_slice_path } nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copyfile(temp_parset_filename, nddd_parset_path) # Remove the temp file finally: os.remove(temp_parset_filename) try: nddd_parset_path = time_slice_path + ".ndppp.par" temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copy(temp_parset_filename, nddd_parset_path) self.logger.debug( "Wrote a ndppp parset with runtime variables:" " {0}".format(nddd_parset_path)) except Exception, exception: self.logger.error("failed loading and updating the " + "parset: {0}".format(parset)) raise exception # remove the temp file finally:
def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, copied_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ time_slice_path_list = [] for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image # Get the subset of ms that are part of the current timeslice, # cast to datamap input_map_subgroup = DataMap( copied_ms_map[start_slice_range:end_slice_range]) output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) # convert the datamap to a file list: Do not remove skipped files: # ndppp needs the incorrect files there to allow filling with zeros ndppp_input_ms = [item.file for item in input_map_subgroup] # Join into a single list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = {'uselogger': 'True', # enables log4cplus 'msin': msin, 'msout': time_slice_path} nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copyfile(temp_parset_filename, nddd_parset_path) # Remove the temp file finally: os.remove(temp_parset_filename) try: nddd_parset_path = time_slice_path + ".ndppp.par" temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copy(temp_parset_filename, nddd_parset_path) self.logger.debug( "Wrote a ndppp parset with runtime variables:" " {0}".format(nddd_parset_path)) except Exception, exception: self.logger.error("failed loading and updating the " + "parset: {0}".format(parset)) raise exception # remove the temp file finally:
def _aw_imager(self, prepare_phase_output, major_cycle, sky_path, skip=False): """ Create an image based on the calibrated, filtered and combined data. """ # Create parset for the awimage recipe parset = self.parset.makeSubset("AWimager.") # Get maxbaseline from 'full' parset max_baseline = self.parset.getInt("Imaging.maxbaseline") patch_dictionary = {"maxbaseline": str(max_baseline)} try: temp_parset_filename = patch_parset(parset, patch_dictionary) aw_image_parset = get_parset(temp_parset_filename) aw_image_parset_path = self._write_parset_to_file( aw_image_parset, "awimager_cycle_{0}".format(major_cycle), "Awimager recipe parset") finally: # remove tempfile os.remove(temp_parset_filename) # Create path to write the awimage files intermediate_image_path = os.path.join( self.scratch_directory, "awimage_cycle_{0}".format(major_cycle), "image") output_mapfile = self._write_datamap_to_file( None, "awimager", "output map for awimager recipe") mask_patch_size = self.parset.getInt("Imaging.mask_patch_size") auto_imaging_specs = self.parset.getBool("Imaging.auto_imaging_specs") fov = self.parset.getFloat("Imaging.fov") specify_fov = self.parset.getBool("Imaging.specify_fov") if skip: pass else: # run the awimager recipe self.run_task("imager_awimager", prepare_phase_output, parset=aw_image_parset_path, mapfile=output_mapfile, output_image=intermediate_image_path, mask_patch_size=mask_patch_size, sourcedb_path=sky_path, working_directory=self.scratch_directory, autogenerate_parameters=auto_imaging_specs, specify_fov=specify_fov, fov=fov) return output_mapfile, max_baseline
def _aw_imager(self, prepare_phase_output, major_cycle, sky_path, skip = False): """ Create an image based on the calibrated, filtered and combined data. """ # Create parset for the awimage recipe parset = self.parset.makeSubset("AWimager.") # Get maxbaseline from 'full' parset max_baseline = self.parset.getInt("Imaging.maxbaseline") patch_dictionary = {"maxbaseline": str( max_baseline)} try: temp_parset_filename = patch_parset(parset, patch_dictionary) aw_image_parset = get_parset(temp_parset_filename) aw_image_parset_path = self._write_parset_to_file(aw_image_parset, "awimager_cycle_{0}".format(major_cycle), "Awimager recipe parset") finally: # remove tempfile os.remove(temp_parset_filename) # Create path to write the awimage files intermediate_image_path = os.path.join(self.scratch_directory, "awimage_cycle_{0}".format(major_cycle), "image") output_mapfile = self._write_datamap_to_file(None, "awimager", "output map for awimager recipe") mask_patch_size = self.parset.getInt("Imaging.mask_patch_size") autogenerate_parameters = self.parset.getBool( "Imaging.auto_imaging_specs") specify_fov = self.parset.getBool( "Imaging.specify_fov") if skip: pass else: # run the awimager recipe self.run_task("imager_awimager", prepare_phase_output, parset = aw_image_parset_path, mapfile = output_mapfile, output_image = intermediate_image_path, mask_patch_size = mask_patch_size, sourcedb_path = sky_path, working_directory = self.scratch_directory, autogenerate_parameters = autogenerate_parameters, specify_fov = specify_fov) return output_mapfile, max_baseline
def go(self): self.logger.info("Starting BBS run") super(bbs, self).go() # Generate source and parameter databases for all input data # ---------------------------------------------------------------------- inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['parmdbm'] inputs['working_directory'] = self.config.get( "DEFAULT", "default_working_directory") inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile') inputs['suffix'] = ".instrument" outputs = LOFARoutput(self.inputs) if self.cook_recipe('parmdb', inputs, outputs): self.logger.warn("parmdb reports failure") return 1 inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['makesourcedb'] inputs['skymodel'] = self.inputs['skymodel'] inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile') inputs['suffix'] = ".sky" outputs = LOFARoutput(self.inputs) if self.cook_recipe('sourcedb', inputs, outputs): self.logger.warn("sourcedb reports failure") return 1 # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for BBS") vds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "bbs.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = vds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(vds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("BBS GVDS is %s" % (vds_file, )) # Iterate over groups of subbands divided up for convenient cluster # procesing -- ie, no more than nproc subbands per compute node # ---------------------------------------------------------------------- for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])): # to_process is a list of (host, filename, vds) tuples # ------------------------------------------------------------------ hosts, ms_names, vds_files = map(list, zip(*to_process)) # The BBS session database should be cleared for our key # ------------------------------------------------------------------ self.logger.debug("Cleaning BBS database for key %s" % (self.inputs["key"])) with closing( psycopg2.connect( host=self.inputs["db_host"], user=self.inputs["db_user"], database=self.inputs["db_name"])) as db_connection: db_connection.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) with closing(db_connection.cursor()) as db_cursor: db_cursor.execute( "DELETE FROM blackboard.session WHERE key=%s", (self.inputs["key"], )) # BBS GlobalControl requires a GVDS file describing all the data # to be processed. We assemble that from the separate parts # already available on disk. # ------------------------------------------------------------------ self.logger.debug("Building VDS file describing data for BBS run") vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) vds_file = os.path.join(vds_dir, "bbs.gvds") combineproc = utilities.spawn_process([ self.inputs['combinevds'], vds_file, ] + vds_files, self.logger) sout, serr = combineproc.communicate() log_process_output(self.inputs['combinevds'], sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError(combineproc.returncode, command) # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") bbs_parset = utilities.patch_parset( self.inputs['parset'], { 'Observation': vds_file, 'BBDB.Key': self.inputs['key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], # 'BBDB.Port': self.inputs['db_name'], }) self.logger.debug("BBS control parset is %s" % (bbs_parset, )) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread(target=self._run_bbs_control, args=(bbs_parset, run_flag)) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace( 'master', 'nodes')) env = { "LOFARROOT": utilities.read_initscript( self.logger, self.inputs['initscript'])["LOFARROOT"], "PYTHONPATH": self.config.get('deploy', 'engine_ppath'), "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath') } jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as (jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(to_process): host, file, vds = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], self.inputs['initscript'], file, self.inputs['key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ]) bbs_kernels.append( threading.Thread(target=self._run_bbs_kernel, args=(host, command, env, job_id, jobhost, str(jobport)))) self.logger.info("Starting %d threads" % len(bbs_kernels)) [thread.start() for thread in bbs_kernels] self.logger.debug("Waiting for all kernels to complete") [thread.join() for thread in bbs_kernels] # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) shutil.rmtree(vds_dir) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 return 0
def go(self): self.logger.info("Starting BBS run") super(new_bbs, self).go() # Check for relevant input parameters in the parset-file # --------------------------------------------------------------------- self.logger.debug("Reading parset from %s" % self.inputs['parset']) self.parset = parameterset(self.inputs['parset']) self._set_input('db_host', 'BBDB.Host') self._set_input('db_user', 'BBDB.User') self._set_input('db_name', 'BBDB.Name') self._set_input('db_key', 'BBDB.Key') #self.logger.debug("self.inputs = %s" % self.inputs) # Clean the blackboard database # --------------------------------------------------------------------- self.logger.info( "Cleaning BBS database for key '%s'" % (self.inputs['db_key']) ) command = ["psql", "-h", self.inputs['db_host'], "-U", self.inputs['db_user'], "-d", self.inputs['db_name'], "-c", "DELETE FROM blackboard.session WHERE key='%s';" % self.inputs['db_key'] ] self.logger.debug(command) if subprocess.call(command) != 0: self.logger.warning( "Failed to clean BBS database for key '%s'" % self.inputs['db_key'] ) # Create a bbs_map describing the file mapping on disk # --------------------------------------------------------------------- if not self._make_bbs_map(): return 1 # Produce a GVDS file, describing the data that must be processed. gvds_file = self.run_task( "vdsmaker", self.inputs['data_mapfile'], gvds=self.inputs['gvds'] )['gvds'] # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") # Create a location for parsets job_directory = self.config.get( "layout", "job_directory") parset_directory = os.path.join(job_directory, "parsets") create_directory(parset_directory) # patch the parset and copy result to target location remove tempfile try: bbs_parset = utilities.patch_parset( self.parset, { 'Observation': gvds_file, 'BBDB.Key': self.inputs['db_key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], #'BBDB.Port': self.inputs['db_name'], } ) bbs_parset_path = os.path.join(parset_directory, "bbs_control.parset") shutil.copyfile(bbs_parset, bbs_parset_path) self.logger.debug("BBS control parset is %s" % (bbs_parset_path,)) finally: # Always remove the file in the tempdir os.remove(bbs_parset) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as(jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(self.bbs_map): host, files = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], files, self.inputs['db_key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, job_id, jobhost, str(jobport)) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) for thread in bbs_kernels: thread.start() self.logger.debug("Waiting for all kernels to complete") for thread in bbs_kernels: thread.join() # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 self.outputs['mapfile'] = self.inputs['data_mapfile'] return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start imager_awimager node run:") log4_cplus_name = "imager_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask mask_file_path = self._create_mask(npix, cell_size, output_image, concatenated_measurement_set, executable, working_directory, log4_cplus_name, sourcedb_path, mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix), # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the updated parameterset cmd = [executable, calculated_parset_path] try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError as exception: self.logger.error(str(exception)) return 1 except Exception as exception: self.logger.error(str(exception)) return 1 # ********************************************************************* # 6. Return output # Append static .restored: This might change but prob. not # The actual output image has this extention always, default of # awimager self.outputs["image"] = output_image + ".restored" return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, processed_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ time_slice_path_list = [] for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) # convert the datamap to a file list: Add nonfalid entry for # skipped files: ndppp needs the incorrect files there to allow # filling with zeros ndppp_input_ms = [] nchan_known = False for item in processed_ms_map[start_slice_range:end_slice_range]: if item.skip: ndppp_input_ms.append("SKIPPEDSUBBAND") else: # From the first non skipped filed get the nchan if not nchan_known: try: # We want toAutomatically average the number # of channels in the output to 1, get the current # nr of channels nchan_input = self._get_nchan_from_ms(item.file) nchan_known = True # corrupt input measurement set except Exception as e: self.logger.warn(str(e)) item.skip = True ndppp_input_ms.append("SKIPPEDSUBBAND") continue ndppp_input_ms.append(item.file) # if none of the input files was valid, skip the creation of the # timeslice all together, it will not show up in the timeslice # mapfile if not nchan_known: continue # TODO/FIXME: dependency on the step name!!!! ndppp_nchan_key = "avg1.freqstep" # Join into a single string list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = { 'uselogger': 'True', # enables log4cplus 'msin': msin, 'msout': time_slice_path, ndppp_nchan_key: nchan_input } nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copyfile(temp_parset_filename, nddd_parset_path) # Remove the temp file finally: os.remove(temp_parset_filename) try: nddd_parset_path = time_slice_path + ".ndppp.par" temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copy(temp_parset_filename, nddd_parset_path) self.logger.debug( "Wrote a ndppp parset with runtime variables:" " {0}".format(nddd_parset_path)) except Exception as exception: self.logger.error("failed loading and updating the " + "parset: {0}".format(parset)) raise exception # remove the temp file finally: os.unlink(temp_parset_filename) # run ndppp cmd = [ndppp, nddd_parset_path] try: # Actual dppp call to externals (allows mucking) self._dppp_call(working_dir, ndppp, cmd, self.environment) # append the created timeslice on succesfull run time_slice_path_list.append(time_slice_path) # On error the current timeslice should be skipped # and the input ms should have the skip set except Exception as exception: for item in processed_ms_map[ start_slice_range:end_slice_range]: item.skip = True self.logger.warning(str(exception)) continue return time_slice_path_list
def go(self): self.logger.info("Starting BBS run") super(bbs, self).go() # Generate source and parameter databases for all input data # ---------------------------------------------------------------------- inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['parmdbm'] inputs['working_directory'] = self.config.get( "DEFAULT", "default_working_directory" ) inputs['mapfile'] = self.task_definitions.get('parmdb','mapfile') inputs['suffix'] = ".instrument" outputs = LOFARoutput(self.inputs) if self.cook_recipe('parmdb', inputs, outputs): self.logger.warn("parmdb reports failure") return 1 inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['makesourcedb'] inputs['skymodel'] = self.inputs['skymodel'] inputs['mapfile'] = self.task_definitions.get('sourcedb','mapfile') inputs['suffix'] = ".sky" outputs = LOFARoutput(self.inputs) if self.cook_recipe('sourcedb', inputs, outputs): self.logger.warn("sourcedb reports failure") return 1 # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for BBS") vds_file = os.path.join( self.config.get("layout", "job_directory"), "vds", "bbs.gvds" ) inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = vds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(vds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("BBS GVDS is %s" % (vds_file,)) # Iterate over groups of subbands divided up for convenient cluster # procesing -- ie, no more than nproc subbands per compute node # ---------------------------------------------------------------------- for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])): # to_process is a list of (host, filename, vds) tuples # ------------------------------------------------------------------ hosts, ms_names, vds_files = map(list, zip(*to_process)) # The BBS session database should be cleared for our key # ------------------------------------------------------------------ self.logger.debug( "Cleaning BBS database for key %s" % (self.inputs["key"]) ) with closing( psycopg2.connect( host=self.inputs["db_host"], user=self.inputs["db_user"], database=self.inputs["db_name"] ) ) as db_connection: db_connection.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT ) with closing(db_connection.cursor()) as db_cursor: db_cursor.execute( "DELETE FROM blackboard.session WHERE key=%s", (self.inputs["key"],) ) # BBS GlobalControl requires a GVDS file describing all the data # to be processed. We assemble that from the separate parts # already available on disk. # ------------------------------------------------------------------ self.logger.debug("Building VDS file describing data for BBS run") vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) vds_file = os.path.join(vds_dir, "bbs.gvds") combineproc = utilities.spawn_process( [ self.inputs['combinevds'], vds_file, ] + vds_files, self.logger ) sout, serr = combineproc.communicate() log_process_output(self.inputs['combinevds'], sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError( combineproc.returncode, command ) # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") bbs_parset = utilities.patch_parset( self.inputs['parset'], { 'Observation': vds_file, 'BBDB.Key': self.inputs['key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], # 'BBDB.Port': self.inputs['db_name'], } ) self.logger.debug("BBS control parset is %s" % (bbs_parset,)) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) env = { "LOFARROOT": utilities.read_initscript(self.logger, self.inputs['initscript'])["LOFARROOT"], "PYTHONPATH": self.config.get('deploy', 'engine_ppath'), "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath') } jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as (jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(to_process): host, file, vds = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], self.inputs['initscript'], file, self.inputs['key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, env, job_id, jobhost, str(jobport) ) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) [thread.start() for thread in bbs_kernels] self.logger.debug("Waiting for all kernels to complete") [thread.join() for thread in bbs_kernels] # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) shutil.rmtree(vds_dir) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start imager_awimager node run:") log4_cplus_name = "imager_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask mask_file_path = self._create_mask(npix, cell_size, output_image, concatenated_measurement_set, executable, working_directory, log4_cplus_name, sourcedb_path, mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = { 'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix), # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the updated parameterset cmd = [executable, calculated_parset_path] try: with CatchLog4CPlus( working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable)) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, processed_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ time_slice_path_list = [] for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) # convert the datamap to a file list: Add nonfalid entry for # skipped files: ndppp needs the incorrect files there to allow # filling with zeros ndppp_input_ms = [] nchan_known = False for item in processed_ms_map[start_slice_range:end_slice_range]: if item.skip: ndppp_input_ms.append("SKIPPEDSUBBAND") else: # From the first non skipped filed get the nchan if not nchan_known: try: # We want toAutomatically average the number # of channels in the output to 1, get the current # nr of channels nchan_input = self._get_nchan_from_ms(item.file) nchan_known = True # corrupt input measurement set except Exception, e: self.logger.warn(str(e)) item.skip = True ndppp_input_ms.append("SKIPPEDSUBBAND") continue ndppp_input_ms.append(item.file) # if none of the input files was valid, skip the creation of the # timeslice all together, it will not show up in the timeslice # mapfile if not nchan_known: continue # TODO/FIXME: dependency on the step name!!!! ndppp_nchan_key = "avg1.freqstep" # Join into a single string list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = {'uselogger': 'True', # enables log4cplus 'msin': msin, 'msout': time_slice_path, ndppp_nchan_key:nchan_input} nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copyfile(temp_parset_filename, nddd_parset_path) # Remove the temp file finally: os.remove(temp_parset_filename) try: nddd_parset_path = time_slice_path + ".ndppp.par" temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copy(temp_parset_filename, nddd_parset_path) self.logger.debug( "Wrote a ndppp parset with runtime variables:" " {0}".format(nddd_parset_path)) except Exception, exception: self.logger.error("failed loading and updating the " + "parset: {0}".format(parset)) raise exception
def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, processed_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ time_slice_path_list = [] for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image # Get the subset of ms that are part of the current timeslice, # cast to datamap output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) # convert the datamap to a file list: Do not remove skipped files: # ndppp needs the incorrect files there to allow filling with zeros ndppp_input_ms = [] for item in processed_ms_map[start_slice_range:end_slice_range]: if item.skip: ndppp_input_ms.append("SKIPPEDSUBBAND") # We need an entry in the list: ndppp will add zeros to # pad missing subbands else: ndppp_input_ms.append(item.file) # if none of the input files was valid, skip the creation of the # timeslice all together, it will not show up in the timeslice # mapfile if len(ndppp_input_ms) == 0: continue # Join into a single list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = { 'uselogger': 'True', # enables log4cplus 'msin': msin, 'msout': time_slice_path } nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copyfile(temp_parset_filename, nddd_parset_path) # Remove the temp file finally: os.remove(temp_parset_filename) try: nddd_parset_path = time_slice_path + ".ndppp.par" temp_parset_filename = patch_parset(parset, patch_dictionary) shutil.copy(temp_parset_filename, nddd_parset_path) self.logger.debug( "Wrote a ndppp parset with runtime variables:" " {0}".format(nddd_parset_path)) except Exception as exception: self.logger.error("failed loading and updating the " + "parset: {0}".format(parset)) raise exception # remove the temp file finally: os.unlink(temp_parset_filename) # run ndppp cmd = [ndppp, nddd_parset_path] try: # Actual dppp call to externals (allows mucking) self._dppp_call(working_dir, ndppp, cmd, self.environment) # append the created timeslice on succesfull run time_slice_path_list.append(time_slice_path) # On error the current timeslice should be skipped except Exception as exception: for item in processed_ms_map[ start_slice_range:end_slice_range]: item.skip = True self.logger.warning(str(exception)) continue return time_slice_path_list