def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec'])): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [self.inputs['control_exec'], bbs_parset, "0"], self.logger, cwd=working_dir, env=self.environment) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda: os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError as e: self.logger.error("Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally: run_flag.set() returncode = self._monitor_process(bbs_control_process, "BBS Control") sout, serr = communicate_returning_strings(bbs_control_process) shutil.rmtree(working_dir) log_process_output(self.inputs['control_exec'], sout, serr, self.logger) return returncode
def run(self, infile, product_type): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # # Get the product metadata. If data product type was not specified, # # derive it from the input filename's extension. # if not product_type: # ext = os.path.splitext(infile)[1] # if ext == ".MS": product_type = "Correlated" # elif ext == ".INST": product_type = "InstrumentModel" # elif ext == ".IM": product_type = "SkyImage" # if not product_type: # self.logger.error("File %s has unknown product type" % infile) # return 1 self.logger.debug("Product type: %s" % product_type) if product_type == "Correlated": self.outputs = metadata.Correlated(infile).data() elif product_type == "InstrumentModel": self.outputs = metadata.InstrumentModel(infile).data() elif product_type == "SkyImage": self.outputs = metadata.SkyImage(infile).data() else: self.logger.error("Unknown product type: %s" % product_type) return 1 return 0
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) with CatchLog4CPlus( working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec']) ): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [ self.inputs['control_exec'], bbs_parset, "0" ], self.logger, cwd=working_dir, env=self.environment ) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda : os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError, e: self.logger.error( "Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally:
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ env = utilities.read_initscript(self.logger, self.inputs['initscript']) self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec'])): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [self.inputs['control_exec'], bbs_parset, "0"], self.logger, cwd=working_dir, env=env) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda: os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError, e: self.logger.error("Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally:
def run(self, pdb_in, pdb_out): with log_time(self.logger): self.logger.debug("Copying parmdb: %s --> %s" % (pdb_in, pdb_out)) # Remove any old parmdb database shutil.rmtree(pdb_out, ignore_errors=True) # And copy the new one into place shutil.copytree(pdb_in, pdb_out) return 0
def run(self, infile, baseline_filename): """ baseline_filename points to a file continaing a pickled array of antenna pairs. """ with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 if not os.path.exists(baseline_filename): self.logger.error("baseline file %s not found" % (baseline_filename)) return 1 with open(baseline_filename) as file: baselines = load(file) antenna1, antenna2 = [], [] for baseline in baselines: ant1, ant2 = baseline.split("&") antenna1.append(int(ant1)) antenna2.append(int(ant2)) if antenna1 and antenna2: cmd = "UPDATE %s SET FLAG=True WHERE any(ANTENNA1=%s and ANTENNA2=%s)" % \ (infile, str(antenna1), str(antenna2)) self.logger.info("Running TaQL: " + cmd) try: taql(cmd) except Exception as e: self.logger.warn(str(e)) return 1 else: self.logger.warn("No baselines specified to flag") # QUICK HACK: Also flag last timestep t = table(infile) maxtime = t.getcol('TIME').max() t.close() cmd = "UPDATE %s SET FLAG=True WHERE TIME=%f" % (infile, maxtime) self.logger.info("Running TaQL: " + cmd) try: taql(cmd) except Exception as e: self.logger.warn(str(e)) return 1 return 0
def run(self, infile): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 try: self.outputs['start_time'] = taql( "CALC MIN([SELECT TIME from %s])" % infile)[0] self.outputs['end_time'] = taql( "CALC MAX([SELECT TIME from %s])" % infile)[0] except Exception, e: self.logger.error(str(e)) return 1
def run(self, files, executable, parset, environment): """ Run the bbs-reducer executable. *Arguments* - `files`: argument is a tuple of (MS-file, parmdb-file, sourcedb-file) - `executable`: full path to the bbs-reducer executable - `parset`: full path to the parset-file - `environment`: environment variables to use """ self.logger.debug("files = %s" % str(files)) self.logger.debug("executable = %s" % executable) self.logger.debug("parset = %s" % parset) self.logger.debug("environment = %s" % environment) self.environment.update(environment) ms, parmdb, sourcedb = files # Time execution of this job with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % ms) else: self.logger.error("Measurement Set %s does not exist" % ms) return 1 # Run bbs-reducer. Catch log output from bbs-reducer and stdout. scratch_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) try: cmd = [ executable, "--parmdb=%s" % parmdb, "--sourcedb=%s" % sourcedb, ms, parset ] with CatchLog4CPlus( scratch_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ) as logger: catch_segfaults(cmd, scratch_dir, self.environment, logger) except CalledProcessError as err: self.logger.error(str(err)) return 1 finally: shutil.rmtree(scratch_dir) return 0
def run(self, executable, catalogue, skydb, dbtype): """ Contains all functionality """ with log_time(self.logger): # **************************************************************** # 1. Create output directory if it does not yet exist. skydb_dir = os.path.dirname(skydb) try: os.makedirs(skydb_dir) self.logger.debug("Created output directory %s" % skydb_dir) except FileExistsError: pass # **************************************************************** # 2 Remove any old sky database # Create the sourcedb shutil.rmtree(skydb, ignore_errors=True) self.logger.info("Creating skymodel: %s" % (skydb)) scratch_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) try: cmd = [ executable, "in=%s" % catalogue, "out=%s" % skydb, "outtype=%s" % dbtype, "format=<", "append=false" ] with CatchLog4CPlus( scratch_dir, self.logger.name + "." + os.path.basename(skydb), os.path.basename(executable)) as logger: catch_segfaults(cmd, scratch_dir, None, logger) # ***************************************************************** # 3. Validate performance and cleanup temp files except CalledProcessError as err: # For CalledProcessError isn't properly propagated by IPython # Temporary workaround... self.logger.error(str(err)) return 1 finally: shutil.rmtree(scratch_dir) return 0
def run(self, executable, catalogue, skydb, dbtype): """ Contains all functionality """ with log_time(self.logger): # **************************************************************** # 1. Create output directory if it does not yet exist. skydb_dir = os.path.dirname(skydb) try: os.makedirs(skydb_dir) self.logger.debug("Created output directory %s" % skydb_dir) except OSError, err: # Ignore error if directory already exists, otherwise re-raise if err[0] != errno.EEXIST: raise # **************************************************************** # 2 Remove any old sky database # Create the sourcedb shutil.rmtree(skydb, ignore_errors=True) self.logger.info("Creating skymodel: %s" % (skydb)) scratch_dir = tempfile.mkdtemp() try: cmd = [executable, "in=%s" % catalogue, "out=%s" % skydb, "outtype=%s" % dbtype, "format=<", "append=false" ] with CatchLog4CPlus( scratch_dir, self.logger.name + "." + os.path.basename(skydb), os.path.basename(executable) ) as logger: catch_segfaults(cmd, scratch_dir, None, logger) # ***************************************************************** # 3. Validate performance and cleanup temp files except CalledProcessError, err: # For CalledProcessError isn't properly propagated by IPython # Temporary workaround... self.logger.error(str(err)) return 1
def run(self, infile, makeFLAGwritable): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 if not os.path.exists(makeFLAGwritable): self.logger.error("file %s not found" % (makeFLAGwritable)) return 1 try: mFw_module = imp.load_source('mFw_module', makeFLAGwritable) mFw_module.makeFlagWritable(infile, '') except Exception, e: self.logger.warn(str(e)) return 1
def run(self, infile): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 try: self.outputs['start_time'] = taql( "CALC MIN([SELECT TIME from %s])" % infile )[0] self.outputs['end_time'] = taql( "CALC MAX([SELECT TIME from %s])" % infile )[0] except Exception, e: self.logger.error(str(e)) return 1
def run(self, files, executable, parset, environment): """ Run the bbs-reducer executable. *Arguments* - `files`: argument is a tuple of (MS-file, parmdb-file, sourcedb-file) - `executable`: full path to the bbs-reducer executable - `parset`: full path to the parset-file - `environment`: environment variables to use """ self.logger.debug("files = %s" % str(files)) self.logger.debug("executable = %s" % executable) self.logger.debug("parset = %s" % parset) self.logger.debug("environment = %s" % environment) self.environment.update(environment) ms, parmdb, sourcedb = files # Time execution of this job with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % ms) else: self.logger.error("Measurement Set %s does not exist" % ms) return 1 # Run bbs-reducer. Catch log output from bbs-reducer and stdout. scratch_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) try: cmd = [executable, "--parmdb=%s" % parmdb, "--sourcedb=%s" % sourcedb, ms, parset ] with CatchLog4CPlus( scratch_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ) as logger: catch_segfaults(cmd, scratch_dir, self.environment, logger) except CalledProcessError, err: self.logger.error(str(err)) return 1 finally:
def run(self, infile, makeFLAGwritable): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 if not os.path.exists(makeFLAGwritable): self.logger.error( "file %s not found" % (makeFLAGwritable) ) return 1 try: mFw_module = imp.load_source('mFw_module', makeFLAGwritable) mFw_module.makeFlagWritable(infile, '') except Exception, e: self.logger.warn(str(e)) return 1
def run(self, infile, product_type): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 self.logger.debug("Product type: %s" % product_type) if product_type == "Correlated": self.outputs = metadata.Correlated(self.logger, infile).data() elif product_type == "InstrumentModel": self.outputs = metadata.InstrumentModel(self.logger, infile).data() elif product_type == "SkyImage": self.outputs = metadata.SkyImage(self.logger, infile).data() else: self.logger.error("Unknown product type: %s" % product_type) return 1 return 0
def run(self, infile, baseline_filename): """ baseline_filename points to a file continaing a pickled array of antenna pairs. """ with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 if not os.path.exists(baseline_filename): self.logger.error( "baseline file %s not found" % (baseline_filename) ) return 1 with open(baseline_filename) as file: baselines = load(file) antenna1, antenna2 = [], [] for baseline in baselines: ant1, ant2 = baseline.split("&") antenna1.append(int(ant1)) antenna2.append(int(ant2)) if antenna1 and antenna2: cmd = "UPDATE %s SET FLAG=True WHERE any(ANTENNA1=%s and ANTENNA2=%s)" % \ (infile, str(antenna1), str(antenna2)) self.logger.info("Running TaQL: " + cmd) try: taql(cmd) except Exception, e: self.logger.warn(str(e)) return 1 else:
def run(self, infile, clusterdesc, outfile, executable): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 try: if not os.access(executable, os.X_OK): raise ExecutableMissing(executable) cmd = [executable, clusterdesc, infile, outfile] return catch_segfaults(cmd, None, None, self.logger).returncode except ExecutableMissing, e: self.logger.error("%s not found" % (e.args[0])) return 1 except CalledProcessError, e: # For CalledProcessError isn't properly propagated by IPython # Temporary workaround... self.logger.error(str(e)) self.logger.info("A common cause for this failure is the usage" "of an incorrect cluster.desc file in the pipeline.cfg") return 1
def run(self, infile, clusterdesc, outfile, executable): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 try: if not os.access(executable, os.X_OK): raise ExecutableMissing(executable) cmd = [executable, clusterdesc, infile, outfile] return catch_segfaults(cmd, None, None, self.logger).returncode except ExecutableMissing, e: self.logger.error("%s not found" % (e.args[0])) return 1 except CalledProcessError, e: # For CalledProcessError isn't properly propagated by IPython # Temporary workaround... self.logger.error(str(e)) self.logger.info( "A common cause for this failure is the usage" "of an incorrect cluster.desc file in the pipeline.cfg") return 1
def run(self, executable, infiles, db_key, db_name, db_user, db_host): """ Depricated functionality """ # executable : path to KernelControl executable # infiles : tuple of MS, instrument- and sky-model files # db_* : database connection parameters # ---------------------------------------------------------------------- self.logger.debug("executable = %s" % executable) self.logger.debug("infiles = %s" % str(infiles)) self.logger.debug("db_key = %s" % db_key) self.logger.debug("db_name = %s" % db_name) self.logger.debug("db_user = %s" % db_user) self.logger.debug("db_host = %s" % db_host) (ms, parmdb_instrument, parmdb_sky) = infiles with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % (ms)) else: self.logger.error("Dataset %s does not exist" % (ms)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up BBSKernel parset") # Getting the filesystem must be done differently, using the # DataProduct keys in the parset provided by the scheduler. filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms)) fd, parset_file = mkstemp() kernel_parset = parameterset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": ms, "BBDB.Key": db_key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmDB.Sky": parmdb_sky, "ParmDB.Instrument": parmdb_instrument }.items(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_file) os.close(fd) self.logger.debug("BBSKernel parset written to %s" % parset_file) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) try: self.logger.info("******** {0}".format( open(parset_file).read())) cmd = [executable, parset_file, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ): bbs_kernel_process = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError(bbs_kernel_process.returncode, executable) except CalledProcessError as e: self.logger.error(str(e)) return 1 finally: os.unlink(parset_file) shutil.rmtree(working_dir) return 0
def pipeline_logic(self): sys.path.insert(0,"") from datafiles_to_process import datafiles # datafiles is a list of MS paths. with log_time(self.logger): # Build a map of compute node <-> data location on storage nodes. storage_mapfile = self.run_task( "datamapper_storage", datafiles)['mapfile'] self.logger.info('storage mapfile = %s' % storage_mapfile) # Produce a GVDS file describing the data on the storage nodes. self.run_task('vdsmaker', storage_mapfile) # Read metadata (start, end times, pointing direction) from GVDS. vdsinfo = self.run_task("vdsreader") # NDPPP reads the data from the storage nodes, according to the # map. It returns a new map, describing the location of data on # the compute nodes. ndppp_results = self.run_task("ndppp", storage_mapfile, ) # Remove baselines which have been fully-flagged in any individual # subband. compute_mapfile = self.run_task( "flag_baseline", ndppp_results['mapfile'], baselines=ndppp_results['fullyflagged'] )['mapfile'] #compute_mapfile = ndppp_results['mapfile'] #self.logger.info("compute map file = %s", compute_mapfile) parmdb_mapfile = self.run_task("parmdb", compute_mapfile)['mapfile'] sourcedb_mapfile = self.run_task("sourcedb", compute_mapfile)['mapfile'] with patched_parset( self.task_definitions.get("bbs", "parset"), {} ) as bbs_parset: # BBS modifies data in place, so the map produced by NDPPP # remains valid. self.run_task("bbs", compute_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile)['mapfile'] # return 0 # Now, run DPPP three times on the output of BBS. We'll run # this twice: once on CORRECTED_DATA, and once on # SUBTRACTED_DATA. Clip anything at more than 5 times the flux of # the central source. with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { # "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as corrected_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=corrected_ndppp_parset, suffix="" ) # Image CORRECTED_DATA with casapy # print dir(compute_mapfile) # print compute_mapfile # return 0 self.run_task("force_mount", compute_mapfile, mount_type="ALT-AZ") self.run_task("casapy_clean", compute_mapfile, arguments={ "niter": 500, "threshold": '0.0mJy', "imsize": [1024, 1024], "cell": ['40.0arcsec'], "weighting": 'briggs', "robust": 0.0, "psfmode": 'clark', "gridmode": 'widefield', "wprojplanes": 128, "calready": False, "restoringbeam": [] })
def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time): # imager_exec: path to cimager executable # vds: VDS file describing the data to be imaged # parset: imager configuration # resultsdir: place resulting images here # start_time: ) time range to be imaged # end_time: ) in seconds (may be None) # ---------------------------------------------------------------------- with log_time(self.logger): self.logger.info("Processing %s" % (vds, )) # Bail out if destination exists (can thus resume a partial run). # Should be configurable? # ------------------------------------------------------------------ parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") for image_name in image_names: outputfile = os.path.join(resultsdir, image_name + ".restored") self.logger.info(outputfile) if os.path.exists(outputfile): self.logger.info("Image already exists: aborting.") return 0 try: working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) # If a time range has been specified, copy that section of the # input MS and only image that. # -------------------------------------------------------------- query = [] if start_time: self.logger.debug("Start time is %s" % start_time) start_time = quantity(float(start_time), 's') query.append("TIME > %f" % start_time.get('s').get_value()) if end_time: self.logger.debug("End time is %s" % end_time) end_time = quantity(float(end_time), 's') query.append("TIME < %f" % end_time.get('s').get_value()) query = " AND ".join(query) if query: # Select relevant section of MS. # ---------------------------------------------------------- self.logger.debug("Query is %s" % query) output = os.path.join(working_dir, "timeslice.MS") vds_parset = get_parset(vds) t = table(vds_parset.getString("FileName")) t.query(query, name=output) # Patch updated information into imager configuration. # ---------------------------------------------------------- parset = patch_parset(parset, {'Cimager.dataset': output}) else: self.logger.debug("No time range selected") self.logger.debug("Running cimager") with CatchLog4CXX( working_dir, self.logger.name + "." + os.path.basename(vds)): cimager_process = Popen([imager_exec, "-inputs", parset], stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = cimager_process.communicate() log_process_output("cimager", sout, serr, self.logger) if cimager_process.returncode != 0: raise CalledProcessError(cimager_process.returncode, imager_exec) # Dump the resulting images in the pipeline results area. # I'm not aware of a foolproof way to predict the image names # that will be produced, so we read them from the # parset and add standard cimager prefixes. # -------------------------------------------------------------- parset_data = Parset(parset) image_names = parset_data.getStringVector( "Cimager.Images.Names") prefixes = [ "image", "psf", "residual", "weights", "sensitivity" ] self.logger.debug("Copying images to %s" % resultsdir) for image_name in image_names: for prefix in prefixes: filename = image_name.replace("image", prefix, 1) shutil.move(os.path.join(working_dir, filename), os.path.join(resultsdir, filename)) if parset_data.getBool('Cimager.restore'): shutil.move( os.path.join(working_dir, image_name + ".restored"), os.path.join(resultsdir, image_name + ".restored")) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb, concat_ms, correlated_output_location, msselect_executable): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) processed_ms_paths = [] ms_per_image_map.iterator = DataMap.SkipIterator for item in ms_per_image_map: ms_path = item.file processed_ms_paths.append(ms_path) #add the information the image try: self.logger.debug("Start addImage Info") addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): self.logger.warn("addImagingInfo already done, continue") pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info("Saving image in HDF5 Format to: {0}" .format( output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception, error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error
def run(self, executable, infiles, db_key, db_name, db_user, db_host): """ Depricated functionality """ # executable : path to KernelControl executable # infiles : tuple of MS, instrument- and sky-model files # db_* : database connection parameters # ---------------------------------------------------------------------- self.logger.debug("executable = %s" % executable) self.logger.debug("infiles = %s" % str(infiles)) self.logger.debug("db_key = %s" % db_key) self.logger.debug("db_name = %s" % db_name) self.logger.debug("db_user = %s" % db_user) self.logger.debug("db_host = %s" % db_host) (ms, parmdb_instrument, parmdb_sky) = infiles with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % (ms)) else: self.logger.error("Dataset %s does not exist" % (ms)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up BBSKernel parset") # Getting the filesystem must be done differently, using the # DataProduct keys in the parset provided by the scheduler. filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms)) fd, parset_file = mkstemp() kernel_parset = parameterset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": ms, "BBDB.Key": db_key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmDB.Sky": parmdb_sky, "ParmDB.Instrument": parmdb_instrument }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_file) os.close(fd) self.logger.debug("BBSKernel parset written to %s" % parset_file) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) try: self.logger.info("******** {0}".format(open(parset_file).read())) cmd = [executable, parset_file, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ): bbs_kernel_process = Popen( cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError( bbs_kernel_process.returncode, executable ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def pipeline_logic(self): from to_process import datafiles # datafiles is a list of MS paths. with log_time(self.logger): # Build a map of compute node <-> data location on storage nodes. storage_mapfile = self.run_task( "datamapper", datafiles )['mapfile'] # Produce a GVDS file describing the data on the storage nodes. self.run_task('vdsmaker', storage_mapfile) # Read metadata (start, end times, pointing direction) from GVDS. vdsinfo = self.run_task("vdsreader") # NDPPP reads the data from the storage nodes, according to the # map. It returns a new map, describing the location of data on # the compute nodes. ndppp_results = self.run_task( "ndppp", storage_mapfile, parset=os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.initial.parset" ), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'] ) # Remove baselines which have been fully-flagged in any individual # subband. compute_mapfile = self.run_task( "flag_baseline", ndppp_results['mapfile'], baselines=ndppp_results['fullyflagged'] )['mapfile'] # Build a sky model ready for BBS & return the name & flux of the # central source. ra = quantity(vdsinfo['pointing']['ra']).get_value('deg') dec = quantity(vdsinfo['pointing']['dec']).get_value('deg') central = self.run_task( "skymodel", ra=ra, dec=dec, search_size=2.5 ) # Patch the name of the central source into the BBS parset for # subtraction. with patched_parset( self.task_definitions.get("bbs", "parset"), { 'Step.correct.Model.Sources': "[ \"%s\" ]" % (central["source_name"]), 'Step.subtract.Model.Sources': "[ \"%s\" ]" % (central["source_name"]) } ) as bbs_parset: # BBS modifies data in place, so the map produced by NDPPP # remains valid. self.run_task("bbs", compute_mapfile, parset=bbs_parset) # Now, run DPPP three times on the output of BBS. We'll run # this twice: once on CORRECTED_DATA, and once on # SUBTRACTED_DATA. Clip anything at more than 5 times the flux of # the central source. with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as corrected_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=corrected_ndppp_parset, suffix="" ) with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { "msin.datacolumn": "SUBTRACTED_DATA", "msout.datacolumn": "SUBTRACTED_DATA", "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as subtracted_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=subtracted_ndppp_parset, suffix="" ) # Image CORRECTED_DATA. self.logger.info("Imaging CORRECTED_DATA") # Patch the pointing direction recorded in the VDS file into # the parset for the cimager. with patched_parset( self.task_definitions.get("cimager", "parset"), { 'Images.ra': quantity(vdsinfo['pointing']['ra']).formatted("time"), 'Images.dec': quantity(vdsinfo['pointing']['dec']).formatted("angle") }, output_dir=self.config.get("layout", "parset_directory") ) as imager_parset: # And run cimager. self.outputs['images'] = self.run_task( "cimager", compute_mapfile, parset=imager_parset, results_dir=os.path.join( self.config.get("layout", "results_directory"), "corrected" ) )['images'] # Image SUBTRACTED_DATA. self.logger.info("Imaging SUBTRACTED_DATA") # Patch the pointing direction recorded in the VDS file into # the parset for the cimager, and change the column to be # imaged. with patched_parset( self.task_definitions.get("cimager", "parset"), { 'Images.ra': quantity(vdsinfo['pointing']['ra']).formatted("time"), 'Images.dec': quantity(vdsinfo['pointing']['dec']).formatted("angle"), 'datacolumn': "SUBTRACTED_DATA" }, output_dir=self.config.get("layout", "parset_directory") ) as subtracted_imager_parset: # And run cimager. self.outputs['images'] = self.run_task( "cimager", compute_mapfile, parset=subtracted_imager_parset, results_dir=os.path.join( self.config.get("layout", "results_directory"), "subtracted" ) )['images']
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] for item in ms_per_image_map: path = item.file ms_file_name = os.path.split(path)[1] #if the ms is in the processed dir (additional check) if (ms_file_name in file_list): # save the path processed_ms_paths.append( os.path.join(processed_ms_dir, ms_file_name)) #add the information the image try: addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception as error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info( "Saving image in HDF5 Format to: {0}".format(output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception as error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error # Convert to fits # create target location fits_output = output_image + ".fits" # To allow reruns a possible earlier version needs to be removed! # image2fits fails if not done!! if os.path.exists(fits_output): os.unlink(fits_output) try: temp_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus( temp_dir, self.logger.name + '.' + os.path.basename(awimager_output), "image2fits") as logger: catch_segfaults([ "image2fits", '-in', awimager_output, '-out', fits_output ], temp_dir, self.environment, logger) except Exception as excp: self.logger.error(str(excp)) return 1 finally: shutil.rmtree(temp_dir) # **************************************************************** # 3. Filling of the HDF5 root group command = [fillrootimagegroup_exec, output_image] self.logger.info(" ".join(command)) #Spawn a subprocess and connect the pipes proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = communicate_returning_strings(proc) exit_status = proc.returncode self.logger.info(stdoutdata) self.logger.info(stderrdata) #if copy failed log the missing file if exit_status != 0: self.logger.error( "Error using the fillRootImageGroup command" "see above lines. Exit status: {0}".format(exit_status)) return 1 # ***************************************************************** # 4 Export the fits image to the msss server url = "http://tanelorn.astron.nl:8000/upload" try: self.logger.info( "Starting upload of fits image data to server!") opener = urllib.request.build_opener(mph.MultipartPostHandler) filedata = {"file": open(fits_output, "rb")} opener.open(url, filedata, timeout=2) # HTTPError needs to be caught first. except urllib.error.HTTPError as httpe: self.logger.warn("HTTP status is: {0}".format(httpe.code)) self.logger.warn("failed exporting fits image to server") except urllib.error.URLError as urle: self.logger.warn(str(urle.reason)) self.logger.warn("failed exporting fits image to server") except Exception as exc: self.logger.warn(str(exc)) self.logger.warn("failed exporting fits image to server") # ***************************************************************** # 5. export the sourcelist to the msss server url = "http://tanelorn.astron.nl:8000/upload_srcs" try: # Copy file to output location new_sourcelist_path = output_image + ".sourcelist" if os.path.exists(new_sourcelist_path): os.unlink(new_sourcelist_path) shutil.copy(sourcelist, new_sourcelist_path) self.logger.info( "Starting upload of sourcelist data to server!") opener = urllib.request.build_opener(mph.MultipartPostHandler) filedata = {"file": open(new_sourcelist_path, "rb")} opener.open(url, filedata, timeout=2) # HTTPError needs to be caught first. except urllib.error.HTTPError as httpe: self.logger.warn("HTTP status is: {0}".format(httpe.code)) self.logger.warn("failed exporting sourcelist to server") except urllib.error.URLError as urle: self.logger.warn(str(urle.reason)) self.logger.warn("failed exporting sourcelist image to server") except Exception as exc: self.logger.warn(str(exc)) self.logger.warn("failed exporting sourcelist image to serve") self.outputs["hdf5"] = "succes" self.outputs["image"] = output_image return 0
def run( self, executable, initscript, infile, key, db_name, db_user, db_host ): # executable: path to KernelControl executable # initscript: path to lofarinit.sh # infile: MeasurementSet for processing # key, db_name, db_user, db_host: database connection parameters # ---------------------------------------------------------------------- with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up kernel parset") filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile)) fd, parset_filename = mkstemp() kernel_parset = Parset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": infile, "BBDB.Key": key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmLog": "", "ParmLoglevel": "", "ParmDB.Sky": infile + ".sky", "ParmDB.Instrument": infile + ".instrument" }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_filename) os.close(fd) self.logger.debug("Parset written to %s" % (parset_filename,)) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) env = read_initscript(self.logger, initscript) try: cmd = [executable, parset_filename, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ): bbs_kernel_process = Popen( cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError( bbs_kernel_process.returncode, executable ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] for item in ms_per_image_map: path = item.file ms_file_name = os.path.split(path)[1] #if the ms is in the processed dir (additional check) if (ms_file_name in file_list): # save the path processed_ms_paths.append( os.path.join(processed_ms_dir, ms_file_name)) #add the information the image try: addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info( "Saving image in HDF5 Format to: {0}".format(output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception, error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error
def run(self, awimager_output, raw_ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param raw_ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): raw_ms_per_image_map = DataMap.load(raw_ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] for item in raw_ms_per_image_map: path = item.file raw_ms_file_name = os.path.split(path)[1] #if the raw ms is in the processed dir (additional check) if (raw_ms_file_name in file_list): # save the path processed_ms_paths.append(os.path.join(processed_ms_dir, raw_ms_file_name)) #add the information the image try: addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: self.logger.error("addImagingInfo Threw Exception:") self.logger.error(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): pass else: raise Exception(error) #Exception: Key Name unknown #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info("Saving image in HDF5 Format to: {0}" .format( output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception, error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error
def run(self, executable, initscript, infile, key, db_name, db_user, db_host): # executable: path to KernelControl executable # initscript: path to lofarinit.sh # infile: MeasurementSet for processing # key, db_name, db_user, db_host: database connection parameters # ---------------------------------------------------------------------- with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up kernel parset") filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile)) fd, parset_filename = mkstemp() kernel_parset = Parset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": infile, "BBDB.Key": key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmLog": "", "ParmLoglevel": "", "ParmDB.Sky": infile + ".sky", "ParmDB.Instrument": infile + ".instrument" }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_filename) os.close(fd) self.logger.debug("Parset written to %s" % (parset_filename, )) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp() env = read_initscript(self.logger, initscript) try: cmd = [executable, parset_filename, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ): bbs_kernel_process = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError(bbs_kernel_process.returncode, executable) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time): # imager_exec: path to cimager executable # vds: VDS file describing the data to be imaged # parset: imager configuration # resultsdir: place resulting images here # start_time: ) time range to be imaged # end_time: ) in seconds (may be None) # ---------------------------------------------------------------------- with log_time(self.logger): self.logger.info("Processing %s" % (vds,)) # Bail out if destination exists (can thus resume a partial run). # Should be configurable? # ------------------------------------------------------------------ parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") for image_name in image_names: outputfile = os.path.join(resultsdir, image_name + ".restored") self.logger.info(outputfile) if os.path.exists(outputfile): self.logger.info("Image already exists: aborting.") return 0 try: working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) # If a time range has been specified, copy that section of the # input MS and only image that. # -------------------------------------------------------------- query = [] if start_time: self.logger.debug("Start time is %s" % start_time) start_time = quantity(float(start_time), 's') query.append("TIME > %f" % start_time.get('s').get_value()) if end_time: self.logger.debug("End time is %s" % end_time) end_time = quantity(float(end_time), 's') query.append("TIME < %f" % end_time.get('s').get_value()) query = " AND ".join(query) if query: # Select relevant section of MS. # ---------------------------------------------------------- self.logger.debug("Query is %s" % query) output = os.path.join(working_dir, "timeslice.MS") vds_parset = get_parset(vds) t = table(vds_parset.getString("FileName")) t.query(query, name=output) # Patch updated information into imager configuration. # ---------------------------------------------------------- parset = patch_parset(parset, { 'Cimager.dataset': output } ) else: self.logger.debug("No time range selected") self.logger.debug("Running cimager") with CatchLog4CXX( working_dir, self.logger.name + "." + os.path.basename(vds) ): cimager_process = Popen( [imager_exec, "-inputs", parset], stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = cimager_process.communicate() log_process_output("cimager", sout, serr, self.logger) if cimager_process.returncode != 0: raise CalledProcessError( cimager_process.returncode, imager_exec ) # Dump the resulting images in the pipeline results area. # I'm not aware of a foolproof way to predict the image names # that will be produced, so we read them from the # parset and add standard cimager prefixes. # -------------------------------------------------------------- parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") prefixes = [ "image", "psf", "residual", "weights", "sensitivity" ] self.logger.debug("Copying images to %s" % resultsdir) for image_name in image_names: for prefix in prefixes: filename = image_name.replace("image", prefix, 1) shutil.move( os.path.join(working_dir, filename), os.path.join(resultsdir, filename) ) if parset_data.getBool('Cimager.restore'): shutil.move( os.path.join(working_dir, image_name + ".restored"), os.path.join(resultsdir, image_name + ".restored") ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally: