def convert_mwimager_parset(parset): try: with patched_parset( parset, { 'dataset': dataset, 'Images.frequency': frequency, 'msDirType': ms_dir_type, 'msDirRa': ms_dir_ra, 'msDirDec': ms_dir_dec, 'restore': restore # cimager bug: non-restored image unusable } ) as cimager_parset: fd, converted_parset = tempfile.mkstemp( dir=self.config.get("layout", "job_directory") ) convert_process = spawn_process( [ self.inputs['convert_exec'], cimager_parset, converted_parset ], self.logger ) os.close(fd) sout, serr = convert_process.communicate() log_process_output(self.inputs['convert_exec'], sout, serr, self.logger) if convert_process.returncode != 0: raise subprocess.CalledProcessError( convert_process.returncode, convert_exec ) return converted_parset except OSError, e: self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e)) raise
def convert_mwimager_parset(parset): try: with patched_parset( parset, { 'dataset': dataset, 'Images.frequency': frequency, 'msDirType': ms_dir_type, 'msDirRa': ms_dir_ra, 'msDirDec': ms_dir_dec, 'restore': restore # cimager bug: non-restored image unusable }) as cimager_parset: fd, converted_parset = tempfile.mkstemp( dir=self.config.get("layout", "job_directory")) convert_process = spawn_process([ self.inputs['convert_exec'], cimager_parset, converted_parset ], self.logger) os.close(fd) sout, serr = convert_process.communicate() log_process_output(self.inputs['convert_exec'], sout, serr, self.logger) if convert_process.returncode != 0: raise subprocess.CalledProcessError( convert_process.returncode, convert_exec) return converted_parset except OSError as e: self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e)) raise except subprocess.CalledProcessError as e: self.logger.error(str(e)) raise
def run(self, infile, outfile, parmdb, sourcedb, parsetfile, executable, working_directory, environment, demix_always, demix_if_needed, start_time, end_time, nthreads, clobber): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("outfile = %s" % outfile) self.logger.debug("parmdb = %s" % parmdb) self.logger.debug("sourcedb = %s" % sourcedb) self.logger.debug("parsetfile = %s" % parsetfile) self.logger.debug("executable = %s" % executable) self.logger.debug("working_dir = %s" % working_directory) self.logger.debug("environment = %s" % environment) self.logger.debug("demix_always = %s" % demix_always) self.logger.debug("demix_if_needed = %s" % demix_if_needed) self.logger.debug("start_time = %s" % start_time) self.logger.debug("end_time = %s" % end_time) self.logger.debug("nthreads = %s" % nthreads) self.logger.debug("clobber = %s" % clobber) self.environment.update(environment) # ******************************************************************** # 1. preparations. set nthreads, Validate input, clean workspace # if not nthreads: nthreads = 1 if not outfile: outfile = infile tmpfile = outfile + '.tmp' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Check if DPPP executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # Make sure that we start with a clean slate shutil.rmtree(tmpfile, ignore_errors=True) if clobber: if outfile == infile: self.logger.warn( "Input and output are identical, not clobbering %s" % outfile) else: self.logger.info("Removing previous output %s" % outfile) shutil.rmtree(outfile, ignore_errors=True) # ***************************************************************** # 2. Perform house keeping, test if work is already done # If input and output files are different, and if output file # already exists, then we're done. if outfile != infile and os.path.exists(outfile): self.logger.info("Output file %s already exists. We're done." % outfile) self.outputs['ok'] = True return 0 # Create a working copy if input and output are identical, to # avoid corrupting the original file if things go awry. if outfile == infile: self.logger.info("Creating working copy: %s --> %s" % (infile, tmpfile)) shutil.copytree(infile, tmpfile) # ***************************************************************** # 3. Update the parset with locally calculate information # Put arguments we need to pass to some private methods in a dict kwargs = { 'infile': infile, 'tmpfile': tmpfile, 'parmdb': parmdb, 'sourcedb': sourcedb, 'parsetfile': parsetfile, 'demix_always': demix_always, 'demix_if_needed': demix_if_needed, 'start_time': start_time, 'end_time': end_time } # Prepare for the actual DPPP run. with patched_parset( # ***************************************************************** # 4. Add ms names to the parset, start/end times if availabe, etc. # 5. Add demixing parameters to the parset parsetfile, self._prepare_steps(**kwargs), output_dir=working_directory, unlink=False) as temp_parset_filename: self.logger.debug("Created temporary parset file: %s" % temp_parset_filename) try: working_dir = tempfile.mkdtemp( dir=working_directory, suffix=".%s" % (os.path.basename(__file__), )) # **************************************************************** # 6. Run ndppp cmd = [executable, temp_parset_filename, '1'] with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch NDPPP segfaults (a regular occurance), and retry catch_segfaults(cmd, working_dir, self.environment, logger, cleanup=lambda: shutil.rmtree( tmpfile, ignore_errors=True)) # Replace outfile with the updated working copy shutil.rmtree(outfile, ignore_errors=True) os.rename(tmpfile, outfile) except CalledProcessError as err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception as err: self.logger.error(str(err)) return 1 finally: shutil.rmtree(working_dir) # We need some signal to the master script that the script ran ok. self.outputs['ok'] = True return 0
def run(self, infile, outfile, parmdb, sourcedb, parsetfile, executable, working_directory, environment, demix_always, demix_if_needed, start_time, end_time, nthreads, clobber): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("outfile = %s" % outfile) self.logger.debug("parmdb = %s" % parmdb) self.logger.debug("sourcedb = %s" % sourcedb) self.logger.debug("parsetfile = %s" % parsetfile) self.logger.debug("executable = %s" % executable) self.logger.debug("working_dir = %s" % working_directory) self.logger.debug("environment = %s" % environment) self.logger.debug("demix_always = %s" % demix_always) self.logger.debug("demix_if_needed = %s" % demix_if_needed) self.logger.debug("start_time = %s" % start_time) self.logger.debug("end_time = %s" % end_time) self.logger.debug("nthreads = %s" % nthreads) self.logger.debug("clobber = %s" % clobber) self.environment.update(environment) # ******************************************************************** # 1. preparations. set nthreads, Validate input, clean workspace # if not nthreads: nthreads = 1 if not outfile: outfile = infile tmpfile = outfile + '.tmp' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Check if DPPP executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # Make sure that we start with a clean slate shutil.rmtree(tmpfile, ignore_errors=True) if clobber: if outfile == infile: self.logger.warn( "Input and output are identical, not clobbering %s" % outfile ) else: self.logger.info("Removing previous output %s" % outfile) shutil.rmtree(outfile, ignore_errors=True) # ***************************************************************** # 2. Perform house keeping, test if work is already done # If input and output files are different, and if output file # already exists, then we're done. if outfile != infile and os.path.exists(outfile): self.logger.info( "Output file %s already exists. We're done." % outfile ) self.outputs['ok'] = True return 0 # Create a working copy if input and output are identical, to # avoid corrupting the original file if things go awry. if outfile == infile: self.logger.info( "Creating working copy: %s --> %s" % (infile, tmpfile) ) shutil.copytree(infile, tmpfile) # ***************************************************************** # 3. Update the parset with locally calculate information # Put arguments we need to pass to some private methods in a dict kwargs = { 'infile' : infile, 'tmpfile' : tmpfile, 'parmdb' : parmdb, 'sourcedb' : sourcedb, 'parsetfile' : parsetfile, 'demix_always' : demix_always, 'demix_if_needed' : demix_if_needed, 'start_time' : start_time, 'end_time' : end_time } # Prepare for the actual DPPP run. with patched_parset( # ***************************************************************** # 4. Add ms names to the parset, start/end times if availabe, etc. # 5. Add demixing parameters to the parset parsetfile, self._prepare_steps(**kwargs), output_dir=working_directory, unlink=False ) as temp_parset_filename: self.logger.debug("Created temporary parset file: %s" % temp_parset_filename ) try: working_dir = tempfile.mkdtemp(dir=working_directory,suffix=".%s" % (os.path.basename(__file__),)) # **************************************************************** # 6. Run ndppp cmd = [executable, temp_parset_filename, '1'] with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch NDPPP segfaults (a regular occurance), and retry catch_segfaults( cmd, working_dir, self.environment, logger, cleanup=lambda : shutil.rmtree(tmpfile, ignore_errors=True) ) # Replace outfile with the updated working copy shutil.rmtree(outfile, ignore_errors=True) os.rename(tmpfile, outfile) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1 finally:
def pipeline_logic(self): from to_process import datafiles # datafiles is a list of MS paths. with log_time(self.logger): # Build a map of compute node <-> data location on storage nodes. storage_mapfile = self.run_task( "datamapper", datafiles )['mapfile'] # Produce a GVDS file describing the data on the storage nodes. self.run_task('vdsmaker', storage_mapfile) # Read metadata (start, end times, pointing direction) from GVDS. vdsinfo = self.run_task("vdsreader") # NDPPP reads the data from the storage nodes, according to the # map. It returns a new map, describing the location of data on # the compute nodes. ndppp_results = self.run_task( "ndppp", storage_mapfile, parset=os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.initial.parset" ), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'] ) # Remove baselines which have been fully-flagged in any individual # subband. compute_mapfile = self.run_task( "flag_baseline", ndppp_results['mapfile'], baselines=ndppp_results['fullyflagged'] )['mapfile'] # Build a sky model ready for BBS & return the name & flux of the # central source. ra = quantity(vdsinfo['pointing']['ra']).get_value('deg') dec = quantity(vdsinfo['pointing']['dec']).get_value('deg') central = self.run_task( "skymodel", ra=ra, dec=dec, search_size=2.5 ) # Patch the name of the central source into the BBS parset for # subtraction. with patched_parset( self.task_definitions.get("bbs", "parset"), { 'Step.correct.Model.Sources': "[ \"%s\" ]" % (central["source_name"]), 'Step.subtract.Model.Sources': "[ \"%s\" ]" % (central["source_name"]) } ) as bbs_parset: # BBS modifies data in place, so the map produced by NDPPP # remains valid. self.run_task("bbs", compute_mapfile, parset=bbs_parset) # Now, run DPPP three times on the output of BBS. We'll run # this twice: once on CORRECTED_DATA, and once on # SUBTRACTED_DATA. Clip anything at more than 5 times the flux of # the central source. with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as corrected_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=corrected_ndppp_parset, suffix="" ) with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { "msin.datacolumn": "SUBTRACTED_DATA", "msout.datacolumn": "SUBTRACTED_DATA", "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as subtracted_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=subtracted_ndppp_parset, suffix="" ) # Image CORRECTED_DATA. self.logger.info("Imaging CORRECTED_DATA") # Patch the pointing direction recorded in the VDS file into # the parset for the cimager. with patched_parset( self.task_definitions.get("cimager", "parset"), { 'Images.ra': quantity(vdsinfo['pointing']['ra']).formatted("time"), 'Images.dec': quantity(vdsinfo['pointing']['dec']).formatted("angle") }, output_dir=self.config.get("layout", "parset_directory") ) as imager_parset: # And run cimager. self.outputs['images'] = self.run_task( "cimager", compute_mapfile, parset=imager_parset, results_dir=os.path.join( self.config.get("layout", "results_directory"), "corrected" ) )['images'] # Image SUBTRACTED_DATA. self.logger.info("Imaging SUBTRACTED_DATA") # Patch the pointing direction recorded in the VDS file into # the parset for the cimager, and change the column to be # imaged. with patched_parset( self.task_definitions.get("cimager", "parset"), { 'Images.ra': quantity(vdsinfo['pointing']['ra']).formatted("time"), 'Images.dec': quantity(vdsinfo['pointing']['dec']).formatted("angle"), 'datacolumn': "SUBTRACTED_DATA" }, output_dir=self.config.get("layout", "parset_directory") ) as subtracted_imager_parset: # And run cimager. self.outputs['images'] = self.run_task( "cimager", compute_mapfile, parset=subtracted_imager_parset, results_dir=os.path.join( self.config.get("layout", "results_directory"), "subtracted" ) )['images']
def pipeline_logic(self): sys.path.insert(0,"") from datafiles_to_process import datafiles # datafiles is a list of MS paths. with log_time(self.logger): # Build a map of compute node <-> data location on storage nodes. storage_mapfile = self.run_task( "datamapper_storage", datafiles)['mapfile'] self.logger.info('storage mapfile = %s' % storage_mapfile) # Produce a GVDS file describing the data on the storage nodes. self.run_task('vdsmaker', storage_mapfile) # Read metadata (start, end times, pointing direction) from GVDS. vdsinfo = self.run_task("vdsreader") # NDPPP reads the data from the storage nodes, according to the # map. It returns a new map, describing the location of data on # the compute nodes. ndppp_results = self.run_task("ndppp", storage_mapfile, ) # Remove baselines which have been fully-flagged in any individual # subband. compute_mapfile = self.run_task( "flag_baseline", ndppp_results['mapfile'], baselines=ndppp_results['fullyflagged'] )['mapfile'] #compute_mapfile = ndppp_results['mapfile'] #self.logger.info("compute map file = %s", compute_mapfile) parmdb_mapfile = self.run_task("parmdb", compute_mapfile)['mapfile'] sourcedb_mapfile = self.run_task("sourcedb", compute_mapfile)['mapfile'] with patched_parset( self.task_definitions.get("bbs", "parset"), {} ) as bbs_parset: # BBS modifies data in place, so the map produced by NDPPP # remains valid. self.run_task("bbs", compute_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile)['mapfile'] # return 0 # Now, run DPPP three times on the output of BBS. We'll run # this twice: once on CORRECTED_DATA, and once on # SUBTRACTED_DATA. Clip anything at more than 5 times the flux of # the central source. with patched_parset( os.path.join( self.config.get("layout", "parset_directory"), "ndppp.1.postbbs.parset" ), { # "clip1.amplmax": str(5 * central["source_flux"]) }, output_dir=self.config.get("layout", "parset_directory") ) as corrected_ndppp_parset: for i in repeat(None, 3): self.run_task( "ndppp", compute_mapfile, parset=corrected_ndppp_parset, suffix="" ) # Image CORRECTED_DATA with casapy # print dir(compute_mapfile) # print compute_mapfile # return 0 self.run_task("force_mount", compute_mapfile, mount_type="ALT-AZ") self.run_task("casapy_clean", compute_mapfile, arguments={ "niter": 500, "threshold": '0.0mJy', "imsize": [1024, 1024], "cell": ['40.0arcsec'], "weighting": 'briggs', "robust": 0.0, "psfmode": 'clark', "gridmode": 'widefield', "wprojplanes": 128, "calready": False, "restoringbeam": [] })