def go(self): self.logger.info("Starting cimager run") super(cimager, self).go() self.outputs['images'] = [] # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for cimager") gvds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "cimager.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = gvds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(gvds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("cimager GVDS is %s" % (gvds_file, )) # Read data for processing from the GVDS file # ---------------------------------------------------------------------- parset = Parset(gvds_file) data = [] for part in range(parset.getInt('NParts')): host = parset.getString("Part%d.FileSys" % part).split(":")[0] vds = parset.getString("Part%d.Name" % part) data.append((host, vds)) # Divide data into timesteps for imaging # timesteps is a list of (start, end, results directory) tuples # ---------------------------------------------------------------------- timesteps = [] results_dir = self.inputs['results_dir'] if self.inputs['timestep'] == 0: self.logger.info("No timestep specified; imaging all data") timesteps = [(None, None, results_dir)] else: self.logger.info("Using timestep of %s s" % self.inputs['timestep']) gvds = get_parset(gvds_file) start_time = quantity(gvds['StartTime'].get()).get('s').get_value() end_time = quantity(gvds['EndTime'].get()).get('s').get_value() step = float(self.inputs['timestep']) while start_time < end_time: timesteps.append((start_time, start_time + step, os.path.join(results_dir, str(start_time)))) start_time += step # Run each cimager process in a separate thread # ---------------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) for label, timestep in enumerate(timesteps): self.logger.info("Processing timestep %d" % label) jobs = [] parsets = [] start_time, end_time, resultsdir = timestep for host, vds in data: vds_data = Parset(vds) frequency_range = [ vds_data.getDoubleVector("StartFreqs")[0], vds_data.getDoubleVector("EndFreqs")[-1] ] parsets.append( self.__get_parset( os.path.basename( vds_data.getString('FileName')).split('.')[0], vds_data.getString("FileName"), str(frequency_range), vds_data.getStringVector("Extra.FieldDirectionType") [0], vds_data.getStringVector("Extra.FieldDirectionRa")[0], vds_data.getStringVector("Extra.FieldDirectionDec")[0], 'True', # cimager bug: non-restored image unusable )) jobs.append( ComputeJob(host, command, arguments=[ self.inputs['imager_exec'], vds, parsets[-1], resultsdir, start_time, end_time ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for parset in parsets: parset = Parset(parset) image_names = parset.getStringVector("Cimager.Images.Names") self.outputs['images'].extend(image_names) [os.unlink(parset) for parset in parsets] # Check if we recorded a failing process before returning # ---------------------------------------------------------------------- if self.error.isSet(): self.logger.warn("Failed imager process detected") return 1 else: return 0
def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time): # imager_exec: path to cimager executable # vds: VDS file describing the data to be imaged # parset: imager configuration # resultsdir: place resulting images here # start_time: ) time range to be imaged # end_time: ) in seconds (may be None) # ---------------------------------------------------------------------- with log_time(self.logger): self.logger.info("Processing %s" % (vds, )) # Bail out if destination exists (can thus resume a partial run). # Should be configurable? # ------------------------------------------------------------------ parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") for image_name in image_names: outputfile = os.path.join(resultsdir, image_name + ".restored") self.logger.info(outputfile) if os.path.exists(outputfile): self.logger.info("Image already exists: aborting.") return 0 try: working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) # If a time range has been specified, copy that section of the # input MS and only image that. # -------------------------------------------------------------- query = [] if start_time: self.logger.debug("Start time is %s" % start_time) start_time = quantity(float(start_time), 's') query.append("TIME > %f" % start_time.get('s').get_value()) if end_time: self.logger.debug("End time is %s" % end_time) end_time = quantity(float(end_time), 's') query.append("TIME < %f" % end_time.get('s').get_value()) query = " AND ".join(query) if query: # Select relevant section of MS. # ---------------------------------------------------------- self.logger.debug("Query is %s" % query) output = os.path.join(working_dir, "timeslice.MS") vds_parset = get_parset(vds) t = table(vds_parset.getString("FileName")) t.query(query, name=output) # Patch updated information into imager configuration. # ---------------------------------------------------------- parset = patch_parset(parset, {'Cimager.dataset': output}) else: self.logger.debug("No time range selected") self.logger.debug("Running cimager") with CatchLog4CXX( working_dir, self.logger.name + "." + os.path.basename(vds)): cimager_process = Popen([imager_exec, "-inputs", parset], stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = cimager_process.communicate() log_process_output("cimager", sout, serr, self.logger) if cimager_process.returncode != 0: raise CalledProcessError(cimager_process.returncode, imager_exec) # Dump the resulting images in the pipeline results area. # I'm not aware of a foolproof way to predict the image names # that will be produced, so we read them from the # parset and add standard cimager prefixes. # -------------------------------------------------------------- parset_data = Parset(parset) image_names = parset_data.getStringVector( "Cimager.Images.Names") prefixes = [ "image", "psf", "residual", "weights", "sensitivity" ] self.logger.debug("Copying images to %s" % resultsdir) for image_name in image_names: for prefix in prefixes: filename = image_name.replace("image", prefix, 1) shutil.move(os.path.join(working_dir, filename), os.path.join(resultsdir, filename)) if parset_data.getBool('Cimager.restore'): shutil.move( os.path.join(working_dir, image_name + ".restored"), os.path.join(resultsdir, image_name + ".restored")) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time): # imager_exec: path to cimager executable # vds: VDS file describing the data to be imaged # parset: imager configuration # resultsdir: place resulting images here # start_time: ) time range to be imaged # end_time: ) in seconds (may be None) # ---------------------------------------------------------------------- with log_time(self.logger): self.logger.info("Processing %s" % (vds,)) # Bail out if destination exists (can thus resume a partial run). # Should be configurable? # ------------------------------------------------------------------ parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") for image_name in image_names: outputfile = os.path.join(resultsdir, image_name + ".restored") self.logger.info(outputfile) if os.path.exists(outputfile): self.logger.info("Image already exists: aborting.") return 0 try: working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) # If a time range has been specified, copy that section of the # input MS and only image that. # -------------------------------------------------------------- query = [] if start_time: self.logger.debug("Start time is %s" % start_time) start_time = quantity(float(start_time), 's') query.append("TIME > %f" % start_time.get('s').get_value()) if end_time: self.logger.debug("End time is %s" % end_time) end_time = quantity(float(end_time), 's') query.append("TIME < %f" % end_time.get('s').get_value()) query = " AND ".join(query) if query: # Select relevant section of MS. # ---------------------------------------------------------- self.logger.debug("Query is %s" % query) output = os.path.join(working_dir, "timeslice.MS") vds_parset = get_parset(vds) t = table(vds_parset.getString("FileName")) t.query(query, name=output) # Patch updated information into imager configuration. # ---------------------------------------------------------- parset = patch_parset(parset, { 'Cimager.dataset': output } ) else: self.logger.debug("No time range selected") self.logger.debug("Running cimager") with CatchLog4CXX( working_dir, self.logger.name + "." + os.path.basename(vds) ): cimager_process = Popen( [imager_exec, "-inputs", parset], stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = cimager_process.communicate() log_process_output("cimager", sout, serr, self.logger) if cimager_process.returncode != 0: raise CalledProcessError( cimager_process.returncode, imager_exec ) # Dump the resulting images in the pipeline results area. # I'm not aware of a foolproof way to predict the image names # that will be produced, so we read them from the # parset and add standard cimager prefixes. # -------------------------------------------------------------- parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") prefixes = [ "image", "psf", "residual", "weights", "sensitivity" ] self.logger.debug("Copying images to %s" % resultsdir) for image_name in image_names: for prefix in prefixes: filename = image_name.replace("image", prefix, 1) shutil.move( os.path.join(working_dir, filename), os.path.join(resultsdir, filename) ) if parset_data.getBool('Cimager.restore'): shutil.move( os.path.join(working_dir, image_name + ".restored"), os.path.join(resultsdir, image_name + ".restored") ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def go(self): self.logger.info("Starting cimager run") super(cimager, self).go() self.outputs['images' ] = [] # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for cimager") gvds_file = os.path.join( self.config.get("layout", "job_directory"), "vds", "cimager.gvds" ) inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = gvds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(gvds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("cimager GVDS is %s" % (gvds_file,)) # Read data for processing from the GVDS file # ---------------------------------------------------------------------- parset = Parset(gvds_file) data = [] for part in range(parset.getInt('NParts')): host = parset.getString("Part%d.FileSys" % part).split(":")[0] vds = parset.getString("Part%d.Name" % part) data.append((host, vds)) # Divide data into timesteps for imaging # timesteps is a list of (start, end, results directory) tuples # ---------------------------------------------------------------------- timesteps = [] results_dir = self.inputs['results_dir'] if self.inputs['timestep'] == 0: self.logger.info("No timestep specified; imaging all data") timesteps = [(None, None, results_dir)] else: self.logger.info("Using timestep of %s s" % self.inputs['timestep']) gvds = get_parset(gvds_file) start_time = quantity(gvds['StartTime'].get()).get('s').get_value() end_time = quantity(gvds['EndTime'].get()).get('s').get_value() step = float(self.inputs['timestep']) while start_time < end_time: timesteps.append( ( start_time, start_time+step, os.path.join(results_dir, str(start_time)) ) ) start_time += step # Run each cimager process in a separate thread # ---------------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) for label, timestep in enumerate(timesteps): self.logger.info("Processing timestep %d" % label) jobs = [] parsets = [] start_time, end_time, resultsdir = timestep for host, vds in data: vds_data = Parset(vds) frequency_range = [ vds_data.getDoubleVector("StartFreqs")[0], vds_data.getDoubleVector("EndFreqs")[-1] ] parsets.append( self.__get_parset( os.path.basename(vds_data.getString('FileName')).split('.')[0], vds_data.getString("FileName"), str(frequency_range), vds_data.getStringVector("Extra.FieldDirectionType")[0], vds_data.getStringVector("Extra.FieldDirectionRa")[0], vds_data.getStringVector("Extra.FieldDirectionDec")[0], 'True', # cimager bug: non-restored image unusable ) ) jobs.append( ComputeJob( host, command, arguments=[ self.inputs['imager_exec'], vds, parsets[-1], resultsdir, start_time, end_time ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for parset in parsets: parset = Parset(parset) image_names = parset.getStringVector("Cimager.Images.Names") self.outputs['images'].extend(image_names) [os.unlink(parset) for parset in parsets] # Check if we recorded a failing process before returning # ---------------------------------------------------------------------- if self.error.isSet(): self.logger.warn("Failed imager process detected") return 1 else: return 0