def _run_rficonsole(self, rficonsole_executable, time_slice_dir, time_slices): """ _run_rficonsole runs the rficonsole application on the supplied timeslices in time_slices. """ # loop all measurement sets rfi_temp_dir = os.path.join(time_slice_dir, "rfi_temp_dir") create_directory(rfi_temp_dir) try: rfi_console_proc_group = SubProcessGroup(self.logger) for time_slice in time_slices: # Each rfi console needs own working space for temp files temp_slice_path = os.path.join(rfi_temp_dir, os.path.basename(time_slice)) create_directory(temp_slice_path) # construct copy command self.logger.info(time_slice) command = [rficonsole_executable, "-indirect-read", time_slice] self.logger.info("executing rficonsole command: {0}".format( " ".join(command))) # Add the command to the process group rfi_console_proc_group.run(command, cwd=temp_slice_path) # wait for all to finish if rfi_console_proc_group.wait_for_finish() != None: raise Exception("an rfi_console_proc_group run failed!") finally: shutil.rmtree(rfi_temp_dir)
def _write_datamap_to_file(self, datamap, mapfile_name, message = ""): """ Write the suplied the suplied map to the mapfile. directory in the jobs dir with the filename suplied in mapfile_name. Return the full path to the created file. If suplied data is None then the file is touched if not existing, but existing files are kept as is """ mapfile_dir = os.path.join( self.config.get("layout", "job_directory"), "mapfiles") # create the mapfile_dir if it does not exist create_directory(mapfile_dir) # write the content to a new parset file mapfile_path = os.path.join(mapfile_dir, "{0}.map".format(mapfile_name)) # display a debug log entrie with path and message if datamap != None: datamap.save(mapfile_path) self.logger.debug( "Wrote mapfile <{0}>: {1}".format(mapfile_path, message)) else: if not os.path.exists(mapfile_path): DataMap().save(mapfile_path) self.logger.debug( "Touched mapfile <{0}>: {1}".format(mapfile_path, message)) return mapfile_path
def _write_datamap_to_file(self, datamap, mapfile_name, message=""): """ Write the suplied the suplied map to the mapfile. directory in the jobs dir with the filename suplied in mapfile_name. Return the full path to the created file. If suplied data is None then the file is touched if not existing, but existing files are kept as is """ mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") # create the mapfile_dir if it does not exist create_directory(mapfile_dir) # write the content to a new parset file mapfile_path = os.path.join(mapfile_dir, "{0}.map".format(mapfile_name)) # display a debug log entrie with path and message if datamap != None: datamap.save(mapfile_path) self.logger.debug("Wrote mapfile <{0}>: {1}".format( mapfile_path, message)) else: if not os.path.exists(mapfile_path): DataMap().save(mapfile_path) self.logger.debug("Touched mapfile <{0}>: {1}".format( mapfile_path, message)) return mapfile_path
def _prepare_steps(self, **kwargs): """ Prepare for running the NDPPP program. This means, for one thing, patching the parsetfile with the correct input/output MS names, start/end times if availabe, etc. If a demixing step must be performed, some extra work needs to be done. Returns: patch dictionary that must be applied to the parset. """ self.logger.debug("Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time'])) # Create output directory for output MS. create_directory(os.path.dirname(kwargs['tmpfile'])) patch_dictionary = { 'msin': kwargs['infile'], 'msout': kwargs['tmpfile'], 'uselogger': 'True' } if kwargs['start_time']: patch_dictionary['msin.starttime'] = kwargs['start_time'] if kwargs['end_time']: patch_dictionary['msin.endtime'] = kwargs['end_time'] # If we need to do a demixing step, we have to do some extra work. # We have to read the parsetfile to check this. parset = parameterset(kwargs['parsetfile']) for step in parset.getStringVector('steps'): if parset.getString(step + '.type', '').startswith('demix'): patch_dictionary.update( self._prepare_demix_step(step, **kwargs)) # Return the patch dictionary that must be applied to the parset. return patch_dictionary
def run(self, infile, outfile, executable, environment, sigma, use_parmexportcal): self.environment.update(environment) if os.path.exists(infile): self.logger.info("Processing {0}".format(infile)) else: self.logger.error( "Instrument model file %s does not exist" % infile ) return 1 # Create output directory (if it doesn't already exist) create_directory(os.path.dirname(outfile)) # Remove the target outfile if there: parexportcall fail otherwise if os.path.exists(outfile): shutil.rmtree(outfile) # ******************************************************************** # 1. Select correction method if not use_parmexportcal: # **************************************************************** # 3. use gainoutliercorrect from Swinbank self.logger.info( "Using the gainoutlier correction based on editparmdb") self._filter_stations_parmdb(infile, outfile, sigma) return 0 # else: if not os.access(executable, os.X_OK): self.logger.error( "Could not find parmexport call executable at: {0}".format( executable)) self.logger.error("bailing out!") return 1 # ******************************************************************** # 2. Call parmexportcal for gain correction self.logger.info( "Using the gainoutlier correction based on parmexportcal") try: temp_dir = tempfile.mkdtemp() with CatchLog4CPlus( temp_dir, self.logger.name + '.' + os.path.basename(infile), os.path.basename(executable) ) as logger: cmd = [executable, '-in', infile, '-out', outfile] self.logger.debug( "Parmexportcal call: {0} ".format(" ".join(cmd))) catch_segfaults( cmd, temp_dir, self.environment, logger ) except Exception, excp: self.logger.error(str(excp)) return 1
def run(self, infile, outfile, executable, environment, sigma, use_parmexportcal): self.environment.update(environment) if os.path.exists(infile): self.logger.info("Processing {0}".format(infile)) else: self.logger.error( "Instrument model file %s does not exist" % infile ) return 1 # Create output directory (if it doesn't already exist) create_directory(os.path.dirname(outfile)) # Remove the target outfile if there: parexportcall fail otherwise if os.path.exists(outfile): shutil.rmtree(outfile) # ******************************************************************** # 1. Select correction method if not use_parmexportcal: # **************************************************************** # 3. use gainoutliercorrect from Swinbank self.logger.info( "Using the gainoutlier correction based on editparmdb") self._filter_stations_parmdb(infile, outfile, sigma) return 0 # else: if not os.access(executable, os.X_OK): self.logger.error( "Could not find parmexport call executable at: {0}".format( executable)) self.logger.error("bailing out!") return 1 # ******************************************************************** # 2. Call parmexportcal for gain correction self.logger.info( "Using the gainoutlier correction based on parmexportcal") try: temp_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) with CatchLog4CPlus( temp_dir, self.logger.name + '.' + os.path.basename(infile), os.path.basename(executable) ) as logger: cmd = [executable, '-in', infile, '-out', outfile] self.logger.debug( "Parmexportcal call: {0} ".format(" ".join(cmd))) catch_segfaults( cmd, temp_dir, self.environment, logger ) except Exception, excp: self.logger.error(str(excp)) return 1
def test_filter_stations_parmdb(self): file_path_in = os.path.join(self.tempDir, "input") create_directory(file_path_in) file_path_out = os.path.join(self.tempDir, "fullName") GainOutlierDetection = GainOutlierCorrectionWrapper() # Call the major function # No errors should be thrown... parmdb = GainOutlierDetection._filter_stations_parmdb(file_path_in, file_path_out, 2)
def test_filter_stations_parmdb(self): file_path_in = os.path.join(self.tempDir, "input") create_directory(file_path_in) file_path_out = os.path.join(self.tempDir, "fullName") GainOutlierDetection = GainOutlierCorrectionWrapper() # Call the major function # No errors should be thrown... parmdb = GainOutlierDetection._filter_stations_parmdb( file_path_in, file_path_out, 2)
def _copy_single_file_using_rsync(self, source_node, source_path, target_path): # assure that target dir exists (rsync creates it but.. # an error in the python code will throw a nicer error message = "No write acces to target path: {0}".format( os.path.dirname(target_path)) # If not existing try to create dir catch no permission try: create_directory(os.path.dirname(target_path)) except OSError, e: if e.errno == 13: # No permision self.logger.error(message) raise IOError(message) else: raise e
def run(self, concatenated_measurement_set, sourcedb_target_path, monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, monet_db_password, assoc_theta, parmdb_executable, slice_paths, parmdb_suffix, environment, working_directory, makesourcedb_path, source_list_path_extern, major_cycle): self.logger.info("Starting imager_create_dbs Node") self.environment.update(environment) #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) #******************************************************************* # 1. get a sourcelist: from gsm or from file source_list, append = self._create_source_list( source_list_path_extern,sourcedb_target_path, concatenated_measurement_set,monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, monet_db_password, assoc_theta) #******************************************************************* # 2convert it to a sourcedb (casa table) if self._create_source_db(source_list, sourcedb_target_path, working_directory, makesourcedb_path, append) == None: self.logger.error("failed creating sourcedb") return 1 #******************************************************************* # 3. Create a empty parmdb for each timeslice\ parmdbs = self._create_parmdb_for_timeslices(parmdb_executable, slice_paths, parmdb_suffix) if parmdbs == None: self.logger.error("failed creating paramdb for slices") return 1 # ******************************************************************* # Add the create databases to the measurments set, self._add_dbs_to_ms(concatenated_measurement_set, sourcedb_target_path, parmdbs, major_cycle) #******************************************************************* # 5. Assign the outputs self.outputs["sourcedb"] = sourcedb_target_path self.outputs["parmdbs"] = parmdbs return 0
def copy(path_from, dir_to, clobber, use_symlinks=False): """ Copy a file or directory Parameters ---------- path_from : str Input file or directory dir_to : str Output directory clobber : bool Clobber existing file or directory? use_symlinks : bool, optional Use symlinks instead of copying files? """ if not os.path.exists(path_from): log.warning('{} not found. Please check the ' 'working directory'.format(path_from)) return path_to = os.path.join(dir_to, os.path.basename(path_from)) if os.path.exists(path_to): if not clobber: log.warning(' Destination "{}" exists and clobber = False. ' 'Skipping it...'.format(path_to)) return else: create_directory(dir_to) if use_symlinks: if os.path.exists(path_to): p = subprocess.Popen('rm -rf {0}'.format(path_to), shell=True, stdout=subprocess.PIPE) r = p.communicate() os.symlink(path_from, path_to) else: p = subprocess.Popen('rsync -a {0} {1}'.format(path_from, dir_to), shell=True, stdout=subprocess.PIPE) r = p.communicate() if p.returncode != 0: log.critical( 'rsync exited abnormally when attempting to archive {}'.format( path_from)) sys.exit(1)
def run(self, concatenated_measurement_set, sourcedb_target_path, monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, monet_db_password, assoc_theta, parmdb_executable, slice_paths, parmdb_suffix, environment, working_directory, makesourcedb_path, source_list_path_extern, major_cycle): self.logger.info("Starting imager_create_dbs Node") self.environment.update(environment) #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) #******************************************************************* # 1. get a sourcelist: from gsm or from file source_list, append = self._create_source_list( source_list_path_extern, sourcedb_target_path, concatenated_measurement_set, monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, monet_db_password, assoc_theta) #******************************************************************* # 2convert it to a sourcedb (casa table) if self._create_source_db(source_list, sourcedb_target_path, working_directory, makesourcedb_path, append) == None: self.logger.error("failed creating sourcedb") return 1 #******************************************************************* # 3. Create a empty parmdb for each timeslice\ parmdbs = self._create_parmdb_for_timeslices(parmdb_executable, slice_paths, parmdb_suffix) if parmdbs == None: self.logger.error("failed creating paramdb for slices") return 1 # ******************************************************************* # Add the create databases to the measurments set, self._add_dbs_to_ms(concatenated_measurement_set, sourcedb_target_path, parmdbs, major_cycle) #******************************************************************* # 5. Assign the outputs self.outputs["sourcedb"] = sourcedb_target_path self.outputs["parmdbs"] = parmdbs return 0
def test__create_parmdb_missing_exec(self): """ Test the correct functioning of the create parmdbs function """ path_to_create = os.path.join(self.test_path, "testParmdb") create_directory(path_to_create) parmdb_output = os.path.join(path_to_create, "parmdbs") parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/incorrectExecutable" self.assertTrue(1 == self.imager_create_dbs._create_parmdb(parmdb_executable, parmdb_output), self.imager_create_dbs.logger.last()) self.assertFalse(os.path.exists(parmdb_output), "target dir to be" "created by parmdb does exist, while it should not") shutil.rmtree(path_to_create)
def copy(path_from, dir_to, clobber, use_symlinks=False): """ Copy a file or directory Parameters ---------- path_from : str Input file or directory dir_to : str Output directory clobber : bool Clobber existing file or directory? use_symlinks : bool, optional Use symlinks instead of copying files? """ if not os.path.exists(path_from): log.warning('{} not found. Please check the ' 'working directory'.format(path_from)) return path_to = os.path.join(dir_to, os.path.basename(path_from)) if os.path.exists(path_to): if not clobber: log.warning(' Destination "{}" exists and clobber = False. ' 'Skipping it...'.format(path_to)) return else: create_directory(dir_to) if use_symlinks: if os.path.exists(path_to): p = subprocess.Popen('rm -rf {0}'.format(path_to), shell=True, stdout=subprocess.PIPE) r = p.communicate() os.symlink(path_from, path_to) else: p = subprocess.Popen('rsync -a {0} {1}'.format(path_from, dir_to), shell=True, stdout=subprocess.PIPE) r = p.communicate() if p.returncode != 0: log.critical('rsync exited abnormally when attempting to archive {}'.format(path_from)) sys.exit(1)
def _save_active_mapfiles(self, cycle_idx, mapfile_dir, mapfiles = {}): """ receives a dict with active mapfiles, var name to path Each mapfile is copier to a seperate directory and saved THis allows us to exit the last succesfull run """ # create a directory for storing the saved mapfiles, use cycle idx mapfile_for_cycle_dir = os.path.join(mapfile_dir, "cycle_" + str(cycle_idx)) create_directory(mapfile_for_cycle_dir) saved_mapfiles = {} for (var_name,mapfile_path) in list(mapfiles.items()): shutil.copy(mapfile_path, mapfile_for_cycle_dir) # save the newly created file, get the filename, and append it # to the directory name saved_mapfiles[var_name] = os.path.join(mapfile_for_cycle_dir, os.path.basename(mapfile_path)) return saved_mapfiles
def test__create_parmdb_missing_exec(self): """ Test the correct functioning of the create parmdbs function """ path_to_create = os.path.join(self.test_path, "testParmdb") create_directory(path_to_create) parmdb_output = os.path.join(path_to_create, "parmdbs") parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/incorrectExecutable" self.assertTrue( 1 == self.imager_create_dbs._create_parmdb(parmdb_executable, parmdb_output), self.imager_create_dbs.logger.last()) self.assertFalse( os.path.exists(parmdb_output), "target dir to be" "created by parmdb does exist, while it should not") shutil.rmtree(path_to_create)
def _write_parset_to_file(self, parset, parset_name, message): """ Write the suplied the suplied parameterset to the parameter set directory in the jobs dir with the filename suplied in parset_name. Return the full path to the created file. """ parset_dir = os.path.join(self.config.get("layout", "job_directory"), "parsets") # create the parset dir if it does not exist create_directory(parset_dir) # write the content to a new parset file parset_path = os.path.join(parset_dir, "{0}.parset".format(parset_name)) parset.writeFile(parset_path) # display a debug log entrie with path and message self.logger.debug("Wrote parset to path <{0}> : {1}".format( parset_path, message)) return parset_path
def _write_parset_to_file(self, parset, parset_name, message): """ Write the suplied the suplied parameterset to the parameter set directory in the jobs dir with the filename suplied in parset_name. Return the full path to the created file. """ parset_dir = os.path.join( self.config.get("layout", "job_directory"), "parsets") # create the parset dir if it does not exist create_directory(parset_dir) # write the content to a new parset file parset_path = os.path.join(parset_dir, "{0}.parset".format(parset_name)) parset.writeFile(parset_path) # display a debug log entrie with path and message self.logger.debug("Wrote parset to path <{0}> : {1}".format( parset_path, message)) return parset_path
def go(self): self.logger.info("Starting CEP-II datamapper run") super(cep2_datamapper, self).go() if self.inputs['parset']: datamap = self._read_files() elif self.inputs['observation_dir']: datamap = self._search_files() else: self.logger.error("Either observation_dir or parset must be given") return 1 self.logger.info("Found %i datasets to process." % len(datamap)) self.logger.debug("datamap = %s" % datamap) # Write datamap-file create_directory(os.path.dirname(self.inputs['mapfile'])) store_data_map(self.inputs['mapfile'], datamap) self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def test__create_parmdb_for_timeslices_except(self): """ Test the errorous functioning of the _create_parmdb_for_timeslices with missing executable should return 1 and no created directories """ path_to_create = os.path.join(self.test_path, "testParmdb") parmdb_ms_output = os.path.join(path_to_create, "parmdbs") create_directory(parmdb_ms_output) parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/missingExcecutable" #Create a number of paths to supply to the create function ms_paths = [] for idx in range(5): ms_paths.append(os.path.join(parmdb_ms_output, str(idx))) self.assertTrue( self.imager_create_dbs._create_parmdb_for_timeslices( parmdb_executable, ms_paths, ".parmdb") == None, self.imager_create_dbs.logger.last()) final_ms_path = os.path.join(parmdb_ms_output, "time_slice_8.dppp.ms.parmdb") self.assertFalse(os.path.exists(final_ms_path))
def test__create_parmdb_for_timeslices_except(self): """ Test the errorous functioning of the _create_parmdb_for_timeslices with missing executable should return 1 and no created directories """ path_to_create = os.path.join(self.test_path, "testParmdb") parmdb_ms_output = os.path.join(path_to_create, "parmdbs") create_directory(parmdb_ms_output) parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/missingExcecutable" #Create a number of paths to supply to the create function ms_paths = [] for idx in range(5): ms_paths.append(os.path.join(parmdb_ms_output, str(idx))) self.assertTrue( self.imager_create_dbs._create_parmdb_for_timeslices(parmdb_executable, ms_paths, ".parmdb") == None, self.imager_create_dbs.logger.last()) final_ms_path = os.path.join(parmdb_ms_output, "time_slice_8.dppp.ms.parmdb") self.assertFalse(os.path.exists(final_ms_path))
def run_rficonsole(rficonsole_executable, temp_dir, input_ms_list, logger, resourceMonitor): """ _run_rficonsole runs the rficonsole application on the supplied timeslices in time_slices. This functionality has also been implemented in BBS. """ # loop all measurement sets rfi_temp_dir = os.path.join(temp_dir, "rfi_temp_dir") create_directory(rfi_temp_dir) try: rfi_console_proc_group = SubProcessGroup(logger=logger, usageStats=resourceMonitor) for time_slice in input_ms_list: # Each rfi console needs own working space for temp files temp_slice_path = os.path.join(rfi_temp_dir, os.path.basename(time_slice)) create_directory(temp_slice_path) # construct copy command logger.info(time_slice) command = [rficonsole_executable, "-indirect-read", time_slice] logger.info("executing rficonsole command: {0}".format( " ".join(command))) # Add the command to the process group rfi_console_proc_group.run(command, cwd = temp_slice_path) # wait for all to finish if rfi_console_proc_group.wait_for_finish() != None: raise Exception("an rfi_console_proc_group run failed!") finally: shutil.rmtree(rfi_temp_dir)
def go(self): self.logger.info("Starting storagemapper run") super(storagemapper, self).go() # We read the storage node name out of the path # and append the local filename (ie, on the storage node) to the map # ---------------------------------------------------------------------- data = defaultdict(list) for filename in self.inputs['args']: host = filename.split(os.path.sep)[3] data[host].append(filename.split(host)[-1]) # Dump the generated mapping to a parset # ---------------------------------------------------------------------- parset = Parset() for host, filenames in data.iteritems(): parset.addStringVector(host, filenames) create_directory(os.path.dirname(self.inputs['mapfile'])) parset.writeFile(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def _copy_instrument_files(self, mapfile_dir): # For the copy recipe a target mapfile is needed # create target map based on the node and the dir in the input data map # with the filename based on the copier_map_path = os.path.join(mapfile_dir, "copier") create_directory(copier_map_path) target_map = self._create_target_map_for_instruments() #Write the two needed maps to file source_path = os.path.join(copier_map_path, "source_instruments.map") self.input_data['instrument'].save(source_path) target_path = os.path.join(copier_map_path, "target_instruments.map") target_map.save(target_path) copied_files_path = os.path.join(copier_map_path, "copied_instruments.map") # The output of the copier is a mapfile containing all the host, path # of succesfull copied files. copied_instruments_mapfile = self.run_task("copier", mapfile_source=source_path, mapfile_target=target_path, mapfiles_dir=copier_map_path, mapfile=copied_files_path, allow_move=False)['mapfile_target_copied'] # Some copy action might fail; the skip fields in the other map-files # need to be updated these to reflect this. self.input_data['instrument'] = DataMap.load(copied_instruments_mapfile) for data, inst, outp in zip( self.input_data['data'], self.input_data['instrument'], self.output_data['data'] ): data.skip = inst.skip = outp.skip = ( data.skip or inst.skip or outp.skip )
def run_rficonsole(rficonsole_executable, temp_dir, input_ms_list, logger, resourceMonitor): """ _run_rficonsole runs the rficonsole application on the supplied timeslices in time_slices. This functionality has also been implemented in BBS. """ # loop all measurement sets rfi_temp_dir = os.path.join(temp_dir, "rfi_temp_dir") create_directory(rfi_temp_dir) try: rfi_console_proc_group = SubProcessGroup(logger=logger, usageStats=resourceMonitor) for time_slice in input_ms_list: # Each rfi console needs own working space for temp files temp_slice_path = os.path.join(rfi_temp_dir, os.path.basename(time_slice)) create_directory(temp_slice_path) # construct copy command logger.info(time_slice) command = [rficonsole_executable, "-indirect-read", time_slice] logger.info("executing rficonsole command: {0}".format( " ".join(command))) # Add the command to the process group rfi_console_proc_group.run(command, cwd=temp_slice_path) # wait for all to finish if rfi_console_proc_group.wait_for_finish() != None: raise Exception("an rfi_console_proc_group run failed!") finally: shutil.rmtree(rfi_temp_dir)
def _run_rficonsole(self, rficonsole_executable, time_slice_dir, time_slices): """ _run_rficonsole runs the rficonsole application on the supplied timeslices in time_slices. """ # loop all measurement sets rfi_temp_dir = os.path.join(time_slice_dir, "rfi_temp_dir") create_directory(rfi_temp_dir) try: rfi_console_proc_group = SubProcessGroup(self.logger) for time_slice in time_slices: # Each rfi console needs own working space for temp files temp_slice_path = os.path.join(rfi_temp_dir, os.path.basename(time_slice)) create_directory(temp_slice_path) # construct copy command self.logger.info(time_slice) command = [rficonsole_executable, "-indirect-read", time_slice] self.logger.info("executing rficonsole command: {0}".format( " ".join(command))) # Add the command to the process group rfi_console_proc_group.run(command, cwd = temp_slice_path) # wait for all to finish if rfi_console_proc_group.wait_for_finish() != None: raise Exception("an rfi_console_proc_group run failed!") finally: shutil.rmtree(rfi_temp_dir)
def _prepare_steps(self, **kwargs): """ Prepare for running the NDPPP program. This means, for one thing, patching the parsetfile with the correct input/output MS names, start/end times if availabe, etc. If a demixing step must be performed, some extra work needs to be done. Returns: patch dictionary that must be applied to the parset. """ self.logger.debug( "Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time']) ) # Create output directory for output MS. create_directory(os.path.dirname(kwargs['tmpfile'])) patch_dictionary = { 'msin': kwargs['infile'], 'msout': kwargs['tmpfile'], 'uselogger': 'True' } if kwargs['start_time']: patch_dictionary['msin.starttime'] = kwargs['start_time'] if kwargs['end_time']: patch_dictionary['msin.endtime'] = kwargs['end_time'] # If we need to do a demixing step, we have to do some extra work. # We have to read the parsetfile to check this. parset = parameterset(kwargs['parsetfile']) for step in parset.getStringVector('steps'): if parset.getString(step + '.type', '').startswith('demix'): patch_dictionary.update( self._prepare_demix_step(step, **kwargs) ) # Return the patch dictionary that must be applied to the parset. return patch_dictionary
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile") self.input_data.save(input_data_mapfile) output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data.save(output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): self.run_task("ndppp", (input_data_mapfile, output_data_mapfile), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile ) # ********************************************************************* # 6. Create feedback file for further processing by the LOFAR framework # (MAC) # Create a parset-file containing the metadata for MAC/SAS with duration(self, "get_metadata"): self.run_task("get_metadata", output_data_mapfile, parset_file=self.parset_feedback_file, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join(mapfile_dir, "input_correlated.mapfile") output_correlated_mapfile = os.path.join(mapfile_dir, "output_correlated.mapfile") output_instrument_mapfile = os.path.join(mapfile_dir, "output_instrument.mapfile") self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug( "Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_correlated_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb')['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile)['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", dppp_mapfile, mapfile=os.path.join( mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb')['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model ) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
# Extract runtime, working, results directories from input parset runtime_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory") working_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.workingDirectory") results_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.resultDirectory") # Set up configuration for later processing stages config = ConfigParser({ "job_name": tree_id, "cwd": os.getcwd(), "start_time": start_time, }) config.read(config_file) config.set('DEFAULT', 'runtime_directory', runtime_directory) config.set('DEFAULT', 'default_working_directory', working_directory) # Extract input file list from parset to_process = input_parset.getStringVector('ObsSW.Observation.DataProducts.measurementSets') # Read config file to establish location of parset directory to use parset_directory = config.get("layout", "parset_directory") create_directory(parset_directory) # For each task (currently only ndppp), extract and write parset tasks = ConfigParser(config.defaults()) tasks.read(string_to_list(config.get("DEFAULT", "task_files"))) ndppp_parset_location = tasks.get("ndppp", "parset") input_parset.makeSubset("ObsSW.Observation.ObservationControl.PythonControl.DPPP.").writeFile(ndppp_parset_location) # Run pipeline & wait for result subprocess.check_call(['python', pipeline_definition, '-j', tree_id, '-d', '--config', config_file, '--runtime-directory', runtime_directory, '--default-working-directory', working_directory, '--start-time', start_time])
def run(self, input_image, bdsm_parameter_run1_path, bdsm_parameter_run2x_path, catalog_output_path, image_output_path, sourcedb_target_path, environment, working_directory, create_sourcdb_exec): """ :param input_image: image to look for sources in :param bdsm_parameter_run1_path: parset with bdsm parameters for the first run :param bdsm_parameter_run2x_path: second ron bdsm parameters :param catalog_output_path: Path to full list of sources found :param image_output_path: Path to fits image with all sources substracted :param sourcedb_target_path: Path to store the sourcedb created from containing all the found sources :param environment: environment for runwithlog4cplus :param working_directory: Working dir :param create_sourcdb_exec: Path to create sourcedb executable :rtype: self.outputs['source_db'] sourcedb_target_path """ #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) import lofar.bdsm as bdsm#@UnresolvedImport self.logger.info("Starting imager_source_finding") self.environment.update(environment) # default frequency is None (read from image), save for later cycles. # output of pybdsm forgets freq of source image frequency = None # Output of the for loop: n iterations and any source found n_itter_sourcefind = None sources_found = False max_sourcefind_itter = 5 # TODO: maximum itter is a magic value for idx in range(max_sourcefind_itter): # ****************************************************************** # 1. Select correct input image # The first iteration uses the input image, second and later use the # output of the previous iteration. The 1+ iteration have a # seperate parameter set. if idx == 0: input_image_local = input_image # input_image_cropped image_output_path_local = image_output_path + "_0" bdsm_parameter_local = parameterset(bdsm_parameter_run1_path) else: input_image_local = image_output_path + "_{0}".format( str(idx - 1)) image_output_path_local = image_output_path + "_{0}".format( str(idx)) bdsm_parameter_local = parameterset(bdsm_parameter_run2x_path) # ***************************************************************** # 2. parse the parameters and convert to python if possible # this is needed for pybdsm bdsm_parameters = {} for key in bdsm_parameter_local.keys(): parameter_value = bdsm_parameter_local.getStringVector(key)[0] try: parameter_value = eval(parameter_value) except: pass #do nothing bdsm_parameters[key] = parameter_value # pybdsm needs its filename here, to derive the log location bdsm_parameters["filename"] = input_image_local # ***************************************************************** # 3. Start pybdsm self.logger.debug( "Starting sourcefinder bdsm on {0} using parameters:".format( input_image_local)) self.logger.debug(repr(bdsm_parameters)) img = bdsm.process_image(bdsm_parameters, frequency = frequency) # Always export the catalog img.write_catalog( outfile = catalog_output_path + "_{0}".format(str(idx)), catalog_type = 'gaul', clobber = True, format = "bbs", force_output = True) # If no more matching of sources with gausians is possible (nsrc==0) # break the loop if img.nsrc == 0: n_itter_sourcefind = idx break # We have at least found a single source! self.logger.debug("Number of source found: {0}".format( img.nsrc)) # ***************************************************************** # 4. export the image self.logger.debug("Wrote list of sources to file at: {0})".format( catalog_output_path)) img.export_image(outfile = image_output_path_local, img_type = 'gaus_resid', clobber = True, img_format = "fits") self.logger.debug("Wrote fits image with substracted sources" " at: {0})".format(image_output_path_local)) # Save the frequency from image header of the original input file, # This information is not written by pybdsm to the exported image frequency = img.frequency # if not set the maximum number of itteration us performed if n_itter_sourcefind == None: n_itter_sourcefind = max_sourcefind_itter # ******************************************************************** # 5. The produced catalogs now need to be combined into a single list # Call with the number of loops and the path to the files, only combine # if we found sources self.logger.debug( "Writing source list to file: {0}".format(catalog_output_path)) self._combine_source_lists(n_itter_sourcefind, catalog_output_path) # ********************************************************************* # 6. Convert sourcelist to sourcedb self._create_source_db(catalog_output_path, sourcedb_target_path, working_directory, create_sourcdb_exec, False) # Assign the outputs self.outputs["catalog_output_path"] = catalog_output_path self.outputs["source_db"] = sourcedb_target_path return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join( self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug( "Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # TODO: This is a backdoor option to manually add beamtables when these # are missing on the provided ms. There is NO use case for users of the # pipeline add_beam_tables = self.parset.getBool( "Imaging.addBeamTables", False) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile, add_beam_tables) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path = source_list_map_path, skip_create_dbs = False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip = False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip = False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip = False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize(aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # create a parset with information that is available on the toplevel toplevel_meta_data = parameterset() toplevel_meta_data.replace("numberOfMajorCycles", str(number_of_major_cycles)) # Create a parset containing the metadata for MAC/SAS at nodes metadata_file = "%s_feedback_SkyImage" % (self.parset_file,) self.run_task("get_metadata", placed_data_image_map, parset_prefix = ( full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts') ), product_type = "SkyImage", metadata_file = metadata_file) self.send_feedback_processing(toplevel_meta_data) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def archive(parset_file, directions, dir_output, full=False, archive_subdata=False, archive_state=False, archive_misc=True, archive_images=True, archive_inst=False, archive_pipestate=False, archive_models=False, archive_plots=True, clobber=False): """ Archives data from a Factor run Parameters ---------- parset_file : str Filename of Factor parset for run of interest directions : list of str List of direction names for which to archive the calibrated data dir_output : str Name of output directory where archived data will be stored full : bool, optional Make a full archive suitable for resuming? archive_subdata : bool, optional Archive the subtracted data MS files? archive_state : bool, optional Archive the state files? archive_misc : bool, optional Archive miscelaneous files? archive_images : bool, optional Archive the facet and field images? archive_inst : bool, optional Archive the instrument tables? archive_pipestate : bool, optional Archive the pipeline state files? archive_models : bool, optional Archive the sky models? archive_plots : bool, optional Archive the selfcal plots? clobber : bool, optional Clobber existing files in output directory? """ # Read in parset and get directions all_directions, parset = load_directions(parset_file) if len(all_directions) == 0: log.error('No directions found in Factor working directory. Please check ' 'the parset') sys.exit(1) all_names = [d.name for d in all_directions] if len(directions) != 0: if directions[0].lower() == 'all': directions = all_names for dname in directions: if dname not in all_names: log.warning('Direction {} not found. Skipping it...'.format(dname)) if full: # Archive everything archive_subdata = True archive_state = True archive_misc = True archive_images = True archive_inst = True archive_pipestate = True archive_models = True archive_plots = True working_dir = all_directions[0].working_dir if archive_subdata: log.info('Archiving subtracted data files...') chunks_dir = os.path.join(working_dir, 'chunks') copy(chunks_dir, dir_output, clobber) if archive_state: log.info('Archiving state files...') state_dir = os.path.join(working_dir, 'state') copy(state_dir, dir_output, clobber) if archive_misc: log.info('Archiving miscelaneous files...') misc_dir = os.path.join(dir_output, 'misc') if 'directions_file' in parset['direction_specific']: directions_file = parset['direction_specific']['directions_file'] else: directions_file = os.path.join(working_dir, 'factor_directions.txt') file_list = [directions_file, parset_file, '{}/factor.log'.format(working_dir), '{}/regions/facets_ds9.reg'.format(working_dir), '{}/regions/calimages_ds9.reg'.format(working_dir)] for f in file_list: copy(f, misc_dir, clobber) if archive_images: log.info('Archiving field images...') file_list = glob.glob(os.path.join(working_dir, 'results', 'field*', 'field', '*.fits')) if len(file_list) == 0: log.warning('No field images found.') else: for i, f in enumerate(file_list): log.info(' Archiving image {0} of {1}...'.format(i+1, len(file_list))) subdir = f.split('/')[-3] image_dir = os.path.join(dir_output, 'images', 'field', subdir) copy(f, image_dir, clobber) if archive_models: log.info('Archiving direction-independent sky models...') band_state_files = glob.glob(os.path.join(working_dir, 'state', 'Band_*')) file_list = [] band_list = [] for bf in band_state_files: try: with open(bf, 'r') as f: b = pickle.load(f) file_list.append(b['skymodel_dirindep']) band_list.append(b['name']) except: pass for i, f in enumerate(file_list): skymodel_dir = os.path.join(dir_output, 'chunks', band_list[i]) log.info(' Copying sky model file {0} of {1}...'.format(i+1, len(file_list))) copy(f, skymodel_dir, clobber) for d in all_directions: if archive_images: log.info('Archiving facet images for direction {}...'.format(d.name)) file_list = glob.glob(os.path.join(working_dir, 'results', 'facetimage*', d.name, '*full2*image.fits')) if len(file_list) == 0: log.warning('No facet images found for direction {}.'.format(d.name)) else: for i, f in enumerate(file_list): subdir = f.split('/')[-3] image_dir = os.path.join(dir_output, 'images', d.name, subdir) copy(f, image_dir, clobber) if archive_models: log.info('Archiving sky models for direction {}...'.format(d.name)) if hasattr(d, 'sourcedb_new_facet_sources'): file_list = check_existing_files(d.sourcedb_new_facet_sources) else: file_list = [] if len(file_list) == 0: log.warning('No sky models found for direction {}.'.format(d.name)) else: sourcedb_dir = os.path.join(dir_output, 'sky_models', d.name) for i, f in enumerate(file_list): log.info(' Copying sky model file {0} of {1}...'.format(i+1, len(file_list))) copy(f, sourcedb_dir, clobber) if archive_inst: log.info('Archiving instrument tables for direction {}...'.format(d.name)) if hasattr(d, 'preapply_h5parm_mapfile'): file_list.append(check_existing_files(d.preapply_parmdb_mapfile)) if len(file_list) == 0: log.warning('No h5parms found for direction {}.'.format(d.name)) else: inst_table_dir = os.path.join(dir_output, 'h5parms', d.name) for i, f in enumerate(file_list): log.info(' Copying h5parm file {0} of {1}...'.format(i+1, len(file_list))) copy(f, inst_table_dir, clobber) if archive_plots: log.info('Archiving plots for direction {}...'.format(d.name)) file_list = glob.glob(os.path.join(working_dir, 'results', 'facetselfcal', d.name, '*png')) if len(file_list) == 0: file_list = glob.glob(os.path.join(working_dir, 'results', 'facetpeel', d.name, '*png')) if len(file_list) == 0: file_list = glob.glob(os.path.join(working_dir, 'results', 'outlierpeel', d.name, '*png')) if len(file_list) == 0: log.warning('No plots found for direction {}.'.format(d.name)) else: plot_dir = os.path.join(dir_output, 'plots', d.name) for i, f in enumerate(file_list): copy(f, plot_dir, clobber) if archive_pipestate: log.info('Archiving pipeline state files for direction {}...'.format(d.name)) file_list = glob.glob(os.path.join(working_dir, 'results', 'facetselfcal', d.name, 'mapfiles', '*')) op_name = 'facetselfcal' if len(file_list) == 0: file_list = glob.glob(os.path.join(working_dir, 'results', 'facetpeel', d.name, 'mapfiles', '*')) op_name = 'facetpeel' if len(file_list) == 0: file_list = glob.glob(os.path.join(working_dir, 'results', 'outlierpeel', d.name, 'mapfiles', '*')) op_name = 'outlierpeel' if len(file_list) == 0: log.warning('No pipeline state files found for direction {}.'.format(d.name)) else: mapfile_dir = os.path.join(dir_output, 'pipeline_state', d.name, op_name) for f in file_list: copy(f, mapfile_dir, clobber) # Also archive "final_image" mapfile for facetimage (needed for mosaicking) file_list = glob.glob(os.path.join(working_dir, 'results', 'facetimage*', d.name, 'mapfiles', 'final_image.mapfile')) if len(file_list) > 0: for i, f in enumerate(file_list): subdir = f.split('/')[-4] mapfile_dir = os.path.join(dir_output, 'pipeline_state', d.name, subdir) copy(f, mapfile_dir, clobber) if d.name in directions: log.info('Archiving calibrated data for direction {}...'.format(d.name)) if hasattr(d, 'image_data_mapfile'): file_list = check_existing_files(d.image_data_mapfile) else: file_list = [] if len(file_list) == 0: log.warning('No data found for direction {}. Skipping it...'.format(d.name)) continue # Make the output directory cal_data_dir = os.path.join(dir_output, 'calibrated_data', d.name) create_directory(cal_data_dir) # Sort the files into time chunks data_mapfile = d.name+'_calibrated_data.mapfile' sort_times_into_freqGroups.main(file_list, filename=data_mapfile, mapfile_dir=cal_data_dir) # Read the new, grouped file lists datamap = DataMap.load(os.path.join(cal_data_dir, data_mapfile)) # Run DPPP to concatenate each time chunk in frequency nchunks = len(datamap) for i, item in enumerate(datamap): log.info(' Concatenating files for time chunk {0} of {1}...'.format(i+1, nchunks)) outfile = os.path.join(cal_data_dir, '{0}_calibrated_data_chunk{1}.ms'.format(d.name, i)) if os.path.exists(outfile): if not clobber: log.warning(' Output file for this chuck exists and clobber = False. Skipping it...') continue else: os.system('rm -rf {0}'.format(outfile)) dppp_concat(item.file, outfile) # Clean up os.system('rm -f {0}'.format(os.path.join(cal_data_dir, data_mapfile))) os.system('rm -f {0}_groups'.format(os.path.join(cal_data_dir, data_mapfile))) log.info('Archiving complete.')
def go(self): self.logger.info("Starting BBS run") super(new_bbs, self).go() # Check for relevant input parameters in the parset-file # --------------------------------------------------------------------- self.logger.debug("Reading parset from %s" % self.inputs['parset']) self.parset = parameterset(self.inputs['parset']) self._set_input('db_host', 'BBDB.Host') self._set_input('db_user', 'BBDB.User') self._set_input('db_name', 'BBDB.Name') self._set_input('db_key', 'BBDB.Key') #self.logger.debug("self.inputs = %s" % self.inputs) # Clean the blackboard database # --------------------------------------------------------------------- self.logger.info( "Cleaning BBS database for key '%s'" % (self.inputs['db_key']) ) command = ["psql", "-h", self.inputs['db_host'], "-U", self.inputs['db_user'], "-d", self.inputs['db_name'], "-c", "DELETE FROM blackboard.session WHERE key='%s';" % self.inputs['db_key'] ] self.logger.debug(command) if subprocess.call(command) != 0: self.logger.warning( "Failed to clean BBS database for key '%s'" % self.inputs['db_key'] ) # Create a bbs_map describing the file mapping on disk # --------------------------------------------------------------------- if not self._make_bbs_map(): return 1 # Produce a GVDS file, describing the data that must be processed. gvds_file = self.run_task( "vdsmaker", self.inputs['data_mapfile'], gvds=self.inputs['gvds'] )['gvds'] # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") # Create a location for parsets job_directory = self.config.get( "layout", "job_directory") parset_directory = os.path.join(job_directory, "parsets") create_directory(parset_directory) # patch the parset and copy result to target location remove tempfile try: bbs_parset = utilities.patch_parset( self.parset, { 'Observation': gvds_file, 'BBDB.Key': self.inputs['db_key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], #'BBDB.Port': self.inputs['db_name'], } ) bbs_parset_path = os.path.join(parset_directory, "bbs_control.parset") shutil.copyfile(bbs_parset, bbs_parset_path) self.logger.debug("BBS control parset is %s" % (bbs_parset_path,)) finally: # Always remove the file in the tempdir os.remove(bbs_parset) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as(jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(self.bbs_map): host, files = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], files, self.inputs['db_key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, job_id, jobhost, str(jobport)) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) for thread in bbs_kernels: thread.start() self.logger.debug("Waiting for all kernels to complete") for thread in bbs_kernels: thread.join() # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 self.outputs['mapfile'] = self.inputs['data_mapfile'] return 0
config = ConfigParser({ "job_name": tree_id, "cwd": os.getcwd(), "start_time": start_time, }) config.read(config_file) config.set('DEFAULT', 'runtime_directory', runtime_directory) config.set('DEFAULT', 'default_working_directory', working_directory) # Extract input file list from parset to_process = input_parset.getStringVector( 'ObsSW.Observation.DataProducts.measurementSets') # Read config file to establish location of parset directory to use parset_directory = config.get("layout", "parset_directory") create_directory(parset_directory) # For each task (currently only ndppp), extract and write parset tasks = ConfigParser(config.defaults()) tasks.read(string_to_list(config.get("DEFAULT", "task_files"))) ndppp_parset_location = tasks.get("ndppp", "parset") input_parset.makeSubset( "ObsSW.Observation.ObservationControl.PythonControl.DPPP.").writeFile( ndppp_parset_location) # Run pipeline & wait for result subprocess.check_call([ 'python', pipeline_definition, '-j', tree_id, '-d', '--config', config_file, '--runtime-directory', runtime_directory, '--default-working-directory', working_directory, '--start-time', start_time
def _filter_bad_stations(self, time_slice_path_list, asciistat_executable, statplot_executable, msselect_executable): """ A Collection of scripts for finding and filtering of bad stations: 1. First a number of statistics with regards to the spread of the data is collected using the asciistat_executable. 2. Secondly these statistics are consumed by the statplot_executable which produces a set of bad stations. 3. In the final step the bad stations are removed from the dataset using ms select REF: http://www.lofar.org/wiki/lib/exe/fetch.php?media=msss:pandeymartinez-week9-v1p2.pdf """ # run asciistat to collect statistics about the ms self.logger.info("Filtering bad stations") self.logger.debug("Collecting statistical properties of input data") asciistat_output = [] asciistat_proc_group = SubProcessGroup(self.logger) for ms in time_slice_path_list: output_dir = ms + ".filter_temp" create_directory(output_dir) asciistat_output.append((ms, output_dir)) cmd_string = "{0} -i {1} -r {2}".format(asciistat_executable, ms, output_dir) asciistat_proc_group.run(cmd_string) if asciistat_proc_group.wait_for_finish() != None: raise Exception("an ASCIIStats run failed!") # Determine the station to remove self.logger.debug("Select bad stations depending on collected stats") asciiplot_output = [] asciiplot_proc_group = SubProcessGroup(self.logger) for (ms, output_dir) in asciistat_output: ms_stats = os.path.join(output_dir, os.path.split(ms)[1] + ".stats") cmd_string = "{0} -i {1} -o {2}".format(statplot_executable, ms_stats, ms_stats) asciiplot_output.append((ms, ms_stats)) asciiplot_proc_group.run(cmd_string) if asciiplot_proc_group.wait_for_finish() != None: raise Exception("an ASCIIplot run failed!") # remove the bad stations self.logger.debug("Use ms select to remove bad stations") msselect_output = {} msselect_proc_group = SubProcessGroup(self.logger) for ms, ms_stats in asciiplot_output: # parse the .tab file containing the bad stations station_to_filter = [] file_pointer = open(ms_stats + ".tab") for line in file_pointer.readlines(): # skip headed line if line[0] == "#": continue entries = line.split() # if the current station is bad (the last entry on the line) if entries[-1] == "True": # add the name of station station_to_filter.append(entries[1]) # if this measurement does not contain baselines to skip do not # filter and provide the original ms as output if len(station_to_filter) == 0: msselect_output[ms] = ms continue ms_output_path = ms + ".filtered" msselect_output[ms] = ms_output_path # use msselect to remove the stations from the ms msselect_baseline = "!{0}".format(",".join(station_to_filter)) cmd_string = "{0} in={1} out={2} baseline={3} deep={4}".format( msselect_executable, ms, ms_output_path, msselect_baseline, "False") msselect_proc_group.run(cmd_string) if msselect_proc_group.wait_for_finish() != None: raise Exception("an MSselect run failed!") filtered_list_of_ms = [] # The order of the inputs needs to be preserved when producing the # filtered output! for input_ms in time_slice_path_list: filtered_list_of_ms.append(msselect_output[input_ms]) return filtered_list_of_ms
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, time_slices_per_image, subbands_per_group, raw_ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables): """ Entry point for the node recipe """ self.environment.update(environment) with log_time(self.logger): input_map = DataMap.load(raw_ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files copied_ms_map = self._copy_input_files(processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, time_slices_per_image, copied_ms_map, subbands_per_group, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) #***************************************************************** # Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 6. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start imager_awimager node run:") log4_cplus_name = "imager_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask mask_file_path = self._create_mask(npix, cell_size, output_image, concatenated_measurement_set, executable, working_directory, log4_cplus_name, sourcedb_path, mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix), # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the updated parameterset cmd = [executable, calculated_parset_path] try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb, concat_ms, correlated_output_location, msselect_executable): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) processed_ms_paths = [] ms_per_image_map.iterator = DataMap.SkipIterator for item in ms_per_image_map: ms_path = item.file processed_ms_paths.append(ms_path) #add the information the image try: self.logger.debug("Start addImage Info") addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): self.logger.warn("addImagingInfo already done, continue") pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info("Saving image in HDF5 Format to: {0}" .format( output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception, error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile") self.input_data.save(input_data_mapfile) output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data.save(output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): output_data_mapfile = self.run_task("ndppp", (input_data_mapfile, output_data_mapfile), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 6. Create feedback file for further processing by the LOFAR framework # Create a parset containing the metadata metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_data_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug("Wrote input data mapfile: %s" % data_mapfile) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug("Wrote output corrected data mapfile: %s" % corrected_mapfile) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data'])) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile'))['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile)['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework metadata_file = "%s_feedback_Correlated" % (self.parset_file, ) with duration(self, "get_metadata"): self.run_task( "get_metadata", corrected_mapfile, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def __init__(self, parset, bands, direction, name=None): self.parset = parset.copy() self.bands = bands self.name = name.lower() self.parset['op_name'] = name self.direction = direction _logging.set_level(self.parset['logging_level']) self.log = logging.getLogger('factor:{0}'.format(self.name)) self.hostname = socket.gethostname() self.node_list = parset['cluster_specific']['node_list'] # Working directory self.factor_working_dir = parset['dir_working'] # Pipeline runtime and working dirs (pipeline makes subdir here with # name of direction) self.pipeline_runtime_dir = os.path.join(self.factor_working_dir, 'results', self.name) self.pipeline_working_dir = self.pipeline_runtime_dir create_directory(self.pipeline_runtime_dir) # Directory that holds the mapfiles self.pipeline_mapfile_dir = os.path.join(self.pipeline_runtime_dir, self.direction.name, 'mapfiles') create_directory(self.pipeline_mapfile_dir) # Directory in the runtime dir that holds parset and config files (also # the results of the pipeline) self.pipeline_parset_dir = os.path.join(self.pipeline_runtime_dir, self.direction.name) create_directory(self.pipeline_parset_dir) # Directory that holds the mapfiles self.pipeline_mapfile_dir = os.path.join(self.pipeline_runtime_dir, self.direction.name, 'mapfiles') create_directory(self.pipeline_mapfile_dir) # Local scratch directory and corresponding node recipes if self.parset['cluster_specific']['dir_local'] is None: # Not specified, specify scratch directory in normal work directory self.local_scratch_dir = os.path.join(self.pipeline_working_dir, self.direction.name) self.dppp_nodescript = 'executable_args' elif self.parset['cluster_specific']['clusterdesc_file'].lower() == 'pbs': # PBS = "system in Hamburg" -> use special NDPPP nodescript self.local_scratch_dir = self.parset['cluster_specific']['dir_local'] self.dppp_nodescript = 'dppp_scratch' else: # other: use given scratch directory an standard nodescrit self.local_scratch_dir = self.parset['cluster_specific']['dir_local'] self.dppp_nodescript = 'executable_args' # Directory that holds logs in a convenient place self.log_dir = os.path.join(self.factor_working_dir, 'logs', self.name) create_directory(self.log_dir) # Log name used for logs in log_dir self.logbasename = os.path.join(self.log_dir, self.direction.name) # Below are paths for scripts, etc. in the Factor install directory self.factor_root_dir = os.path.split(DIR)[0] self.factor_pipeline_dir = os.path.join(self.factor_root_dir, 'pipeline') self.factor_script_dir = os.path.join(self.factor_root_dir, 'scripts') self.factor_parset_dir = os.path.join(self.factor_root_dir, 'parsets') self.factor_skymodel_dir = os.path.join(self.factor_root_dir, 'skymodels') # Below are the templates and output paths for the pipeline parset and # config files. These may need to be re-defined in the subclasses # if the operation has non-standard template names self.pipeline_parset_template = '{0}_pipeline.parset'.format(self.name) self.pipeline_parset_file = os.path.join(self.pipeline_parset_dir, 'pipeline.parset') self.pipeline_config_template = 'pipeline.cfg' self.pipeline_config_file = os.path.join(self.pipeline_parset_dir, 'pipeline.cfg') # Define parameters needed for the pipeline config. self.cfg_dict = {'lofarroot': parset['lofarroot'], 'pythonpath': parset['lofarpythonpath'], 'factorroot': self.factor_root_dir, 'pipeline_working_dir': self.pipeline_working_dir, 'pipeline_runtime_dir': self.pipeline_runtime_dir, 'casa_executable': parset['casa_executable'], 'wsclean_executable': parset['wsclean_executable'], 'image2fits_executable': parset['image2fits_executable'], 'dppp_nodescript': self.dppp_nodescript} # Define global parameters needed by all pipeline parsets. Other, # pipeline-specific, parameters should be defined in the subclasses by # updating this dictionary self.parms_dict = {'parset_dir': self.factor_parset_dir, 'skymodel_dir': self.factor_skymodel_dir, 'mapfile_dir': self.pipeline_mapfile_dir, 'pipeline_dir': self.factor_pipeline_dir, 'script_dir': self.factor_script_dir, 'local_dir': self.local_scratch_dir, 'hosts': self.node_list} # Update the dictionaries with the attributes of the operation's # direction object. Any attributes set in the direction object that are # also in the parms_dict will be set to those of the direction object # (e.g., 'max_cpus_per_node', which is set in the direction object by # factor.cluster.divide_nodes() will override the value set above) self.cfg_dict.update(self.direction.__dict__) self.parms_dict.update(self.direction.__dict__) # Add cluster-related info if self.parset['cluster_specific']['clustertype'] == 'local': self.cfg_dict['remote'] = '[remote]\n'\ + 'method = local\n'\ + 'max_per_node = {0}\n'.format(self.cfg_dict['max_cpus_per_node']) elif self.parset['cluster_specific']['clustertype'] == 'juropa_slurm': self.cfg_dict['remote'] = '[remote]\n'\ + 'method = slurm_srun\n'\ + 'max_per_node = {0}\n'.format(self.cfg_dict['max_cpus_per_node']) elif self.parset['cluster_specific']['clustertype'] == 'pbs': self.cfg_dict['remote'] = '' else: self.log.error('Could not determine the nature of your cluster!') sys.exit(1) # an absolute path in ...['clusterdesc'] will overrule the the "working_dir" self.cfg_dict['clusterdesc'] = os.path.join(self.factor_working_dir, self.parset['cluster_specific']['clusterdesc'])
def go(self): """ Contains functionality of the vdsmaker """ super(vdsmaker, self).go() # ********************************************************************** # 1. Load data from disk create output files args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) data = DataMap.load(args[0]) # Skip items in `data` that have 'skip' set to True data.iterator = DataMap.SkipIterator # Create output vds names vdsnames = [ os.path.join(self.inputs['directory'], os.path.basename(item.file) + '.vds') for item in data ] # ********************************************************************* # 2. Call vdsmaker command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for inp, vdsfile in zip(data, vdsnames): jobs.append( ComputeJob(inp.host, command, arguments=[ inp.file, self.config.get('cluster', 'clusterdesc'), vdsfile, self.inputs['makevds'] ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) vdsnames = [ vds for vds, job in zip(vdsnames, jobs) if job.results['returncode'] == 0 ] if not vdsnames: self.logger.error("All makevds processes failed. Bailing out!") return 1 # ********************************************************************* # 3. Combine VDS files to produce GDS failure = False self.logger.info("Combining VDS files") executable = self.inputs['combinevds'] gvds_out = self.inputs['gvds'] # Create the gvds directory for output files, needed for combine create_directory(os.path.dirname(gvds_out)) try: command = [executable, gvds_out] + vdsnames combineproc = subprocess.Popen(command, close_fds=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = combineproc.communicate() log_process_output(executable, sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError(combineproc.returncode, command) self.outputs['gvds'] = gvds_out self.logger.info("Wrote combined VDS file: %s" % gvds_out) except subprocess.CalledProcessError, cpe: self.logger.exception("combinevds failed with status %d: %s" % (cpe.returncode, serr)) failure = True
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join( self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug( "Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # Location of the output measurement set output_correlated_mapfile = os.path.join(self.mapfile_dir, "correlated.mapfile") self.output_correlated_data.save(output_correlated_mapfile) self.logger.debug( "Wrote output correlated mapfile: {0}".format(output_correlated_mapfile)) # Get pipeline parameters from the toplevel recipe # TODO: This is a backdoor option to manually add beamtables when these # are missing on the provided ms. There is NO use case for users of the # pipeline add_beam_tables = self.parset.getBool( "Imaging.addBeamTables", False) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # Almost always a users wants a partial succes above a failed pipeline output_result_of_last_succesfull_cycle = self.parset.getBool( "Imaging.output_on_error", True) if number_of_major_cycles < 3: self.logger.error( "The number of major cycles must be 3 or higher, correct" " the key: Imaging.number_of_major_cycles") raise PipelineException( "Incorrect number_of_major_cycles in the parset") # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile, add_beam_tables) # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) succesfull_cycle_mapfiles_dict = None for idx_cycle in range(number_of_major_cycles): try: # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, idx_cycle, source_list_map_path = source_list_map_path, skip_create_dbs = False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(concat_ms_map_path, timeslice_map_path, parmdbs_path, sourcedb_map_path, idx_cycle, skip = False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) redmine issue #6021 # Done in imager_bbs.p at the node level after calibration # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_cycle, sourcedb_map_path, number_of_major_cycles, skip = False) # ***************************************************************** # (5) Source finding source_list_map_path, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_cycle, skip = False) # should the output be a sourcedb? instead of a sourcelist # save the active mapfiles: locations and content # Used to output last succesfull cycle on error mapfiles_to_save = {'aw_image_mapfile':aw_image_mapfile, 'source_list_map_path':source_list_map_path, 'found_sourcedb_path':found_sourcedb_path, 'concat_ms_map_path':concat_ms_map_path} succesfull_cycle_mapfiles_dict = self._save_active_mapfiles(idx_cycle, self.mapfile_dir, mapfiles_to_save) # On exception there is the option to output the results of the # last cycle without errors except KeyboardInterrupt as ex: raise ex except Exception as ex: self.logger.error("Encountered an fatal exception during self" "calibration. Aborting processing and return" " the last succesfull cycle results") self.logger.error(str(ex)) # if we are in the first cycle always exit with exception if idx_cycle == 0: raise ex if not output_result_of_last_succesfull_cycle: raise ex # restore the mapfile variables aw_image_mapfile = succesfull_cycle_mapfiles_dict['aw_image_mapfile'] source_list_map_path = succesfull_cycle_mapfiles_dict['source_list_map_path'] found_sourcedb_path = succesfull_cycle_mapfiles_dict['found_sourcedb_path'] concat_ms_map_path = succesfull_cycle_mapfiles_dict['concat_ms_map_path'] # set the number_of_major_cycles to the correct number number_of_major_cycles = idx_cycle - 1 max_cycles_reached = False break else: max_cycles_reached = True # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map, placed_correlated_map = \ self._finalize(aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, source_list_map_path, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path, concat_ms_map_path, output_correlated_mapfile) # ********************************************************************* # (7) Get metadata # create a parset with information that is available on the toplevel self._get_meta_data(number_of_major_cycles, placed_data_image_map, placed_correlated_map, full_parset, max_cycles_reached) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path=source_list_map_path, skip_create_dbs=False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip=False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip=False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip=False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize( aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # Create a parset containing the metadata for MAC/SAS metadata_file = "%s_feedback_SkyImage" % (self.parset_file, ) self.run_task( "get_metadata", placed_data_image_map, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="SkyImage", metadata_file=metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. Note: return 0 on success, 1 on failure. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. # # Note that PULP will read many of these fields directly. That makes # the following fields, and possibly others, part of the API towards # PULP: # # self.config # self.logger # self.input_data # self.output_data # self.parset_feedback_file # self.job_dir # Get input/output-data products specifications. self._get_io_product_specs() self.job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(self.job_dir, "parsets") mapfile_dir = os.path.join(self.job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files # Coherent Stokes self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile") self.input_data['coherent'].save(self.input_CS_mapfile) # Incoherent Stokes self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile") self.input_data['incoherent'].save(self.input_IS_mapfile) # Output data self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data['data'].save(self.output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.') pulsar_parset = os.path.join(parset_dir, "Pulsar.parset") self.pulsar_parms.writeFile(pulsar_parset) self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # Rebuilding sys.argv without the options given automatically by framework # --auto = automatic run from framework # -q = quiet mode, no user interaction sys.argv = ['pulp.py', '--auto', '-q'] if (not self.coherentStokesEnabled): sys.argv.extend(["--noCS", "--noCV", "--noFE"]) if (not self.incoherentStokesEnabled): sys.argv.append("--noIS") # Tell PULP where to write the feedback to self.parset_feedback_file = "%s_feedback" % (self.parset_file,) # Run the pulsar pipeline self.logger.debug("Starting pulp with: " + join(sys.argv)) p = pulp.pulp(self) # TODO: MUCK self to capture the API # NOTE: PULP returns 0 on SUCCESS!! if p.go(): self.logger.error("PULP did not succeed. Bailing out!") return 1 # Read and forward the feedback try: metadata = parameterset(self.parset_feedback_file) except IOError, e: self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e)) return 1
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError as exception: self.logger.error(str(exception)) return 1 except Exception as exception: self.logger.error(str(exception)) return 1 # ********************************************************************* # 6. Return output # Append static .restored: This might change but prob. not # The actual output image has this extention always, default of # awimager self.outputs["image"] = output_image + ".restored" return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug( "Wrote input data mapfile: %s" % data_mapfile ) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug( "Wrote output corrected data mapfile: %s" % corrected_mapfile ) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data']) ) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task("ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile') )['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile )['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task("bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # (MAC) # Create a parset-file containing the metadata for MAC/SAS with duration(self, "get_metadata"): self.run_task("get_metadata", corrected_mapfile, parset_file=self.parset_feedback_file, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts') ), product_type="Correlated") return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. Note: return 0 on success, 1 on failure. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. # # Note that PULP will read many of these fields directly. That makes # the following fields, and possibly others, part of the API towards # PULP: # # self.config # self.logger # self.input_data # self.output_data # self.parset_feedback_file # self.job_dir # Get input/output-data products specifications. self._get_io_product_specs() self.job_dir = self.config.get("layout", "job_directory") self.globalfs = self.config.has_option("remote", "globalfs") and self.config.getboolean("remote", "globalfs") parset_dir = os.path.join(self.job_dir, "parsets") mapfile_dir = os.path.join(self.job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files # Coherent Stokes self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile") self.input_data['coherent'].save(self.input_CS_mapfile) # Incoherent Stokes self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile") self.input_data['incoherent'].save(self.input_IS_mapfile) # Output data self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data['data'].save(self.output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.') pulsar_parset = os.path.join(parset_dir, "Pulsar.parset") if self.globalfs: # patch for Pulp in case of DOCKER for k in [x for x in self.pulsar_parms.keys() if x.endswith("_extra_opts")]: self.pulsar_parms.replace(k, self.pulsar_parms[k].getString().replace(" ","\\\\ ")) self.pulsar_parms.writeFile(pulsar_parset) self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # Rebuilding sys.argv without the options given automatically by framework # --auto = automatic run from framework # -q = quiet mode, no user interaction sys.argv = ['pulp.py', '--auto', '-q'] if self.globalfs: project = self.parset.getString(self.parset.fullModuleName('Campaign') + '.name') sys.argv.extend(['--slurm', '--globalfs', '--docker', '--docker-container=lofar-pulp:%s' % os.environ.get("LOFAR_TAG"), '--raw=/data/projects/%s' % project]) else: sys.argv.append("--auto") if (not self.coherentStokesEnabled): sys.argv.extend(["--noCS", "--noCV", "--noFE"]) if (not self.incoherentStokesEnabled): sys.argv.append("--noIS") # Tell PULP where to write the feedback to self.parset_feedback_file = "%s_feedback" % (self.parset_file,) # Run the pulsar pipeline self.logger.debug("Starting pulp with: " + join(sys.argv)) self.logger.debug("Calling pulp.pulp(self) with self = %s", pprint.pformat(vars(self))) p = pulp.pulp(self) # TODO: MUCK self to capture the API # NOTE: PULP returns 0 on SUCCESS!! if p.go(): self.logger.error("PULP did not succeed. Bailing out!") return 1 # Read and forward the feedback try: metadata = parameterset(self.parset_feedback_file) except IOError, e: self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e)) return 1
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting longbaseline pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) ## *************************************************************** #output_mapfile_path = os.path.join(self.mapfile_dir, "output.mapfile") #self.output_mapfile.save(output_mapfile_path) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_ms_mapfile = os.path.join(self.mapfile_dir, "output.mapfile") self.output_data.save(output_ms_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_ms_mapfile)) # TODO: This is a backdoor option to manually add beamtables when these # are missing on the provided ms. There is NO use case for users of the # pipeline add_beam_tables = self.parset.getBool("Imaging.addBeamTables", False) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._long_baseline(input_mapfile, target_mapfile, add_beam_tables, output_ms_mapfile) # ********************************************************************* # (7) Get metadata # create a parset with information that is available on the toplevel toplevel_meta_data = parameterset( {'feedback_version': feedback_version}) # get some parameters from the imaging pipeline parset: subbandgroups_per_ms = self.parset.getInt( "LongBaseline.subbandgroups_per_ms") subbands_per_subbandgroup = self.parset.getInt( "LongBaseline.subbands_per_subbandgroup") toplevel_meta_data.replace("subbandsPerSubbandGroup", str(subbands_per_subbandgroup)) toplevel_meta_data.replace("subbandGroupsPerMS", str(subbandgroups_per_ms)) # Create a parset-file containing the metadata for MAC/SAS at nodes metadata_file = "%s_feedback_Correlated" % (self.parset_file, ) self.run_task( "get_metadata", output_ms_mapfile, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(toplevel_meta_data) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, mapfile=os.path.join(mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb' )['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') pipeline_steps = self.parset.makeSubset( self.parset.fullModuleName('steps') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') # construct the step name list if there were pipeline.steps.<subset> for item in pipeline_steps.keys(): if item in step_name_list: loc = step_name_list.index(item) step_name_list[loc:loc] = pipeline_steps.getStringVector(item) step_name_list.remove(item) step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines input_dictionary = { 'parset': parset_file, 'parsetobj': self.parset, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir} resultdicts = {} for section in self.config.sections(): tmp_dict = {} for entry in self.config.items(section): input_dictionary[entry[0]] = entry[1] tmp_dict[entry[0]] = entry[1] resultdicts.update({section: copy.deepcopy(tmp_dict)}) resultdicts.update({'input': input_dictionary}) resultdicts.update({self.name: input_dictionary}) if 'pipeline.mapfile' in self.parset.keywords(): resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) self.logger.info("Beginning step %s" % (stepname,)) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' adds = None if stepname in step_parset_obj: adds = self._construct_step_parset(inputdict, step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict['stepname'] = stepname if adds: inputdict.update(adds) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keywords(): submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, }}) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keywords(): subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in self._keys(subpipeline_parset): if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in self._keys(subpipeline_parset): val = subpipeline_parset[k] if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'): for item in checklist: if item+".output" in str(val): val = str(val).replace(item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: # remove replacements strings to prevent loading the same key twice if k in self._keys(self.parset): self.parset.remove(k) self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self._keys(self.parset): if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip(): self.parset.remove(k) self.parset.add('! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task( typeval, inputargs, **inputdict ) # plugins if kind_of_step == 'plugin': bla = str(self.config.get('DEFAULT', 'recipe_directories')) pluginpath = bla.rstrip(']').lstrip('[').split(',') for i, item in enumerate(pluginpath): pluginpath[i] = os.path.join(item, 'plugins') if 'pluginpath' in pipeline_args.keys(): pluginpath.append(pipeline_args.getString('pluginpath')) with duration(self, stepname): resultdict = loader.call_plugin(typeval, pluginpath, inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict: resultdicts[activeloop[0]]['break'] = resultdict['break']
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, subbandgroups_per_ms, subbands_per_subbandgroup, ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables, globalfs, final_output_path): """ Entry point for the node recipe """ self.environment.update(environment) self.globalfs = globalfs with log_time(self.logger): input_map = DataMap.load(ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) create_directory(working_dir) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files processed_ms_map = self._copy_input_files(processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, subbandgroups_per_ms, processed_ms_map, subbands_per_subbandgroup, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted if False: self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations #if not(asciistat_executable == "" or # statplot_executable == "" or # msselect_executable == "" or True): if False: time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) else: # use the unfiltered list time_slice_filtered_path_list = time_slices_path_list #***************************************************************** # 6. Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 7. Perform Convert polarization: self._convert_polarization(time_slice_filtered_path_list) #****************************************************************** # 8. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #***************************************************************** # 9. Use table.copy(deep=true) to copy the ms to the correct # output location: create a new measurement set. self._deep_copy_to_output_location(output_measurement_set, final_output_path) # Write the actually used ms for the created dataset to the input # mapfile processed_ms_map.save(ms_mapfile) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0