def run(self, executable, nthreads, strategy, indirect, skip_flagged, wd, *infiles): with log_time(self.logger): self.logger.info("Processing %s" % " ".join(infiles)) try: if not os.access(executable, os.X_OK): raise ExecutableMissing(executable) working_dir = tempfile.mkdtemp(dir=wd,suffix=".%s" % (os.path.basename(__file__),)) cmd = [executable, "-j", str(nthreads)] if strategy: if os.path.exists(strategy): cmd.extend(["-strategy", strategy]) else: raise Exception("Strategy definition not available") if indirect: cmd.extend(["-indirect-read"]) if skip_flagged: cmd.extend(["-skip-flagged"]) cmd.extend(infiles) with CatchLog4CPlus( working_dir, self.logger.name, os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_dir, None, logger) except ExecutableMissing, e: self.logger.error("%s not found" % (e.args[0])) return 1 except CalledProcessError, e: self.logger.error(str(e)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Script %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** # Run # Change to working directory for the script pipedir = os.getcwd() os.chdir(work_dir) outdict = {} plugin = imp.load_source('main', executable) outdict = plugin.main(*args, **kwargs) os.chdir(pipedir) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, executable, nthreads, strategy, indirect, skip_flagged, wd, *infiles): with log_time(self.logger): self.logger.info("Processing %s" % " ".join(infiles)) try: if not os.access(executable, os.X_OK): raise ExecutableMissing(executable) working_dir = tempfile.mkdtemp(dir=wd, suffix=".%s" % (os.path.basename(__file__), )) cmd = [executable, "-j", str(nthreads)] if strategy: if os.path.exists(strategy): cmd.extend(["-strategy", strategy]) else: raise Exception("Strategy definition not available") if indirect: cmd.extend(["-indirect-read"]) if skip_flagged: cmd.extend(["-skip-flagged"]) cmd.extend(infiles) with CatchLog4CPlus(working_dir, self.logger.name, os.path.basename(executable)) as logger: catch_segfaults(cmd, working_dir, None, logger) except ExecutableMissing as e: self.logger.error("%s not found" % (e.args[0])) return 1 except CalledProcessError as e: self.logger.error(str(e)) return 1 except Exception as e: self.logger.exception(e) return 1 finally: # Try and clean up the working directory, but don't worry if # it fails -- it might not have been greated before throwing # the exception try: shutil.rmtree(working_dir) except: pass return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start imager_awimager node run:") log4_cplus_name = "imager_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask mask_file_path = self._create_mask(npix, cell_size, output_image, concatenated_measurement_set, executable, working_directory, log4_cplus_name, sourcedb_path, mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix), # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the updated parameterset cmd = [executable, calculated_parset_path] try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def run(self, infile, outfile, parmdb, sourcedb, parsetfile, executable, working_directory, environment, demix_always, demix_if_needed, start_time, end_time, nthreads, clobber): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("outfile = %s" % outfile) self.logger.debug("parmdb = %s" % parmdb) self.logger.debug("sourcedb = %s" % sourcedb) self.logger.debug("parsetfile = %s" % parsetfile) self.logger.debug("executable = %s" % executable) self.logger.debug("working_dir = %s" % working_directory) self.logger.debug("environment = %s" % environment) self.logger.debug("demix_always = %s" % demix_always) self.logger.debug("demix_if_needed = %s" % demix_if_needed) self.logger.debug("start_time = %s" % start_time) self.logger.debug("end_time = %s" % end_time) self.logger.debug("nthreads = %s" % nthreads) self.logger.debug("clobber = %s" % clobber) self.environment.update(environment) # ******************************************************************** # 1. preparations. set nthreads, Validate input, clean workspace # if not nthreads: nthreads = 1 if not outfile: outfile = infile tmpfile = outfile + '.tmp' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Check if DPPP executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # Make sure that we start with a clean slate shutil.rmtree(tmpfile, ignore_errors=True) if clobber: if outfile == infile: self.logger.warn( "Input and output are identical, not clobbering %s" % outfile) else: self.logger.info("Removing previous output %s" % outfile) shutil.rmtree(outfile, ignore_errors=True) # ***************************************************************** # 2. Perform house keeping, test if work is already done # If input and output files are different, and if output file # already exists, then we're done. if outfile != infile and os.path.exists(outfile): self.logger.info("Output file %s already exists. We're done." % outfile) self.outputs['ok'] = True return 0 # Create a working copy if input and output are identical, to # avoid corrupting the original file if things go awry. if outfile == infile: self.logger.info("Creating working copy: %s --> %s" % (infile, tmpfile)) shutil.copytree(infile, tmpfile) # ***************************************************************** # 3. Update the parset with locally calculate information # Put arguments we need to pass to some private methods in a dict kwargs = { 'infile': infile, 'tmpfile': tmpfile, 'parmdb': parmdb, 'sourcedb': sourcedb, 'parsetfile': parsetfile, 'demix_always': demix_always, 'demix_if_needed': demix_if_needed, 'start_time': start_time, 'end_time': end_time } # Prepare for the actual DPPP run. with patched_parset( # ***************************************************************** # 4. Add ms names to the parset, start/end times if availabe, etc. # 5. Add demixing parameters to the parset parsetfile, self._prepare_steps(**kwargs), output_dir=working_directory, unlink=False) as temp_parset_filename: self.logger.debug("Created temporary parset file: %s" % temp_parset_filename) try: working_dir = tempfile.mkdtemp( dir=working_directory, suffix=".%s" % (os.path.basename(__file__), )) # **************************************************************** # 6. Run ndppp cmd = [executable, temp_parset_filename, '1'] with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch NDPPP segfaults (a regular occurance), and retry catch_segfaults(cmd, working_dir, self.environment, logger, cleanup=lambda: shutil.rmtree( tmpfile, ignore_errors=True)) # Replace outfile with the updated working copy shutil.rmtree(outfile, ignore_errors=True) os.rename(tmpfile, outfile) except CalledProcessError as err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception as err: self.logger.error(str(err)) return 1 finally: shutil.rmtree(working_dir) # We need some signal to the master script that the script ran ok. self.outputs['ok'] = True return 0
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError as exception: self.logger.error(str(exception)) return 1 except Exception as exception: self.logger.error(str(exception)) return 1 # ********************************************************************* # 6. Return output # Append static .restored: This might change but prob. not # The actual output image has this extention always, default of # awimager self.outputs["image"] = output_image + ".restored" return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # hack the planet #executable = 'casa' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise #print 'KWARGS: ', kwargs if not parsetasfile: for k, v in kwargs.items(): args.append('--' + k + '=' + v) else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): #print 'DOTPOS: ',str(k).find('.') #print 'SPLIT: ', str(k).split('.')[0] #print 'SPLIT: ', str(k).split('.')[1] if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #print 'SUBPARSETLIST: ', sublist #subpar = Parset() #quick hacks below. for proof of concept. subparsetlist = [] casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') #print 'SUBPAR: ',subpar.keys() casastring = sub + '(' for k in subpar.keys(): #print 'SUBPARSET: ',k ,' ',subpar[k] #args.append('--' + k + '=' + subpar[k]) if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('/casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('/casastr/') + "'" + ',' else: casastring += str(k) + '=' + str(subpar[k]) + ',' casastring = casastring.rstrip(',') casastring += ')\n' #print 'CASASTRING:' #print casastring # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(infile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write('try:\n') casacommandfile.write(' ' + casastring) casacommandfile.write('except SystemExit:\n') casacommandfile.write(' pass\n') casacommandfile.write('except:\n') casacommandfile.write(' import os\n') casacommandfile.write(' os._exit(1)\n') casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(infile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: commandstring += ' ' + item #print 'COMMANDSTRING: ',commandstring crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') #crap.write('/home/zam/sfroehli/casapy-42.1.29047-001-1-64b/bin/casa' + ' --nologger'+' -c ' + casafilename) crap.write(commandstring) # crap.write('\nexit 0') crap.close() import stat st = os.stat(somename) #os.chmod(casafilename, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run #cmd = [executable] + args cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, working_dir, initscript, remove, target, clusterdesc, timestep, freqstep, half_window, threshold, demixdir, skymodel, db_host): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Started processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 self.logger.debug("infile = %s", infile) self.logger.debug("working_dir = %s", working_dir) self.logger.debug("initscript = %s", initscript) self.logger.debug("remove = %s", remove) self.logger.debug("target = %s", target) self.logger.debug("clusterdesc = %s", clusterdesc) self.logger.debug("timestep = %d", timestep) self.logger.debug("freqstep = %d", freqstep) self.logger.debug("half_window = %d", half_window) self.logger.debug("threshold = %f", threshold) self.logger.debug("demixdir = %s", demixdir) self.logger.debug("skymodel = %s", skymodel) self.logger.debug("db_host= %s", db_host) # Initialise environment self.environment = read_initscript(self.logger, initscript) # Create working directory, if it does not yet exist. if not os.path.exists(working_dir): os.makedirs(working_dir) # The output file names are based on the input filename, however # they must be created in ``working_dir``. filename = os.path.split(infile)[1] outfile = os.path.join(working_dir, filename) key = os.path.join(working_dir, 'key_' + filename) mixingtable = os.path.join(working_dir, 'mixing_' + filename) basename = outfile.replace('_uv.MS', '') + '_' # If needed, run NDPPP to preflag input file out to demix.MS t = pt.table(infile) shp = t.getcell("DATA", 0).shape t = 0 mstarget = outfile.replace('uv', target) if os.system('rm -f -r ' + mstarget) != 0: return 1 if (shp[0] == 64 or shp[0] == 128 or shp[0] == 256): f = open(basename + 'NDPPP_dmx.parset', 'w') f.write('msin = %s\n' % infile) f.write('msin.autoweight = True\n') f.write('msin.startchan = nchan/32\n') f.write('msin.nchan = 30*nchan/32\n') f.write('msout = %s\n' % mstarget) f.write('steps=[preflag]\n') f.write('preflag.type=preflagger\n') f.write('preflag.corrtype=auto\n') f.close() self.logger.info("Starting NDPPP demix ...") if not self._execute(['NDPPP', basename + 'NDPPP_dmx.parset']): return 1 else: if infile == mstarget: self.logger.error("MS-file %s already exists" % mstarget) return 1 else: self.logger.info("Copying MS-file: %s --> %s" % (infile, mstarget)) if os.system('cp -r ' + infile + ' ' + mstarget) != 0: return 1 # Use heuristics to get a list of A-team sources that may need # to be removed. If the user specified a list of candidate A-team # sources to remove, then determine the intersection of both lists. # Otherwise just use the list obtained from heuristics. ateam_list = getAteamList(infile, outerDistance=2.e4, elLimit=5., verbose=self.logger.isEnabledFor( logging.DEBUG)) self.logger.debug("getAteamList returned: %s" % ateam_list) if remove: remove = list(set(remove).intersection(ateam_list)) else: remove = ateam_list self.logger.info("Removing %d target(s) from %s: %s" % (len(remove), mstarget, ', '.join(remove))) spc.shiftphasecenter(mstarget, remove, freqstep, timestep) # for each source to remove, and the target, do a freq/timesquash # NDPPP removeplustarget = numpy.append(remove, target) avgoutnames = [] for rem in removeplustarget: if os.system('rm -f ' + basename + 'dmx_avg.parset') != 0: return 1 f = open(basename + 'dmx_avg.parset', 'w') msin = outfile.replace('uv', rem) f.write('msin = %s\n' % msin) msout = msin.replace('.MS', '_avg.MS') f.write('msout = %s\n' % msout) f.write('steps=[avg]\n') f.write('avg.type = averager\n') f.write('avg.timestep = %d\n' % timestep) f.write('avg.freqstep = %d\n' % freqstep) f.close() self.logger.debug("Squashing %s to %s" % (msin, msout)) if os.system('rm -f -r ' + msout) != 0: return 1 if not self._execute(['NDPPP', basename + 'dmx_avg.parset']): return 1 # Form avg output names. msin = outfile.replace('uv', rem) msout = msin.replace('.MS', '_avg.MS') avgoutnames.append(msout) msdem = msin.replace('.MS', '_avg_dem.MS') if os.system('rm -f -r ' + msdem) != 0: return 1 self.logger.info("Starting the demixing algorithm") dmx.demixing(mstarget, mixingtable, avgoutnames, freqstep, timestep, 4) self.logger.info("Finished the demixing algorithm") # # run BBS on the demixed measurement sets # self.logger.info("Starting BBS run on demixed measurement sets") for i in remove: self.logger.info("Processing %s ..." % i) msin = outfile.replace('uv', i) msout = msin.replace('.MS', '_avg_dem.MS') vds_file = basename + i + '.vds' gds_file = basename + i + '.gds' self.logger.info("Creating vds & gds files...") if os.system('rm -f ' + vds_file + gds_file) != 0: return 1 if not self._execute(['makevds', clusterdesc, msout, vds_file ]): return 1 if not self._execute(['combinevds', gds_file, vds_file]): return 1 self.logger.info("Starting first calibration run") command = [ 'calibrate', '-f', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_' + i + '.parset'), skymodel, working_dir ] if not self._execute(command): return 1 self.logger.info("Generating smoothed instrument model") input_parmdb = os.path.join(msout, 'instrument') output_parmdb = os.path.join(msout, 'instrument_smoothed') # smoothparmdb indirectly creates a subprocess, so we must # make sure that the correct environment is set-up here. env = os.environ os.environ = self.environment smdx.smoothparmdb(input_parmdb, output_parmdb, half_window, threshold) os.environ = env self.logger.info("Starting second calibration run, " "using smoothed instrument model") command = [ 'calibrate', '--clean', '--skip-sky-db', '--skip-instrument-db', '--instrument-name', 'instrument_smoothed', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_' + i + '_smoothcal.parset'), skymodel, working_dir ] if not self._execute(command): return 1 # Form the list of input files and subtract. self.logger.info("Subtracting removed sources from the data ...") demfiles = [ outfile.replace('uv', rem + '_avg_dem') for rem in remove ] sfa.subtract_from_averaged(mstarget.replace('.MS', '_avg.MS'), mixingtable, demfiles, mstarget.replace('.MS', '_sub.MS')) # We're done. return 0
def run(self, infile, working_dir, initscript, remove, target, clusterdesc, timestep, freqstep, half_window, threshold, demixdir, skymodel, db_host): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Started processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 self.logger.debug("infile = %s", infile) self.logger.debug("working_dir = %s", working_dir) self.logger.debug("initscript = %s", initscript) self.logger.debug("remove = %s", remove) self.logger.debug("target = %s", target) self.logger.debug("clusterdesc = %s", clusterdesc) self.logger.debug("timestep = %d", timestep) self.logger.debug("freqstep = %d", freqstep) self.logger.debug("half_window = %d", half_window) self.logger.debug("threshold = %f", threshold) self.logger.debug("demixdir = %s", demixdir) self.logger.debug("skymodel = %s", skymodel) self.logger.debug("db_host= %s", db_host) # Initialise environment self.environment = read_initscript(self.logger, initscript) # Create working directory, if it does not yet exist. if not os.path.exists(working_dir): os.makedirs(working_dir) # The output file names are based on the input filename, however # they must be created in ``working_dir``. filename = os.path.split(infile)[1] outfile = os.path.join(working_dir, filename) key = os.path.join(working_dir, 'key_' + filename) mixingtable = os.path.join(working_dir, 'mixing_' + filename) basename = outfile.replace('_uv.MS', '') + '_' # If needed, run NDPPP to preflag input file out to demix.MS t = pt.table(infile) shp = t.getcell("DATA", 0).shape t = 0 mstarget = outfile.replace('uv',target) if os.system ('rm -f -r ' + mstarget) != 0: return 1 if (shp[0] == 64 or shp[0] == 128 or shp[0] == 256): f=open(basename + 'NDPPP_dmx.parset','w') f.write('msin = %s\n' % infile) f.write('msin.autoweight = True\n') f.write('msin.startchan = nchan/32\n') f.write('msin.nchan = 30*nchan/32\n') f.write('msout = %s\n' % mstarget) f.write('steps=[preflag]\n') f.write('preflag.type=preflagger\n') f.write('preflag.corrtype=auto\n') f.close() self.logger.info("Starting NDPPP demix ...") if not self._execute(['NDPPP', basename + 'NDPPP_dmx.parset']): return 1 else: if infile == mstarget: self.logger.error("MS-file %s already exists" % mstarget) return 1 else: self.logger.info( "Copying MS-file: %s --> %s" % (infile, mstarget) ) if os.system ('cp -r ' + infile + ' ' + mstarget) != 0: return 1 # Use heuristics to get a list of A-team sources that may need # to be removed. If the user specified a list of candidate A-team # sources to remove, then determine the intersection of both lists. # Otherwise just use the list obtained from heuristics. ateam_list = getAteamList( infile, outerDistance=2.e4, elLimit=5., verbose=self.logger.isEnabledFor(logging.DEBUG) ) self.logger.debug("getAteamList returned: %s" % ateam_list) if remove: remove = list(set(remove).intersection(ateam_list)) else: remove = ateam_list self.logger.info("Removing %d target(s) from %s: %s" % (len(remove), mstarget, ', '.join(remove))) spc.shiftphasecenter (mstarget, remove, freqstep, timestep) # for each source to remove, and the target, do a freq/timesquash # NDPPP removeplustarget = numpy.append (remove, target) avgoutnames = [] for rem in removeplustarget: if os.system ('rm -f ' + basename + 'dmx_avg.parset') != 0: return 1 f=open(basename + 'dmx_avg.parset','w') msin = outfile.replace('uv',rem) f.write('msin = %s\n' % msin) msout = msin.replace ('.MS','_avg.MS') f.write('msout = %s\n' % msout) f.write('steps=[avg]\n') f.write('avg.type = averager\n') f.write('avg.timestep = %d\n' % timestep) f.write('avg.freqstep = %d\n' % freqstep) f.close() self.logger.debug("Squashing %s to %s" % (msin, msout)) if os.system ('rm -f -r '+msout) != 0: return 1 if not self._execute(['NDPPP', basename + 'dmx_avg.parset']): return 1 # Form avg output names. msin = outfile.replace('uv',rem) msout = msin.replace ('.MS','_avg.MS') avgoutnames.append (msout) msdem = msin.replace ('.MS','_avg_dem.MS') if os.system ('rm -f -r '+msdem) != 0: return 1 self.logger.info("Starting the demixing algorithm") dmx.demixing (mstarget, mixingtable, avgoutnames, freqstep, timestep, 4) self.logger.info("Finished the demixing algorithm") # # run BBS on the demixed measurement sets # self.logger.info("Starting BBS run on demixed measurement sets") for i in remove: self.logger.info("Processing %s ..." % i) msin = outfile.replace('uv', i) msout = msin.replace ('.MS','_avg_dem.MS') vds_file = basename + i +'.vds' gds_file = basename + i +'.gds' self.logger.info("Creating vds & gds files...") if os.system ('rm -f '+ vds_file + gds_file) != 0: return 1 if not self._execute(['makevds', clusterdesc, msout, vds_file]): return 1 if not self._execute(['combinevds', gds_file, vds_file]): return 1 self.logger.info("Starting first calibration run") command=['calibrate', '-f', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_'+i+'.parset'), skymodel, working_dir] if not self._execute(command): return 1 self.logger.info("Generating smoothed instrument model") input_parmdb = os.path.join(msout, 'instrument') output_parmdb= os.path.join(msout, 'instrument_smoothed') # smoothparmdb indirectly creates a subprocess, so we must # make sure that the correct environment is set-up here. env = os.environ os.environ = self.environment smdx.smoothparmdb(input_parmdb, output_parmdb, half_window, threshold) os.environ = env self.logger.info("Starting second calibration run, " "using smoothed instrument model") command=['calibrate', '--clean', '--skip-sky-db', '--skip-instrument-db', '--instrument-name', 'instrument_smoothed', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_'+i+'_smoothcal.parset'), skymodel, working_dir] if not self._execute(command): return 1 # Form the list of input files and subtract. self.logger.info("Subtracting removed sources from the data ...") demfiles = [outfile.replace('uv',rem+'_avg_dem') for rem in remove] sfa.subtract_from_averaged (mstarget.replace('.MS','_avg.MS'), mixingtable, demfiles, mstarget.replace('.MS','_sub.MS')) # We're done. return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # else: # self.logger.error("Dataset %s does not exist" % infile) # return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] # deal with multiple input files for wsclean if argsformat == 'wsclean': for i in reversed(xrange(len(args))): if str(args[i]).startswith('[') and str( args[i]).endswith(']'): tmplist = args.pop(i).lstrip('[').rstrip(']').split( ',') for val in reversed(tmplist): args.insert(i, val.strip(' \'\"')) if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): if str(v).startswith('[') and str(v).endswith(']'): v = v.lstrip('[').rstrip(']').replace(' ', '') multargs = v.split(',') else: multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) if argsformat == 'losoto': args.append(parsetname) else: args.insert(0, parsetname) try: # **************************************************************** # Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable if 'replace-sourcedb' in kwargs: self.replace_sourcedb = kwargs['replace-sourcedb'] kwargs.pop('replace-sourcedb') if 'replace-parmdb' in kwargs: self.replace_parmdb = kwargs['replace-parmdb'] kwargs.pop('replace-parmdb') if 'dry-run' in kwargs: self.dry_run = kwargs['dry-run'] kwargs.pop('dry-run') if 'sourcedb' in kwargs: self.sourcedb = kwargs['sourcedb'] kwargs.pop('sourcedb') if 'parmdb' in kwargs: self.parmdb = kwargs['parmdb'] kwargs.pop('parmdb') if 'sourcedb-name' in kwargs: self.sourcedb_basename = kwargs['sourcedb-name'] self.replace_sourcedb = True kwargs.pop('sourcedb-name') if 'parmdb-name' in kwargs: self.parmdb_basename = kwargs['parmdb-name'] self.replace_parmdb = True kwargs.pop('parmdb-name') if 'force' in kwargs: self.replace_parmdb = True self.replace_sourcedb = True kwargs.pop('force') numthreads = 1 if 'numthreads' in kwargs: numthreads = kwargs['numthreads'] kwargs.pop('numthreads') args.append('--numthreads=' + str(numthreads)) if 'observation' in kwargs: self.observation = kwargs.pop('observation') if 'catalog' in kwargs: self.catalog = kwargs.pop('catalog') self.createsourcedb() self.createparmdb() if not 'no-columns' in kwargs: #if not kwargs['no-columns']: self.addcolumns() else: kwargs.pop('no-columns') args.append('--sourcedb=' + self.sourcedb_path) args.append('--parmdb=' + self.parmdb_path) args.append(self.observation) #catalog = None # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in list(kwargs.items()): nodeparset.add(k, v) nodeparset.writeFile(parsetname) #args.insert(0, parsetname) args.append(parsetname) #if catalog is not None: # args.append(catalog) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError as err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception as err: self.logger.error(str(err)) return 1 # We need some signal to the master script that the script ran ok. self.outputs['ok'] = True return 0
def run(self, infile, outfile, parmdb, sourcedb, parsetfile, executable, working_directory, environment, demix_always, demix_if_needed, start_time, end_time, nthreads, clobber): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("outfile = %s" % outfile) self.logger.debug("parmdb = %s" % parmdb) self.logger.debug("sourcedb = %s" % sourcedb) self.logger.debug("parsetfile = %s" % parsetfile) self.logger.debug("executable = %s" % executable) self.logger.debug("working_dir = %s" % working_directory) self.logger.debug("environment = %s" % environment) self.logger.debug("demix_always = %s" % demix_always) self.logger.debug("demix_if_needed = %s" % demix_if_needed) self.logger.debug("start_time = %s" % start_time) self.logger.debug("end_time = %s" % end_time) self.logger.debug("nthreads = %s" % nthreads) self.logger.debug("clobber = %s" % clobber) self.environment.update(environment) # ******************************************************************** # 1. preparations. set nthreads, Validate input, clean workspace # if not nthreads: nthreads = 1 if not outfile: outfile = infile tmpfile = outfile + '.tmp' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Check if DPPP executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # Make sure that we start with a clean slate shutil.rmtree(tmpfile, ignore_errors=True) if clobber: if outfile == infile: self.logger.warn( "Input and output are identical, not clobbering %s" % outfile ) else: self.logger.info("Removing previous output %s" % outfile) shutil.rmtree(outfile, ignore_errors=True) # ***************************************************************** # 2. Perform house keeping, test if work is already done # If input and output files are different, and if output file # already exists, then we're done. if outfile != infile and os.path.exists(outfile): self.logger.info( "Output file %s already exists. We're done." % outfile ) self.outputs['ok'] = True return 0 # Create a working copy if input and output are identical, to # avoid corrupting the original file if things go awry. if outfile == infile: self.logger.info( "Creating working copy: %s --> %s" % (infile, tmpfile) ) shutil.copytree(infile, tmpfile) # ***************************************************************** # 3. Update the parset with locally calculate information # Put arguments we need to pass to some private methods in a dict kwargs = { 'infile' : infile, 'tmpfile' : tmpfile, 'parmdb' : parmdb, 'sourcedb' : sourcedb, 'parsetfile' : parsetfile, 'demix_always' : demix_always, 'demix_if_needed' : demix_if_needed, 'start_time' : start_time, 'end_time' : end_time } # Prepare for the actual DPPP run. with patched_parset( # ***************************************************************** # 4. Add ms names to the parset, start/end times if availabe, etc. # 5. Add demixing parameters to the parset parsetfile, self._prepare_steps(**kwargs), output_dir=working_directory, unlink=False ) as temp_parset_filename: self.logger.debug("Created temporary parset file: %s" % temp_parset_filename ) try: working_dir = tempfile.mkdtemp(dir=working_directory,suffix=".%s" % (os.path.basename(__file__),)) # **************************************************************** # 6. Run ndppp cmd = [executable, temp_parset_filename, '1'] with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch NDPPP segfaults (a regular occurance), and retry catch_segfaults( cmd, working_dir, self.environment, logger, cleanup=lambda : shutil.rmtree(tmpfile, ignore_errors=True) ) # Replace outfile with the updated working copy shutil.rmtree(outfile, ignore_errors=True) os.rename(tmpfile, outfile) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1 finally:
def run(self, source_node, source_path, target_path, globalfs): self.globalfs = globalfs # Time execution of this job with log_time(self.logger): return self._copy_single_file_using_rsync(source_node, source_path, target_path)
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable if 'replace-sourcedb' in kwargs: self.replace_sourcedb = kwargs['replace-sourcedb'] kwargs.pop('replace-sourcedb') if 'replace-parmdb' in kwargs: self.replace_parmdb = kwargs['replace-parmdb'] kwargs.pop('replace-parmdb') if 'dry-run' in kwargs: self.dry_run = kwargs['dry-run'] kwargs.pop('dry-run') if 'sourcedb' in kwargs: self.sourcedb = kwargs['sourcedb'] kwargs.pop('sourcedb') if 'parmdb' in kwargs: self.parmdb = kwargs['parmdb'] kwargs.pop('parmdb') if 'sourcedb-name' in kwargs: self.sourcedb_basename = kwargs['sourcedb-name'] self.replace_sourcedb = True kwargs.pop('sourcedb-name') if 'parmdb-name' in kwargs: self.parmdb_basename = kwargs['parmdb-name'] self.replace_parmdb = True kwargs.pop('parmdb-name') if 'force' in kwargs: self.replace_parmdb = True self.replace_sourcedb = True kwargs.pop('force') numthreads = 1 if 'numthreads' in kwargs: numthreads = kwargs['numthreads'] kwargs.pop('numthreads') args.append('--numthreads='+str(numthreads)) if 'observation' in kwargs: self.observation = kwargs.pop('observation') if 'catalog' in kwargs: self.catalog = kwargs.pop('catalog') self.createsourcedb() self.createparmdb() if not 'no-columns' in kwargs: #if not kwargs['no-columns']: self.addcolumns() else: kwargs.pop('no-columns') args.append('--sourcedb=' + self.sourcedb_path) args.append('--parmdb=' + self.parmdb_path) args.append(self.observation) #catalog = None # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) #args.insert(0, parsetname) args.append(parsetname) #if catalog is not None: # args.append(catalog) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start imager_awimager node run:") log4_cplus_name = "imager_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask mask_file_path = self._create_mask(npix, cell_size, output_image, concatenated_measurement_set, executable, working_directory, log4_cplus_name, sourcedb_path, mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = { 'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix), # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the updated parameterset cmd = [executable, calculated_parset_path] try: with CatchLog4CPlus( working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable)) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1
def run(self, source_node, source_path, target_path, globalfs, allow_move): self.globalfs = globalfs # Time execution of this job with log_time(self.logger): return self._copy_single_file_using_rsync( source_node, source_path, target_path, allow_move)
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, time_slices_per_image, subbands_per_group, raw_ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables): """ Entry point for the node recipe """ self.environment.update(environment) with log_time(self.logger): input_map = DataMap.load(raw_ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files copied_ms_map = self._copy_input_files( processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, time_slices_per_image, copied_ms_map, subbands_per_group, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) #***************************************************************** # Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 6. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, subbandgroups_per_ms, subbands_per_subbandgroup, ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables, globalfs, final_output_path): """ Entry point for the node recipe """ self.environment.update(environment) self.globalfs = globalfs with log_time(self.logger): input_map = DataMap.load(ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) create_directory(working_dir) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files processed_ms_map = self._copy_input_files(processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, subbandgroups_per_ms, processed_ms_map, subbands_per_subbandgroup, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted if False: self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations #if not(asciistat_executable == "" or # statplot_executable == "" or # msselect_executable == "" or True): if False: time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) else: # use the unfiltered list time_slice_filtered_path_list = time_slices_path_list #***************************************************************** # 6. Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 7. Perform Convert polarization: self._convert_polarization(time_slice_filtered_path_list) #****************************************************************** # 8. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #***************************************************************** # 9. Use table.copy(deep=true) to copy the ms to the correct # output location: create a new measurement set. self._deep_copy_to_output_location(output_measurement_set, final_output_path) # Write the actually used ms for the created dataset to the input # mapfile processed_ms_map.save(ms_mapfile) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, subbandgroups_per_ms, subbands_per_subbandgroup, ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables, globalfs, final_output_path): """ Entry point for the node recipe """ self.environment.update(environment) self.globalfs = globalfs with log_time(self.logger): input_map = DataMap.load(ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) create_directory(working_dir) create_directory(os.path.dirname(output_measurement_set)) create_directory(os.path.dirname(final_output_path)) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files processed_ms_map = self._copy_input_files( processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, subbandgroups_per_ms, processed_ms_map, subbands_per_subbandgroup, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted if False: self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations #if not(asciistat_executable == "" or # statplot_executable == "" or # msselect_executable == "" or True): if False: time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) else: # use the unfiltered list time_slice_filtered_path_list = time_slices_path_list #***************************************************************** # 6. Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 7. Perform Convert polarization: self._convert_polarization(time_slice_filtered_path_list) #****************************************************************** # 8. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #***************************************************************** # 9. Use table.copy(deep=true) to copy the ms to the correct # output location: create a new measurement set. self._deep_copy_to_output_location(output_measurement_set, final_output_path) # Write the actually used ms for the created dataset to the input # mapfile processed_ms_map.save(ms_mapfile) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): if infile[0] == '[': infiles = [ms.strip(" []\'\"") for ms in infile.split(',')] reffile = infiles[0] else: reffile = infile if os.path.exists(reffile): self.logger.info("Processing %s" % reffile) else: self.logger.error("Dataset %s does not exist" % reffile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if not parsetasfile: self.logger.error( "Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!" ) return 1 else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #quick hacks below. for proof of concept. casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') casastring = sub + '(' for k in subpar.keys(): if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('casastr/') + "'" + ',' elif str(subpar[k]).lower() == 'false' or str( subpar[k]).lower() == 'true': casastring += str(k) + '=' + str(subpar[k]) + ',' else: # Test if int/float or list of int/float try: self.logger.info('value: {}'.format(subpar[k])) test = float(str(subpar[k])) is_int_float = True except: is_int_float = False if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: if '[' in str(subpar[k]) or '(' in str( subpar[k]): # Check if list of int/float or strings list_vals = [ f.strip() for f in str( subpar[k]).strip('[]()').split(',') ] is_int_float = True for list_val in list_vals: try: test = float(list_val) except: is_int_float = False break if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: casastring += str( k) + '=' + '[{}]'.format(','.join([ "'" + list_val + "'" for list_val in list_vals ])) + ',' else: # Simple string casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' casastring = casastring.rstrip(',') casastring += ')\n' # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(reffile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write(casastring) casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(reffile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: if str(item).find(' ') > -1 or str(item).find('[') > -1: commandstring += ' "' + item + '"' else: commandstring += ' ' + item crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') crap.write(commandstring + ' >& casa.log\n') crap.close() # file permissions st = os.stat(somename) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(reffile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable self.msout_original = kwargs['msout'].rstrip('/') kwargs.pop('msout') self.msout_destination_dir = os.path.dirname(self.msout_original) self.scratch_dir = tempfile.mkdtemp(dir=kwargs['local_scratch_dir']) kwargs.pop('local_scratch_dir') self.logger.info('Using {} as scratch directory'.format( self.scratch_dir)) # Set up scratch paths self.msout_scratch = os.path.join( self.scratch_dir, os.path.basename(self.msout_original)) args.append('msout=' + self.msout_scratch) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) self.cleanup() return 1 except Exception, err: self.logger.error(str(err)) self.cleanup() return 1
def run(self, environment, parset, working_dir, processed_ms_dir, ndppp_executable, output_measurement_set, time_slices_per_image, subbands_per_group, raw_ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, rficonsole_executable, add_beam_tables): """ Entry point for the node recipe """ self.environment.update(environment) with log_time(self.logger): input_map = DataMap.load(raw_ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe create_directory(processed_ms_dir) # time slice dir_to_remove: assure empty directory: Stale data # is problematic for dppp time_slice_dir = os.path.join(working_dir, _time_slice_dir_name) create_directory(time_slice_dir) for root, dirs, files in os.walk(time_slice_dir): for file_to_remove in files: os.unlink(os.path.join(root, file_to_remove)) for dir_to_remove in dirs: shutil.rmtree(os.path.join(root, dir_to_remove)) self.logger.debug("Created directory: {0}".format(time_slice_dir)) self.logger.debug("and assured it is empty") #****************************************************************** # 1. Copy the input files copied_ms_map = self._copy_input_files(processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, time_slices_per_image, copied_ms_map, subbands_per_group, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted self._run_rficonsole(rficonsole_executable, time_slice_dir, time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) self.logger.debug( "Added imaging columns to time_slice: {0}".format( time_slice_path)) #***************************************************************** # 5. Filter bad stations time_slice_filtered_path_list = self._filter_bad_stations( time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) #***************************************************************** # Add measurmenttables if add_beam_tables: self.add_beam_tables(time_slice_filtered_path_list) #****************************************************************** # 6. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #****************************************************************** # return self.outputs["time_slices"] = \ time_slices_path_list return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # else: # self.logger.error("Dataset %s does not exist" % infile) # return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] # deal with multiple input files for wsclean if argsformat == 'wsclean': for i in reversed(xrange(len(args))): if str(args[i]).startswith('[') and str(args[i]).endswith(']'): tmplist = args.pop(i).lstrip('[').rstrip(']').split(',') for val in reversed(tmplist): args.insert(i, val.strip(' \'\"')) if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): if str(v).startswith('[') and str(v).endswith(']'): v = v.lstrip('[').rstrip(']').replace(' ', '') multargs = v.split(',') else: multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-'+ k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) if argsformat == 'losoto': args.append(parsetname) else: args.insert(0,parsetname) try: # **************************************************************** # Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, source_node, source_path, target_path): # Time execution of this job with log_time(self.logger): return self._copy_single_file_using_rsync( source_node, source_path, target_path)
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): if infile[0] == '[': infiles = [ms.strip(" []\'\"") for ms in infile.split(',')] reffile = infiles[0] else: reffile = infile if os.path.exists(reffile): self.logger.info("Processing %s" % reffile) else: self.logger.error("Dataset %s does not exist" % reffile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if not parsetasfile: self.logger.error("Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!") return 1 else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #quick hacks below. for proof of concept. casastring = '' for sub in sublist: subpar = nodeparset.makeSubset(nodeparset.fullModuleName(sub) + '.') casastring = sub + '(' for k in subpar.keys(): if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ',' elif str(subpar[k]).find('casastr/') == 0: casastring += str(k) + '=' + "'" + str(subpar[k]).strip('casastr/') + "'" + ',' elif str(subpar[k]).lower() == 'false' or str(subpar[k]).lower() == 'true': casastring += str(k) + '=' + str(subpar[k]) + ',' else: # Test if int/float or list of int/float try: self.logger.info('value: {}'.format(subpar[k])) test = float(str(subpar[k])) is_int_float = True except: is_int_float = False if is_int_float: casastring += str(k) + '=' + str(subpar[k]) + ',' else: if '[' in str(subpar[k]) or '(' in str(subpar[k]): # Check if list of int/float or strings list_vals = [f.strip() for f in str(subpar[k]).strip('[]()').split(',')] is_int_float = True for list_val in list_vals: try: test = float(list_val) except: is_int_float = False break if is_int_float: casastring += str(k) + '=' + str(subpar[k]) + ',' else: casastring += str(k) + '=' + '[{}]'.format(','.join(["'"+list_val+"'" for list_val in list_vals])) + ',' else: # Simple string casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ',' casastring = casastring.rstrip(',') casastring += ')\n' # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join(work_dir, os.path.basename(reffile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write(casastring) casacommandfile.close() args.append(casafilename) somename = os.path.join(work_dir, os.path.basename(reffile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: if str(item).find(' ') > -1 or str(item).find('[') > -1: commandstring += ' "' + item + '"' else: commandstring += ' ' + item crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') crap.write(commandstring + ' >& casa.log\n') crap.close() # file permissions st = os.stat(somename) os.chmod(somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(reffile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, casapydir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable self.msout_original = kwargs['msout'].rstrip('/') kwargs.pop('msout') self.msout_destination_dir = os.path.dirname(self.msout_original) # Set up scratch paths scratch_dir = kwargs['local_scratch_dir'] kwargs.pop('local_scratch_dir') try: os.mkdir(scratch_dir) except OSError: pass self.scratch_dir = tempfile.mkdtemp(dir=scratch_dir) self.logger.info('Using {} as scratch directory'.format(self.scratch_dir)) self.msout_scratch = os.path.join(self.scratch_dir, os.path.basename(self.msout_original)) args.append('msout=' + self.msout_scratch) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-'+ k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) self.cleanup() return 1 except Exception, err: self.logger.error(str(err)) self.cleanup() return 1
def run(self, executable, environment, parset, working_directory, output_image, concatenated_measurement_set, sourcedb_path, mask_patch_size, autogenerate_parameters, specify_fov, fov, major_cycle, nr_cycles, perform_self_cal): """ :param executable: Path to awimager executable :param environment: environment for catch_segfaults (executable runner) :param parset: parameters for the awimager, :param working_directory: directory the place temporary files :param output_image: location and filesname to story the output images the multiple images are appended with type extentions :param concatenated_measurement_set: Input measurement set :param sourcedb_path: Path the the sourcedb used to create the image mask :param mask_patch_size: Scaling of the patch around the source in the mask :param autogenerate_parameters: Turns on the autogeneration of: cellsize, npix, wprojplanes, wmax, fov :param fov: if autogenerate_parameters is false calculate imageparameter (cellsize, npix, wprojplanes, wmax) relative to this fov :param major_cycle: number of the self calibration cycle to determine the imaging parameters: cellsize, npix, wprojplanes, wmax, fov :param nr_cycles: The requested number of self cal cycles :param perform_self_cal: Bool used to control the selfcal functionality or the old semi-automatic functionality :rtype: self.outputs["image"] The path to the output image """ self.logger.info("Start selfcal_awimager node run:") log4_cplus_name = "selfcal_awimager" self.environment.update(environment) with log_time(self.logger): # Read the parameters as specified in the parset parset_object = get_parset(parset) # ************************************************************* # 1. Calculate awimager parameters that depend on measurement set # and the parset if perform_self_cal: # Calculate awimager parameters that depend on measurement set # and the parset self.logger.info( "Calculating selfcalibration parameters ") cell_size, npix, w_max, w_proj_planes, \ UVmin, UVmax, robust, threshold =\ self._get_selfcal_parameters( concatenated_measurement_set, parset, major_cycle, nr_cycles) self._save_selfcal_info(concatenated_measurement_set, major_cycle, npix, UVmin, UVmax) else: self.logger.info( "Calculating parameters.. ( NOT selfcalibration)") cell_size, npix, w_max, w_proj_planes = \ self._get_imaging_parameters( concatenated_measurement_set, parset, autogenerate_parameters, specify_fov, fov) self.logger.info("Using autogenerated parameters; ") self.logger.info( "Calculated parameters: cell_size: {0}, npix: {1}".format( cell_size, npix)) self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( w_max, w_proj_planes)) # **************************************************************** # 2. Get the target image location from the mapfile for the parset. # Create target dir if it not exists image_path_head = os.path.dirname(output_image) create_directory(image_path_head) self.logger.debug("Created directory to place awimager output" " files: {0}".format(image_path_head)) # **************************************************************** # 3. Create the mask #mask_file_path = self._create_mask(npix, cell_size, output_image, # concatenated_measurement_set, executable, # working_directory, log4_cplus_name, sourcedb_path, # mask_patch_size, image_path_head) # ***************************************************************** # 4. Update the parset with calculated parameters, and output image patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log 'ms': str(concatenated_measurement_set), 'cellsize': str(cell_size), 'npix': str(npix), 'wmax': str(w_max), 'wprojplanes': str(w_proj_planes), 'image': str(output_image), 'maxsupport': str(npix) # 'mask':str(mask_file_path), #TODO REINTRODUCE # MASK, excluded to speed up in this debug stage } # Add some aditional keys from the self calibration method if perform_self_cal: self_cal_patch_dict = { 'weight': 'briggs', 'padding': str(1.18), 'niter' : str(1000000), 'operation' : 'mfclark', 'timewindow' : '300', 'fits' : '', 'threshold' : str(threshold), 'robust' : str(robust), 'UVmin' : str(UVmin), 'UVmax' : str(UVmax), 'maxbaseline' : str(10000000), 'select' : str("sumsqr(UVW[:2])<1e12"), } patch_dictionary.update(self_cal_patch_dict) # save the parset at the target dir for the image calculated_parset_path = os.path.join(image_path_head, "parset.par") try: temp_parset_filename = patch_parset(parset, patch_dictionary) # Copy tmp file to the final location shutil.copyfile(temp_parset_filename, calculated_parset_path) self.logger.debug("Wrote parset for awimager run: {0}".format( calculated_parset_path)) finally: # remove temp file os.remove(temp_parset_filename) # ***************************************************************** # 5. Run the awimager with the parameterset cmd = [executable, calculated_parset_path] self.logger.debug("Parset used for awimager run:") self.logger.debug(cmd) try: with CatchLog4CPlus(working_directory, self.logger.name + "." + os.path.basename(log4_cplus_name), os.path.basename(executable) ) as logger: catch_segfaults(cmd, working_directory, self.environment, logger, usageStats=self.resourceMonitor) # Thrown by catch_segfault except CalledProcessError, exception: self.logger.error(str(exception)) return 1 except Exception, exception: self.logger.error(str(exception)) return 1