def _prepare_demix_step(self, stepname, **kwargs): """ Prepare for a demixing step. This requires the setting of some extra keys in the parset, as well as testing which A-team sources must actually be demixed. Parameters: `stepname`: name of the demixing step in the parset. `kwargs` : dict of extra arguments. Returns: patch_dictionary that must be applied to the parset. """ # Add demix directory to sys.path before importing find_a_team module. sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "demix")) from find_a_team import getAteamList patch_dictionary = {} if kwargs['parmdb']: patch_dictionary[stepname + '.instrumentmodel'] = kwargs['parmdb'] if kwargs['sourcedb']: patch_dictionary[stepname + '.skymodel'] = kwargs['sourcedb'] demix_always = set(kwargs['demix_always']) demix_if_needed = set(kwargs['demix_if_needed']) # If the user specified a list of candidate A-team sources to remove, # then determine the intersection of that list and the list of sources # that need demixing according to the heuristics of getAteamList(). if demix_if_needed: ateam_list = getAteamList( kwargs['infile'], outerDistance=2.e4, elLimit=5., verbose=self.logger.isEnabledFor(logging.DEBUG) ) self.logger.debug("getAteamList returned: %s" % ateam_list) demix_if_needed.intersection_update(ateam_list) # Determine the complete set of sources to be demixed. demix_sources = list(demix_always.union(demix_if_needed)) self.logger.info("Removing %d target(s) from %s: %s" % ( len(demix_sources), kwargs['infile'], ', '.join(demix_sources) ) ) patch_dictionary[stepname + '.subtractsources'] = demix_sources # Return the patch dictionary. return patch_dictionary
def run(self, infile, working_dir, initscript, remove, target, clusterdesc, timestep, freqstep, half_window, threshold, demixdir, skymodel, db_host): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Started processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 self.logger.debug("infile = %s", infile) self.logger.debug("working_dir = %s", working_dir) self.logger.debug("initscript = %s", initscript) self.logger.debug("remove = %s", remove) self.logger.debug("target = %s", target) self.logger.debug("clusterdesc = %s", clusterdesc) self.logger.debug("timestep = %d", timestep) self.logger.debug("freqstep = %d", freqstep) self.logger.debug("half_window = %d", half_window) self.logger.debug("threshold = %f", threshold) self.logger.debug("demixdir = %s", demixdir) self.logger.debug("skymodel = %s", skymodel) self.logger.debug("db_host= %s", db_host) # Initialise environment self.environment = read_initscript(self.logger, initscript) # Create working directory, if it does not yet exist. if not os.path.exists(working_dir): os.makedirs(working_dir) # The output file names are based on the input filename, however # they must be created in ``working_dir``. filename = os.path.split(infile)[1] outfile = os.path.join(working_dir, filename) key = os.path.join(working_dir, 'key_' + filename) mixingtable = os.path.join(working_dir, 'mixing_' + filename) basename = outfile.replace('_uv.MS', '') + '_' # If needed, run NDPPP to preflag input file out to demix.MS t = pt.table(infile) shp = t.getcell("DATA", 0).shape t = 0 mstarget = outfile.replace('uv', target) if os.system('rm -f -r ' + mstarget) != 0: return 1 if (shp[0] == 64 or shp[0] == 128 or shp[0] == 256): f = open(basename + 'NDPPP_dmx.parset', 'w') f.write('msin = %s\n' % infile) f.write('msin.autoweight = True\n') f.write('msin.startchan = nchan/32\n') f.write('msin.nchan = 30*nchan/32\n') f.write('msout = %s\n' % mstarget) f.write('steps=[preflag]\n') f.write('preflag.type=preflagger\n') f.write('preflag.corrtype=auto\n') f.close() self.logger.info("Starting NDPPP demix ...") if not self._execute(['NDPPP', basename + 'NDPPP_dmx.parset']): return 1 else: if infile == mstarget: self.logger.error("MS-file %s already exists" % mstarget) return 1 else: self.logger.info("Copying MS-file: %s --> %s" % (infile, mstarget)) if os.system('cp -r ' + infile + ' ' + mstarget) != 0: return 1 # Use heuristics to get a list of A-team sources that may need # to be removed. If the user specified a list of candidate A-team # sources to remove, then determine the intersection of both lists. # Otherwise just use the list obtained from heuristics. ateam_list = getAteamList(infile, outerDistance=2.e4, elLimit=5., verbose=self.logger.isEnabledFor( logging.DEBUG)) self.logger.debug("getAteamList returned: %s" % ateam_list) if remove: remove = list(set(remove).intersection(ateam_list)) else: remove = ateam_list self.logger.info("Removing %d target(s) from %s: %s" % (len(remove), mstarget, ', '.join(remove))) spc.shiftphasecenter(mstarget, remove, freqstep, timestep) # for each source to remove, and the target, do a freq/timesquash # NDPPP removeplustarget = numpy.append(remove, target) avgoutnames = [] for rem in removeplustarget: if os.system('rm -f ' + basename + 'dmx_avg.parset') != 0: return 1 f = open(basename + 'dmx_avg.parset', 'w') msin = outfile.replace('uv', rem) f.write('msin = %s\n' % msin) msout = msin.replace('.MS', '_avg.MS') f.write('msout = %s\n' % msout) f.write('steps=[avg]\n') f.write('avg.type = averager\n') f.write('avg.timestep = %d\n' % timestep) f.write('avg.freqstep = %d\n' % freqstep) f.close() self.logger.debug("Squashing %s to %s" % (msin, msout)) if os.system('rm -f -r ' + msout) != 0: return 1 if not self._execute(['NDPPP', basename + 'dmx_avg.parset']): return 1 # Form avg output names. msin = outfile.replace('uv', rem) msout = msin.replace('.MS', '_avg.MS') avgoutnames.append(msout) msdem = msin.replace('.MS', '_avg_dem.MS') if os.system('rm -f -r ' + msdem) != 0: return 1 self.logger.info("Starting the demixing algorithm") dmx.demixing(mstarget, mixingtable, avgoutnames, freqstep, timestep, 4) self.logger.info("Finished the demixing algorithm") # # run BBS on the demixed measurement sets # self.logger.info("Starting BBS run on demixed measurement sets") for i in remove: self.logger.info("Processing %s ..." % i) msin = outfile.replace('uv', i) msout = msin.replace('.MS', '_avg_dem.MS') vds_file = basename + i + '.vds' gds_file = basename + i + '.gds' self.logger.info("Creating vds & gds files...") if os.system('rm -f ' + vds_file + gds_file) != 0: return 1 if not self._execute(['makevds', clusterdesc, msout, vds_file ]): return 1 if not self._execute(['combinevds', gds_file, vds_file]): return 1 self.logger.info("Starting first calibration run") command = [ 'calibrate', '-f', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_' + i + '.parset'), skymodel, working_dir ] if not self._execute(command): return 1 self.logger.info("Generating smoothed instrument model") input_parmdb = os.path.join(msout, 'instrument') output_parmdb = os.path.join(msout, 'instrument_smoothed') # smoothparmdb indirectly creates a subprocess, so we must # make sure that the correct environment is set-up here. env = os.environ os.environ = self.environment smdx.smoothparmdb(input_parmdb, output_parmdb, half_window, threshold) os.environ = env self.logger.info("Starting second calibration run, " "using smoothed instrument model") command = [ 'calibrate', '--clean', '--skip-sky-db', '--skip-instrument-db', '--instrument-name', 'instrument_smoothed', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_' + i + '_smoothcal.parset'), skymodel, working_dir ] if not self._execute(command): return 1 # Form the list of input files and subtract. self.logger.info("Subtracting removed sources from the data ...") demfiles = [ outfile.replace('uv', rem + '_avg_dem') for rem in remove ] sfa.subtract_from_averaged(mstarget.replace('.MS', '_avg.MS'), mixingtable, demfiles, mstarget.replace('.MS', '_sub.MS')) # We're done. return 0
def run(self, infile, working_dir, initscript, remove, target, clusterdesc, timestep, freqstep, half_window, threshold, demixdir, skymodel, db_host): with log_time(self.logger): if os.path.exists(infile): self.logger.info("Started processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 self.logger.debug("infile = %s", infile) self.logger.debug("working_dir = %s", working_dir) self.logger.debug("initscript = %s", initscript) self.logger.debug("remove = %s", remove) self.logger.debug("target = %s", target) self.logger.debug("clusterdesc = %s", clusterdesc) self.logger.debug("timestep = %d", timestep) self.logger.debug("freqstep = %d", freqstep) self.logger.debug("half_window = %d", half_window) self.logger.debug("threshold = %f", threshold) self.logger.debug("demixdir = %s", demixdir) self.logger.debug("skymodel = %s", skymodel) self.logger.debug("db_host= %s", db_host) # Initialise environment self.environment = read_initscript(self.logger, initscript) # Create working directory, if it does not yet exist. if not os.path.exists(working_dir): os.makedirs(working_dir) # The output file names are based on the input filename, however # they must be created in ``working_dir``. filename = os.path.split(infile)[1] outfile = os.path.join(working_dir, filename) key = os.path.join(working_dir, 'key_' + filename) mixingtable = os.path.join(working_dir, 'mixing_' + filename) basename = outfile.replace('_uv.MS', '') + '_' # If needed, run NDPPP to preflag input file out to demix.MS t = pt.table(infile) shp = t.getcell("DATA", 0).shape t = 0 mstarget = outfile.replace('uv',target) if os.system ('rm -f -r ' + mstarget) != 0: return 1 if (shp[0] == 64 or shp[0] == 128 or shp[0] == 256): f=open(basename + 'NDPPP_dmx.parset','w') f.write('msin = %s\n' % infile) f.write('msin.autoweight = True\n') f.write('msin.startchan = nchan/32\n') f.write('msin.nchan = 30*nchan/32\n') f.write('msout = %s\n' % mstarget) f.write('steps=[preflag]\n') f.write('preflag.type=preflagger\n') f.write('preflag.corrtype=auto\n') f.close() self.logger.info("Starting NDPPP demix ...") if not self._execute(['NDPPP', basename + 'NDPPP_dmx.parset']): return 1 else: if infile == mstarget: self.logger.error("MS-file %s already exists" % mstarget) return 1 else: self.logger.info( "Copying MS-file: %s --> %s" % (infile, mstarget) ) if os.system ('cp -r ' + infile + ' ' + mstarget) != 0: return 1 # Use heuristics to get a list of A-team sources that may need # to be removed. If the user specified a list of candidate A-team # sources to remove, then determine the intersection of both lists. # Otherwise just use the list obtained from heuristics. ateam_list = getAteamList( infile, outerDistance=2.e4, elLimit=5., verbose=self.logger.isEnabledFor(logging.DEBUG) ) self.logger.debug("getAteamList returned: %s" % ateam_list) if remove: remove = list(set(remove).intersection(ateam_list)) else: remove = ateam_list self.logger.info("Removing %d target(s) from %s: %s" % (len(remove), mstarget, ', '.join(remove))) spc.shiftphasecenter (mstarget, remove, freqstep, timestep) # for each source to remove, and the target, do a freq/timesquash # NDPPP removeplustarget = numpy.append (remove, target) avgoutnames = [] for rem in removeplustarget: if os.system ('rm -f ' + basename + 'dmx_avg.parset') != 0: return 1 f=open(basename + 'dmx_avg.parset','w') msin = outfile.replace('uv',rem) f.write('msin = %s\n' % msin) msout = msin.replace ('.MS','_avg.MS') f.write('msout = %s\n' % msout) f.write('steps=[avg]\n') f.write('avg.type = averager\n') f.write('avg.timestep = %d\n' % timestep) f.write('avg.freqstep = %d\n' % freqstep) f.close() self.logger.debug("Squashing %s to %s" % (msin, msout)) if os.system ('rm -f -r '+msout) != 0: return 1 if not self._execute(['NDPPP', basename + 'dmx_avg.parset']): return 1 # Form avg output names. msin = outfile.replace('uv',rem) msout = msin.replace ('.MS','_avg.MS') avgoutnames.append (msout) msdem = msin.replace ('.MS','_avg_dem.MS') if os.system ('rm -f -r '+msdem) != 0: return 1 self.logger.info("Starting the demixing algorithm") dmx.demixing (mstarget, mixingtable, avgoutnames, freqstep, timestep, 4) self.logger.info("Finished the demixing algorithm") # # run BBS on the demixed measurement sets # self.logger.info("Starting BBS run on demixed measurement sets") for i in remove: self.logger.info("Processing %s ..." % i) msin = outfile.replace('uv', i) msout = msin.replace ('.MS','_avg_dem.MS') vds_file = basename + i +'.vds' gds_file = basename + i +'.gds' self.logger.info("Creating vds & gds files...") if os.system ('rm -f '+ vds_file + gds_file) != 0: return 1 if not self._execute(['makevds', clusterdesc, msout, vds_file]): return 1 if not self._execute(['combinevds', gds_file, vds_file]): return 1 self.logger.info("Starting first calibration run") command=['calibrate', '-f', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_'+i+'.parset'), skymodel, working_dir] if not self._execute(command): return 1 self.logger.info("Generating smoothed instrument model") input_parmdb = os.path.join(msout, 'instrument') output_parmdb= os.path.join(msout, 'instrument_smoothed') # smoothparmdb indirectly creates a subprocess, so we must # make sure that the correct environment is set-up here. env = os.environ os.environ = self.environment smdx.smoothparmdb(input_parmdb, output_parmdb, half_window, threshold) os.environ = env self.logger.info("Starting second calibration run, " "using smoothed instrument model") command=['calibrate', '--clean', '--skip-sky-db', '--skip-instrument-db', '--instrument-name', 'instrument_smoothed', '--key', key, '--cluster-desc', clusterdesc, '--db', db_host, '--db-user', 'postgres', gds_file, os.path.join(demixdir, 'bbs_'+i+'_smoothcal.parset'), skymodel, working_dir] if not self._execute(command): return 1 # Form the list of input files and subtract. self.logger.info("Subtracting removed sources from the data ...") demfiles = [outfile.replace('uv',rem+'_avg_dem') for rem in remove] sfa.subtract_from_averaged (mstarget.replace('.MS','_avg.MS'), mixingtable, demfiles, mstarget.replace('.MS','_sub.MS')) # We're done. return 0