def go(self): self.logger.info("Starting demixing run") super(demixing, self).go() job_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = load_data_map(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = load_data_map(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 else: # This is a bit of a kludge. The input MS-filenames are supposed to # contain the string "_uv". The demixing node script will produce # output MS-files, whose names have the string "_uv" replaced by # "_" + self.inputs['ms_target'] + "_sub". outdata = [(host, os.path.join( job_dir, os.path.basename(infile).replace( '_uv', '_' + self.inputs['ms_target'] + '_sub'))) for host, infile in indata] command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, infile in indata: jobs.append( ComputeJob( host, command, arguments=[ infile, job_dir, self.inputs['initscript'], self.inputs['demix_sources'], self.inputs['ms_target'], self.config.get('cluster', 'clusterdesc'), self.inputs['timestep'], self.inputs['freqstep'], self.inputs['half_window'], self.inputs['threshold'], self.inputs['demix_parset_dir'], self.inputs['skymodel'], self.inputs['db_host'] ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.logger.debug("Writing mapfile %s" % self.inputs['mapfile']) store_data_map(self.inputs['mapfile'], outdata) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): self.logger.info("Starting make_flaggable run") super(make_flaggable, self).go() # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob( host, command, arguments=[ ms, self.inputs['makeflagwritable'] ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.outputs['mapfile'] = self.inputs['args'][0] return 0
def _make_bbs_map(self): """ This method bundles the contents of three different map-files. All three map-files contain a list of tuples of hostname and filename. The contents of these files are related by index in the list. They form triplets of MS-file, its associated instrument model and its associated sky model. The data structure `self.bbs_map` is a list of tuples, where each tuple is a pair of hostname and the aforementioned triplet. For example: bbs_map[0] = ('locus001', ('/data/L29697/L29697_SAP000_SB000_uv.MS', '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument', '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky') ) Returns `False` if validation of the three map-files fails, otherwise returns `True`. """ self.logger.debug("Creating BBS map-file using: %s, %s, %s" % (self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'])) data_map = load_data_map(self.inputs['args'][0]) instrument_map = load_data_map(self.inputs['instrument_mapfile']) sky_map = load_data_map(self.inputs['sky_mapfile']) if not validate_data_maps(data_map, instrument_map, sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Store data mapfile containing list of files to be processed by BBS. store_data_map(self.inputs['data_mapfile'], data_map) self.bbs_map = [ (dat[0], (dat[1], ins[1], sky[1])) for dat, ins, sky in zip(data_map, instrument_map, sky_map) ] return True
def go(self): self.logger.info("Starting rficonsole run") super(rficonsole, self).go() # Load file <-> compute node mapping from disk # ---------------------------------------------------------------------- self.logger.debug("Loading map from %s" % self.inputs['args']) data = load_data_map(self.inputs['args'][0]) # Jobs being dispatched to each host are arranged in a dict. Each # entry in the dict is a list of list of filnames to process. # ---------------------------------------------------------------------- hostlist = defaultdict(lambda: list([[]])) for host, filename in data: if ( self.inputs.has_key('nmeasurementsets') and len(hostlist[host][-1]) >= self.inputs['nmeasurementsets'] ): hostlist[host].append([filename]) else: hostlist[host][-1].append(filename) if self.inputs.has_key('strategy'): strategy = self.inputs['strategy'] else: strategy = None command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, file_lists in hostlist.iteritems(): for file_list in file_lists: jobs.append( ComputeJob( host, command, arguments=[ self.inputs['executable'], self.inputs['nthreads'], strategy, self.inputs['indirect_read'], self.inputs['skip_flagged'], self.inputs['working_dir'] ] + file_list, resources={ "cores": self.inputs['nthreads'] } ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): self.logger.warn("Failed rficonsole process detected") return 1 else: return 0
def go(self): self.logger.info("Starting rficonsole run") super(rficonsole, self).go() # Load file <-> compute node mapping from disk # ---------------------------------------------------------------------- self.logger.debug("Loading map from %s" % self.inputs['args']) data = load_data_map(self.inputs['args'][0]) # Jobs being dispatched to each host are arranged in a dict. Each # entry in the dict is a list of list of filnames to process. # ---------------------------------------------------------------------- hostlist = defaultdict(lambda: list([[]])) for host, filename in data: if ( 'nmeasurementsets' in self.inputs and len(hostlist[host][-1]) >= self.inputs['nmeasurementsets'] ): hostlist[host].append([filename]) else: hostlist[host][-1].append(filename) if 'strategy' in self.inputs: strategy = self.inputs['strategy'] else: strategy = None command = "python3 %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, file_lists in hostlist.items(): for file_list in file_lists: jobs.append( ComputeJob( host, command, arguments=[ self.inputs['executable'], self.inputs['nthreads'], strategy, self.inputs['indirect_read'], self.inputs['skip_flagged'], self.inputs['working_dir'] ] + file_list, resources={ "cores": self.inputs['nthreads'] } ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): self.logger.warn("Failed rficonsole process detected") return 1 else: return 0
def go(self): self.logger.info("Starting flag_baseline run") super(flag_baseline, self).go() # Serialise list of baselines to disk for compute nodes to pick up # ---------------------------------------------------------------------- fd, baseline_filename = mkstemp( dir=self.config.get("layout", "job_directory") ) baseline_file = os.fdopen(fd, "w") dump(self.inputs["baselines"], baseline_file) baseline_file.close() # try block ensures baseline_filename is always unlinked # ---------------------------------------------------------------------- try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob( host, command, arguments=[ ms, baseline_filename ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) finally: os.unlink(baseline_filename) if self.error.isSet(): return 1 else: self.outputs['mapfile'] = self.inputs['args'][0] return 0
def go(self): self.logger.info("Starting count_timesteps run") super(count_timesteps, self).go() self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append(ComputeJob(host, command, arguments=[ms])) jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) self.outputs['start_time'] = min(job.results['start_time'] for job in jobs.values()) self.outputs['end_time'] = max(job.results['end_time'] for job in jobs.values()) if self.error.isSet(): return 1 else: return 0
def go(self): self.logger.info("Starting make_flaggable run") super(make_flaggable, self).go() # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob(host, command, arguments=[ms, self.inputs['makeflagwritable']])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.outputs['mapfile'] = self.inputs['args'][0] return 0
def go(self): self.logger.info("Starting count_timesteps run") super(count_timesteps, self).go() self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob( host, command, arguments=[ms] ) ) jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) self.outputs['start_time'] = min(job.results['start_time'] for job in jobs.itervalues()) self.outputs['end_time'] = max(job.results['end_time'] for job in jobs.itervalues()) if self.error.isSet(): return 1 else: return 0
def go(self): self.logger.info("Starting demixing run") super(demixing, self).go() job_dir = os.path.join(self.inputs["working_directory"], self.inputs["job_name"]) # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs["args"] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = load_data_map(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = load_data_map(args[1]) if not validate_data_maps(indata, outdata): self.logger.error("Validation of input/output data mapfiles failed") return 1 else: # This is a bit of a kludge. The input MS-filenames are supposed to # contain the string "_uv". The demixing node script will produce # output MS-files, whose names have the string "_uv" replaced by # "_" + self.inputs['ms_target'] + "_sub". outdata = [ ( host, os.path.join( job_dir, os.path.basename(infile).replace("_uv", "_" + self.inputs["ms_target"] + "_sub") ), ) for host, infile in indata ] command = "python %s" % (self.__file__.replace("master", "nodes")) jobs = [] for host, infile in indata: jobs.append( ComputeJob( host, command, arguments=[ infile, job_dir, self.inputs["initscript"], self.inputs["demix_sources"], self.inputs["ms_target"], self.config.get("cluster", "clusterdesc"), self.inputs["timestep"], self.inputs["freqstep"], self.inputs["half_window"], self.inputs["threshold"], self.inputs["demix_parset_dir"], self.inputs["skymodel"], self.inputs["db_host"], ], ) ) self._schedule_jobs(jobs, max_per_node=self.inputs["nproc"]) if self.error.isSet(): return 1 else: self.logger.debug("Writing mapfile %s" % self.inputs["mapfile"]) store_data_map(self.inputs["mapfile"], outdata) self.outputs["mapfile"] = self.inputs["mapfile"] return 0