def go(self): self.logger.info("Starting demixing run") super(demixing, self).go() job_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = load_data_map(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = load_data_map(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 else: # This is a bit of a kludge. The input MS-filenames are supposed to # contain the string "_uv". The demixing node script will produce # output MS-files, whose names have the string "_uv" replaced by # "_" + self.inputs['ms_target'] + "_sub". outdata = [(host, os.path.join( job_dir, os.path.basename(infile).replace( '_uv', '_' + self.inputs['ms_target'] + '_sub'))) for host, infile in indata] command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, infile in indata: jobs.append( ComputeJob( host, command, arguments=[ infile, job_dir, self.inputs['initscript'], self.inputs['demix_sources'], self.inputs['ms_target'], self.config.get('cluster', 'clusterdesc'), self.inputs['timestep'], self.inputs['freqstep'], self.inputs['half_window'], self.inputs['threshold'], self.inputs['demix_parset_dir'], self.inputs['skymodel'], self.inputs['db_host'] ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.logger.debug("Writing mapfile %s" % self.inputs['mapfile']) store_data_map(self.inputs['mapfile'], outdata) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def _make_bbs_map(self): """ This method bundles the contents of three different map-files. All three map-files contain a list of tuples of hostname and filename. The contents of these files are related by index in the list. They form triplets of MS-file, its associated instrument model and its associated sky model. The data structure `self.bbs_map` is a list of tuples, where each tuple is a pair of hostname and the aforementioned triplet. For example: bbs_map[0] = ('locus001', ('/data/L29697/L29697_SAP000_SB000_uv.MS', '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument', '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky') ) Returns `False` if validation of the three map-files fails, otherwise returns `True`. """ self.logger.debug("Creating BBS map-file using: %s, %s, %s" % (self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'])) data_map = load_data_map(self.inputs['args'][0]) instrument_map = load_data_map(self.inputs['instrument_mapfile']) sky_map = load_data_map(self.inputs['sky_mapfile']) if not validate_data_maps(data_map, instrument_map, sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Store data mapfile containing list of files to be processed by BBS. store_data_map(self.inputs['data_mapfile'], data_map) self.bbs_map = [ (dat[0], (dat[1], ins[1], sky[1])) for dat, ins, sky in zip(data_map, instrument_map, sky_map) ] return True
def go(self): self.logger.info("Starting demixing run") super(demixing, self).go() job_dir = os.path.join(self.inputs["working_directory"], self.inputs["job_name"]) # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs["args"] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = load_data_map(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = load_data_map(args[1]) if not validate_data_maps(indata, outdata): self.logger.error("Validation of input/output data mapfiles failed") return 1 else: # This is a bit of a kludge. The input MS-filenames are supposed to # contain the string "_uv". The demixing node script will produce # output MS-files, whose names have the string "_uv" replaced by # "_" + self.inputs['ms_target'] + "_sub". outdata = [ ( host, os.path.join( job_dir, os.path.basename(infile).replace("_uv", "_" + self.inputs["ms_target"] + "_sub") ), ) for host, infile in indata ] command = "python %s" % (self.__file__.replace("master", "nodes")) jobs = [] for host, infile in indata: jobs.append( ComputeJob( host, command, arguments=[ infile, job_dir, self.inputs["initscript"], self.inputs["demix_sources"], self.inputs["ms_target"], self.config.get("cluster", "clusterdesc"), self.inputs["timestep"], self.inputs["freqstep"], self.inputs["half_window"], self.inputs["threshold"], self.inputs["demix_parset_dir"], self.inputs["skymodel"], self.inputs["db_host"], ], ) ) self._schedule_jobs(jobs, max_per_node=self.inputs["nproc"]) if self.error.isSet(): return 1 else: self.logger.debug("Writing mapfile %s" % self.inputs["mapfile"]) store_data_map(self.inputs["mapfile"], outdata) self.outputs["mapfile"] = self.inputs["mapfile"] return 0