Esempio n. 1
0
    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Esempio n. 2
0
    def go(self):
        self.logger.info("Starting make_flaggable run")
        super(make_flaggable, self).go()

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(
                ComputeJob(
                    host, command,
                    arguments=[
                        ms,
                        self.inputs['makeflagwritable']
                    ]
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Esempio n. 3
0
    def _make_bbs_map(self):
        """
        This method bundles the contents of three different map-files.
        All three map-files contain a list of tuples of hostname and filename.
        The contents of these files are related by index in the list. They
        form triplets of MS-file, its associated instrument model and its
        associated sky model.

        The data structure `self.bbs_map` is a list of tuples, where each
        tuple is a pair of hostname and the aforementioned triplet.

        For example:
        bbs_map[0] = ('locus001',
            ('/data/L29697/L29697_SAP000_SB000_uv.MS',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky')
        )

        Returns `False` if validation of the three map-files fails, otherwise
        returns `True`.
        """
        self.logger.debug("Creating BBS map-file using: %s, %s, %s" %
                          (self.inputs['args'][0],
                           self.inputs['instrument_mapfile'],
                           self.inputs['sky_mapfile']))
        data_map = load_data_map(self.inputs['args'][0])
        instrument_map = load_data_map(self.inputs['instrument_mapfile'])
        sky_map = load_data_map(self.inputs['sky_mapfile'])

        if not validate_data_maps(data_map, instrument_map, sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Store data mapfile containing list of files to be processed by BBS.
        store_data_map(self.inputs['data_mapfile'], data_map)

        self.bbs_map = [
            (dat[0], (dat[1], ins[1], sky[1]))
            for dat, ins, sky in zip(data_map, instrument_map, sky_map)
        ]

        return True
Esempio n. 4
0
    def go(self):
        self.logger.info("Starting rficonsole run")
        super(rficonsole, self).go()

        #                           Load file <-> compute node mapping from disk
        # ----------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'])
        data = load_data_map(self.inputs['args'][0])

        #        Jobs being dispatched to each host are arranged in a dict. Each
        #            entry in the dict is a list of list of filnames to process.
        # ----------------------------------------------------------------------
        hostlist = defaultdict(lambda: list([[]]))
        for host, filename in data:
            if (
                self.inputs.has_key('nmeasurementsets') and
                len(hostlist[host][-1]) >= self.inputs['nmeasurementsets']
            ):
                hostlist[host].append([filename])
            else:
                hostlist[host][-1].append(filename)

        if self.inputs.has_key('strategy'):
            strategy = self.inputs['strategy']
        else:
            strategy = None

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, file_lists in hostlist.iteritems():
            for file_list in file_lists:
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['executable'],
                            self.inputs['nthreads'],
                            strategy,
                            self.inputs['indirect_read'],
                            self.inputs['skip_flagged'],
                            self.inputs['working_dir']
                        ] + file_list,
                        resources={
                            "cores": self.inputs['nthreads']
                        }
                    )
                )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            self.logger.warn("Failed rficonsole process detected")
            return 1
        else:
            return 0
Esempio n. 5
0
    def go(self):
        self.logger.info("Starting rficonsole run")
        super(rficonsole, self).go()

        #                           Load file <-> compute node mapping from disk
        # ----------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'])
        data = load_data_map(self.inputs['args'][0])

        #        Jobs being dispatched to each host are arranged in a dict. Each
        #            entry in the dict is a list of list of filnames to process.
        # ----------------------------------------------------------------------
        hostlist = defaultdict(lambda: list([[]]))
        for host, filename in data:
            if (
                'nmeasurementsets' in self.inputs and
                len(hostlist[host][-1]) >= self.inputs['nmeasurementsets']
            ):
                hostlist[host].append([filename])
            else:
                hostlist[host][-1].append(filename)

        if 'strategy' in self.inputs:
            strategy = self.inputs['strategy']
        else:
            strategy = None

        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, file_lists in hostlist.items():
            for file_list in file_lists:
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['executable'],
                            self.inputs['nthreads'],
                            strategy,
                            self.inputs['indirect_read'],
                            self.inputs['skip_flagged'],
                            self.inputs['working_dir']
                        ] + file_list,
                        resources={
                            "cores": self.inputs['nthreads']
                        }
                    )
                )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            self.logger.warn("Failed rficonsole process detected")
            return 1
        else:
            return 0
    def go(self):
        self.logger.info("Starting flag_baseline run")
        super(flag_baseline, self).go()

        #       Serialise list of baselines to disk for compute nodes to pick up
        # ----------------------------------------------------------------------
        fd, baseline_filename = mkstemp(
            dir=self.config.get("layout", "job_directory")
        )
        baseline_file = os.fdopen(fd, "w")
        dump(self.inputs["baselines"], baseline_file)
        baseline_file.close()

        #                 try block ensures baseline_filename is always unlinked
        # ----------------------------------------------------------------------
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            self.logger.debug("Loading map from %s" % self.inputs['args'][0])
            data = load_data_map(self.inputs['args'][0])

            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobs = []
            for host, ms in data:
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            ms,
                            baseline_filename
                        ]
                    )
                )
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        finally:
            os.unlink(baseline_filename)

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Esempio n. 7
0
    def go(self):
        self.logger.info("Starting count_timesteps run")
        super(count_timesteps, self).go()

        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(ComputeJob(host, command, arguments=[ms]))
        jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        self.outputs['start_time'] = min(job.results['start_time']
                                         for job in jobs.values())
        self.outputs['end_time'] = max(job.results['end_time']
                                       for job in jobs.values())

        if self.error.isSet():
            return 1
        else:
            return 0
    def go(self):
        self.logger.info("Starting make_flaggable run")
        super(make_flaggable, self).go()

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(
                ComputeJob(host,
                           command,
                           arguments=[ms, self.inputs['makeflagwritable']]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Esempio n. 9
0
    def go(self):
        self.logger.info("Starting count_timesteps run")
        super(count_timesteps, self).go()

        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(
                ComputeJob(
                    host, command, arguments=[ms]
                )
            )
        jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        self.outputs['start_time'] = min(job.results['start_time'] for job in jobs.itervalues())
        self.outputs['end_time'] = max(job.results['end_time'] for job in jobs.itervalues())

        if self.error.isSet():
            return 1
        else:
            return 0
Esempio n. 10
0
    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs["working_directory"], self.inputs["job_name"])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs["args"]
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error("Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [
                (
                    host,
                    os.path.join(
                        job_dir, os.path.basename(infile).replace("_uv", "_" + self.inputs["ms_target"] + "_sub")
                    ),
                )
                for host, infile in indata
            ]

        command = "python %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile,
                        job_dir,
                        self.inputs["initscript"],
                        self.inputs["demix_sources"],
                        self.inputs["ms_target"],
                        self.config.get("cluster", "clusterdesc"),
                        self.inputs["timestep"],
                        self.inputs["freqstep"],
                        self.inputs["half_window"],
                        self.inputs["threshold"],
                        self.inputs["demix_parset_dir"],
                        self.inputs["skymodel"],
                        self.inputs["db_host"],
                    ],
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs["nproc"])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs["mapfile"])
            store_data_map(self.inputs["mapfile"], outdata)
            self.outputs["mapfile"] = self.inputs["mapfile"]
            return 0