Example #1
0
    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Example #2
0
    def _make_bbs_map(self):
        """
        This method bundles the contents of three different map-files.
        All three map-files contain a list of tuples of hostname and filename.
        The contents of these files are related by index in the list. They
        form triplets of MS-file, its associated instrument model and its
        associated sky model.

        The data structure `self.bbs_map` is a list of tuples, where each
        tuple is a pair of hostname and the aforementioned triplet.

        For example:
        bbs_map[0] = ('locus001',
            ('/data/L29697/L29697_SAP000_SB000_uv.MS',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky')
        )

        Returns `False` if validation of the three map-files fails, otherwise
        returns `True`.
        """
        self.logger.debug("Creating BBS map-file using: %s, %s, %s" %
                          (self.inputs['args'][0],
                           self.inputs['instrument_mapfile'],
                           self.inputs['sky_mapfile']))
        data_map = load_data_map(self.inputs['args'][0])
        instrument_map = load_data_map(self.inputs['instrument_mapfile'])
        sky_map = load_data_map(self.inputs['sky_mapfile'])

        if not validate_data_maps(data_map, instrument_map, sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Store data mapfile containing list of files to be processed by BBS.
        store_data_map(self.inputs['data_mapfile'], data_map)

        self.bbs_map = [
            (dat[0], (dat[1], ins[1], sky[1]))
            for dat, ins, sky in zip(data_map, instrument_map, sky_map)
        ]

        return True
Example #3
0
    def go(self):
        self.logger.info("Starting CEP-II datamapper run")
        super(cep2_datamapper, self).go()

        if self.inputs['parset']:
            datamap = self._read_files()
        elif self.inputs['observation_dir']:
            datamap = self._search_files()
        else:
            self.logger.error("Either observation_dir or parset must be given")
            return 1

        self.logger.info("Found %i datasets to process." % len(datamap))
        self.logger.debug("datamap = %s" % datamap)

        # Write datamap-file
        create_directory(os.path.dirname(self.inputs['mapfile']))
        store_data_map(self.inputs['mapfile'], datamap)
        self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile'])

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Example #4
0
    def go(self):
        self.logger.info("Starting CEP-II datamapper run")
        super(cep2_datamapper, self).go()

        if self.inputs['parset']:
            datamap = self._read_files()
        elif self.inputs['observation_dir']:
            datamap = self._search_files()
        else:
            self.logger.error("Either observation_dir or parset must be given")
            return 1

        self.logger.info("Found %i datasets to process." % len(datamap))
        self.logger.debug("datamap = %s" % datamap)

        # Write datamap-file
        create_directory(os.path.dirname(self.inputs['mapfile']))
        store_data_map(self.inputs['mapfile'], datamap)
        self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile'])

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Example #5
0
    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs["working_directory"], self.inputs["job_name"])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs["args"]
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error("Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [
                (
                    host,
                    os.path.join(
                        job_dir, os.path.basename(infile).replace("_uv", "_" + self.inputs["ms_target"] + "_sub")
                    ),
                )
                for host, infile in indata
            ]

        command = "python %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile,
                        job_dir,
                        self.inputs["initscript"],
                        self.inputs["demix_sources"],
                        self.inputs["ms_target"],
                        self.config.get("cluster", "clusterdesc"),
                        self.inputs["timestep"],
                        self.inputs["freqstep"],
                        self.inputs["half_window"],
                        self.inputs["threshold"],
                        self.inputs["demix_parset_dir"],
                        self.inputs["skymodel"],
                        self.inputs["db_host"],
                    ],
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs["nproc"])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs["mapfile"])
            store_data_map(self.inputs["mapfile"], outdata)
            self.outputs["mapfile"] = self.inputs["mapfile"]
            return 0