Beispiel #1
0
    def add_chunk(self, name, array, grid={}):
        """
        Adds a slice of values for a parameter.

        Args:
            name (str):
                The parameter name e.g. "G".
            array (:obj:`~numpy.ma.core.MaskedArray`):
                The values which are to be added.
            grid (dict, optional):
                Grid coordinates for each sliced parameter axis. 

        """

        assert (self.mode is "create")
        parm = self._parameters.get(name)
        assert (parm is not None)
        # dump parm to DB the first time a slice shows up
        if name not in self._parm_written:
            pickle.dump(parm, self._fobj, 2)
            self._parm_written.add(name)
        # update axis shapes and grids based on slice
        parm._update_shape(array.shape, grid)
        # dump slice to DB
        item = _ParmSegment(name=name, array=np.ma.asarray(array), grid=grid)
        pickle.dump(item, self._fobj, 2)
Beispiel #2
0
    def _create(self, filename, metadata={}, backup=True, **kw):
        """
        Creates a parameter database given by the filename and opens it in "create" mode.

        Args:
            filename (str): 
                Name of database.
            metadata (dict, optional): 
                Optional metadata to be stored in DB.
            backup (bool, optional):
                If True, and an old database with the same filename exists, make a backup.
            kw (dict):
                Keyword arguments.
        """

        self.mode = "create"
        self.filename = filename
        self.do_backup = backup
        self.metadata = OrderedDict(mode=self.MODE_FRAGMENTED,
                                    time=time.time(),
                                    **metadata)
        # we'll write to a temp file, and do a backup on successful closure
        self._fobj = open(filename + ".tmp", 'wb')
        pickle.dump(self.metadata, self._fobj)
        self._fobj.flush()
        self._parameters = {}
        self._parm_written = set()
        print("creating {} in {} mode".format(self.filename,
                                              self.metadata['mode']),
              file=log(0))
Beispiel #3
0
    def _save_desc(self):
        """ Helper function. Writes accumulated parameter descriptions to filename.desc. """

        for desc in self._parameters.values():
            desc._finalize_shape()
        for key in list(self._parameters.keys()):
            if not self._parameters[key]._populated:
                del self._parameters[key]
        pickle.dump(self._parameters, open(self.filename + ".skel", 'wb'), 2)
        print("saved updated parameter skeletons to {}".format(self.filename + ".skel"), file=log(0))
Beispiel #4
0
    def save(self, filename):
        """
        Pickles contents to file. Better than pickling whole object, as the pickle then only 
        contains standard classes (i.e. don't need CubiCal to read it).

        Args:
            filename (str):
                Name for pickled file.
        """

        pickle.dump((self.chanant, self.timeant, self.timechan, self.chunk),
                    open(filename, 'wb'), 2)
Beispiel #5
0
    def save(self, filename=None, backup=True):
        """
        Save the database.

        Args:
            filename (str, optional):
                Name of output file.
            backup (bool, optional):
                If True, create a backup.
        """
        assert (self.mode is "load")
        self.metadata['mode'] = self.MODE_CONSOLIDATED
        filename = filename or self.filename
        with open(filename + ".tmp", 'w') as fobj:
            pickle.dump(self.metadata, fobj, 2)
            for parm in self._parameters.values():
                parm.release_cache()
            pickle.dump(self._parameters, fobj, 2)
        # successfully written? Backup and rename
        self.filename = filename
        self._backup_and_rename(backup)
Beispiel #6
0
def main(debugging=False):
    """
    Main cubical driver function. Reads options, sets up MS and solvers, calls the solver, etc.

    Args:
        debugging (bool, optional):
            If True, run in debugging mode.

    Raises:
        UserInputError:
            If neither --model-lsm nor --model-column were specified.
        UserInputError:
            If no Jones terms are enabled.
        UserInputError:
            If --out-mode is invalid.
        ValueError:
            If unknown Jones type is specified.
        RuntimeError:
            If I/O job on a tile failed.
    """

    # this will be set below if a custom parset is specified on the command line
    custom_parset_file = None
    # "GD" is a global defaults dict, containing options set up from parset + command line
    global GD, enable_pdb

    # keep a list of messages here, until we have a logfile open
    prelog_messages = []

    def prelog_print(level, message):
        prelog_messages.append((level, message))

    try:
        if debugging:
            print("initializing from cubical.last", file=log)
            GD = pickle.load(open("cubical.last"))
            basename = GD["out"]["name"]
            parser = None
        else:
            default_parset = parsets.Parset("%s/DefaultParset.cfg" % os.path.dirname(__file__))

            # if first argument is a filename, treat it as a parset

            if len(sys.argv) > 1 and not sys.argv[1][0].startswith('-'):
                custom_parset_file = sys.argv[1]
                print("reading defaults from {}".format(custom_parset_file), file=log)
                try:
                    parset = parsets.Parset(custom_parset_file)
                except:
                    import traceback
                    traceback.print_exc()
                    raise UserInputError("'{}' must be a valid parset file. Use -h for help.".format(custom_parset_file))
                if not parset.success:
                    raise UserInputError("'{}' must be a valid parset file. Use -h for help.".format(custom_parset_file))
                # update default parameters with values from parset
                default_parset.update_values(parset, other_filename=' in {}'.format(custom_parset_file))

            import cubical
            parser = dynoptparse.DynamicOptionParser(usage='Usage: %prog [parset file] <options>',
                    description="""Questions, bug reports, suggestions: https://github.com/ratt-ru/CubiCal""",
                    version='%prog version {}'.format(cubical.VERSION),
                    defaults=default_parset.value_dict,
                    attributes=default_parset.attr_dict)

            # now read the full input from command line
            # "GD" is a global defaults dict, containing options set up from parset + command line
            GD = parser.read_input()

            # if a single argument is given, it should have been the parset
            if len(parser.get_arguments()) != (1 if custom_parset_file else 0):
                raise UserInputError("Unexpected number of arguments. Use -h for help.")

            # get dirname and basename for all output files
            outdir = expand_templated_name(GD["out"]["dir"]).strip()
            basename = expand_templated_name(GD["out"]["name"]).strip()
            can_overwrite = GD["out"]["overwrite"]
            can_backup = GD["out"]["backup"]

            explicit_basename_path = "/" in basename
            folder_is_ccout  = False

            if explicit_basename_path:
                prelog_print(0, "output basename explicitly set to {}, --out-dir setting ignored".format(basename))
                outdir = os.path.dirname(basename)
            elif outdir == "." or not outdir:
                outdir = None
                prelog_print(0, "using output basename {} in current directory".format(basename))
            else:
                # append implicit .cc-out suffix, unless already there (or ends with .cc-out)
                if not outdir.endswith("/"):
                    if outdir.endswith(".cc-out"):
                        outdir += "/"
                    else:
                        outdir += ".cc-out/"
                folder_is_ccout = outdir.endswith(".cc-out/")
                basename = outdir + basename
                if outdir != "/":
                    outdir = outdir.rstrip("/")
                prelog_print(0, "using output basename {}".format(basename))

            # create directory for output files, if specified, and it doesn't exist
            if outdir and not os.path.exists(outdir):
                prelog_print(0, "creating new output directory {}".format(outdir))
                os.mkdir(outdir)

            # are we going to be overwriting a previous run?
            out_parset = "{}.parset".format(basename)
            if os.path.exists(out_parset):
                prelog_print(0, "{} already exists, possibly from a previous run".format(out_parset))

                if can_backup:
                    if folder_is_ccout:
                        # find non-existing directory name for backup
                        backup_dir = outdir + ".0"
                        N = 0
                        while os.path.exists(backup_dir):
                            N += 1
                            backup_dir = "{}.{}".format(outdir, N)
                        # rename old directory, if we ended up manipulating the directory name
                        os.rename(outdir, backup_dir)
                        os.mkdir(outdir)
                        prelog_print(0, ModColor.Str("backed up existing {} to {}".format(outdir, backup_dir), "blue"))
                    else:
                        prelog_print(0, "refusing to auto-backup output directory, since it is not a .cc-out dir")

                if os.path.exists(out_parset):
                    if can_overwrite:
                        prelog_print(0, "proceeding anyway since --out-overwrite is set")
                    else:
                        if folder_is_ccout:
                            prelog_print(0, "won't proceed without --out-overwrite and/or --out-backup")
                        else:
                            prelog_print(0, "won't proceed without --out-overwrite")
                        raise UserInputError("{} already exists: won't overwrite previous run".format(out_parset))

            GD["out"]["name"] = basename

            # "GD" is a global defaults dict, containing options set up from parset + command line
            pickle.dump(GD, open("cubical.last", "wb"))

            # save parset with all settings
            parser.write_to_parset(out_parset)

        enable_pdb = GD["debug"]["pdb"]

        # now setup logging
        logger.logToFile(basename + ".log", append=GD["log"]["append"])
        logger.enableMemoryLogging(GD["log"]["memory"])
        logger.setBoring(GD["log"]["boring"])
        logger.setGlobalVerbosity(GD["log"]["verbose"])
        logger.setGlobalLogVerbosity(GD["log"]["file-verbose"])

        if not debugging:
            print("started " + " ".join(sys.argv), file=log)

        # dump accumulated messages from before log was open
        for level, message in prelog_messages:
            print(message, file=log(level))
        prelog_messages = []

        # clean up shared memory from any previous runs
        shm_utils.cleanupStaleShm()

        # disable matplotlib's tk backend if we're not going to be showing plots
        if GD['out']['plots'] =='show' or GD['madmax']['plot'] == 'show':
            import pylab
            try:
                pylab.figure()
                pylab.close()
            except Exception as exc:
                import traceback
                print(ModColor.Str("Error initializing matplotlib: {}({})\n {}".format(type(exc).__name__,
                                                                                       exc, traceback.format_exc())), file=log)
                raise UserInputError("matplotlib can't connect to X11. Can't use --out-plots show or --madmax-plot show.")
        else:
            matplotlib.use("Agg")

        # print current options
        if parser is not None:
            parser.print_config(dest=log)

        double_precision = GD["sol"]["precision"] == 64

        # set up RIME

        solver_opts = GD["sol"]
        debug_opts  = GD["debug"]
        out_opts = GD["out"]
        sol_jones = solver_opts["jones"]
        if isinstance(sol_jones, string_types):
            sol_jones = set(sol_jones.split(','))
        jones_opts = [GD[j.lower()] for j in sol_jones]
        # collect list of options from enabled Jones matrices
        if not len(jones_opts):
            raise UserInputError("No Jones terms are enabled")
        print(ModColor.Str("Enabling {}-Jones".format(",".join(sol_jones)), col="green"), file=log)

        have_dd_jones = any([jo['dd-term'] for jo in jones_opts])

        solver.GD = GD

        # set up data handler

        solver_type = GD['out']['mode']
        if solver_type not in solver.SOLVERS:
            raise UserInputError("invalid setting --out-mode {}".format(solver_type))
        solver_mode_name = solver.SOLVERS[solver_type].__name__.replace("_", " ")
        print(ModColor.Str("mode: {}".format(solver_mode_name), col='green'), file=log)
        # these flags are used below to tweak the behaviour of gain machines and model loaders
        apply_only = solver.SOLVERS[solver_type].is_apply_only
        print("solver is apply-only type: {}".format(apply_only), file=log(0))
        load_model = solver.SOLVERS[solver_type].is_model_required
        print("solver requires model: {}".format(load_model), file=log(0))

        if load_model and not GD["model"]["list"]:
            raise UserInputError("--model-list must be specified")

        ms = MSDataHandler(GD["data"]["ms"],
                           GD["data"]["column"],
                           output_column=GD["out"]["column"],
                           output_model_column=GD["out"]["model-column"],
                           output_weight_column=GD["out"]["weight-column"],
                           reinit_output_column=GD["out"]["reinit-column"],
                           taql=GD["sel"]["taql"],
                           fid=GD["sel"]["field"],
                           ddid=GD["sel"]["ddid"],
                           channels=GD["sel"]["chan"],
                           diag=GD["sel"]["diag"],
                           beam_pattern=GD["model"]["beam-pattern"],
                           beam_l_axis=GD["model"]["beam-l-axis"],
                           beam_m_axis=GD["model"]["beam-m-axis"],
                           active_subset=GD["sol"]["subset"],
                           min_baseline=GD["sol"]["min-bl"],
                           max_baseline=GD["sol"]["max-bl"],
                           chunk_freq=GD["data"]["freq-chunk"],
                           rebin_freq=GD["data"]["rebin-freq"],
                           do_load_CASA_kwtables = GD["out"]["casa-gaintables"],
                           feed_rotate_model=GD["model"]["feed-rotate"],
                           pa_rotate_model=GD["model"]["pa-rotate"],
                           pa_rotate_montblanc=GD["montblanc"]["pa-rotate"],
                           derotate_output=GD["out"]["derotate"],
                           )

        solver.metadata = ms.metadata
        # if using dual-corr mode, propagate this into Jones options
        if ms.ncorr == 2:
            for jo in jones_opts:
                jo['diag-only'] = True
                jo['diag-data'] = True
            solver_opts['diag-only'] = True
            solver_opts['diag-data'] = True

        # With a single Jones term, create a gain machine factory based on its type.
        # With multiple Jones, create a ChainMachine factory
        term_iters = solver_opts["term-iters"]
        if type(term_iters) is int:
            term_iters = [term_iters] * len(jones_opts)
            solver_opts["term-iters"] = term_iters
            len(jones_opts) > 1 and log.warn("Multiple gain terms specified, but a recipe of solver sol-term-iters not given. "
                                             "This may indicate user error. We will assume doing the same number of iterations per term and "
                                             "stopping on the last term on the chain.")
        elif type(term_iters) is list and len(term_iters) == 1:
            term_iters = term_iters * len(jones_opts)
            solver_opts["term-iters"] = term_iters
            len(jones_opts) > 1 and log.warn("Multiple gain terms specified, but a recipe of solver sol-term-iters not given. "
                                             "This may indicate user error. We will assume doing the same number of iterations per term and "
                                             "stopping on the last term on the chain.")
        elif type(term_iters) is list and len(term_iters) < len(jones_opts):
            raise ValueError("sol-term-iters is a list, but does not match or exceed the number of gain terms being solved. "
                             "Please either only set a single value to be used or provide a list to construct a iteration recipe")
        elif type(term_iters) is list and len(term_iters) >= len(jones_opts):
            pass # user is executing a recipe
        else:
            raise TypeError("sol-term-iters is neither a list, nor a int. Check your parset")

        if len(jones_opts) == 1:
            jones_opts = jones_opts[0]
            # for just one term, propagate --sol-term-iters, if set, into its max-iter setting
            term_iters = solver_opts["term-iters"]
            if term_iters:
                jones_opts["max-iter"] = term_iters[0] if hasattr(term_iters,'__getitem__') else term_iters
            # create a gain machine factory
            jones_class = machine_types.get_machine_class(jones_opts['type'])
            if jones_class is None:
                raise UserInputError("unknown Jones type '{}'".format(jones_opts['type']))
        elif jones_opts[0]['type'] == "robust-2x2":
            jones_class = jones_chain_robust_machine.JonesChain
        else:
            jones_class = jones_chain_machine.JonesChain

        # init models
        dde_mode = GD["model"]["ddes"]

        if dde_mode == 'always' and not have_dd_jones:
            raise UserInputError("we have '--model-ddes always', but no direction dependent Jones terms enabled")

        # force floats in Montblanc calculations
        mb_opts = GD["montblanc"]
        # mb`_opts['dtype'] = 'float'

        ms.init_models(str(GD["model"]["list"]).split(","),
                       GD["weight"]["column"].split(",") if GD["weight"]["column"] else None,
                       fill_offdiag_weights=GD["weight"]["fill-offdiag"],
                       mb_opts=GD["montblanc"],
                       use_ddes=have_dd_jones and dde_mode != 'never',
                       degrid_opts=GD["degridding"])

        if len(ms.model_directions) < 2 and have_dd_jones and dde_mode == 'auto':
            raise UserInputError("--model-list does not specify directions. "
                    "Have you forgotten a @dE tag perhaps? Rerun with '--model-ddes never' to proceed anyway.")

        if load_model:
            # set up subtraction options
            solver_opts["subtract-model"] = smod = GD["out"]["subtract-model"]
            if smod < 0 or smod >= len(ms.models):
                raise UserInputError("--out-subtract-model {} out of range for {} model(s)".format(smod, len(ms.models)))

            # parse subtraction directions as a slice or list
            subdirs = GD["out"]["subtract-dirs"]
            if type(subdirs) is int:
                subdirs = [subdirs]
            if subdirs:
                if isinstance(subdirs, string_types):
                    try:
                        if ',' in subdirs:
                            subdirs = list(map(int, subdirs.split(",")))
                        else:
                            subdirs = eval("np.s_[{}]".format(subdirs))
                    except:
                        raise UserInputError("invalid --out-subtract-model option '{}'".format(subdirs))
                elif type(subdirs) is not list:
                    raise UserInputError("invalid --out-subtract-dirs option '{}'".format(subdirs))
                # check ranges
                if type(subdirs) is list:
                    out_of_range = [ d for d in subdirs if d < 0 or d >= len(ms.model_directions) ]
                    if out_of_range:
                        raise UserInputError("--out-subtract-dirs {} out of range for {} model direction(s)".format(
                                ",".join(map(str, out_of_range)), len(ms.model_directions)))
                print("subtraction directions set to {}".format(subdirs), file=log(0))
            else:
                subdirs = slice(None)
            solver_opts["subtract-dirs"] = subdirs

        # create gain machine factory
        # TODO: pass in proper antenna and correlation names, rather than number

        grid = dict(ant=ms.antnames, corr=ms.feeds, time=ms.uniq_times, freq=ms.all_freqs)
        solver.gm_factory = jones_class.create_factory(grid=grid,
                                                       apply_only=apply_only,
                                                       double_precision=double_precision,
                                                       global_options=GD, jones_options=jones_opts)
                                                       
        # create IFR-based gain machine. Only compute gains if we're loading a model
        # (i.e. not in load-apply mode)
        solver.ifrgain_machine = ifr_gain_machine.IfrGainMachine(solver.gm_factory, GD["bbc"], compute=load_model)

        solver.legacy_version12_weights = GD["weight"]["legacy-v1-2"]

        single_chunk = GD["data"]["single-chunk"]
        single_tile = GD["data"]["single-tile"]

        # setup worker process properties

        workers.setup_parallelism(GD["dist"]["ncpu"], GD["dist"]["nworker"], GD["dist"]["nthread"],
                                  debugging or single_chunk,
                                  GD["dist"]["pin"], GD["dist"]["pin-io"], GD["dist"]["pin-main"],
                                  ms.use_montblanc, GD["montblanc"]["threads"])

        # set up chunking

        chunk_by = GD["data"]["chunk-by"]
        if isinstance(chunk_by, string_types):
            chunk_by = chunk_by.split(",")
        jump = float(GD["data"]["chunk-by-jump"])

        chunks_per_tile = max(GD["dist"]["min-chunks"], workers.num_workers, 1)
        if GD["dist"]["max-chunks"]:
            chunks_per_tile = max(GD["dist"]["max-chunks"], chunks_per_tile)

        print("defining chunks (time {}, freq {}{})".format(GD["data"]["time-chunk"], GD["data"]["freq-chunk"],
            ", also when {} jumps > {}".format(", ".join(chunk_by), jump) if chunk_by else ""), file=log)

        chunks_per_tile, tile_list = ms.define_chunk(GD["data"]["time-chunk"], GD["data"]["rebin-time"],
                                            GD["data"]["freq-chunk"],
                                            chunk_by=chunk_by, chunk_by_jump=jump,
                                            chunks_per_tile=chunks_per_tile, max_chunks_per_tile=GD["dist"]["max-chunks"])

        # now that we have tiles, define the flagging situation (since this may involve a one-off iteration through the
        # MS to populate the column)
        ms.define_flags(tile_list, flagopts=GD["flags"])

        # single-chunk implies single-tile
        if single_tile >= 0:
            tile_list = tile_list[single_tile:single_tile+1]
            print("--data-single-tile {} set, will process only the one tile".format(single_tile), file=log(0, "blue"))
        elif single_chunk:
            match = re.match("D([0-9]+)T([0-9]+)", single_chunk)
            if not match:
                raise ValueError("invalid setting: --data-single-chunk {}".format(single_chunk))
            ddid_tchunk = int(match.group(1)), int(match.group(2))

            tilemap = { (rc.ddid, rc.tchunk): (tile, rc) for tile in tile_list for rc in tile.rowchunks }
            single_tile_rc = tilemap.get(ddid_tchunk)
            if single_tile_rc:
                tile, rc = single_tile_rc
                tile_list = [tile]
                print("--data-single-chunk {} in {}, rows {}:{}".format(
                    single_chunk, tile.label, min(rc.rows0), max(rc.rows0)+1), file=log(0, "blue"))
            else:
                raise ValueError("--data-single-chunk {}: chunk with this ID not found".format(single_chunk))

        # run the main loop

        t0 = time()

        stats_dict = workers.run_process_loop(ms, tile_list, load_model, single_chunk, solver_type, solver_opts, debug_opts, out_opts)


        print(ModColor.Str("Time taken for {}: {} seconds".format(solver_mode_name, time() - t0), col="green"), file=log)

        # print flagging stats
        print(ModColor.Str("Flagging stats: ",col="green") + " ".join(ms.get_flag_counts()), file=log)

        if not apply_only:
            # now summarize the stats
            print("computing summary statistics", file=log)
            st = SolverStats(stats_dict)
            filename = basename + ".stats.pickle"
            st.save(filename)
            print("saved summary statistics to %s" % filename, file=log)
            print_stats = GD["log"]["stats"]
            if print_stats:
                print("printing some summary statistics below", file=log(0))
                thresholds = []
                for thr in GD["log"]["stats-warn"].split(","):
                    field, value = thr.split(":")
                    thresholds.append((field, float(value)))
                    print("  highlighting {}>{}".format(field, float(value)), file=log(0))
                if print_stats == "all":
                    print_stats = st.get_notrivial_chunk_statfields()
                else:
                    print_stats = print_stats.split("//")
                for stats in print_stats:
                    if stats[0] != "{":
                        stats = "{{{}}}".format(stats)
                    lines = st.format_chunk_stats(stats, threshold=thresholds)
                    print("  summary stats for {}:\n  {}".format(stats, "\n  ".join(lines)), file=log(0))

            if GD["postmortem"]["enable"]:
                # flag based on summary stats
                flag3 = flagging.flag_chisq(st, GD, basename, ms.nddid_actual)

                if flag3 is not None:
                    st.apply_flagcube(flag3)
                    if GD["flags"]["save"] and flag3.any() and not GD["data"]["single-chunk"]:
                        print("regenerating output flags based on post-solution flagging", file=log)
                        flagcol = ms.flag3_to_col(flag3)
                        ms.save_flags(flagcol)

            # make plots
            if GD["out"]["plots"]:
                import cubical.plots
                try:
                    cubical.plots.make_summary_plots(st, ms, GD, basename)
                except Exception as exc:
                    if GD["debug"]["escalate-warnings"]:
                        raise
                    import traceback
                    print(file=ModColor.Str("An error has occurred while making summary plots: {}({})\n {}".format(type(exc).__name__,
                                                                                           exc,
                                                                                           traceback.format_exc())))
                    print(ModColor.Str("This is not fatal, but should be reported (and your plots have gone missing!)"), file=log)

        # make BBC plots
        if solver.ifrgain_machine and solver.ifrgain_machine.is_computing() and GD["bbc"]["plot"] and GD["out"]["plots"]:
            import cubical.plots.ifrgains
            if GD["debug"]["escalate-warnings"]:
                with warnings.catch_warnings():
                    warnings.simplefilter("error", np.ComplexWarning)
                    cubical.plots.ifrgains.make_ifrgain_plots(solver.ifrgain_machine.reload(), ms, GD, basename)
            else:
                try:
                    cubical.plots.ifrgains.make_ifrgain_plots(solver.ifrgain_machine.reload(), ms, GD, basename)
                except Exception as exc:
                    import traceback
                    print(file=ModColor.Str("An error has occurred while making BBC plots: {}({})\n {}".format(type(exc).__name__,
                                                                                           exc,
                                                                                           traceback.format_exc())))
                    print(ModColor.Str("This is not fatal, but should be reported (and your plots have gone missing!)"), file=log)

        ms.close()

        print(ModColor.Str("completed successfully", col="green"), file=log)

    except Exception as exc:
        for level, message in prelog_messages:
            print(message, file=log(level))

        if type(exc) is UserInputError:
            print(ModColor.Str(exc), file=log)
        else:
            import traceback
            print(ModColor.Str("Exiting with exception: {}({})\n {}".format(type(exc).__name__,
                                                                    exc, traceback.format_exc())), file=log)
            if enable_pdb and not type(exc) is UserInputError:
                from cubical.tools import pdb
                exc, value, tb = sys.exc_info()
                pdb.post_mortem(tb)
        sys.exit(2 if type(exc) is UserInputError else 1)
Beispiel #7
0
    def beyond_thunderdome(self, resid_arr, data_arr, model_arr, flags_arr, threshold, med_threshold, max_label):
        """This function implements MAD-based flagging on residuals"""
        if not threshold and not med_threshold:
            return False
        # residuals can pe per-model (when invoked from solver) or not, when invoked on final residuals.
        # cope with both cases
        if resid_arr.ndim == 7:
            n_mod, _, _, n_ant, n_ant, n_cor, n_cor = resid_arr.shape
        else:
            _, _, n_ant, n_ant, n_cor, n_cor = resid_arr.shape
            n_mod = 1
            resid_arr = resid_arr.reshape([1]+list(resid_arr.shape))

        import cubical.kernels
        madmax = cubical.kernels.import_kernel("madmax")
        # estimate MAD of off-diagonal elements
        absres = np.empty_like(resid_arr, dtype=np.float32)
        np.abs(resid_arr, out=absres)
        if self.mad_per_corr:
            mad, goodies = madmax.compute_mad_per_corr(absres, flags_arr, diag=self.mad_estimate_diag, offdiag=self.mad_estimate_offdiag)
        else:
            mad, goodies = madmax.compute_mad(absres, flags_arr, diag=self.mad_estimate_diag, offdiag=self.mad_estimate_offdiag)
        # any of it non-zero?
        if mad.mask.all():
            return
        # estimate median MAD
        ### want to do this:
        ## medmad = np.ma.median(mad, axis=(1,2))
        ### but this seems to thrown an error on earlier numpys (1.11?), so let's be defensive and reshape into one axis:
        shape1 = [mad.shape[0], mad.shape[1]*mad.shape[2]] + list(mad.shape[3:])
        medmad = np.ma.median(mad.reshape(shape1), axis=1)
        # all this was worth it, just so I could type "mad.max()" as legit code
        print("{} per-baseline MAD min {:.3g}, max {:.3g}, median {:.3g} to {:.3g}".format(max_label, mad.min(), mad.max(), medmad.min(), medmad.max()), file=log(2))
        if log.verbosity() > 4:
            for imod in range(n_mod):
                if self.mad_per_corr:
                    for ic1,c1 in enumerate(self.metadata.feeds):
                        for ic2,c2 in enumerate(self.metadata.feeds):
                            per_bl = [(mad[imod,p,q,ic1,ic2], p, q) for p in range(n_ant)
                                      for q in range(p+1, n_ant) if not mad.mask[imod,p,q,ic1,ic2]]
                            per_bl = ["{} ({}m): {:.3g}".format(self.metadata.baseline_name[p,q], int(self.metadata.baseline_length[p,q]), x)
                                      for x, p, q in sorted(per_bl)[::-1]]
                            print("{} model {} {}{} MADs are {}".format(max_label, imod,
                                                                                c1.upper(), c2.upper(), ", ".join(per_bl)), file=log(4))
                else:
                    per_bl = [(mad[imod,p,q,], p, q) for p in range(n_ant)
                              for q in range(p+1, n_ant) if not mad.mask[imod,p,q]]
                    per_bl = ["{} ({}m) {:.3g}".format(self.metadata.baseline_name[p,q], int(self.metadata.baseline_length[p,q]), x)
                              for x, p, q in sorted(per_bl)[::-1]]
                    print("{} model {} MADs are {}".format(max_label, imod, ", ".join(per_bl)), file=log(4))


        made_plots = flagged_something = False

        thr = np.zeros((n_mod, n_ant, n_ant, n_cor, n_cor), dtype=np.float32)
        # apply per-baseline MAD threshold
        if threshold:
            if self.mad_per_corr:
                thr[:] = threshold * mad / SIGMA_MAD
            else:
                thr[:] = threshold * mad[...,np.newaxis,np.newaxis] / SIGMA_MAD
            baddies = madmax.threshold_mad(absres, thr, flags_arr, self.flagbit, goodies,
                                             diag=self.mad_diag, offdiag=self.mad_offdiag)
            made_plots, flagged_something  = self.report_carnage(absres, mad, baddies, flags_arr,
                                                "baseline-based Mad Max ({} sigma)".format(threshold), max_label)
            if not self._pretend:
                baddies = baddies.astype(bool)
                if model_arr is not None:
                    model_arr[:,:,baddies,:,:] = 0
                if data_arr is not None:
                    data_arr[:,baddies,:,:] = 0

        # apply global median MAD threshold
        if med_threshold:
            med_thr = med_threshold * medmad / SIGMA_MAD
            if self.mad_per_corr:
                thr[:] = med_thr[:,np.newaxis,np.newaxis,:,:]
            else:
                thr[:] = med_thr[:,np.newaxis,np.newaxis,np.newaxis,np.newaxis]
            baddies = madmax.threshold_mad(absres, thr, flags_arr, self.flagbit, goodies,
                                             diag=self.mad_diag, offdiag=self.mad_offdiag)

            made, flagged = \
                self.report_carnage(absres, mad, baddies, flags_arr,
                                       "global Mad Max ({} sigma)".format(med_threshold), max_label)

            made_plots = made_plots or made
            flagged_something = flagged_something or flagged

            if not self._pretend:
                baddies = baddies.astype(bool)
                if model_arr is not None:
                    model_arr[:, :, baddies, :, :] = 0
                if data_arr is not None:
                    data_arr[:, baddies, :, :] = 0
        else:
            med_thr = None

        # generate overview plot
        if made_plots:
            import pylab
            outflags, figure = plots.make_baseline_mad_plot(mad, medmad, med_thr, metadata=self.metadata,
                                max_label=max_label, chunk_label=self.chunk_label,
                                antenna_mad_threshold=self.GD['madmax']['flag-ant-thr'])
            if outflags.any():
                flagged_something = True
                if self.mad_per_corr:
                    outflags = outflags.any(axis=(-1,-2))
                if self.GD['madmax']['flag-ant'] and not self._pretend:
                    print("{} baselines {}flagged on mad residuals (--madmax-flag-ant 1)".format(
                                            outflags.sum()/2, "trial-" if self._trial else ""), file=log(0, "red"))
                    flags_arr[:,:,outflags] |= self.flagbit
                    if model_arr is not None:
                        model_arr[:,:,:,:,outflags,:,:] = 0
                    if data_arr is not None:
                        data_arr[:,:,:,outflags,:,:] = 0
                else:
                    print("{} baselines would have been flagged due to mad residuals (use --madmax-flag-ant to enable this)".format(outflags.sum()/2), file=log(0, "red"))

            try:
                if self.GD['madmax']['plot'] == 'show':
                    pylab.show()
                else:
                    filename = self.get_plot_filename('mads')
                    print("{}: saving MAD distribution plot to {}".format(self.chunk_label,filename), file=log(1))
                    figure.savefig(filename, dpi=300)
                    from future.moves import pickle
                    pickle_file = filename+".cp"
                    pickle.dump((mad, medmad, med_thr, self.metadata, max_label), open(pickle_file, "wb"), 2)
                    print("{}: pickling MAD distribution to {}".format(self.chunk_label, pickle_file), file=log(1))
                pylab.close(figure)
                del figure
            except Exception as exc:
                traceback.print_exc()
                print("WARNING: {}: exception {} raised while rendering Mad Max summary plot".format(
                                        self.chunk_label, exc), file=log(1,"red"))
                print("Although harmless, this may indicate a problem with the data, or a bug in CubiCal.", file=log(1))
                print("Please see stack trace above, and report if you think this is a bug.", file=log(1))

        return flagged_something and not self._pretend