Exemple #1
0
def from_cfg(fname):
    """Load a PISA config file"""
    from pisa.utils.config_parser import PISAConfigParser
    config = PISAConfigParser()
    try:
        config.read(fname)
    except:
        log.logging.error('Failed to read PISA config file, `fname`="%s"',
                          fname)
        raise
    return config
def main():
    """Do the conversion."""
    args = parse_args()
    in_fpath = os.path.expanduser(os.path.expandvars(args.config[0]))
    out_fpath = in_fpath + '.new'

    with open(in_fpath, 'r') as infile:
        orig_contents = infile.readlines()

    osc_stage_header_line = None
    section_names_with_colons = {}
    new_contents = []
    for lineno, orig_line in enumerate(orig_contents, start=1):
        # Remove trailing whitespace, including newline character(s)
        new_line = orig_line.rstrip()

        # Empty (or whitespace-only) line is trivial
        if new_line == '':
            new_contents.append(new_line)
            continue

        # Replace text substitution ("config file variables") syntax
        new_line = OLD_SUB_RE.sub(repl=replace_substitution, string=new_line)

        # Replace stage headers. E.g.
        #     ``  [ stage :stage_name ]``
        # is replaced by
        #     ``  [stage.stage_name]``
        # I.e. retain any whitespace before (and after... though this is
        # already removed) the brackets but swap colon for period and remove
        # whitespace within the brackets.
        new_line = OLD_STAGE_SECTION_RE.sub(
            repl=lambda m: '[stage.%s]' % m.groups(), string=new_line)

        # Replace stage:key variables. E.g. what should now look like
        #     ``  ${ stage : key }  ``
        # should look like
        #     ``  ${stage.key}  ``
        new_line = OLD_STAGE_VARIABLE_RE.sub(
            repl=lambda m: '${stage.%s}' % m.groups(), string=new_line)

        stripped = new_line.strip()

        # Replace order string
        if stripped.startswith('order'):
            new_line = OLD_ORDER_RE.sub(repl=replace_order, string=new_line)
        # Record line on which the [stage.osc] section occurs (if any)
        elif stripped == '[stage.osc]':
            osc_stage_header_line = lineno - 1

        # Convert ``#include x`` to ``#include x as y``, where appropriate
        new_line = PISAConfigParser.INCLUDE_RE.sub(repl=append_include_as,
                                                   string=new_line)

        # Convert JSON filenames to .json.bz2 that are now bzipped
        if '.json' in new_line:
            for json_re in JSON_NO_BZ2_RE_LIST:
                new_line = json_re.sub(repl='.json.bz2', string=new_line)

        # Replace changed names
        for orig_name, new_name in NAMES_CHANGED_MAP.items():
            new_line = new_line.replace(orig_name, new_name)

        # Search for any colons used in section names. This is illegal, as a
        # section name can be used as a variable where the syntax is
        #   ``${section_name:key}``
        # so any colons in section_name will make the parser choke.
        for match in SECTION_NAME_WITH_COLON_RE.finditer(new_line):
            section_name_with_colons = match.groups()[0]
            if NEW_VARIABLE_RE.match(section_name_with_colons):
                if section_name_with_colons.count(':') > 1:
                    raise ValueError(
                        'Multiple colons in new-style variable, line %d:\n'
                        '>> Original line:\n%s\n>> New line:\n%s\n' %
                        (lineno, orig_line, new_line))
                else:
                    continue
            section_name_without_colons = section_name_with_colons.replace(
                ':', OTHER_SECTION_NAME_SEPARATOR)
            section_names_with_colons[section_name_with_colons] = (
                section_name_without_colons)

        new_contents.append(new_line)

    #for item in  section_names_with_colons.items():
    #    print '%s --> %s' % item

    # Go back through and replace colon-sparated section names with
    # ``OTHER_SECTION_NAME_SEPARATOR``-separated section names
    all_names_to_replace = section_names_with_colons.keys()

    def replace_var(match):
        """Closure to replace variable names"""
        whole_string, var_name = match.groups()
        if var_name in all_names_to_replace:
            return '${%s}' % section_names_with_colons[var_name]
        return whole_string

    def replace_section_name(match):
        """Closure to replace section names"""
        whole_string, section_name = match.groups()
        if section_name in all_names_to_replace:
            return whole_string.replace(
                section_name, section_names_with_colons[section_name])
        return whole_string

    for lineno, new_line in enumerate(new_contents, start=1):
        if not new_line:
            continue

        new_line = NEW_VARIABLE_RE.sub(repl=replace_var, string=new_line)
        new_line = NEW_SECTION_RE.sub(repl=replace_section_name,
                                      string=new_line)

        #new_line = NEW_SECTION_RE.sub(repl=replace_colon_names,
        #                              string=new_line)

        #for with_colons, without_colons in section_names_with_colons:
        #    new_line = new_line.replace(with_colons, without_colons)

        # Check for multiple colons in a variable (which is illegal)
        if MULTI_COLON_VAR_RE.findall(new_line):
            raise ValueError(
                'Multiple colons in variable, line %d:\n>> Original'
                ' line:\n%s\n>> New line:\n%s\n' %
                (lineno, orig_contents[lineno - 1], new_line))

        new_contents[lineno - 1] = new_line

    # Parse the new config file with the PISAConfigParser to see if NSI
    # parameters are defined in the `stage.osc` section (if the latter is
    # present). If needed, insert appropriate #include in the section
    pcp = PISAConfigParser()
    missing_section_header = False
    try:
        pcp.read_string(('\n'.join(new_contents) + '\n').decode('utf-8'))
    except MissingSectionHeaderError:
        missing_section_header = True
        pcp.read_string(('\n'.join(['[dummy section header]'] + new_contents) +
                         '\n').decode('utf-8'))

    if 'stage.osc' in pcp:
        keys_containing_eps = [
            k for k in pcp['stage.osc'].keys() if '.eps_'.encode('utf-8') in k
        ]

        nsi_params_present = []
        nsi_params_missing = []
        for nsi_param, nsi_param_re in NSI_PARAM_RE_MAP.items():
            found = None
            for key_idx, key in enumerate(keys_containing_eps):
                if nsi_param_re.match(key):
                    found = key_idx
                    nsi_params_present.append(nsi_param)

            if found is None:
                nsi_params_missing.append(nsi_param)
            else:
                # No need to search this key again
                keys_containing_eps.pop(found)

        if set(nsi_params_present) == set(NSI_PARAM_RE_MAP.keys()):
            all_nsi_params_defined = True
        elif set(nsi_params_missing) == set(NSI_PARAM_RE_MAP.keys()):
            all_nsi_params_defined = False
        else:
            raise ValueError(
                'Found a subset of NSI params defined; missing %s' %
                str(nsi_params_missing))

        # NOTE: since for now the contents of nsi_null.cfg are commented out
        # (until merging NSI branch), the above check will say NSI params are
        # missing if the #include statement was made. So check to see if
        # settings/osc/nsi_null.cfg _has_ been included (we can't tell what
        # section it is in, but we'll have to just accept that).
        #
        # We will probably want to remove this stanza as soon as NSI brnach is
        # merged, since this is imprecise and can introduce other weird corner
        # cases.
        rsrc_loc = find_resource('settings/osc/nsi_null.cfg')
        for file_iter in pcp.file_iterators:
            if rsrc_loc in file_iter.fpaths_processed:
                all_nsi_params_defined = True

        if not all_nsi_params_defined and osc_stage_header_line is None:
            raise ValueError(
                "Found a stage.osc section without NSI params defined (using"
                " PISAConfigParser) but could not find the line of the"
                " `[stage.osc]` header. This could occur if `[stage.osc]` came"
                " from an `#include`'d file. You can manually define the NSI"
                " parameters in this file or in the included file e.g. as"
                " found in `settings/osc/nsi_null.cfg` or simply add the"
                " statement ``#include settings/osc/nsi_null.cfg`` to either"
                " file (so long as that statement it falls within the"
                " stage.osc section).")

        # Add ``#include settings/osc/nsi_null.cfg`` at top of stage.osc
        # section if a stage.osc section is present and no NSI params were
        # specified in that section
        if not all_nsi_params_defined:
            # Add an #include to set all NSI parameters to 0
            new_contents.insert(osc_stage_header_line + 1,
                                '#include settings/osc/nsi_null.cfg')
            # Add an extra blank line after the #include line
            new_contents.insert(osc_stage_header_line + 2, '')

    if not new_contents:
        raise ValueError('Empty file after conversion; quitting.')

    # Note that newlines are added but no join is performed for comparison
    # against `orig_contents`
    new_contents = [line + '\n' for line in new_contents]

    # Now for validation, try to parse the new config file with the
    # PISAConfigParser
    pcp = PISAConfigParser()
    if missing_section_header:
        pcp.read_string((''.join(['[dummy section header]\n'] + new_contents) +
                         '\n').decode('utf-8'))
    else:
        pcp.read_string((''.join(new_contents)).decode('utf-8'))

    if new_contents == orig_contents:
        sys.stdout.write('Nothing modified in the original file (ok!).\n')
        return

    if args.validate_only:
        raise ValueError(
            'Original config file "%s" would be modfied (and so may be'
            ' invalid). Re-run this script without the --validate-only flag to'
            ' produce an appropriately-converted config file.' %
            args.config[0])

    sys.stdout.write('Writing modified config file to "%s"\n' % out_fpath)
    with open(out_fpath, 'w') as outfile:
        outfile.writelines(new_contents)
Exemple #3
0
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs