def from_cfg(fname): """Load a PISA config file""" from pisa.utils.config_parser import PISAConfigParser config = PISAConfigParser() try: config.read(fname) except: log.logging.error('Failed to read PISA config file, `fname`="%s"', fname) raise return config
def main(): """Do the conversion.""" args = parse_args() in_fpath = os.path.expanduser(os.path.expandvars(args.config[0])) out_fpath = in_fpath + '.new' with open(in_fpath, 'r') as infile: orig_contents = infile.readlines() osc_stage_header_line = None section_names_with_colons = {} new_contents = [] for lineno, orig_line in enumerate(orig_contents, start=1): # Remove trailing whitespace, including newline character(s) new_line = orig_line.rstrip() # Empty (or whitespace-only) line is trivial if new_line == '': new_contents.append(new_line) continue # Replace text substitution ("config file variables") syntax new_line = OLD_SUB_RE.sub(repl=replace_substitution, string=new_line) # Replace stage headers. E.g. # `` [ stage :stage_name ]`` # is replaced by # `` [stage.stage_name]`` # I.e. retain any whitespace before (and after... though this is # already removed) the brackets but swap colon for period and remove # whitespace within the brackets. new_line = OLD_STAGE_SECTION_RE.sub( repl=lambda m: '[stage.%s]' % m.groups(), string=new_line) # Replace stage:key variables. E.g. what should now look like # `` ${ stage : key } `` # should look like # `` ${stage.key} `` new_line = OLD_STAGE_VARIABLE_RE.sub( repl=lambda m: '${stage.%s}' % m.groups(), string=new_line) stripped = new_line.strip() # Replace order string if stripped.startswith('order'): new_line = OLD_ORDER_RE.sub(repl=replace_order, string=new_line) # Record line on which the [stage.osc] section occurs (if any) elif stripped == '[stage.osc]': osc_stage_header_line = lineno - 1 # Convert ``#include x`` to ``#include x as y``, where appropriate new_line = PISAConfigParser.INCLUDE_RE.sub(repl=append_include_as, string=new_line) # Convert JSON filenames to .json.bz2 that are now bzipped if '.json' in new_line: for json_re in JSON_NO_BZ2_RE_LIST: new_line = json_re.sub(repl='.json.bz2', string=new_line) # Replace changed names for orig_name, new_name in NAMES_CHANGED_MAP.items(): new_line = new_line.replace(orig_name, new_name) # Search for any colons used in section names. This is illegal, as a # section name can be used as a variable where the syntax is # ``${section_name:key}`` # so any colons in section_name will make the parser choke. for match in SECTION_NAME_WITH_COLON_RE.finditer(new_line): section_name_with_colons = match.groups()[0] if NEW_VARIABLE_RE.match(section_name_with_colons): if section_name_with_colons.count(':') > 1: raise ValueError( 'Multiple colons in new-style variable, line %d:\n' '>> Original line:\n%s\n>> New line:\n%s\n' % (lineno, orig_line, new_line)) else: continue section_name_without_colons = section_name_with_colons.replace( ':', OTHER_SECTION_NAME_SEPARATOR) section_names_with_colons[section_name_with_colons] = ( section_name_without_colons) new_contents.append(new_line) #for item in section_names_with_colons.items(): # print '%s --> %s' % item # Go back through and replace colon-sparated section names with # ``OTHER_SECTION_NAME_SEPARATOR``-separated section names all_names_to_replace = section_names_with_colons.keys() def replace_var(match): """Closure to replace variable names""" whole_string, var_name = match.groups() if var_name in all_names_to_replace: return '${%s}' % section_names_with_colons[var_name] return whole_string def replace_section_name(match): """Closure to replace section names""" whole_string, section_name = match.groups() if section_name in all_names_to_replace: return whole_string.replace( section_name, section_names_with_colons[section_name]) return whole_string for lineno, new_line in enumerate(new_contents, start=1): if not new_line: continue new_line = NEW_VARIABLE_RE.sub(repl=replace_var, string=new_line) new_line = NEW_SECTION_RE.sub(repl=replace_section_name, string=new_line) #new_line = NEW_SECTION_RE.sub(repl=replace_colon_names, # string=new_line) #for with_colons, without_colons in section_names_with_colons: # new_line = new_line.replace(with_colons, without_colons) # Check for multiple colons in a variable (which is illegal) if MULTI_COLON_VAR_RE.findall(new_line): raise ValueError( 'Multiple colons in variable, line %d:\n>> Original' ' line:\n%s\n>> New line:\n%s\n' % (lineno, orig_contents[lineno - 1], new_line)) new_contents[lineno - 1] = new_line # Parse the new config file with the PISAConfigParser to see if NSI # parameters are defined in the `stage.osc` section (if the latter is # present). If needed, insert appropriate #include in the section pcp = PISAConfigParser() missing_section_header = False try: pcp.read_string(('\n'.join(new_contents) + '\n').decode('utf-8')) except MissingSectionHeaderError: missing_section_header = True pcp.read_string(('\n'.join(['[dummy section header]'] + new_contents) + '\n').decode('utf-8')) if 'stage.osc' in pcp: keys_containing_eps = [ k for k in pcp['stage.osc'].keys() if '.eps_'.encode('utf-8') in k ] nsi_params_present = [] nsi_params_missing = [] for nsi_param, nsi_param_re in NSI_PARAM_RE_MAP.items(): found = None for key_idx, key in enumerate(keys_containing_eps): if nsi_param_re.match(key): found = key_idx nsi_params_present.append(nsi_param) if found is None: nsi_params_missing.append(nsi_param) else: # No need to search this key again keys_containing_eps.pop(found) if set(nsi_params_present) == set(NSI_PARAM_RE_MAP.keys()): all_nsi_params_defined = True elif set(nsi_params_missing) == set(NSI_PARAM_RE_MAP.keys()): all_nsi_params_defined = False else: raise ValueError( 'Found a subset of NSI params defined; missing %s' % str(nsi_params_missing)) # NOTE: since for now the contents of nsi_null.cfg are commented out # (until merging NSI branch), the above check will say NSI params are # missing if the #include statement was made. So check to see if # settings/osc/nsi_null.cfg _has_ been included (we can't tell what # section it is in, but we'll have to just accept that). # # We will probably want to remove this stanza as soon as NSI brnach is # merged, since this is imprecise and can introduce other weird corner # cases. rsrc_loc = find_resource('settings/osc/nsi_null.cfg') for file_iter in pcp.file_iterators: if rsrc_loc in file_iter.fpaths_processed: all_nsi_params_defined = True if not all_nsi_params_defined and osc_stage_header_line is None: raise ValueError( "Found a stage.osc section without NSI params defined (using" " PISAConfigParser) but could not find the line of the" " `[stage.osc]` header. This could occur if `[stage.osc]` came" " from an `#include`'d file. You can manually define the NSI" " parameters in this file or in the included file e.g. as" " found in `settings/osc/nsi_null.cfg` or simply add the" " statement ``#include settings/osc/nsi_null.cfg`` to either" " file (so long as that statement it falls within the" " stage.osc section).") # Add ``#include settings/osc/nsi_null.cfg`` at top of stage.osc # section if a stage.osc section is present and no NSI params were # specified in that section if not all_nsi_params_defined: # Add an #include to set all NSI parameters to 0 new_contents.insert(osc_stage_header_line + 1, '#include settings/osc/nsi_null.cfg') # Add an extra blank line after the #include line new_contents.insert(osc_stage_header_line + 2, '') if not new_contents: raise ValueError('Empty file after conversion; quitting.') # Note that newlines are added but no join is performed for comparison # against `orig_contents` new_contents = [line + '\n' for line in new_contents] # Now for validation, try to parse the new config file with the # PISAConfigParser pcp = PISAConfigParser() if missing_section_header: pcp.read_string((''.join(['[dummy section header]\n'] + new_contents) + '\n').decode('utf-8')) else: pcp.read_string((''.join(new_contents)).decode('utf-8')) if new_contents == orig_contents: sys.stdout.write('Nothing modified in the original file (ok!).\n') return if args.validate_only: raise ValueError( 'Original config file "%s" would be modfied (and so may be' ' invalid). Re-run this script without the --validate-only flag to' ' produce an appropriately-converted config file.' % args.config[0]) sys.stdout.write('Writing modified config file to "%s"\n' % out_fpath) with open(out_fpath, 'w') as outfile: outfile.writelines(new_contents)
def main(return_outputs=False): """Run unit tests if `pipeline.py` is called as a script.""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) # Even if user specifies an integer on command line, it comes in as a # string. Try to convert to int (e.g. if `'1'` is passed to indicate the # second stage), and -- if successful -- use this as `args.only_stage`. # Otherwise, the string value passed will be used (e.g. `'osc'` could be # passed). try: only_stage_int = int(args.only_stage) except (ValueError, TypeError): pass else: args.only_stage = only_stage_int if args.outdir: mkdir(args.outdir) else: if args.pdf or args.png: raise ValueError("No --outdir provided, so cannot save images.") # Most basic parsing of the pipeline config (parsing only to this level # allows for simple strings to be specified as args for updating) bcp = PISAConfigParser() bcp.read(args.pipeline) # Update the config with any args specified on command line if args.arg is not None: for arg_list in args.arg: if len(arg_list) < 2: raise ValueError( 'Args must be formatted as: "section arg=val". Got "%s"' " instead." % " ".join(arg_list)) section = arg_list[0] remainder = " ".join(arg_list[1:]) eq_split = remainder.split("=") newarg = eq_split[0].strip() value = ("=".join(eq_split[1:])).strip() logging.debug('Setting config section "%s" arg "%s" = "%s"', section, newarg, value) try: bcp.set(section, newarg, value) except NoSectionError: logging.error( 'Invalid section "%s" specified. Must be one of %s', section, bcp.sections(), ) raise # Instantiate the pipeline pipeline = Pipeline(bcp) # pylint: disable=redefined-outer-name if args.select is not None: pipeline.select_params(args.select, error_on_missing=True) if args.only_stage is None: stop_idx = args.stop_after_stage try: stop_idx = int(stop_idx) except (TypeError, ValueError): pass if isinstance(stop_idx, str): stop_idx = pipeline.index(stop_idx) outputs = pipeline.get_outputs(idx=stop_idx) # pylint: disable=redefined-outer-name if stop_idx is not None: stop_idx += 1 indices = slice(0, stop_idx) else: assert args.stop_after_stage is None idx = pipeline.index(args.only_stage) stage = pipeline[idx] indices = slice(idx, idx + 1) # Create dummy inputs if necessary inputs = None if hasattr(stage, "input_binning"): logging.warning( "Stage requires input, so building dummy" " inputs of random numbers, with random state set to the input" " index according to alphabetical ordering of input names and" " filled in alphabetical ordering of dimension names.") input_maps = [] tmp = deepcopy(stage.input_binning) alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names)) for input_num, input_name in enumerate(sorted(stage.input_names)): # Create a new map with all 3's; name according to the input hist = np.full(shape=alphabetical_binning.shape, fill_value=3.0) input_map = Map(name=input_name, binning=alphabetical_binning, hist=hist) # Apply Poisson fluctuations to randomize the values in the map input_map.fluctuate(method="poisson", random_state=input_num) # Reorder dimensions according to user's original binning spec input_map.reorder_dimensions(stage.input_binning) input_maps.append(input_map) inputs = MapSet(maps=input_maps, name="ones", hash=1) outputs = stage.run(inputs=inputs) for stage in pipeline[indices]: if not args.outdir: break stg_svc = stage.stage_name + "__" + stage.service_name fbase = os.path.join(args.outdir, stg_svc) if args.intermediate or stage == pipeline[indices][-1]: stage.outputs.to_json(fbase + "__output.json.bz2") # also only plot if args intermediate or last stage if args.intermediate or stage == pipeline[indices][-1]: formats = OrderedDict(png=args.png, pdf=args.pdf) if isinstance(stage.outputs, Data): # TODO(shivesh): plots made here will use the most recent # "pisa_weight" column and so all stages will have identical plots # (one workaround is to turn on "memcache_deepcopy") # TODO(shivesh): intermediate stages have no output binning if stage.output_binning is None: logging.debug("Skipping plot of intermediate stage %s", stage) continue outputs = stage.outputs.histogram_set( binning=stage.output_binning, nu_weights_col="pisa_weight", mu_weights_col="pisa_weight", noise_weights_col="pisa_weight", mapset_name=stg_svc, errors=True, ) try: for fmt, enabled in formats.items(): if not enabled: continue my_plotter = Plotter( stamp="Event rate", outdir=args.outdir, fmt=fmt, log=False, annotate=args.annotate, ) my_plotter.ratio = True my_plotter.plot_2d_array(outputs, fname=stg_svc + "__output", cmap="RdBu") except ValueError as exc: logging.error( "Failed to save plot to format %s. See exception" " message below", fmt, ) traceback.format_exc() logging.exception(exc) logging.warning("I can't go on, I'll go on.") if return_outputs: return pipeline, outputs