def getParameters(filenames=[ "pipeline.ini", ], defaults=None, site_ini=True, user_ini=True, default_ini=True, only_import=None): '''read a config file and return as a dictionary. Sections and keys are combined with an underscore. If a key without section does not exist, it will be added plain. For example:: [general] input=input1.file [special] input=input2.file will be entered as { 'general_input' : "input1.file", 'input: "input1.file", 'special_input' : "input2.file" } This function also updates the module-wide parameter map. The section [DEFAULT] is equivalent to [general]. The order of initialization is as follows: 1. hard-coded defaults 2. pipeline specific default file in the CGAT code installation 3. :file:`.cgat` in the users home directory 4. files supplied by the user in the order given If the same configuration value appears in multiple files, later configuration files will overwrite the settings form earlier files. Path names are expanded to the absolute pathname to avoid ambiguity with relative path names. Path names are updated for parameters that end in the suffix "dir" and start with a "." such as "." or "../data". Arguments --------- filenames : list List of filenames of the configuration files to read. defaults : dict Dictionary with default values. These will be overwrite any hard-coded parameters, but will be overwritten by user specified parameters in the configuration files. default_ini : bool If set, the default initialization file will be read from 'CGATPipelines/configuration/pipeline.ini' user_ini : bool If set, configuration files will also be read from a file called :file:`.cgat` in the user`s home directory. only_import : bool If set to a boolean, the parameter dictionary will be a defaultcollection. This is useful for pipelines that are imported (for example for documentation generation) but not executed as there might not be an appropriate .ini file available. If `only_import` is None, it will be set to the default, which is to raise an exception unless the calling script is imported or the option ``--is-test`` has been passed at the command line. Returns ------- config : dict Dictionary with configuration values. ''' global CONFIG global PARAMS old_id = id(PARAMS) caller_locals = getCallerLocals() # check if this is only for import if only_import is None: only_import = isTest() or \ "__name__" not in caller_locals or \ caller_locals["__name__"] != "__main__" # important: only update the PARAMS variable as # it is referenced in other modules. Thus the type # needs to be fixed at import. Raise error where this # is not the case. # Note: Parameter sharing in the Pipeline module needs # to be reorganized. if only_import: # turn on default dictionary TriggeredDefaultFactory.with_default = True # Clear up ini files on the list that do not exist. # Please note the use of list(filenames) to create # a clone to iterate over as we remove items from # the original list (to avoid unexpected results) for fn in list(filenames): if not os.path.exists(fn): filenames.remove(fn) if site_ini: # read configuration from /etc/cgat/pipeline.ini fn = "/etc/cgat/pipeline.ini" if os.path.exists(fn): filenames.insert(0, fn) if default_ini: # The link between CGATPipelines and Pipeline.py # needs to severed at one point. # 1. config files into CGAT module directory? # 2. Pipeline.py into CGATPipelines module directory? filenames.insert( 0, os.path.join(CGATPIPELINES_PIPELINE_DIR, 'configuration', 'pipeline.ini')) if user_ini: # read configuration from a users home directory fn = os.path.join(os.path.expanduser("~"), ".cgat") if os.path.exists(fn): if 'pipeline.ini' in filenames: index = filenames.index('pipeline.ini') filenames.insert(index, fn) else: filenames.append(fn) # IMS: Several legacy scripts call this with a string as input # rather than a list. Check for this and correct if isinstance(filenames, str): filenames = [filenames] PARAMS['pipeline_ini'] = filenames try: CONFIG.read(filenames) p = configToDictionary(CONFIG) except configparser.InterpolationSyntaxError as ex: # Do not log, as called before logging module is initialized - # this will mess up loging configuration in Control.py and Experiment.py # E.debug( # "InterpolationSyntaxError when reading configuration file, " # "likely due to use of '%'. " # "Please quote '%' if ini interpolation is required. " # "Orginal error: {}".format(str(ex))) CONFIG = configparser.RawConfigParser() CONFIG.read(filenames) p = configToDictionary(CONFIG) # update with hard-coded PARAMS PARAMS.update(HARDCODED_PARAMS) if defaults: PARAMS.update(defaults) PARAMS.update(p) # interpolate some params with other parameters for param in INTERPOLATE_PARAMS: try: PARAMS[param] = PARAMS[param] % PARAMS except TypeError as msg: raise TypeError('could not interpolate %s: %s' % (PARAMS[param], msg)) # expand pathnames for param, value in list(PARAMS.items()): if param.endswith("dir"): if value.startswith("."): PARAMS[param] = os.path.abspath(value) # make sure that the dictionary reference has not changed assert id(PARAMS) == old_id return PARAMS
def peekParameters(workingdir, pipeline, on_error_raise=None, prefix=None, update_interface=False, restrict_interface=False): '''peek configuration parameters from external pipeline. As the paramater dictionary is built at runtime, this method executes the pipeline in workingdir, dumping its configuration values and reading them into a dictionary. If either `pipeline` or `workingdir` are not found, an error is raised. This behaviour can be changed by setting `on_error_raise` to False. In that case, an empty dictionary is returned. Arguments --------- workingdir : string Working directory. This is the directory that the pipeline was executed in. pipeline : string Name of the pipeline script. The pipeline is assumed to live in the same directory as the current pipeline. on_error_raise : Bool If set to a boolean, an error will be raised (or not) if there is an error during parameter peeking, for example if `workingdir` can not be found. If `on_error_raise` is None, it will be set to the default, which is to raise an exception unless the calling script is imported or the option ``--is-test`` has been passed at the command line. prefix : string Add a prefix to all parameters. This is useful if the paramaters are added to the configuration dictionary of the calling pipeline. update_interface : bool If True, this method will prefix any options in the ``[interface]`` section with `workingdir`. This allows transparent access to files in the external pipeline. restrict_interface : bool If True, only interface parameters will be imported. Returns ------- config : dict Dictionary of configuration values. ''' caller_locals = getCallerLocals() # check if we should raise errors if on_error_raise is None: on_error_raise = not isTest() and \ "__name__" in caller_locals and \ caller_locals["__name__"] == "__main__" # patch - if --help or -h in command line arguments, # do not peek as there might be no config file. if "--help" in sys.argv or "-h" in sys.argv: return {} # Attempt to locate directory with pipeline source code. This is a # patch as pipelines might be called within the repository # directory or from an installed location dirname = PARAMS["pipelinedir"] # called without a directory, use current directory if dirname == "": dirname = os.path.abspath(".") else: # if not exists, assume we want version located # in directory of calling script. if not os.path.exists(dirname): # directory is path of calling script dirname = os.path.dirname(caller_locals['__file__']) pipeline = os.path.join(dirname, pipeline) if not os.path.exists(pipeline): if on_error_raise: raise ValueError( "can't find pipeline at %s" % (pipeline)) else: return {} if workingdir == "": workingdir = os.path.abspath(".") # patch for the "config" target - use default # pipeline directory if directory is not specified # working dir is set to "?!" if "config" in sys.argv or "check" in sys.argv or "clone" in sys.argv and workingdir == "?!": workingdir = os.path.join(PARAMS.get("pipelinedir"), IOTools.snip(pipeline, ".py")) if not os.path.exists(workingdir): if on_error_raise: raise ValueError( "can't find working dir %s" % workingdir) else: return {} statement = "python %s -f -v 0 dump" % pipeline os.environ.update({'BASH_ENV': os.path.join(os.environ['HOME'],'.bashrc')}) process = subprocess.Popen(statement, cwd=workingdir, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ.copy()) # process.stdin.close() stdout, stderr = process.communicate() if process.returncode != 0: raise OSError( ("Child was terminated by signal %i: \n" "Statement: %s\n" "The stderr was: \n%s\n" "Stdout: %s") % (-process.returncode, statement, stderr, stdout)) # subprocess only accepts encoding argument in py >= 3.6 so # decode here. stdout = stdout.decode("utf-8").splitlines() # remove any log messages stdout = [x for x in stdout if x.startswith("{")] if len(stdout) > 1: raise ValueError("received multiple configurations") dump = json.loads(stdout[0]) # update interface if update_interface: for key, value in list(dump.items()): if key.startswith("interface"): dump[key] = os.path.join(workingdir, value) # keep only interface if so required if restrict_interface: dump = dict([(k, v) for k, v in dump.items() if k.startswith("interface")]) # prefix all parameters if prefix is not None: dump = dict([("%s%s" % (prefix, x), y) for x, y in list(dump.items())]) return dump
def __call__(self): if TriggeredDefaultFactory.with_default: return str() else: raise KeyError("missing parameter accessed") # Global variable for parameter interpolation in commands # This is a dictionary that can be switched between defaultdict # and normal dict behaviour. PARAMS = collections.defaultdict(TriggeredDefaultFactory()) # patch - if --help or -h in command line arguments, # switch to a default dict to avoid missing paramater # failures if isTest() or "--help" in sys.argv or "-h" in sys.argv: TriggeredDefaultFactory.with_default = True # A list of hard-coded parameters within the CGAT environment # These can be overwritten by command line options and # configuration files HARDCODED_PARAMS = { 'scriptsdir': CGATSCRIPTS_SCRIPTS_DIR, 'toolsdir': CGATSCRIPTS_SCRIPTS_DIR, 'pipeline_scriptsdir': CGATPIPELINES_SCRIPTS_DIR, 'pipelinedir': CGATPIPELINES_PIPELINE_DIR, 'pipeline_rdir': CGATPIPELINES_R_DIR, 'pipelines_conda_dir': CGATPIPELINES_CONDA_DIR, # script to perform map/reduce like computation. 'cmd-farm': """python %(pipeline_scriptsdir)s/farm.py --method=drmaa
def getParameters(filenames=["pipeline.ini", ], defaults=None, site_ini=True, user_ini=True, default_ini=True, only_import=None): '''read a config file and return as a dictionary. Sections and keys are combined with an underscore. If a key without section does not exist, it will be added plain. For example:: [general] input=input1.file [special] input=input2.file will be entered as { 'general_input' : "input1.file", 'input: "input1.file", 'special_input' : "input2.file" } This function also updates the module-wide parameter map. The section [DEFAULT] is equivalent to [general]. The order of initialization is as follows: 1. hard-coded defaults 2. pipeline specific default file in the CGAT code installation 3. :file:`.cgat` in the users home directory 4. files supplied by the user in the order given If the same configuration value appears in multiple files, later configuration files will overwrite the settings form earlier files. Path names are expanded to the absolute pathname to avoid ambiguity with relative path names. Path names are updated for parameters that end in the suffix "dir" and start with a "." such as "." or "../data". Arguments --------- filenames : list List of filenames of the configuration files to read. defaults : dict Dictionary with default values. These will be overwrite any hard-coded parameters, but will be overwritten by user specified parameters in the configuration files. default_ini : bool If set, the default initialization file will be read from 'CGATPipelines/configuration/pipeline.ini' user_ini : bool If set, configuration files will also be read from a file called :file:`.cgat` in the user`s home directory. only_import : bool If set to a boolean, the parameter dictionary will be a defaultcollection. This is useful for pipelines that are imported (for example for documentation generation) but not executed as there might not be an appropriate .ini file available. If `only_import` is None, it will be set to the default, which is to raise an exception unless the calling script is imported or the option ``--is-test`` has been passed at the command line. Returns ------- config : dict Dictionary with configuration values. ''' global CONFIG global PARAMS old_id = id(PARAMS) caller_locals = getCallerLocals() # check if this is only for import if only_import is None: only_import = isTest() or \ "__name__" not in caller_locals or \ caller_locals["__name__"] != "__main__" # important: only update the PARAMS variable as # it is referenced in other modules. Thus the type # needs to be fixed at import. Raise error where this # is not the case. # Note: Parameter sharing in the Pipeline module needs # to be reorganized. if only_import: # turn on default dictionary TriggeredDefaultFactory.with_default = True # Clear up ini files on the list that do not exist. # Please note the use of list(filenames) to create # a clone to iterate over as we remove items from # the original list (to avoid unexpected results) for fn in list(filenames): if not os.path.exists(fn): filenames.remove(fn) if site_ini: # read configuration from /etc/cgat/pipeline.ini fn = "/etc/cgat/pipeline.ini" if os.path.exists(fn): filenames.insert(0, fn) if default_ini: # The link between CGATPipelines and Pipeline.py # needs to severed at one point. # 1. config files into CGAT module directory? # 2. Pipeline.py into CGATPipelines module directory? filenames.insert(0, os.path.join(CGATPIPELINES_PIPELINE_DIR, 'configuration', 'pipeline.ini')) if user_ini: # read configuration from a users home directory fn = os.path.join(os.path.expanduser("~"), ".cgat") if os.path.exists(fn): if 'pipeline.ini' in filenames: index = filenames.index('pipeline.ini') filenames.insert(index,fn) else: filenames.append(fn) # IMS: Several legacy scripts call this with a string as input # rather than a list. Check for this and correct if isinstance(filenames, str): filenames = [filenames] PARAMS['pipeline_ini'] = filenames try: CONFIG.read(filenames) p = configToDictionary(CONFIG) except configparser.InterpolationSyntaxError as ex: # Do not log, as called before logging module is initialized - # this will mess up loging configuration in Control.py and Experiment.py # E.debug( # "InterpolationSyntaxError when reading configuration file, " # "likely due to use of '%'. " # "Please quote '%' if ini interpolation is required. " # "Orginal error: {}".format(str(ex))) CONFIG = configparser.RawConfigParser() CONFIG.read(filenames) p = configToDictionary(CONFIG) # update with hard-coded PARAMS PARAMS.update(HARDCODED_PARAMS) if defaults: PARAMS.update(defaults) PARAMS.update(p) # interpolate some params with other parameters for param in INTERPOLATE_PARAMS: try: PARAMS[param] = PARAMS[param] % PARAMS except TypeError as msg: raise TypeError('could not interpolate %s: %s' % (PARAMS[param], msg)) # expand pathnames for param, value in list(PARAMS.items()): if param.endswith("dir"): if value.startswith("."): PARAMS[param] = os.path.abspath(value) # make sure that the dictionary reference has not changed assert id(PARAMS) == old_id return PARAMS
def getParameters(filenames=["pipeline.ini", ], defaults=None, user_ini=True, default_ini=True, only_import=None): '''read a config file and return as a dictionary. Sections and keys are combined with an underscore. If a key without section does not exist, it will be added plain. For example:: [general] input=input1.file [special] input=input2.file will be entered as { 'general_input' : "input1.file", 'input: "input1.file", 'special_input' : "input2.file" } This function also updates the module-wide parameter map. The section [DEFAULT] is equivalent to [general]. The order of initialization is as follows: 1. hard-coded defaults 2. pipeline specific default file in the CGAT code installation 3. :file:`.cgat` in the users home directory 4. files supplied by the user in the order given If the same configuration value appears in multiple files, later configuration files will overwrite the settings form earlier files. Path names are expanded to the absolute pathname to avoid ambiguity with relative path names. Path names are updated for parameters that end in the suffix "dir" and start with a "." such as "." or "../data". Arguments --------- filenames : list List of filenames of the configuration files to read. defaults : dict Dictionary with default values. These will be overwrite any hard-coded parameters, but will be overwritten by user specified parameters in the configuration files. default_ini : bool If set, the default initialization file will be read from 'CGATPipelines/configuration/pipeline.ini' user_ini : bool If set, configuration files will also be read from a file called :file:`.cgat` in the user`s home directory. only_import : bool If set to a boolean, the parameter dictionary will be a defaultcollection. This is useful for pipelines that are imported (for example for documentation generation) but not executed as there might not be an appropriate .ini file available. If `only_import` is None, it will be set to the default, which is to raise an exception unless the calling script is imported or the option ``--is-test`` has been passed at the command line. Returns ------- config : dict Dictionary with configuration values. ''' global CONFIG global PARAMS old_id = id(PARAMS) caller_locals = getCallerLocals() # check if this is only for import if only_import is None: only_import = isTest() or \ "__name__" not in caller_locals or \ caller_locals["__name__"] != "__main__" # important: only update the PARAMS variable as # it is referenced in other modules. Thus the type # needs to be fixed at import. Raise error where this # is not the case. # Note: Parameter sharing in the Pipeline module needs # to be reorganized. if only_import: # turn on default dictionary TriggeredDefaultFactory.with_default = True if user_ini: # read configuration from a users home directory fn = os.path.join(os.path.expanduser("~"), ".cgat") if os.path.exists(fn): filenames.insert(0, fn) # IMS: Several legacy scripts call this with a sting as input # rather than a list. Check for this and correct if isinstance(filenames, basestring): filenames = [filenames] if default_ini: # The link between CGATPipelines and Pipeline.py # needs to severed at one point. # 1. config files into CGAT module directory? # 2. Pipeline.py into CGATPipelines module directory? filenames.insert(0, os.path.join(CGATPIPELINES_PIPELINE_DIR, 'configuration', 'pipeline.ini')) CONFIG.read(filenames) p = configToDictionary(CONFIG) # update with hard-coded PARAMS PARAMS.update(HARDCODED_PARAMS) if defaults: PARAMS.update(defaults) PARAMS.update(p) # interpolate some params with other parameters for param in INTERPOLATE_PARAMS: try: PARAMS[param] = PARAMS[param] % PARAMS except TypeError, msg: raise TypeError('could not interpolate %s: %s' % (PARAMS[param], msg))
def peekParameters(workingdir, pipeline, on_error_raise=None, prefix=None, update_interface=False, restrict_interface=False): '''peek configuration parameters from external pipeline. As the paramater dictionary is built at runtime, this method executes the pipeline in workingdir, dumping its configuration values and reading them into a dictionary. If either `pipeline` or `workingdir` are not found, an error is raised. This behaviour can be changed by setting `on_error_raise` to False. In that case, an empty dictionary is returned. Arguments --------- workingdir : string Working directory. This is the directory that the pipeline was executed in. pipeline : string Name of the pipeline script. The pipeline is assumed to live in the same directory as the current pipeline. on_error_raise : Bool If set to a boolean, an error will be raised (or not) if there is an error during parameter peeking, for example if `workingdir` can not be found. If `on_error_raise` is None, it will be set to the default, which is to raise an exception unless the calling script is imported or the option ``--is-test`` has been passed at the command line. prefix : string Add a prefix to all parameters. This is useful if the paramaters are added to the configuration dictionary of the calling pipeline. update_interface : bool If True, this method will prefix any options in the ``[interface]`` section with `workingdir`. This allows transparent access to files in the external pipeline. restrict_interface : bool If True, only interface parameters will be imported. Returns ------- config : dict Dictionary of configuration values. ''' caller_locals = getCallerLocals() # check if we should raise errors if on_error_raise is None: on_error_raise = not isTest() and \ "__name__" in caller_locals and \ caller_locals["__name__"] == "__main__" # patch - if --help or -h in command line arguments, # do not peek as there might be no config file. if "--help" in sys.argv or "-h" in sys.argv: return {} # Attempt to locate directory with pipeline source code. This is a # patch as pipelines might be called within the repository # directory or from an installed location dirname = PARAMS["pipelinedir"] # called without a directory, use current directory if dirname == "": dirname = os.path.abspath(".") else: # if not exists, assume we want version located # in directory of calling script. if not os.path.exists(dirname): # directory is path of calling script dirname = os.path.dirname(caller_locals['__file__']) pipeline = os.path.join(dirname, pipeline) if not os.path.exists(pipeline): if on_error_raise: raise ValueError("can't find pipeline at %s" % (pipeline)) else: return {} if workingdir == "": workingdir = os.path.abspath(".") # patch for the "config" target - use default # pipeline directory if directory is not specified # working dir is set to "?!" if "config" in sys.argv or "check" in sys.argv or "clone" in sys.argv and workingdir == "?!": workingdir = os.path.join(PARAMS.get("pipelinedir"), IOTools.snip(pipeline, ".py")) if not os.path.exists(workingdir): if on_error_raise: raise ValueError("can't find working dir %s" % workingdir) else: return {} statement = "python %s -f -v 0 dump" % pipeline process = subprocess.Popen(statement, cwd=workingdir, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # process.stdin.close() stdout, stderr = process.communicate() if process.returncode != 0: raise OSError( ("Child was terminated by signal %i: \n" "Statement: %s\n" "The stderr was: \n%s\n" "Stdout: %s") % (-process.returncode, statement, stderr, stdout)) # subprocess only accepts encoding argument in py >= 3.6 so # decode here. stdout = stdout.decode("utf-8").splitlines() # remove any log messages stdout = [x for x in stdout if x.startswith("{")] if len(stdout) > 1: raise ValueError("received multiple configurations") dump = json.loads(stdout[0]) # update interface if update_interface: for key, value in list(dump.items()): if key.startswith("interface"): dump[key] = os.path.join(workingdir, value) # keep only interface if so required if restrict_interface: dump = dict([(k, v) for k, v in dump.items() if k.startswith("interface")]) # prefix all parameters if prefix is not None: dump = dict([("%s%s" % (prefix, x), y) for x, y in list(dump.items())]) return dump