def __init__(self): """ Constructor: Initialize data objects and variables calls the setup function. Steps get the configuration from the datain that is handed to them. """ # initialize input and output self.datain = DataParent() self.dataout = DataParent() # set names self.name = None self.procname = None # set parameters / logger self.arglist = {} # Dictionary with current arguments self.paramlist = [] # List with possible parameters # set configuration / logger self.config = None self.log = None # specify whether this step runs on a single PipeData object with a # single output PipeData object (SISO), multiple input PipeData objects # with multiple output PipeData objects (MIMO), or multiple input Pipefile # objects with a single output PipeData object (MISO). self.iomode = 'SISO' # do local setup self.setup() self.log.debug('Init: done')
def reset(self): """ Resets the step to the same condition as it was when it was created. Internal variables are reset, any stored data is erased. """ # initialize input and output self.datain = DataParent() self.dataout = DataParent() self.log.debug('Reset: done')
def loadauxfile(self, auxpar='', data=None, multi=False): """ Uses loadauxname to search for files matching auxfile. See loadauxname for parameter description. A pipedata object with the best match is returned. """ # Get auxname auxname = self.loadauxname(auxpar, data, multi) # Load auxdata if multi: auxdata = [ DataParent(config=self.config).load(auxnam) for auxnam in auxname ] else: auxdata = DataParent(config=self.config).load(auxname) # Return selected file return auxdata
def __init__(self): """ Constructor: Initialize data objects and variables calls the setup function. """ # call superclass constructor (calls setup) super(StepMOParent,self).__init__() # Change dataout self.dataout = [DataParent()] # set iomode self.iomode = 'MIMO'
def execfiles(self, inputfiles): """ Runs several files from execute. """ if len(inputfiles) > 0: # Read input files to datain self.datain = [] for filename in inputfiles: # Read input file data = DataParent(config=self.config) self.datain.append(data.load(filename)) # Call start - run and call end self.runstart(self.datain, self.arglist) self.run() self.runend(self.dataout) # Write output file self.dataout.save() self.log.info('Execute: Saved result %s' % self.dataout.filename) else: # Warning - no input file(s) self.log.warn('Execute: Missing input File(s)')
def test(self): """ Test Pipe Step Parent Object: Runs a set of basic tests on the object """ # log message self.log.info('Testing pipe step parent') # test function call #testout=self(1) # should raise TypeError if self.config != None: testin = DataParent(config=self.config) else: testin = DataParent(config=self.testconf) testin.filename = 'this.file.type.fts' testout = self(testin, sampar=5.0) print(testout.header) print(testout.filename) # test get and set parameters print("sampar=%.3f" % self.getarg('sampar')) # log message self.log.info('Testing pipe step parent - Done')
def __init__(self): """ Constructor: Initialize data objects and variables calls the setup function. """ # call superclass constructor (calls setup) super(StepMIParent, self).__init__() # Change datain self.datain = [DataParent()] # set iomode self.iomode = 'MISO' # add a filenum list, for output filenames self.filenum = []
def test(self): """ Test Pipe Step Parent Object: Runs a set of basic tests on the object """ # log message self.log.info('Testing pipe step %s' % self.name) # read configuration if self.config != None: datain = DataParent(config=self.config) else: datain = DataParent(config=self.testconf) # generate 2 files datain.filename = 'this.file.type.fts' datain = [datain, datain] # run function call dataout = self(datain) # test output print(type(dataout)) print(dataout.header) # log message self.log.info('Testing pipe step %s - Done' % self.name)
def execfiles(self, inputfiles): """ Runs several files from execute. This function is overwritten in MISO and MIMO steps """ if len(self.arglist['inputfiles']) > 0: for filename in inputfiles: # Read input file: make dataparent, get child from load() ##- datain = DataParent(config=self.config) self.datain = datain.load(filename) # Call start - run and call end self.runstart(self.datain, self.arglist) self.run() self.runend(self.dataout) # Write output file self.dataout.save() self.log.info('Execute: Saved result %s' % self.dataout.filename) else: # Warning - no input file self.log.warn('Execute: Missing input File')
def test(self): """ Test Pipe Step Parent Object: Runs a set of basic tests on the object """ # log message self.log.info('Testing pipe step %s' % self.name) # Set up the step self.name = 'loadaux' # this is not used normally as loadaux is normally used as parent self.loadauxsetup('test1') self.loadauxsetup('test2') for par in self.paramlist: print(par) # Load input data self.datain = DataParent(config=self.config).load('IN_a0_1.fits') # Get test1 auxfile auxf = self.loadauxname('test1', multi=True) print('********** ' + repr(auxf)) # Get test2 auxfile auxf = self.loadauxname('test2') print('********** ' + repr(auxf)) # log message self.log.info('Testing pipe step %s - Done' % self.name)
def loadauxname(self, auxpar='', data=None, multi=False): """ Searches for files matching auxfile. If only one match is found, that file is returned. Else the header keywords listed in auxfitkeys are matched between the data and the auxfiles which were found. The first auxfile for which these keywords values best match the ones from data is selected. The filename of the best match is returned. auxpar: A name for the aux file parameter to use. This allows loadauxfiles to be used multiple times in a given pipe step (for example for darks and flats). Default value is self.auxpar which is set by loadauxsetup(). data: A pipedata object to match the auxiliary file to. If no data is specified self.datain is used (for Multi Input steps self.datain[0]). """ ### Setup # Set auxpar if len(auxpar) == 0: auxpar = self.auxpar # Get parameters auxfile = os.path.expandvars(self.getarg(auxpar + 'file')) fitkeys = self.getarg(auxpar + 'fitkeys') if len(fitkeys) == 1 and len(fitkeys[0]) == 0: fitkeys = [] ### Look for files - return in special cases # Glob the list of files auxlist = glob.glob(auxfile) # Throw exception if no file found if len(auxlist) < 1: self.log.warn('No files found under %s - looking in backup' % auxfile) auxfile = os.path.expandvars(self.getarg('bkup' + auxpar)) auxlist = glob.glob(auxfile) if len(auxlist) < 1: msg = 'No %s files found under %s' % (auxpar, auxfile) self.log.error(msg) raise ValueError(msg) # Get datain object (depends on step being SingleInput or MultiInput) if data == None: if issubclass(self.__class__, StepMIParent): data = self.datain[0] else: data = self.datain # Return unique file, or all files if fitkeys is empty if len(auxlist) == 1 or len(fitkeys) == 0: if len(auxlist) == 1: self.log.info('LoadAuxName: Found unique file = %s' % auxlist[0]) else: self.log.info( 'LoadAuxName: No fitkeys: Return first %sfile match = %s' % (self.auxpar, auxlist[0])) data.setheadval( 'HISTORY', '%s: Best %sfile = %s' % ( self.name, self.auxpar, os.path.split(auxlist[0])[1], )) if multi: return auxlist else: return auxlist[0] ### Select files with Fitkeys # check format (make first element uppercase) try: _ = fitkeys[0].upper() except AttributeError: # AttributeError if it's not a string self.log.error('LoadAuxFile: fitkeys config parameter is ' + 'incorrect format - need list of strings') raise TypeError('fitkeys config parameter is incorrect format' + ' - need list of strings') # Load all headers from auxlist into a auxheadlist (pipedata objects) auxheadlist = [] for auxnam in auxlist: auxheadlist.append(DataParent(config=self.config).loadhead(auxnam)) # Look through keywords, only keep auxfiles which fit keys for key in fitkeys: newheadlist = [] # Look through auxfiles, transfer good ones if key in 'DATE-OBS': # SPECIAL CASE DATE-OBS: # get time for data datime = time.mktime( time.strptime(data.getheadval('DATE-OBS'), '%Y-%m-%dT%H:%M:%S')) # get time offset (from data) for each auxfile auxtimes = [] for auxhead in auxheadlist: auxtime = time.mktime( time.strptime(auxhead.getheadval('DATE-OBS'), '%Y-%m-%dT%H:%M:%S')) auxtimes.append(abs(auxtime - datime)) # only keep auxfiles which are within daterange of closest auxfile mindiff = min(auxtimes) timerange = self.getarg('daterange') * 86400 for auxi in range(len(auxheadlist)): if auxtimes[auxi] - mindiff < timerange: newheadlist.append(auxheadlist[auxi]) else: # Normal Keyword compare for auxhead in auxheadlist: # Check if the auxfile fits (compare with data) if auxhead.getheadval(key) == data.getheadval(key): # it fits -> add to newheadlist newheadlist.append(auxhead) # break key loop if no files left if len(newheadlist) == 0: break else: auxheadlist = newheadlist ### Select file to return if multi: # Return all filenames auxname = [aux.filename for aux in auxheadlist] # Return message if len(auxname) > 3: listnames = "%d files: %s to %s" % (len(auxname), auxname[0], auxname[-1]) else: listnames = string.join(auxname) if len(newheadlist) > 0: self.log.info('LoadAuxName: Matching %s found are <%s>' % (auxpar, listnames)) else: self.log.warn('LoadAuxName: NO MATCH finding aux files') self.log.warn('Returning files <%s>' % listnames) else: # Return first filename auxname = auxheadlist[0].filename # Select best file if len(newheadlist) > 0: self.log.info('LoadAuxName: Matching %s found is <%s>' % (auxpar, auxname)) else: self.log.warn('LoadAuxName: NO MATCH finding aux file') self.log.warn('Returning first file <%s>' % auxname) listnames = auxname # just so we can use it below data.setheadval('HISTORY', '%s: Best %s = %s' % (self.name, auxpar, listnames)) # Return selected file return auxname
def execute(self): """ Runs the pipe step as called from the command line: The first arguments are used as input file names. Other special arguments are: - config = name of the configuration file object - test = runs the test function using the input file - loglevel = name of logging level (INFO is default) Other arguments are used as parameters to the pipe step. """ ### Read Arguments # Set up argument parser - Generic parameters self.parser = argparse.ArgumentParser( description="Pipeline Step %s" % self.name, formatter_class=argparse.ArgumentDefaultsHelpFormatter) self.parser.add_argument( 'inputfiles', type=str, default='', nargs='*', help='input files pathname', ) self.parser.add_argument('-t', '--test', action='store_true', help='runs the selftest of this pipe step') self.parser.add_argument( '--loglevel', default='INFO', type=str, choices=['DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'], help='log level') self.parser.add_argument('--logfile', default=None, type=str, help='logging file') self.parser.add_argument('--config', default=None, type=str, help='pipeline configuration file') # Add step-specific parameters from parlist for param in self.paramlist: # Comment: default = None because getarg gets default value from # paramlist if isinstance(param[1], (list, tuple)): try: partype = type(param[1][0]) self.parser.add_argument('--%s' % param[0], type=partype, nargs=len(param[1]), default=param[1], help=param[2]) except IndexError: # empty list, so no type checking self.parser.add_argument('--%s' % param[0], nargs='*', default=None, help=param[2]) else: self.parser.add_argument('--' + param[0], type=type(param[1]), default=param[1], help=param[2]) # Get arguments - store dict in arglist args = self.parser.parse_args() self.arglist = vars(args) ### Process generic arguments # Set logging (add file handler if logfile != '') level = getattr(logging, args.loglevel.upper(), None) logging.basicConfig(level=level) if args.logfile is not None: fhand = logging.FileHandler(args.logfile) fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" fhand.setFormatter(logging.Formatter(fmt)) logging.getLogger().addHandler(fhand) # Set configuration (load if specified) if args.config is not None: datain = DataParent(config=args.config) self.config = datain.config elif not args.test: # Set config unless test is requested self.config = ConfigObj() self.config[self.name] = {} # Check for test if args.test: self.test() return ### Reduce data self.execfiles(args.inputfiles) self.log.info('Execute: done')
class StepParent(object): """ HAWC Pipeline Step Parent Object The object is callable. It requires a valid configuration input (file or object) when it runs. """ stepver = '0.1.1' # pipe step version #testconf = 'config/pipeconf_master.txt' # Test configuration testconf = 'config/pipeconf_mgb.txt' # Default test configuration def __init__(self): """ Constructor: Initialize data objects and variables calls the setup function. Steps get the configuration from the datain that is handed to them. """ # initialize input and output self.datain = DataParent() self.dataout = DataParent() # set names self.name = None self.procname = None # set parameters / logger self.arglist = {} # Dictionary with current arguments self.paramlist = [] # List with possible parameters # set configuration / logger self.config = None self.log = None # specify whether this step runs on a single PipeData object with a # single output PipeData object (SISO), multiple input PipeData objects # with multiple output PipeData objects (MIMO), or multiple input Pipefile # objects with a single output PipeData object (MISO). self.iomode = 'SISO' # do local setup self.setup() self.log.debug('Init: done') def setup(self): """ ### Names and Prameters need to be Set Here ### Sets the internal names for the function and for saved files. Defines the input parameters for the current pipe step. Setup() is called at the end of __init__ The parameters are stored in a list containing the following information: - name: The name for the parameter. This name is used when calling the pipe step from command line or python shell. It is also used to identify the parameter in the pipeline configuration file. - default: A default value for the parameter. If nothing, set '' for strings, 0 for integers and 0.0 for floats - help: A short description of the parameter. """ ### Set Names # Name of the pipeline reduction step self.name = 'parent' # Shortcut for pipeline reduction step and identifier for # saved file names. self.procname = 'unk' # Set Logger for this pipe step self.log = logging.getLogger('pipe.step.%s' % self.name) ### Set Parameter list # Clear Parameter list self.paramlist = [] # Append parameters self.paramlist.append([ 'sampar', 1.0, 'Sample Parameter - parent only - no practical use' ]) def run(self): """ Runs the data reduction algorithm. The self.datain is run through the code, the result is in self.dataout. """ # Log the value of sample parameter self.log.debug("Sample Parameter = %.2f" % self.getarg('sampar')) # Copy datain to dataout self.dataout = self.datain def __call__(self, datain, **arglist): """ Object Call: returns reduced input data """ # Get input data self.datain = datain # Start Setup self.runstart(self.datain, arglist) # Call the run function self.run() # Finish - call end self.runend(self.dataout) # return result return self.dataout def runstart(self, data, arglist): """ Method to call at the beginning of the pipe step call. - Sends initial log message - Checks the validity of the input data - Gets configuration from input data and checks type """ # Start Message self.log.info('Start Reduction: Pipe Step %s' % self.name) # Set input arguments for k in arglist.keys(): self.arglist[k.lower()] = arglist[k] # Check input data type and set data config if issubclass(data.__class__, DataParent): self.config = data.config else: msg = 'Invalid input data type: DataParent child object is required' self.log.error(msg) raise TypeError('Runstart: ' + msg) # Set Configuration if self.config is None: # no config specified, make an empty one self.config = ConfigObj() self.config[self.name] = {} # Check configuration if not isinstance(self.config, ConfigObj): msg = 'Invalid configuration information - aborting' self.log.error(msg) raise RuntimeError('Runstart: ' + msg) def runend(self, data): """ Method to call at the end of pipe the pipe step call - Sends final log messages """ # update header (status and history) self.updateheader(data) # clear input arguments self.arglist = {} self.log.info('Finished Reduction: Pipe Step %s' % self.name) def updateheader(self, data): """ Update the header for a single PipeData object - Sets the PROCSTAT and PROCLEVL keywords in the data header - Adds a history entry to the data header """ # Update PRODuct TYPE keyword with step name, add history keyword data.setheadval('PRODTYPE', self.name, 'Product Type') histmsg = 'Reduced: ' + self.name + ' v' + self.stepver + ' ' histmsg += time.strftime('%Y-%m-%d_%H:%M:%S') data.setheadval('HISTORY', histmsg) # Add input parameters to history for p in [par[0] for par in self.paramlist]: histmsg = ' %s: %s=%s' % (self.name, p, self.getarg(p)) data.setheadval('HISTORY', histmsg) # Update file name with .PipeStepName.fits data.filename = data.filenamebegin + self.procname.upper( ) + data.filenameend # Add config file name if available and not already present # in HISTORY try: # This may fail if config has no filename - in that case, # don't add the message. conffilename = '' + self.config.filename # New history message histmsg = 'CONFIG: %s' % conffilename # Check history for presence of the full message or possibly # a truncated version (eg. for long filenames in FITS headers) full_history = data.getheadval('HISTORY') if len(histmsg) > 72: shortmsg = histmsg[0:72] else: shortmsg = histmsg if histmsg not in full_history and shortmsg not in full_history: self.log.debug('Recording config file name %s' % conffilename) data.setheadval('HISTORY', histmsg) except TypeError: pass # Send log messages def getarg(self, parname): """ Returns the argument value of the parameter parname. The parameter is first searched for in self.arglist['parname'], then in config['stepname']['parname']. If the parameter is not found, the default value from parameter list is returned. Should the parameter name not have an entry in the parameter list, a error is returned and a KeyError is raised. All name comparisons are made in lower case. """ # list of strings that should parse to boolean true # we need to handle booleans separately, because bool("False") # evaluates to True booltrue = ['yes', 'true', '1', 't'] parname = parname.lower() # so we don't have to worry about case # Get paramlist index and check if parameter is valid try: ind = [par[0].lower() for par in self.paramlist].index(parname) except ValueError: msg = 'GetArg: There is no parameter named %s' % parname self.log.error(msg) raise KeyError(msg) parnameraw = self.paramlist[ind][0] # ParName in original Case default = self.paramlist[ind][1] # get from arguments if possible if self.arglist.has_key(parname): # assumes that: if value is not default, then set on command line # by the user. if self.arglist[parname] != self.parser.get_default(parnameraw): ret = self.arglist[parnameraw] self.log.debug('GetArg: from command line, done (%s=%s)' % (parnameraw, repr(ret))) return ret # make temporary config entry with lowercase key names conftmp = {} if self.config.has_key(self.name): # skip if no step entry in config for keyname in self.config[self.name].keys(): conftmp[keyname.lower()] = self.config[self.name][keyname] # get from config if possible if conftmp.has_key(parname): value = conftmp[parname] # If default is a sequence: if isinstance(default, (tuple, list)): # Get type for list elements # (if default is empty, convert to string) if len(default) > 0: outtype = type(default[0]) else: outtype = str ret = [] # Convert elements in list # Note: if the keyword only has one item in the list and there # is no trailing comma, configobj will read it as a string # instead of a 1-element list. We force to list here. if isinstance(value, str): value = [value] for i in xrange(len(value)): # Check if it's boolean if outtype == bool: if value[i].lower() in booltrue: ret.append(True) else: # default to False ret.append(False) # Not boolean - just convert to type else: ret.append(outtype(value[i])) # convert to tuple self.log.debug('GetArg: from config file, done (%s=%s)' % (parname, repr(type(default)(ret)))) return type(default)(ret) # Default is not a sequence else: # Check if it's boolean if isinstance(default, bool) and not isinstance(value, bool): if value.lower() in booltrue: self.log.debug( 'GetArg: from config file, done (%s=True)' % parname) return True else: self.log.debug( 'GetArg: from config file, done (%s=False)' % parname) return False # Not boolean - just convert to type else: self.log.debug('GetArg: from config file, done (%s=%s)' % (parname, repr(type(default)(value)))) return type(default)(value) # get default from parameter list ret = self.paramlist[ind][1] # return parameter self.log.debug('GetArg: from param list, done (%s=%s)' % (parname, repr(ret))) return ret def getparam(self, parname): """ DEPRECATED - use getarg instead Returns the value of the parameter parname. The parameter is first searched for in self.arglist['parname'], then in config['stepname']['parname']. If the parameter is not found, a warning is returned and a KeyError is raised. """ self.log.warn('GetParam is Decrecated - use GetArg') return self.getarg(parname) def reset(self): """ Resets the step to the same condition as it was when it was created. Internal variables are reset, any stored data is erased. """ # initialize input and output self.datain = DataParent() self.dataout = DataParent() self.log.debug('Reset: done') def execute(self): """ Runs the pipe step as called from the command line: The first arguments are used as input file names. Other special arguments are: - config = name of the configuration file object - test = runs the test function using the input file - loglevel = name of logging level (INFO is default) Other arguments are used as parameters to the pipe step. """ ### Read Arguments # Set up argument parser - Generic parameters self.parser = argparse.ArgumentParser( description="Pipeline Step %s" % self.name, formatter_class=argparse.ArgumentDefaultsHelpFormatter) self.parser.add_argument( 'inputfiles', type=str, default='', nargs='*', help='input files pathname', ) self.parser.add_argument('-t', '--test', action='store_true', help='runs the selftest of this pipe step') self.parser.add_argument( '--loglevel', default='INFO', type=str, choices=['DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'], help='log level') self.parser.add_argument('--logfile', default=None, type=str, help='logging file') self.parser.add_argument('--config', default=None, type=str, help='pipeline configuration file') # Add step-specific parameters from parlist for param in self.paramlist: # Comment: default = None because getarg gets default value from # paramlist if isinstance(param[1], (list, tuple)): try: partype = type(param[1][0]) self.parser.add_argument('--%s' % param[0], type=partype, nargs=len(param[1]), default=param[1], help=param[2]) except IndexError: # empty list, so no type checking self.parser.add_argument('--%s' % param[0], nargs='*', default=None, help=param[2]) else: self.parser.add_argument('--' + param[0], type=type(param[1]), default=param[1], help=param[2]) # Get arguments - store dict in arglist args = self.parser.parse_args() self.arglist = vars(args) ### Process generic arguments # Set logging (add file handler if logfile != '') level = getattr(logging, args.loglevel.upper(), None) logging.basicConfig(level=level) if args.logfile is not None: fhand = logging.FileHandler(args.logfile) fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" fhand.setFormatter(logging.Formatter(fmt)) logging.getLogger().addHandler(fhand) # Set configuration (load if specified) if args.config is not None: datain = DataParent(config=args.config) self.config = datain.config elif not args.test: # Set config unless test is requested self.config = ConfigObj() self.config[self.name] = {} # Check for test if args.test: self.test() return ### Reduce data self.execfiles(args.inputfiles) self.log.info('Execute: done') def execfiles(self, inputfiles): """ Runs several files from execute. This function is overwritten in MISO and MIMO steps """ if len(self.arglist['inputfiles']) > 0: for filename in inputfiles: # Read input file: make dataparent, get child from load() ##- datain = DataParent(config=self.config) self.datain = datain.load(filename) # Call start - run and call end self.runstart(self.datain, self.arglist) self.run() self.runend(self.dataout) # Write output file self.dataout.save() self.log.info('Execute: Saved result %s' % self.dataout.filename) else: # Warning - no input file self.log.warn('Execute: Missing input File') def test(self): """ Test Pipe Step Parent Object: Runs a set of basic tests on the object """ # log message self.log.info('Testing pipe step parent') # test function call #testout=self(1) # should raise TypeError if self.config != None: testin = DataParent(config=self.config) else: testin = DataParent(config=self.testconf) testin.filename = 'this.file.type.fts' testout = self(testin, sampar=5.0) print(testout.header) print(testout.filename) # test get and set parameters print("sampar=%.3f" % self.getarg('sampar')) # log message self.log.info('Testing pipe step parent - Done')
fhand.setFormatter(logging.Formatter(logformat)) logging.getLogger().addHandler(fhand) print(logf) # Setup log for piperun log = logging.getLogger('piperun') log.info('Setting Up: %s' % os.path.split(args.prdfile[0])[1]) # Get pipeconfig file if 'pipeconf' in rundict: pipeconf = rundict['pipeconf'] else: log.error('Missing pipeconf in %s' % args.prdfile) raise ValueError('Missing pipeconf in %s' % args.prdfile) # Load pipeconf, merge if there are multiple pipeconfs pipeconf = pipeconf.split('\n') log.debug('Loading Pipeconf=' + repr(pipeconf)) pipeconf = DataParent(config=pipeconf).config # Get pipemode if 'pipemode' in rundict: pipemode = rundict['pipemode'] else: pipemode = None # Get singlefile singlefile = False if 'singlefile' in rundict: if rundict['singlefile'][0] in ['T','t']: singlefile = True ### Get input file list # Option 1: Filenames (treat each with glob) filelist = [] # list of input files to reduce
def run(self): """ Runs the data reduction algorithm. The self.datain is run through the code, the result is in self.dataout. """ ### Get redstep if it's not loaded if self.redstep == None: # Get the step datap = DataParent(config=self.config) self.redstep = datap.getobject(self.getarg('redstepname')) ### Group the input files # Setup datagroups, get keys and key formats datagroups = [] groupkeys = self.getarg('groupkeys').split('|') groupkfmt = self.getarg('groupkfmt') if len(groupkfmt) == 0: groupkfmt = None else: groupkfmt = groupkfmt.split('|') # Loop over files for data in self.datain: groupind = 0 # Loop over groups until group match found or end reached while groupind < len(datagroups): # Check if data fits group found = True gdata = datagroups[groupind][0] for keyi in range(len(groupkeys)): # Get key from group and new data - format if needed key = groupkeys[keyi] dkey = data.getheadval(key) gkey = gdata.getheadval(key) if groupkfmt != None: dkey = groupkfmt[keyi] % dkey gkey = groupkfmt[keyi] % gkey # Compare if dkey != gkey: found = False # Found -> add to group if found: datagroups[groupind].append(data) break # Not found -> increase group index groupind += 1 # If not in any group -> make new group if groupind == len(datagroups): datagroups.append([ data, ]) # info messages self.log.debug(" Found %d data groups" % len(datagroups)) for groupind in range(len(datagroups)): group = datagroups[groupind] msg = " Group %d len=%d" % (groupind, len(group)) for key in groupkeys: msg += " %s = %s" % (key, group[0].getheadval(key)) self.log.debug(msg) ### Reduce input files - collect output files self.dataout = [] # Make new variables for groupidkeys and groupoutputs groupidkeys = [] groupoutputs = [] # Loop over groups -> save output in self.dataout for groupi in range(len(datagroups)): group = datagroups[groupi] # Get fileidkeys to see if unchanged groups should be re-reduced fileidkey = self.getarg('fileidkey') if len(fileidkey): # Get fileidkeys for the current new group newkeys = [dat.getheadval(fileidkey) for dat in group] copykeys = ['x'] # Search for fit in existing groups: fit is index fit = -1 for fit in range(len(self.groupidkeys)): # Make copy of new keys copykeys = list(newkeys) # For each key in group[fit] for val in self.groupidkeys[fit]: if val in copykeys: # Remove key from copykeys if found del copykeys[copykeys.index(val)] else: # Else: group[fit] does not match, go to next group copykeys = ['x'] # if all have been removed break # Check if any values left in copykeys if len(copykeys) == 0: # No values left in copykeys, group[fit] is valid match break # Any values left in copykeys -> no match found if len(copykeys): fit = -1 self.log.debug('New datagroup # %d has no previous match' % groupi) else: self.log.debug( 'New datagroup # %d matches previous group # %d' % (groupi, fit)) else: fit = -1 # Reduce the data if fit < 0: dataout = self.redstep(group) # Add groupoutputs and groupidkeys if len(fileidkey): groupoutputs.append(dataout) idkeys = [dat.getheadval(fileidkey) for dat in group] groupidkeys.append(idkeys) else: groupoutputs.append(self.groupoutputs[fit]) groupidkeys.append(self.groupidkeys[fit]) dataout = self.groupoutputs[fit] # add output to dataout if issubclass(dataout.__class__, DataParent): self.dataout.append(dataout) else: for data in dataout: self.dataout.append(dataout) # Copy groupidkeys and groupoutputs self.groupoutputs = groupoutputs self.groupidkeys = groupidkeys # Set procname to redstep.procname self.procname = self.redstep.procname