def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir): step_list_copy = (copy.deepcopy(step_name_list)) counter = 0 while step_list_copy: counter -= 1 stepname = step_list_copy.pop(-1) fullparset = self.parset.makeSubset( self.parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset( fullparset.fullModuleName('control') + '.') number = 0 for item in step_list_copy: if item == stepname: number += 1 if number != 0: stepname += str(number) step_name_list[counter] = stepname step_control_dict[stepname] = subparset if fullparset.fullModuleName('argument'): stepparset = fullparset.makeSubset( fullparset.fullModuleName('argument') + '.') # ********************************************************************* # save parsets # either a filename is given in the main parset # or files will be created from subsets with stepnames.parset as filenames # for name, parset in step_parset_dict.iteritems(): try: file_parset = Parset(stepparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) stepparset.remove('parset') except: pass # parset from task.cfg try: file_parset = Parset( self.task_definitions.get(str(subparset['type']), 'parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) except: pass # for parset in control section try: file_parset = Parset(subparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) subparset.remove('parset') except: pass step_parset = os.path.join(parset_dir, stepname + '.parset') stepparset.writeFile(step_parset) step_parset_files[stepname] = step_parset step_parset_obj[stepname] = stepparset
def populate_cimager_parset(parset): input_parset = Parset(parset) patch_dictionary = { 'Cimager.dataset': dataset, 'Cimager.restore': restore } image_names = [] for image_name in input_parset.getStringVector( 'Cimager.Images.Names'): image_names.append("%s_%s" % (image_name, name)) subset = input_parset.makeSubset( "Cimager.Images.%s" % image_name, "Cimager.Images.%s" % image_names[-1]) patch_dictionary["Cimager.Images.%s.frequency" % image_names[-1]] = frequency patch_dictionary["Cimager.Images.%s.direction" % image_names[-1]] = "[ %s,%s,%s ]" % ( ms_dir_ra, ms_dir_dec, ms_dir_type) for key in subset: patch_dictionary[key] = subset[key].get() input_parset.subtractSubset('Cimager.Images.image') for key in input_parset: patch_dictionary[key] = input_parset[key].get() patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join( image_names) return patch_parset(None, patch_dictionary, self.config.get("layout", "job_directory"))
def gvds_iterator(gvds_file, nproc=4): """ Reads a GVDS file. Provides a generator, which successively returns the contents of the GVDS file in the form (host, filename), in chunks suitable for processing across the cluster. Ie, no more than nproc files per host at a time. """ parset = Parset(gvds_file) data = defaultdict(list) for part in range(parset.getInt('NParts')): host = parset.getString("Part%d.FileSys" % part).split(":")[0] file = parset.getString("Part%d.FileName" % part) vds = parset.getString("Part%d.Name" % part) data[host].append((file, vds)) for host, values in data.iteritems(): data[host] = utilities.group_iterable(values, nproc) while True: yieldable = [] for host, values in data.iteritems(): try: for filename, vds in values.next(): yieldable.append((host, filename, vds)) except StopIteration: pass if len(yieldable) == 0: raise StopIteration else: yield yieldable
def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task, 'parset'): task_parset.adoptFile(self.task_definitions.get(task, 'parset')) print 'possible arguments: key = value' for k in task_parset.keys: print ' ', k, ' ', '=', ' ', task_parset[ k]
def __init__(self): control.__init__(self) self.parset = Parset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None #self.logger = None#logging.RootLogger('DEBUG') self.name = ''
def _create_mapfile_from_parset(parset, identifier): pars = Parset() pars.adoptFile(parset) dps = pars.makeSubset(pars.fullModuleName('DataProducts') + '.') datamap = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip, ) for location, filename, skip in zip( dps.getStringVector(identifier + '.locations'), dps.getStringVector(identifier + '.filenames'), dps.getBoolVector(identifier + '.skip')) ]) return datamap
def get_current_op_step_names(direction): """ Returns lits of step names for current operation """ current_op = get_current_op(direction) parset_file = os.path.join(direction.working_dir, 'results', current_op, direction.name, 'pipeline.parset') parset = Parset() parset.adoptFile(parset_file) pipeline_args = parset.makeSubset(parset.fullModuleName('pipeline') + '.') step_name_list = pipeline_args.getStringVector('steps') # Filter out plugin steps filter_step_name_list = [] for stepname in step_name_list: fullparset = parset.makeSubset( parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset( fullparset.fullModuleName('control') + '.') try: kind_of_step = subparset.getString('kind') except: kind_of_step = 'recipe' if kind_of_step != 'plugin': if kind_of_step == 'loop': loopsteps = subparset.getStringVector('loopsteps') for loopstep in loopsteps: fullparset_loop = parset.makeSubset( parset.fullModuleName(str(loopstep)) + '.') subparset_loop = fullparset_loop.makeSubset( fullparset_loop.fullModuleName('control') + '.') try: kind_of_loop_step = subparset_loop.getString('kind') except: kind_of_loop_step = 'recipe' if kind_of_loop_step != 'plugin': filter_step_name_list.append(loopstep) else: filter_step_name_list.append(stepname) return filter_step_name_list
def go(self): self.logger.info("Starting storagemapper run") super(storagemapper, self).go() # We read the storage node name out of the path # and append the local filename (ie, on the storage node) to the map # ---------------------------------------------------------------------- data = defaultdict(list) for filename in self.inputs['args']: host = filename.split(os.path.sep)[3] data[host].append(filename.split(host)[-1]) # Dump the generated mapping to a parset # ---------------------------------------------------------------------- parset = Parset() for host, filenames in data.iteritems(): parset.addStringVector(host, filenames) create_directory(os.path.dirname(self.inputs['mapfile'])) parset.writeFile(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): self.logger.info("Starting datamapper run") super(datamapper, self).go() # We build lists of compute-nodes per cluster and data-per-cluster, # then match them up to schedule jobs in a round-robin fashion. # ---------------------------------------------------------------------- clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc")) if clusterdesc.subclusters: available_nodes = dict((cl.name, cycle(get_compute_nodes(cl))) for cl in clusterdesc.subclusters) else: available_nodes = { clusterdesc.name: cycle(get_compute_nodes(clusterdesc)) } data = defaultdict(list) for filename in self.inputs['args']: subcluster = filename.split(os.path.sep)[2] try: host = next(available_nodes[subcluster]) except KeyError as key: self.logger.error("%s is not a known cluster" % str(key)) raise data[host].append(filename) # Dump the generated mapping to a parset # ---------------------------------------------------------------------- parset = Parset() for host, filenames in data.items(): parset.addStringVector(host, filenames) parset.writeFile(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable self.msout_original = kwargs['msout'].rstrip('/') kwargs.pop('msout') self.msout_destination_dir = os.path.dirname(self.msout_original) self.scratch_dir = tempfile.mkdtemp(dir=kwargs['local_scratch_dir']) kwargs.pop('local_scratch_dir') self.logger.info('Using {} as scratch directory'.format( self.scratch_dir)) # Set up scratch paths self.msout_scratch = os.path.join( self.scratch_dir, os.path.basename(self.msout_original)) args.append('msout=' + self.msout_scratch) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) self.cleanup() return 1 except Exception, err: self.logger.error(str(err)) self.cleanup() return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable if 'replace-sourcedb' in kwargs: self.replace_sourcedb = kwargs['replace-sourcedb'] kwargs.pop('replace-sourcedb') if 'replace-parmdb' in kwargs: self.replace_parmdb = kwargs['replace-parmdb'] kwargs.pop('replace-parmdb') if 'dry-run' in kwargs: self.dry_run = kwargs['dry-run'] kwargs.pop('dry-run') if 'sourcedb' in kwargs: self.sourcedb = kwargs['sourcedb'] kwargs.pop('sourcedb') if 'parmdb' in kwargs: self.parmdb = kwargs['parmdb'] kwargs.pop('parmdb') if 'sourcedb-name' in kwargs: self.sourcedb_basename = kwargs['sourcedb-name'] self.replace_sourcedb = True kwargs.pop('sourcedb-name') if 'parmdb-name' in kwargs: self.parmdb_basename = kwargs['parmdb-name'] self.replace_parmdb = True kwargs.pop('parmdb-name') if 'force' in kwargs: self.replace_parmdb = True self.replace_sourcedb = True kwargs.pop('force') numthreads = 1 if 'numthreads' in kwargs: numthreads = kwargs['numthreads'] kwargs.pop('numthreads') args.append('--numthreads=' + str(numthreads)) if 'observation' in kwargs: self.observation = kwargs.pop('observation') if 'catalog' in kwargs: self.catalog = kwargs.pop('catalog') self.createsourcedb() self.createparmdb() if not 'no-columns' in kwargs: #if not kwargs['no-columns']: self.addcolumns() else: kwargs.pop('no-columns') args.append('--sourcedb=' + self.sourcedb_path) args.append('--parmdb=' + self.parmdb_path) args.append(self.observation) #catalog = None # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in list(kwargs.items()): nodeparset.add(k, v) nodeparset.writeFile(parsetname) #args.insert(0, parsetname) args.append(parsetname) #if catalog is not None: # args.append(catalog) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError as err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception as err: self.logger.error(str(err)) return 1 # We need some signal to the master script that the script ran ok. self.outputs['ok'] = True return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # else: # self.logger.error("Dataset %s does not exist" % infile) # return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] # deal with multiple input files for wsclean if argsformat == 'wsclean': for i in reversed(xrange(len(args))): if str(args[i]).startswith('[') and str( args[i]).endswith(']'): tmplist = args.pop(i).lstrip('[').rstrip(']').split( ',') for val in reversed(tmplist): args.insert(i, val.strip(' \'\"')) if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): if str(v).startswith('[') and str(v).endswith(']'): v = v.lstrip('[').rstrip(']').replace(' ', '') multargs = v.split(',') else: multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) if argsformat == 'losoto': args.append(parsetname) else: args.insert(0, parsetname) try: # **************************************************************** # Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # hack the planet #executable = 'casa' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise #print 'KWARGS: ', kwargs if not parsetasfile: for k, v in kwargs.items(): args.append('--' + k + '=' + v) else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): #print 'DOTPOS: ',str(k).find('.') #print 'SPLIT: ', str(k).split('.')[0] #print 'SPLIT: ', str(k).split('.')[1] if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #print 'SUBPARSETLIST: ', sublist #subpar = Parset() #quick hacks below. for proof of concept. subparsetlist = [] casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') #print 'SUBPAR: ',subpar.keys() casastring = sub + '(' for k in subpar.keys(): #print 'SUBPARSET: ',k ,' ',subpar[k] #args.append('--' + k + '=' + subpar[k]) if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('/casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('/casastr/') + "'" + ',' else: casastring += str(k) + '=' + str(subpar[k]) + ',' casastring = casastring.rstrip(',') casastring += ')\n' #print 'CASASTRING:' #print casastring # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(infile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write('try:\n') casacommandfile.write(' ' + casastring) casacommandfile.write('except SystemExit:\n') casacommandfile.write(' pass\n') casacommandfile.write('except:\n') casacommandfile.write(' import os\n') casacommandfile.write(' os._exit(1)\n') casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(infile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: commandstring += ' ' + item #print 'COMMANDSTRING: ',commandstring crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') #crap.write('/home/zam/sfroehli/casapy-42.1.29047-001-1-64b/bin/casa' + ' --nologger'+' -c ' + casafilename) crap.write(commandstring) # crap.write('\nexit 0') crap.close() import stat st = os.stat(somename) #os.chmod(casafilename, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run #cmd = [executable] + args cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Script %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** # Run # Change to working directory for the script pipedir = os.getcwd() os.chdir(work_dir) outdict = {} plugin = imp.load_source('main', executable) outdict = plugin.main(*args, **kwargs) os.chdir(pipedir) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time): # imager_exec: path to cimager executable # vds: VDS file describing the data to be imaged # parset: imager configuration # resultsdir: place resulting images here # start_time: ) time range to be imaged # end_time: ) in seconds (may be None) # ---------------------------------------------------------------------- with log_time(self.logger): self.logger.info("Processing %s" % (vds, )) # Bail out if destination exists (can thus resume a partial run). # Should be configurable? # ------------------------------------------------------------------ parset_data = Parset(parset) image_names = parset_data.getStringVector("Cimager.Images.Names") for image_name in image_names: outputfile = os.path.join(resultsdir, image_name + ".restored") self.logger.info(outputfile) if os.path.exists(outputfile): self.logger.info("Image already exists: aborting.") return 0 try: working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) # If a time range has been specified, copy that section of the # input MS and only image that. # -------------------------------------------------------------- query = [] if start_time: self.logger.debug("Start time is %s" % start_time) start_time = quantity(float(start_time), 's') query.append("TIME > %f" % start_time.get('s').get_value()) if end_time: self.logger.debug("End time is %s" % end_time) end_time = quantity(float(end_time), 's') query.append("TIME < %f" % end_time.get('s').get_value()) query = " AND ".join(query) if query: # Select relevant section of MS. # ---------------------------------------------------------- self.logger.debug("Query is %s" % query) output = os.path.join(working_dir, "timeslice.MS") vds_parset = get_parset(vds) t = table(vds_parset.getString("FileName")) t.query(query, name=output) # Patch updated information into imager configuration. # ---------------------------------------------------------- parset = patch_parset(parset, {'Cimager.dataset': output}) else: self.logger.debug("No time range selected") self.logger.debug("Running cimager") with CatchLog4CXX( working_dir, self.logger.name + "." + os.path.basename(vds)): cimager_process = Popen([imager_exec, "-inputs", parset], stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = cimager_process.communicate() log_process_output("cimager", sout, serr, self.logger) if cimager_process.returncode != 0: raise CalledProcessError(cimager_process.returncode, imager_exec) # Dump the resulting images in the pipeline results area. # I'm not aware of a foolproof way to predict the image names # that will be produced, so we read them from the # parset and add standard cimager prefixes. # -------------------------------------------------------------- parset_data = Parset(parset) image_names = parset_data.getStringVector( "Cimager.Images.Names") prefixes = [ "image", "psf", "residual", "weights", "sensitivity" ] self.logger.debug("Copying images to %s" % resultsdir) for image_name in image_names: for prefix in prefixes: filename = image_name.replace("image", prefix, 1) shutil.move(os.path.join(working_dir, filename), os.path.join(resultsdir, filename)) if parset_data.getBool('Cimager.restore'): shutil.move( os.path.join(working_dir, image_name + ".restored"), os.path.join(resultsdir, image_name + ".restored")) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def plugin_main(*args, **kwargs): parset = Parset(kwargs['first_parset']) parset.adoptFile(kwargs['second_parset']) parset.writeFile(kwargs['result_parset'] + '_feedback_file')
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines resultdicts = { 'input': { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } } resultdicts.update({ self.name: { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } }) if 'pipeline.mapfile' in self.parset.keys: resultdicts['input']['mapfile'] = str( self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str( self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' #self._construct_cmdline(inputargs, step, resultdicts) additional_input = {} if stepname in step_parset_obj: additional_input = self._construct_step_parset( step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict = {'stepname': stepname} inputdict.update(additional_input) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace( ' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector( 'pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keys: submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({ os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, } }) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keys: subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in subpipeline_parset.keys: if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in subpipeline_parset.keys: if not str(k).startswith('#'): val = subpipeline_parset[k] if not str(k).startswith('!'): for item in checklist: if item in str(val): val = str(val).replace( item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self.parset.keys: if str(k).startswith('!') and item in k: self.parset.remove(k) self.parset.add( '! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # remove replacements strings to prevent loading the same key twice for k in copy.deepcopy(self.parset.keys): if str(k).startswith('!'): self.parset.remove(k) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task(typeval, inputargs, **inputdict) # plugins if kind_of_step == 'plugin': with duration(self, stepname): resultdict = loader.call_plugin( typeval, pipeline_args.getString('pluginpath'), inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if resultdict is not None and 'break' in resultdict: if resultdict['break']: resultdicts[activeloop[0]]['break'] = resultdict['break']
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): self.logger.info("Starting cimager run") super(cimager, self).go() self.outputs['images'] = [] # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for cimager") gvds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "cimager.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = gvds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(gvds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("cimager GVDS is %s" % (gvds_file, )) # Read data for processing from the GVDS file # ---------------------------------------------------------------------- parset = Parset(gvds_file) data = [] for part in range(parset.getInt('NParts')): host = parset.getString("Part%d.FileSys" % part).split(":")[0] vds = parset.getString("Part%d.Name" % part) data.append((host, vds)) # Divide data into timesteps for imaging # timesteps is a list of (start, end, results directory) tuples # ---------------------------------------------------------------------- timesteps = [] results_dir = self.inputs['results_dir'] if self.inputs['timestep'] == 0: self.logger.info("No timestep specified; imaging all data") timesteps = [(None, None, results_dir)] else: self.logger.info("Using timestep of %s s" % self.inputs['timestep']) gvds = get_parset(gvds_file) start_time = quantity(gvds['StartTime'].get()).get('s').get_value() end_time = quantity(gvds['EndTime'].get()).get('s').get_value() step = float(self.inputs['timestep']) while start_time < end_time: timesteps.append((start_time, start_time + step, os.path.join(results_dir, str(start_time)))) start_time += step # Run each cimager process in a separate thread # ---------------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) for label, timestep in enumerate(timesteps): self.logger.info("Processing timestep %d" % label) jobs = [] parsets = [] start_time, end_time, resultsdir = timestep for host, vds in data: vds_data = Parset(vds) frequency_range = [ vds_data.getDoubleVector("StartFreqs")[0], vds_data.getDoubleVector("EndFreqs")[-1] ] parsets.append( self.__get_parset( os.path.basename( vds_data.getString('FileName')).split('.')[0], vds_data.getString("FileName"), str(frequency_range), vds_data.getStringVector("Extra.FieldDirectionType") [0], vds_data.getStringVector("Extra.FieldDirectionRa")[0], vds_data.getStringVector("Extra.FieldDirectionDec")[0], 'True', # cimager bug: non-restored image unusable )) jobs.append( ComputeJob(host, command, arguments=[ self.inputs['imager_exec'], vds, parsets[-1], resultsdir, start_time, end_time ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for parset in parsets: parset = Parset(parset) image_names = parset.getStringVector("Cimager.Images.Names") self.outputs['images'].extend(image_names) [os.unlink(parset) for parset in parsets] # Check if we recorded a failing process before returning # ---------------------------------------------------------------------- if self.error.isSet(): self.logger.warn("Failed imager process detected") return 1 else: return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): if infile[0] == '[': infiles = [ms.strip(" []\'\"") for ms in infile.split(',')] reffile = infiles[0] else: reffile = infile if os.path.exists(reffile): self.logger.info("Processing %s" % reffile) else: self.logger.error("Dataset %s does not exist" % reffile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if not parsetasfile: self.logger.error( "Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!" ) return 1 else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #quick hacks below. for proof of concept. casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') casastring = sub + '(' for k in subpar.keys(): if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('casastr/') + "'" + ',' elif str(subpar[k]).lower() == 'false' or str( subpar[k]).lower() == 'true': casastring += str(k) + '=' + str(subpar[k]) + ',' else: # Test if int/float or list of int/float try: self.logger.info('value: {}'.format(subpar[k])) test = float(str(subpar[k])) is_int_float = True except: is_int_float = False if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: if '[' in str(subpar[k]) or '(' in str( subpar[k]): # Check if list of int/float or strings list_vals = [ f.strip() for f in str( subpar[k]).strip('[]()').split(',') ] is_int_float = True for list_val in list_vals: try: test = float(list_val) except: is_int_float = False break if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: casastring += str( k) + '=' + '[{}]'.format(','.join([ "'" + list_val + "'" for list_val in list_vals ])) + ',' else: # Simple string casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' casastring = casastring.rstrip(',') casastring += ')\n' # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(reffile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write(casastring) casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(reffile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: if str(item).find(' ') > -1 or str(item).find('[') > -1: commandstring += ' "' + item + '"' else: commandstring += ' ' + item crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') crap.write(commandstring + ' >& casa.log\n') crap.close() # file permissions st = os.stat(somename) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(reffile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, executable, initscript, infile, key, db_name, db_user, db_host): # executable: path to KernelControl executable # initscript: path to lofarinit.sh # infile: MeasurementSet for processing # key, db_name, db_user, db_host: database connection parameters # ---------------------------------------------------------------------- with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up kernel parset") filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile)) fd, parset_filename = mkstemp() kernel_parset = Parset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": infile, "BBDB.Key": key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmLog": "", "ParmLoglevel": "", "ParmDB.Sky": infile + ".sky", "ParmDB.Instrument": infile + ".instrument" }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_filename) os.close(fd) self.logger.debug("Parset written to %s" % (parset_filename, )) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp() env = read_initscript(self.logger, initscript) try: cmd = [executable, parset_filename, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ): bbs_kernel_process = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError(bbs_kernel_process.returncode, executable) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
class datamapper(BaseRecipe): """ Parses a list of filenames and attempts to map them to appropriate compute nodes (ie, which can access the files) on the LOFAR CEP cluster. Mapping by filename in this way is fragile, but is the best we can do for now. **Arguments** None. """ inputs = { 'mapfile': ingredient.StringField( '--mapfile', help= "Full path (including filename) of mapfile to produce (clobbered if exists)" ) } outputs = { 'mapfile': ingredient.FileField( help="Full path (including filename) of generated mapfile") } def go(self): self.logger.info("Starting datamapper run") super(datamapper, self).go() # We build lists of compute-nodes per cluster and data-per-cluster, # then match them up to schedule jobs in a round-robin fashion. # ---------------------------------------------------------------------- clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc")) if clusterdesc.subclusters: available_nodes = dict((cl.name, cycle(get_compute_nodes(cl))) for cl in clusterdesc.subclusters) else: available_nodes = { clusterdesc.name: cycle(get_compute_nodes(clusterdesc)) } data = defaultdict(list) for filename in self.inputs['args']: subcluster = filename.split(os.path.sep)[2] try: host = available_nodes[subcluster].next() except KeyError, key: self.logger.error("%s is not a known cluster" % str(key)) raise data[host].append(filename) # Dump the generated mapping to a parset # ---------------------------------------------------------------------- parset = Parset() for host, filenames in data.iteritems(): parset.addStringVector(host, filenames) parset.writeFile(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0