def read_device(device,use_cache=True): if device in device_struct_cache and use_cache and time.time() < device_timeouts[device] + CACHE_TIMEOUT: return device_struct_cache[device] else: device_struct_cache[device] = config.read_struct(config.map['global']['device_dir']+'/'+device+".dev"); device_timeouts[device] = time.time() return device_struct_cache[device]
def read_source(source,use_cache=True): if source in source_struct_cache and use_cache and time.time() < source_timeouts[source] + CACHE_TIMEOUT: return source_struct_cache[source] else: source_struct_cache[source] = config.read_struct(config.map['global']['source_dir']+'/'+source+".src"); source_timeouts[source] = time.time() return source_struct_cache[source]
list : List available flows to run run <flow name> [--from <from time>] [--to <to time>] [--pretend] [--nocache] [--local] [--param name=value] [<source name> <device identifier>] Run the given flow. Optional time interval, default is past day. Optional source name and device identifier, default is all known devices. --pretend is for testing, doesn't actually add the tasks to the task manager --nocache redoes all of the steps instead of looking for cached steps --local makes no connections to the postgres database and manually runs the tasks, dumping the output in the current directory you must specify sourcename and device identifier """ elif sys.argv[0] == "list": for file in os.listdir(config.map["global"]["flow_dir"]): if file.endswith(".flow"): name = file[file.find("/") + 1 : -5] print "Flow name: " + name fmap = config.read_struct(config.map["global"]["flow_dir"] + "/" + file) print "\tSource type(s): [" + ",".join(fmap["source_types"]) + "]" print "\tTasks:\n\t\t" + "\n\t\t".join(fmap["tasks"]) print "" elif sys.argv[0] == "run": fromtime = datetime.now() - timedelta(weeks=1) totime = datetime.now() ret, vals, sys.argv = utils.check_arg(sys.argv, "--from", 1) if ret: try: fromtime = utils.str_to_date(vals[0]) except ValueError, msg: print "Bad from time: " + str(msg) ret, vals, sys.argv = utils.check_arg(sys.argv, "--to", 1)
def read_flow_file(fname): if not fname.endswith('.flow'): fname = config.map['global']['flow_dir']+'/'+fname+".flow" lmap = config.read_struct(fname); if '/' in fname: fname = fname[fname.rfind('/')+1:] if fname.endswith('.flow'): fname = fname[:-5] if ( lmap == None ): raise ValueError("Could not load flow: "+fname) flow = FlowDef(); flow.name = fname flow.source_types = lmap['source_types'] flow.use_tmp = False if not lmap.has_key('use_tmp') or lmap['use_tmp']=='0' else True flow.files = [] flow.steps = [] flow.outputs = [] for tsk in lmap['tasks']: step = Step() step.name = tsk step.outputs = [] step.inputs = [] step.profile = config.getdict(lmap,tsk+'.profile','cpu_bound') if step.profile not in [i[0:i.find(',')] for i in config.map['mod_scheduler']['profiles']]: print "ERROR: profile tag %s not found in config files"%step.profile sys.exit(1) step.cmd = lmap[tsk+'.cmd'] flow.steps.append(step) for step in flow.steps: # look for output files idx = -1 while ( True ): idx = step.cmd.find('%O',idx+1) if idx == -1: break file = File() file.cached = False step.outputs.append(file) file.src = step file.dests = [] file.index = int(step.cmd[idx+2:idx+3]) if len(step.cmd) > idx+3 and step.cmd[idx+3] == 'D': file.directory = True else: file.directory = False flow.files.append(file) for step in flow.steps: # look for input files idx = -1 while ( True ): idx = step.cmd.find('%I',idx+1) if idx == -1: break outspec = lmap[step.name+'.'+step.cmd[idx+1:idx+3]] found = False for file in flow.files: idx2 = outspec.find('.') if ( file.src.name == outspec[0:idx2] and file.index == int(outspec[idx2+2:idx2+3])): found = True step.inputs.append(file) file.dests.append((step,int(step.cmd[idx+2:idx+3]))) if len(step.cmd) > idx+3 and step.cmd[idx+3] == 'D' and not file.directory: print "ERROR: Output file %d from %s was specified as a directory, but task %s believes its not"%(file.index,file.src.name,step.name) sys.exit(1) break if ( not found ): print "ERROR: Couldn't find output "+outspec sys.exit(1) if 'outputs' in lmap: index = 0 for file in flow.files: if ( file.src.name+'.O%d'%file.index in lmap['outputs'] ): found = True file.dests.append(('OUTPUT',index)) flow.outputs.append(file) index += 1 break # now prune any steps/files not needed for step in flow.steps: step.mark = False for file in flow.files: file.mark = False def mark_recursive(file): file.src.mark = True file.mark = True for f in file.src.inputs: mark_recursive(f) for file in flow.outputs: mark_recursive(file) for step in flow.steps[:]: if not step.mark: print "Pruning step %s: no connection to output"%(step.name) flow.steps.remove(step) for file in flow.files[:]: if not file.mark: print "Pruning file %s.O%d : no connection to output"%(file.src.name,file.index) flow.files.remove(file) else: print "No outputs in flow definition, all tasks are run" def build_file_dep_str(file): return file.src.name+'('+','.join([build_file_dep_str(f) for f in file.src.inputs])+')[%d]'%file.index for file in flow.files: file.stepchain = build_file_dep_str(file) if (len(flow.steps) <= 0 ): raise Exception("ERROR: No steps left after pruning") for file in flow.files: if ( len(file.dests) == 0 ): print "WARNING: Task %s.O%d is not connected to anything"%(file.src.name, file.index) return flow