def process(workflow, inputs, args): processes = {} inputmappings = {} outputmappings = {} success = True nodes = [node.getContainedObject() for node in workflow.graph.nodes()] if rank == 0 and not args.simple: try: processes, inputmappings, outputmappings =\ processor.assign_and_connect(workflow, size) except: success = False success = comm.bcast(success, root=0) if args.simple or not success: ubergraph = processor.create_partitioned(workflow) nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()] if rank == 0: print('Partitions: %s' % ', '.join( ('[%s]' % ', '.join((pe.id for pe in part)) for part in workflow.partitions))) for node in ubergraph.graph.nodes(): wrapperPE = node.getContainedObject() ns = [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()] print('%s contains %s' % (wrapperPE.id, ns)) try: processes, inputmappings, outputmappings = \ processor.assign_and_connect(ubergraph, size) inputs = processor.map_inputs_to_partitions(ubergraph, inputs) success = True except: print('dispel4py.mpi_process: ' 'Not enough processes for execution of graph') success = False success = comm.bcast(success, root=0) if not success: return inputs = {pe.id: v for pe, v in inputs.items()} processes = comm.bcast(processes, root=0) inputmappings = comm.bcast(inputmappings, root=0) outputmappings = comm.bcast(outputmappings, root=0) inputs = comm.bcast(inputs, root=0) if rank == 0: print('Processes: %s' % processes) # print('Inputs: %s' % inputs) for pe in nodes: if rank in processes[pe.id]: provided_inputs = processor.get_inputs(pe, inputs) wrapper = MPIWrapper(pe, provided_inputs) wrapper.targets = outputmappings[rank] wrapper.sources = inputmappings[rank] wrapper.process()
def process(workflow, inputs, args): processes = {} inputmappings = {} outputmappings = {} success = True nodes = [node.getContainedObject() for node in workflow.graph.nodes()] if rank == 0 and not args.simple: try: processes, inputmappings, outputmappings = processor.assign_and_connect(workflow, size) except: success = False success = comm.bcast(success, root=0) if args.simple or not success: ubergraph = processor.create_partitioned(workflow) nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()] if rank == 0: print( "Partitions: %s" % ", ".join(("[%s]" % ", ".join((pe.id for pe in part)) for part in workflow.partitions)) ) for node in ubergraph.graph.nodes(): wrapperPE = node.getContainedObject() ns = [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()] print("%s contains %s" % (wrapperPE.id, ns)) try: processes, inputmappings, outputmappings = processor.assign_and_connect(ubergraph, size) inputs = processor.map_inputs_to_partitions(ubergraph, inputs) success = True except: print("dispel4py.mpi_process: " "Not enough processes for execution of graph") success = False success = comm.bcast(success, root=0) if not success: return inputs = {pe.id: v for pe, v in inputs.items()} processes = comm.bcast(processes, root=0) inputmappings = comm.bcast(inputmappings, root=0) outputmappings = comm.bcast(outputmappings, root=0) inputs = comm.bcast(inputs, root=0) if rank == 0: print("Processes: %s" % processes) # print('Inputs: %s' % inputs) for pe in nodes: if rank in processes[pe.id]: provided_inputs = processor.get_inputs(pe, inputs) wrapper = MPIWrapper(pe, provided_inputs) wrapper.targets = outputmappings[rank] wrapper.sources = inputmappings[rank] wrapper.process()
def process_and_return(workflow, inputs, resultmappings=None): ''' Executes the simple sequential processor for dispel4py graphs and returns the data collected from any unconnected output streams. :param workflow: the dispel4py graph to be enacted :param inputs: inputs for root PEs of the graphs. This is a dictionary mapping a PE to either a non-negative integer (the number of iterations) or a list of input data items. :rtype: a dictionary mapping PE ids to the output data produced by that PE ''' numnodes = 0 for node in workflow.graph.nodes(): numnodes += 1 node.getContainedObject().numprocesses = 1 processes, inputmappings, outputmappings = \ processor.assign_and_connect(workflow, numnodes) # print 'Processes: %s' % processes # print inputmappings # print outputmappings proc_to_pe = {} for node in workflow.graph.nodes(): pe = node.getContainedObject() proc_to_pe[processes[pe.id][0]] = pe simple = SimpleProcessingPE(inputmappings, outputmappings, proc_to_pe) simple.id = 'SimplePE' simple.result_mappings = resultmappings wrapper = SimpleProcessingWrapper(simple, [inputs]) wrapper.targets = {} wrapper.sources = {} wrapper.process() # now collect output data into a single list for each PE outputs = {} for (pe_id, output_name), data in wrapper.outputs.items(): if pe_id not in outputs: outputs[pe_id] = {} try: outputs[pe_id][output_name] += data except KeyError: outputs[pe_id][output_name] = data return outputs
def process(workflow, inputs, args): size = args.num success = True nodes = [node.getContainedObject() for node in workflow.graph.nodes()] if not args.simple: try: result = processor.assign_and_connect(workflow, size) processes, inputmappings, outputmappings = result except: success = False if args.simple or not success: ubergraph = processor.create_partitioned(workflow) print('Partitions: %s' % ', '.join(('[%s]' % ', '.join( (pe.id for pe in part)) for part in workflow.partitions))) for node in ubergraph.graph.nodes(): wrapperPE = node.getContainedObject() pes = [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()] print('%s contains %s' % (wrapperPE.id, pes)) try: result = processor.assign_and_connect(ubergraph, size) if result is None: return 'dispel4py.multi_process: ' \ 'Not enough processes for execution of graph' processes, inputmappings, outputmappings = result inputs = processor.map_inputs_to_partitions(ubergraph, inputs) success = True nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()] except: print(traceback.format_exc()) return 'dispel4py.multi_process: ' \ 'Could not create mapping for execution of graph' print('Processes: %s' % processes) process_pes = {} queues = {} result_queue = None try: if args.results: result_queue = multiprocessing.Queue() except AttributeError: pass for pe in nodes: provided_inputs = processor.get_inputs(pe, inputs) for proc in processes[pe.id]: cp = copy.deepcopy(pe) cp.rank = proc cp.log = types.MethodType(simpleLogger, cp) wrapper = MultiProcessingWrapper(proc, cp, provided_inputs) process_pes[proc] = wrapper wrapper.input_queue = multiprocessing.Queue() wrapper.input_queue.name = 'Queue_%s_%s' % (cp.id, cp.rank) wrapper.result_queue = result_queue queues[proc] = wrapper.input_queue wrapper.targets = outputmappings[proc] wrapper.sources = inputmappings[proc] for proc in process_pes: wrapper = process_pes[proc] wrapper.output_queues = {} for target in wrapper.targets.values(): for inp, comm in target: for i in comm.destinations: wrapper.output_queues[i] = queues[i] jobs = [] for wrapper in process_pes.values(): p = multiprocessing.Process(target=_processWorker, args=(wrapper,)) jobs.append(p) for j in jobs: j.start() for j in jobs: j.join() if result_queue: result_queue.put(STATUS_TERMINATED) return result_queue
def process(sc, workflow, inputs, args): from dispel4py.new.processor\ import assign_and_connect, _order_by_dependency graph = workflow.graph result = assign_and_connect(workflow, graph.number_of_nodes()) if result is None: return processes, inputmappings, outputmappings = result process_to_pes = {} wrappers = {} for node in workflow.graph.nodes(): pe = node.getContainedObject() wrapper = PEWrapper(pe) for p in processes[pe.id]: process_to_pes[p] = pe wrappers[p] = wrapper # print('Processes: %s' % processes) # print(inputmappings) # print(outputmappings) ordered = _order_by_dependency(inputmappings, outputmappings) # print('Ordered processes: %s' % ordered) output_rdd = {} result_rdd = {} for proc in ordered: inps = inputmappings[proc] outs = outputmappings[proc] wrapper = wrappers[proc] pe = process_to_pes[proc] if inps: if len(inps) == 1: for input_name, sources in inps.items(): inp_rdd = output_rdd[(sources[0], input_name)] out_rdd = inp_rdd.flatMap(wrapper.process) else: prev = None for input_name, sources in inps.items(): inp_rdd = output_rdd[(sources[0], input_name)] if prev: inp_rdd = prev.union(inp_rdd) prev = inp_rdd out_rdd = prev.flatMap(wrapper.process) if len(outs) == 1: for output_name in outs: for inp in outs[output_name]: input_name = inp[0] rename = Rename({output_name: input_name}) output_rdd[(proc, input_name)] = \ out_rdd.flatMap(rename.rename) else: for output_name in outs: proj = Projection([output_name]) proj_rdd = out_rdd.flatMap(proj.project) for inp in outs[output_name]: rename = Rename({output_name: inp[0]}) output_rdd[(proc, inp[0])] = \ proj_rdd.flatMap(rename.rename) if not outs: result_rdd[proc] = out_rdd else: pe_input = inputs[pe.id] if type(pe_input) is list: # only one slice so there no repetitions - not the best start_rdd = sc.parallelize(pe_input, 1) elif isinstance(pe_input, int): start_rdd = sc.parallelize(xrange(pe_input), 1) else: # fingers crossed it's a string and the file exists! start_rdd = sc.textFile(pe_input) out_rdd = start_rdd.flatMap(wrapper.process) if len(outs) == 1: for output_name in outs: for inp in outs[output_name]: input_name = inp[0] rename = Rename({output_name: input_name}) output_rdd[(proc, input_name)] = \ out_rdd.flatMap(rename.rename) else: for output_name in outs: proj = Projection([output_name]) out_rdd = out_rdd.flatMap(proj.project).persist() for inp in outs[output_name]: input_name = inp[0] rename = Rename({output_name: input_name}) output_rdd[(proc, input_name)] = \ out_rdd.flatMap(rename.rename) if not outs: result_rdd[proc] = out_rdd # print("RESULT PROCESSES: %s" % result_rdd.keys()) for p in result_rdd: result = result_rdd[p].foreach(lambda x: None)
def process(workflow, inputs, args): processes = {} inputmappings = {} outputmappings = {} success = True nodes = [node.getContainedObject() for node in workflow.graph.nodes()] if rank == 0 and not args.simple: try: processes, inputmappings, outputmappings =\ processor.assign_and_connect(workflow, size) except: success = False success = comm.bcast(success, root=0) if args.simple or not success: ubergraph = processor.create_partitioned(workflow) nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()] if rank == 0: print('Partitions: %s' % ', '.join(('[%s]' % ', '.join( (pe.id for pe in part)) for part in workflow.partitions))) for node in ubergraph.graph.nodes(): wrapperPE = node.getContainedObject() print('%s contains %s' % (wrapperPE.id, [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()])) try: processes, inputmappings, outputmappings =\ processor.assign_and_connect(ubergraph, size) inputs = processor.map_inputs_to_partitions(ubergraph, inputs) success = True except: # print traceback.format_exc() print('dispel4py.mpi_process: \ Not enough processes for execution of graph') success = False success = comm.bcast(success, root=0) if not success: return try: inputs = {pe.id: v for pe, v in inputs.items()} except AttributeError: pass processes = comm.bcast(processes, root=0) inputmappings = comm.bcast(inputmappings, root=0) outputmappings = comm.bcast(outputmappings, root=0) inputs = comm.bcast(inputs, root=0) if rank == 0: print('Processes: %s' % processes) # print 'Inputs: %s' % inputs for pe in nodes: if rank in processes[pe.id]: provided_inputs = processor.get_inputs(pe, inputs) wrapper = MPIWrapper(pe, provided_inputs) wrapper.targets = outputmappings[rank] wrapper.sources = inputmappings[rank] wrapper.process()
def process(workflow, inputs, args): workflow_submission_id = uuid.uuid1().hex print workflow_submission_id # Check if switch profile mode on if args.profileOn: manager = multiprocessing.Manager() # A dict to store characterization profiles = manager.dict() multi_monitor = Monitor(profiles, args, workflow) t1 = time.time() size = args.num success = True nodes = [node.getContainedObject() for node in workflow.graph.nodes()] if not args.simple: try: result = processor.assign_and_connect(workflow, size) processes, inputmappings, outputmappings = result except: success = False if args.simple or not success: ubergraph = processor.create_partitioned(workflow) print('Partitions: %s' % ', '.join(('[%s]' % ', '.join( (pe.id for pe in part)) for part in workflow.partitions))) for node in ubergraph.graph.nodes(): wrapperPE = node.getContainedObject() pes = [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()] print('%s contains %s' % (wrapperPE.id, pes)) try: result = processor.assign_and_connect(ubergraph, size) if result is None: return 'dispel4py.multi_process: ' \ 'Not enough processes for execution of graph' processes, inputmappings, outputmappings = result inputs = processor.map_inputs_to_partitions(ubergraph, inputs) success = True nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()] except: print(traceback.format_exc()) return 'dispel4py.multi_process: ' \ 'Could not create mapping for execution of graph' print('Processes: %s' % processes) # print ("inputmappings: %s, \noutputmappings: %s" % (inputmappings, outputmappings)) process_pes = {} queues = {} result_queue = None try: if args.results: result_queue = multiprocessing.Queue() except AttributeError: pass for pe in nodes: provided_inputs = processor.get_inputs(pe, inputs) for proc in processes[pe.id]: cp = copy.deepcopy(pe) cp.rank = proc cp.log = types.MethodType(simpleLogger, cp) if args.profileOn: wrapper = MultiProcessingWrapper(proc, cp, provided_inputs, workflow_submission_id = workflow_submission_id, profiles = profiles) else: wrapper = MultiProcessingWrapper(proc, cp, provided_inputs, workflow_submission_id = workflow_submission_id) process_pes[proc] = wrapper wrapper.input_queue = multiprocessing.Queue() wrapper.input_queue.name = 'Queue_%s_%s' % (cp.id, cp.rank) wrapper.result_queue = result_queue queues[proc] = wrapper.input_queue wrapper.targets = outputmappings[proc] wrapper.sources = inputmappings[proc] for proc in process_pes: wrapper = process_pes[proc] wrapper.output_queues = {} for target in wrapper.targets.values(): for inp, comm in target: for i in comm.destinations: wrapper.output_queues[i] = queues[i] jobs = [] for wrapper in process_pes.values(): p = multiprocessing.Process(target=_processWorker, args=(wrapper, )) jobs.append(p) for j in jobs: j.start() for j in jobs: j.join() if result_queue: result_queue.put(STATUS_TERMINATED) if args.profileOn: t2 = time.time() t3 = t2 - t1 profiles["exec_%s" % workflow_submission_id] = t3 profiles["submitted_%s" % workflow_submission_id] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t1)) # print("Total execution workflow time is %s recorded by proccess %s" % (t3, cp.rank)) multi_monitor.get_pe_process_map(processes, workflow_submission_id) multi_monitor.analyse_and_record() return result_queue