def create_iterative_chain(functions, FunctionPE_class=SimpleFunctionPE, name_prefix='PE_', name_suffix=''): ''' Creates a composite PE wrapping a pipeline that processes obspy streams. :param chain: list of functions that process data iteratively. The function accepts one input parameter, data, and returns an output data block (or None). :param requestId: id of the request that the stream is associated with :param controlParameters: environment parameters for the processing elements :rtype: dictionary inputs and outputs of the composite PE that was created ''' prev = None first = None graph = WorkflowGraph() for fn_desc in functions: try: fn = fn_desc[0] params = fn_desc[1] except TypeError: fn = fn_desc params = {} #print 'adding %s to chain' % fn.__name__ pe = FunctionPE_class() pe.compute_fn = fn pe.params = params pe.name = name_prefix + fn.__name__ + name_suffix if prev: graph.connect(prev, IterativePE.OUTPUT_NAME, pe, IterativePE.INPUT_NAME) else: first = pe prev = pe # Map inputs and outputs of the wrapper to the nodes in the subgraph graph.inputmappings = { 'input' : (first, IterativePE.INPUT_NAME) } graph.outputmappings = { 'output' : (prev, IterativePE.OUTPUT_NAME) } return graph
def solr_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() solrsearch = SolrSearch(url=source.get('url'), query=source.get('query'), filter_query=source.get('filter_query')) solrsearch.set_monitor(monitor, 0, 10) download = Download(url=wps_url(), headers=headers) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(solrsearch, solrsearch.OUTPUT_NAME, download, download.INPUT_NAME) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process_and_return(graph, inputs={solrsearch: [{}]}) status_location = result[doit.id][doit.STATUS_LOCATION_NAME][0] status = result[doit.id][doit.STATUS_NAME][0] return dict(worker=dict(status_location=status_location, status=status))
def create_workflow_icclim(): su_calculation_r1i2p1 = NetCDFProcessing() su_calculation_r1i2p1.name = 'SU_calculation_r1i2p1' mean_calculation_r1i2p1 = NetCDFProcessing() mean_calculation_r1i2p1.name = "Average_SU_r1i2p1" su_calculation_r2i2p1 = NetCDFProcessing() su_calculation_r2i2p1.name = 'SU_calculation_r2i2p1' mean_calculation_r2i2p1 = NetCDFProcessing() mean_calculation_r2i2p1.name = "Average_SU_r2i2p1" su_calculation_r3i2p1 = NetCDFProcessing() su_calculation_r3i2p1.name = 'SU_calculation_r3i2p1' mean_calculation_r3i2p1 = NetCDFProcessing() mean_calculation_r3i2p1.name = "Average_SU_r3i2p1" streamProducer = StreamProducer() streamProducer.name = 'SU_workflow' graph = WorkflowGraph() graph.connect(streamProducer, 'output', su_calculation_r1i2p1, 'input') graph.connect(su_calculation_r1i2p1, 'output', mean_calculation_r1i2p1, 'input') graph.connect(streamProducer, 'output', su_calculation_r2i2p1, 'input') graph.connect(su_calculation_r2i2p1, 'output', mean_calculation_r2i2p1, 'input') graph.connect(streamProducer, 'output', su_calculation_r3i2p1, 'input') graph.connect(su_calculation_r3i2p1, 'output', mean_calculation_r3i2p1, 'input') return graph
def testSquare(): graph = WorkflowGraph() prod = TestProducer(2) cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() last = TestTwoInOneOut() graph.connect(prod, 'output0', cons1, 'input') graph.connect(prod, 'output1', cons2, 'input') graph.connect(cons1, 'output', last, 'input0') graph.connect(cons2, 'output', last, 'input1') args.num = 4 args.results = True result_queue = process(graph, inputs={prod: 10}, args=args) results = [] item = result_queue.get() while item != STATUS_TERMINATED: name, output, data = item tools.eq_(last.id, name) tools.eq_('output', output) results.append(data) item = result_queue.get() expected = {str(i): 2 for i in range(1, 11)} tools.eq_(expected, Counter(results))
def createWf(): graph = WorkflowGraph() mat = CompMatrix(variables_number) mat.prov_cluster = 'record2' mc = MaxClique(-0.01) mc.prov_cluster = 'record0' start = Start() start.prov_cluster = 'record0' sources = {} mc.numprocesses = 1 mat.numprocesses = 1 for i in range(0, variables_number): sources[i] = Source(sampling_rate, i) sources[i].prov_cluster = 'record0' #'+str(i%variables_number) #+str(i%7) sources[i].numprocesses = 1 #sources[i].name="Source"+str(i) for h in range(0, variables_number): graph.connect(start, 'output', sources[h], 'iterations') for j in range(h + 1, variables_number): cc = CorrCoef(batch_size, (h, j)) cc.prov_cluster = 'record1' #+str(h%variables_number) mat._add_input('input' + '_' + str(h) + '_' + str(j), grouping=[3]) graph.connect(sources[h], 'output', cc, 'input1') graph.connect(sources[j], 'output', cc, 'input2') graph.connect(cc, 'output', mat, 'input' + '_' + str(h) + '_' + str(j)) cc.numprocesses = 1 graph.connect(mat, 'output', mc, 'matrix') return graph
def testPipelineNotEnoughProcesses(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() cons3 = TestOneInOneOut() cons4 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') graph.connect(cons2, 'output', cons3, 'input') graph.connect(cons3, 'output', cons4, 'input') args = argparse.Namespace args.num = 4 args.simple = False args.results = True result_queue = process(graph, inputs={prod: 10}, args=args) results = [] item = result_queue.get() while item != STATUS_TERMINATED: name, output, data = item tools.eq_((cons4.id, 'output'), output) results.extend(data) item = result_queue.get() tools.eq_(Counter(range(1, 11)), Counter(results))
def graph_stddev(): prod = NumberProducer(1000) std = parallelStdDev() graph = WorkflowGraph() graph.connect(prod, 'output', std, 'input') return graph
def graph_count(): prod = NumberProducer(1000) c = parallelCount() graph = WorkflowGraph() graph.connect(prod, 'output', c, 'input') return graph
def graph_avg(): prod = NumberProducer(1000) a = parallelAvg() graph = WorkflowGraph() graph.connect(prod, 'output', a, 'input') return graph
Spec'ing TestOneInOneOut5 Spec'ing TestProducer6 spouts {'TestProducer6': ... } bolts {'TestOneInOneOut5': ... } Created Storm submission package in \ /var/folders/58/7bjr3s011kgdtm5lx58prc_40000gn/T/tmp5ePEq3 Running: java -client -Dstorm.options= -Dstorm.home= ... Submitting topology 'TestTopology' to storm.example.com:6627 ... ''' from dispel4py.examples.graph_testing import testing_PEs as t from dispel4py.workflow_graph import WorkflowGraph def testPipeline(graph): ''' Adds a pipeline to the given graph. :rtype: the created graph ''' prod = t.TestProducer() prev = prod for i in range(5): cons = t.TestOneInOneOut() graph.connect(prev, 'output', cons, 'input') prev = cons return graph ''' important: this is the graph_variable ''' graph = testPipeline(WorkflowGraph())
def test_dot_pipeline(): graph = WorkflowGraph() prod = TestProducer() cons = TestOneInOneOut() graph.connect(prod, 'output', cons, 'input') draw(graph)
def testOnePE(): graph = WorkflowGraph() prod = TestProducer() graph.add(prod) results = simple_process.process(graph, {prod: [{}]}) tools.eq_({(prod.id, 'output'): [1]}, results)
def test_process_input_by_id(): prod = TestProducer() cons = PrintDataConsumer() graph = WorkflowGraph() graph.connect(prod, 'output', cons, 'input') simple_process.process(graph, inputs={prod.id: 5})
def simpleProcess(graph, sources, inputs): ''' This method is used if there are less MPI processes than the nodes in the graph (PE instances). ''' uberWorkflow = WorkflowGraph() wrappers = {} externalConnections = [] partitions = [] try: partitions = graph.partitions except AttributeError: sourcePartition = [] otherPartition = [] for node in graph.graph.nodes(): pe = node.getContainedObject() if pe.id in sources: sourcePartition.append(pe) else: otherPartition.append(pe) partitions = [sourcePartition, otherPartition] if rank == 0: print('Partitions: ', ', '.join(('[%s]' % ', '.join((pe.id for pe in part)) for part in partitions))) mappedInput = copy.deepcopy(inputs) for component in partitions: inputnames = {} outputnames = {} workflow = copy.deepcopy(graph) componentIds = [] for pe in component: componentIds.append(pe.id) # print('component: %s' % componentIds) # print('inputs: %s' % inputs) for node in workflow.graph.nodes(): pe = node.getContainedObject() if pe.id in componentIds: for edge in workflow.graph.edges(node, data=True): direction = edge[2]['DIRECTION'] source = direction[0] source_output = edge[2]['FROM_CONNECTION'] dest = direction[1] dest_input = edge[2]['TO_CONNECTION'] if dest == pe and source.id not in componentIds: try: inputnames[dest.id + '_' + dest_input].append((dest, dest_input)) except KeyError: inputnames[dest.id + '_' + dest_input] = [(dest, dest_input)] elif source == pe and dest.id not in componentIds: outputnames[(source.id, source_output)] = source.id + '_' + source_output try: grouping = dest.inputconnections[dest_input][GROUPING] except KeyError: grouping = None externalConnections.append((source.id, source_output, dest.id, dest_input, grouping)) if pe.id in sources and mappedInput is not None: for name in pe.inputconnections: inputnames[pe.id + '_' + name] = [(pe, name)] for block, mappedblock in zip(inputs, mappedInput): if block == TERMINATE_MSG: if mappedblock != TERMINATE_MSG: del mappedInput[-1] mappedInput.append(TERMINATE_MSG) continue for input_name in block: mappedblock[pe.id + '_' + input_name] = block[input_name] # print('Mapped input: %s' % mappedInput) for node in workflow.graph.nodes(): if node.getContainedObject().id not in componentIds: workflow.graph.remove_node(node) # print ("inputnames : %s" % inputnames) wrapperPE = GraphWrapperPE(workflow, inputnames, outputnames) for node in workflow.graph.nodes(): wrappers[node.getContainedObject().id] = wrapperPE # print ('External connections: %s' % externalConnections) for (source_id, source_output, dest_id, dest_input, grouping) in externalConnections: sourceWrapper = wrappers[source_id] destWrapper = wrappers[dest_id] if grouping: destWrapper.inputconnections[dest_id + '_' + dest_input][GROUPING] = grouping uberWorkflow.connect(sourceWrapper, source_id + '_' + source_output, destWrapper, dest_id + '_' + dest_input) # print ('%s: connected %s to %s' % (rank, sourceWrapper.id + '.' + source_id + '_' + source_output, # destWrapper.id + '.' + dest_id + '_' + dest_input)) if rank == 0: for node in uberWorkflow.graph.nodes(): wrapperPE = node.getContainedObject() print('%s contains %s' % (wrapperPE.id, [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()])) success = True processes = {} if rank == 0: success, sources, processes = assign(uberWorkflow) success=comm.bcast(success, root=0) if success: if rank == 0: print ('Processes:', processes) processes=comm.bcast(processes,root=0) buildProcess(uberWorkflow, processes, mappedInput) else: print('Simple processing: Not enough MPI processes.')
NAME: 'output', TYPE: ['timestamp', 'location', 'stream'] } } def process(self, inputs): stream = read( '/Users/akrause/VERCE/data/laquila/20100501-20120930_fseed/TERO/20100501.fseed' ) return {'output': [{}, {}, {'data': stream}]} from dispel4py.workflow_graph import WorkflowGraph controlParameters = { 'runId': '12345', 'username': '******', 'outputdest': "./" } from dispel4py.seismo.obspy_stream import createProcessingComposite, INPUT_NAME, OUTPUT_NAME chain = [] chain.append(PEMeanSub) chain.append(PEDetrend) composite = createProcessingComposite(chain, controlParameters=controlParameters) producer = TestProducer() graph = WorkflowGraph() graph.connect(producer, 'output', composite, INPUT_NAME)
def testUnconnected(): graph = WorkflowGraph() testPipeline(graph) testPipeline(graph) del graph.partitions return graph
def create_partitioned(workflow_all): processes_all, inputmappings_all, outputmappings_all = assign_and_connect( workflow_all, len(workflow_all.graph.nodes())) proc_to_pe_all = {v[0]: k for k, v in processes_all.iteritems()} partitions = get_partitions(workflow_all) external_connections = [] pe_to_partition = {} partition_pes = [] for i in range(len(partitions)): for pe in partitions[i]: pe_to_partition[pe.id] = i for index in range(len(partitions)): result_mappings = {} part = partitions[index] partition_id = index component_ids = [pe.id for pe in part] workflow = copy.deepcopy(workflow_all) graph = workflow.graph for node in graph.nodes(): if node.getContainedObject().id not in component_ids: graph.remove_node(node) processes, inputmappings, outputmappings = \ assign_and_connect(workflow, len(graph.nodes())) proc_to_pe = {} for node in graph.nodes(): pe = node.getContainedObject() proc_to_pe[processes[pe.id][0]] = pe for node in graph.nodes(): pe = node.getContainedObject() pe.rank = index proc_all = processes_all[pe.id][0] for output_name in outputmappings_all[proc_all]: for dest_input, comm_all in outputmappings_all[proc_all][ output_name]: dest = proc_to_pe_all[comm_all.destinations[0]] if not dest in processes: # it's an external connection external_connections.append( (comm_all, partition_id, pe.id, output_name, pe_to_partition[dest], dest, dest_input)) try: result_mappings[pe.id].append(output_name) except: result_mappings[pe.id] = [output_name] partition_pe = SimpleProcessingPE(inputmappings, outputmappings, proc_to_pe) partition_pe.workflow = workflow partition_pe.partition_id = partition_id if result_mappings: partition_pe.result_mappings = result_mappings partition_pe.map_inputs = _map_inputs_to_pes partition_pe.map_outputs = _map_outputs_from_pes partition_pes.append(partition_pe) # print 'EXTERNAL CONNECTIONS : %s' % external_connections ubergraph = WorkflowGraph() ubergraph.pe_to_partition = pe_to_partition ubergraph.partition_pes = partition_pes # sort the external connections so that nodes are added in the same order # if doing this in multiple processes in parallel this is important for comm, source_partition, source_id, source_output, \ dest_partition, dest_id, dest_input in sorted(external_connections): partition_pes[source_partition]._add_output((source_id, source_output)) partition_pes[dest_partition]._add_input((dest_id, dest_input), grouping=comm.name) ubergraph.connect(partition_pes[source_partition], (source_id, source_output), partition_pes[dest_partition], (dest_id, dest_input)) return ubergraph
def simpleProcess(graph, sources, inputs): uberWorkflow = WorkflowGraph() wrappers = {} externalConnections = [] partitions = [] try: partitions = graph.partitions except AttributeError: sourcePartition = [] otherPartition = [] for node in graph.graph.nodes(): pe = node.getContainedObject() if pe in sources: sourcePartition.append(pe) else: otherPartition.append(pe) partitions = [sourcePartition, otherPartition] print 'Partitions: ', ', '.join(('[%s]' % ', '.join((pe.id for pe in part)) for part in partitions)) sources = [ pe.id for pe in sources ] mappedInput = copy.deepcopy(inputs) for component in partitions: inputnames = {} outputnames = {} workflow = copy.deepcopy(graph) componentIds = [] for pe in component: componentIds.append(pe.id) # print('component: %s' % componentIds) # print('inputs: %s' % inputs) # print 'MAPPED INPUT BEFORE: %s' % mappedInput # print 'SOURCES : %s' % sources for node in workflow.graph.nodes(): pe = node.getContainedObject() if pe.id in componentIds: for edge in workflow.graph.edges(node, data=True): direction = edge[2]['DIRECTION'] source = direction[0] source_output = edge[2]['FROM_CONNECTION'] dest = direction[1] dest_input = edge[2]['TO_CONNECTION'] if dest == pe and source.id not in componentIds: try: inputnames[dest.id + '_' + dest_input].append((dest, dest_input)) except KeyError: inputnames[dest.id + '_' + dest_input] = [(dest, dest_input)] elif source == pe and dest.id not in componentIds: outputnames[(source.id, source_output)] = source.id + '_' + source_output try: grouping = dest.inputconnections[dest_input][GROUPING] except KeyError: grouping = None externalConnections.append((source.id, source_output, dest.id, dest_input, grouping)) if pe.id in sources and mappedInput is not None: for name in pe.inputconnections: inputnames[pe.id + '_' + name] = [(pe, name)] for block, mappedblock in zip(inputs, mappedInput): for input_name in block: mappedblock[pe.id + '_' + input_name] = block[input_name] for node in workflow.graph.nodes(): if node.getContainedObject().id not in componentIds: workflow.graph.remove_node(node) # print ("inputnames : %s" % inputnames) wrapperPE = GraphWrapperPE(workflow, inputnames, outputnames) for node in workflow.graph.nodes(): wrappers[node.getContainedObject().id] = wrapperPE # print ('External connections: %s' % externalConnections) for (source_id, source_output, dest_id, dest_input, grouping) in externalConnections: sourceWrapper = wrappers[source_id] destWrapper = wrappers[dest_id] if grouping: destWrapper.inputconnections[dest_id + '_' + dest_input][GROUPING] = grouping uberWorkflow.connect(sourceWrapper, source_id + '_' + source_output, destWrapper, dest_id + '_' + dest_input) return uberWorkflow, mappedInput