コード例 #1
0
def create_iterative_chain(functions, FunctionPE_class=SimpleFunctionPE, name_prefix='PE_', name_suffix=''):
    
    '''
    Creates a composite PE wrapping a pipeline that processes obspy streams.
    :param chain: list of functions that process data iteratively. The function accepts one input parameter, data, and returns an output data block (or None).
    :param requestId: id of the request that the stream is associated with
    :param controlParameters: environment parameters for the processing elements
    :rtype: dictionary inputs and outputs of the composite PE that was created
    '''

    prev = None
    first = None
    graph = WorkflowGraph()

    for fn_desc in functions:
        try:
            fn = fn_desc[0]
            params = fn_desc[1]
        except TypeError:
            fn = fn_desc
            params = {}
        
        #print 'adding %s to chain' % fn.__name__
        pe = FunctionPE_class()
        pe.compute_fn = fn
        pe.params = params
        pe.name = name_prefix + fn.__name__ + name_suffix

        if prev:
            graph.connect(prev, IterativePE.OUTPUT_NAME, pe, IterativePE.INPUT_NAME)
        else:
            first = pe
        prev = pe
        
    # Map inputs and outputs of the wrapper to the nodes in the subgraph
    graph.inputmappings =  { 'input'  : (first, IterativePE.INPUT_NAME) }
    graph.outputmappings = { 'output' : (prev, IterativePE.OUTPUT_NAME) }

    return graph
コード例 #2
0
ファイル: workflow.py プロジェクト: Ouranosinc/malleefowl
def solr_workflow(source, worker, monitor=None, headers=None):
    graph = WorkflowGraph()

    solrsearch = SolrSearch(url=source.get('url'),
                            query=source.get('query'),
                            filter_query=source.get('filter_query'))
    solrsearch.set_monitor(monitor, 0, 10)
    download = Download(url=wps_url(), headers=headers)
    download.set_monitor(monitor, 10, 50)
    doit = GenericWPS(headers=headers, **worker)
    doit.set_monitor(monitor, 50, 100)

    graph.connect(solrsearch, solrsearch.OUTPUT_NAME, download,
                  download.INPUT_NAME)
    graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME)

    result = simple_process.process_and_return(graph,
                                               inputs={solrsearch: [{}]})

    status_location = result[doit.id][doit.STATUS_LOCATION_NAME][0]
    status = result[doit.id][doit.STATUS_NAME][0]
    return dict(worker=dict(status_location=status_location, status=status))
コード例 #3
0
def create_workflow_icclim():

    su_calculation_r1i2p1 = NetCDFProcessing()
    su_calculation_r1i2p1.name = 'SU_calculation_r1i2p1'

    mean_calculation_r1i2p1 = NetCDFProcessing()
    mean_calculation_r1i2p1.name = "Average_SU_r1i2p1"

    su_calculation_r2i2p1 = NetCDFProcessing()
    su_calculation_r2i2p1.name = 'SU_calculation_r2i2p1'

    mean_calculation_r2i2p1 = NetCDFProcessing()
    mean_calculation_r2i2p1.name = "Average_SU_r2i2p1"

    su_calculation_r3i2p1 = NetCDFProcessing()
    su_calculation_r3i2p1.name = 'SU_calculation_r3i2p1'

    mean_calculation_r3i2p1 = NetCDFProcessing()
    mean_calculation_r3i2p1.name = "Average_SU_r3i2p1"

    streamProducer = StreamProducer()
    streamProducer.name = 'SU_workflow'

    graph = WorkflowGraph()

    graph.connect(streamProducer, 'output', su_calculation_r1i2p1, 'input')
    graph.connect(su_calculation_r1i2p1, 'output', mean_calculation_r1i2p1,
                  'input')

    graph.connect(streamProducer, 'output', su_calculation_r2i2p1, 'input')
    graph.connect(su_calculation_r2i2p1, 'output', mean_calculation_r2i2p1,
                  'input')

    graph.connect(streamProducer, 'output', su_calculation_r3i2p1, 'input')
    graph.connect(su_calculation_r3i2p1, 'output', mean_calculation_r3i2p1,
                  'input')

    return graph
コード例 #4
0
def testSquare():
    graph = WorkflowGraph()
    prod = TestProducer(2)
    cons1 = TestOneInOneOut()
    cons2 = TestOneInOneOut()
    last = TestTwoInOneOut()
    graph.connect(prod, 'output0', cons1, 'input')
    graph.connect(prod, 'output1', cons2, 'input')
    graph.connect(cons1, 'output', last, 'input0')
    graph.connect(cons2, 'output', last, 'input1')
    args.num = 4
    args.results = True
    result_queue = process(graph, inputs={prod: 10}, args=args)
    results = []
    item = result_queue.get()
    while item != STATUS_TERMINATED:
        name, output, data = item
        tools.eq_(last.id, name)
        tools.eq_('output', output)
        results.append(data)
        item = result_queue.get()
    expected = {str(i): 2 for i in range(1, 11)}
    tools.eq_(expected, Counter(results))
コード例 #5
0
def createWf():
    graph = WorkflowGraph()
    mat = CompMatrix(variables_number)
    mat.prov_cluster = 'record2'
    mc = MaxClique(-0.01)
    mc.prov_cluster = 'record0'
    start = Start()
    start.prov_cluster = 'record0'
    sources = {}
    mc.numprocesses = 1
    mat.numprocesses = 1

    for i in range(0, variables_number):
        sources[i] = Source(sampling_rate, i)
        sources[i].prov_cluster = 'record0'
        #'+str(i%variables_number)
        #+str(i%7)
        sources[i].numprocesses = 1
        #sources[i].name="Source"+str(i)

    for h in range(0, variables_number):
        graph.connect(start, 'output', sources[h], 'iterations')
        for j in range(h + 1, variables_number):
            cc = CorrCoef(batch_size, (h, j))
            cc.prov_cluster = 'record1'
            #+str(h%variables_number)

            mat._add_input('input' + '_' + str(h) + '_' + str(j), grouping=[3])
            graph.connect(sources[h], 'output', cc, 'input1')
            graph.connect(sources[j], 'output', cc, 'input2')
            graph.connect(cc, 'output', mat,
                          'input' + '_' + str(h) + '_' + str(j))
            cc.numprocesses = 1

    graph.connect(mat, 'output', mc, 'matrix')

    return graph
コード例 #6
0
def testPipelineNotEnoughProcesses():
    prod = TestProducer()
    cons1 = TestOneInOneOut()
    cons2 = TestOneInOneOut()
    cons3 = TestOneInOneOut()
    cons4 = TestOneInOneOut()
    graph = WorkflowGraph()
    graph.connect(prod, 'output', cons1, 'input')
    graph.connect(cons1, 'output', cons2, 'input')
    graph.connect(cons2, 'output', cons3, 'input')
    graph.connect(cons3, 'output', cons4, 'input')
    args = argparse.Namespace
    args.num = 4
    args.simple = False
    args.results = True
    result_queue = process(graph, inputs={prod: 10}, args=args)
    results = []
    item = result_queue.get()
    while item != STATUS_TERMINATED:
        name, output, data = item
        tools.eq_((cons4.id, 'output'), output)
        results.extend(data)
        item = result_queue.get()
    tools.eq_(Counter(range(1, 11)), Counter(results))
コード例 #7
0
def graph_stddev():
    prod = NumberProducer(1000)
    std = parallelStdDev()
    graph = WorkflowGraph()
    graph.connect(prod, 'output', std, 'input')
    return graph
コード例 #8
0
def graph_count():
    prod = NumberProducer(1000)
    c = parallelCount()
    graph = WorkflowGraph()
    graph.connect(prod, 'output', c, 'input')
    return graph
コード例 #9
0
def graph_avg():
    prod = NumberProducer(1000)
    a = parallelAvg()
    graph = WorkflowGraph()
    graph.connect(prod, 'output', a, 'input')
    return graph
コード例 #10
0
        Spec'ing TestOneInOneOut5
        Spec'ing TestProducer6
        spouts {'TestProducer6': ... }
        bolts  {'TestOneInOneOut5': ... }
        Created Storm submission package in \
/var/folders/58/7bjr3s011kgdtm5lx58prc_40000gn/T/tmp5ePEq3
        Running: java -client -Dstorm.options= -Dstorm.home= ...
        Submitting topology 'TestTopology' to storm.example.com:6627 ...

'''

from dispel4py.examples.graph_testing import testing_PEs as t
from dispel4py.workflow_graph import WorkflowGraph


def testPipeline(graph):
    '''
    Adds a pipeline to the given graph.

    :rtype: the created graph
    '''
    prod = t.TestProducer()
    prev = prod
    for i in range(5):
        cons = t.TestOneInOneOut()
        graph.connect(prev, 'output', cons, 'input')
        prev = cons
    return graph
''' important: this is the graph_variable '''
graph = testPipeline(WorkflowGraph())
コード例 #11
0
def test_dot_pipeline():
    graph = WorkflowGraph()
    prod = TestProducer()
    cons = TestOneInOneOut()
    graph.connect(prod, 'output', cons, 'input')
    draw(graph)
コード例 #12
0
def testOnePE():
    graph = WorkflowGraph()
    prod = TestProducer()
    graph.add(prod)
    results = simple_process.process(graph, {prod: [{}]})
    tools.eq_({(prod.id, 'output'): [1]}, results)
コード例 #13
0
def test_process_input_by_id():
    prod = TestProducer()
    cons = PrintDataConsumer()
    graph = WorkflowGraph()
    graph.connect(prod, 'output', cons, 'input')
    simple_process.process(graph, inputs={prod.id: 5})
コード例 #14
0
ファイル: worker_mpi.py プロジェクト: krischer/dispel4py
def simpleProcess(graph, sources, inputs):
    '''
    This method is used if there are less MPI processes than the nodes in the graph (PE instances).
    '''
    uberWorkflow = WorkflowGraph()
    wrappers = {}
    externalConnections = []
    partitions = []
    try:
        partitions = graph.partitions
    except AttributeError: 
        sourcePartition = []
        otherPartition = []
        for node in graph.graph.nodes():
            pe = node.getContainedObject()
            if pe.id in sources:
                sourcePartition.append(pe)
            else:
                otherPartition.append(pe)
        partitions = [sourcePartition, otherPartition]
    if rank == 0:
        print('Partitions: ', ', '.join(('[%s]' % ', '.join((pe.id for pe in part)) for part in partitions)))

    mappedInput = copy.deepcopy(inputs)
    for component in partitions:
        inputnames = {}
        outputnames = {}
        workflow = copy.deepcopy(graph)
        componentIds = []
        
        for pe in component:
            componentIds.append(pe.id)
            
        # print('component: %s' % componentIds)
        # print('inputs: %s' % inputs)
        
        for node in workflow.graph.nodes():
            pe = node.getContainedObject()
            if pe.id in componentIds:
                for edge in workflow.graph.edges(node, data=True):
                    direction = edge[2]['DIRECTION']
                    source = direction[0]
                    source_output = edge[2]['FROM_CONNECTION']
                    dest = direction[1]
                    dest_input = edge[2]['TO_CONNECTION']
                    if dest == pe and source.id not in componentIds:
                        try:
                            inputnames[dest.id + '_' + dest_input].append((dest, dest_input))
                        except KeyError:
                            inputnames[dest.id + '_' + dest_input] = [(dest, dest_input)]
                    elif source == pe and dest.id not in componentIds:
                        outputnames[(source.id, source_output)] = source.id + '_' + source_output
                        try:
                            grouping = dest.inputconnections[dest_input][GROUPING]
                        except KeyError:
                            grouping = None
                        externalConnections.append((source.id, source_output, dest.id, dest_input, grouping))
                   
                if pe.id in sources and mappedInput is not None:
                    for name in pe.inputconnections:
                        inputnames[pe.id + '_' + name] = [(pe, name)]
                    for block, mappedblock in zip(inputs, mappedInput):
                        if block == TERMINATE_MSG:
                            if mappedblock != TERMINATE_MSG:
                                del mappedInput[-1]
                                mappedInput.append(TERMINATE_MSG)
                            continue
                        for input_name in block:
                            mappedblock[pe.id + '_' + input_name] = block[input_name]
                    # print('Mapped input: %s' % mappedInput)

        for node in workflow.graph.nodes():
            if node.getContainedObject().id not in componentIds:
                workflow.graph.remove_node(node)
        # print ("inputnames : %s" % inputnames)
        wrapperPE = GraphWrapperPE(workflow, inputnames, outputnames)
        for node in workflow.graph.nodes():
            wrappers[node.getContainedObject().id] = wrapperPE
    
    # print ('External connections: %s' % externalConnections)
    for (source_id, source_output, dest_id, dest_input, grouping) in externalConnections:
        sourceWrapper = wrappers[source_id]
        destWrapper = wrappers[dest_id]
        if grouping:
            destWrapper.inputconnections[dest_id + '_' + dest_input][GROUPING] = grouping
        uberWorkflow.connect(sourceWrapper, source_id + '_' + source_output, destWrapper, dest_id + '_' + dest_input)
        # print ('%s: connected %s to %s' % (rank, sourceWrapper.id + '.' + source_id + '_' + source_output,
        #          destWrapper.id + '.' + dest_id + '_' + dest_input))
        
    if rank == 0:
        for node in uberWorkflow.graph.nodes():
            wrapperPE = node.getContainedObject()
            print('%s contains %s' % (wrapperPE.id, [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()]))

    success = True
    processes = {}
    if rank == 0:
        success, sources, processes = assign(uberWorkflow)
    success=comm.bcast(success, root=0)
    if success: 
        if rank == 0: print ('Processes:', processes)
        processes=comm.bcast(processes,root=0)
        buildProcess(uberWorkflow, processes, mappedInput)
    else:
        print('Simple processing: Not enough MPI processes.')
コード例 #15
0
                NAME: 'output',
                TYPE: ['timestamp', 'location', 'stream']
            }
        }

    def process(self, inputs):
        stream = read(
            '/Users/akrause/VERCE/data/laquila/20100501-20120930_fseed/TERO/20100501.fseed'
        )
        return {'output': [{}, {}, {'data': stream}]}


from dispel4py.workflow_graph import WorkflowGraph

controlParameters = {
    'runId': '12345',
    'username': '******',
    'outputdest': "./"
}

from dispel4py.seismo.obspy_stream import createProcessingComposite, INPUT_NAME, OUTPUT_NAME

chain = []
chain.append(PEMeanSub)
chain.append(PEDetrend)
composite = createProcessingComposite(chain,
                                      controlParameters=controlParameters)

producer = TestProducer()
graph = WorkflowGraph()
graph.connect(producer, 'output', composite, INPUT_NAME)
コード例 #16
0
ファイル: worker_mpi_test.py プロジェクト: krischer/dispel4py
def testUnconnected():
    graph = WorkflowGraph()
    testPipeline(graph)
    testPipeline(graph)
    del graph.partitions
    return graph
コード例 #17
0
def create_partitioned(workflow_all):
    processes_all, inputmappings_all, outputmappings_all = assign_and_connect(
        workflow_all, len(workflow_all.graph.nodes()))
    proc_to_pe_all = {v[0]: k for k, v in processes_all.iteritems()}
    partitions = get_partitions(workflow_all)
    external_connections = []
    pe_to_partition = {}
    partition_pes = []
    for i in range(len(partitions)):
        for pe in partitions[i]:
            pe_to_partition[pe.id] = i
    for index in range(len(partitions)):
        result_mappings = {}
        part = partitions[index]
        partition_id = index
        component_ids = [pe.id for pe in part]
        workflow = copy.deepcopy(workflow_all)
        graph = workflow.graph
        for node in graph.nodes():
            if node.getContainedObject().id not in component_ids:
                graph.remove_node(node)
        processes, inputmappings, outputmappings = \
            assign_and_connect(workflow, len(graph.nodes()))
        proc_to_pe = {}
        for node in graph.nodes():
            pe = node.getContainedObject()
            proc_to_pe[processes[pe.id][0]] = pe
        for node in graph.nodes():
            pe = node.getContainedObject()
            pe.rank = index
            proc_all = processes_all[pe.id][0]
            for output_name in outputmappings_all[proc_all]:
                for dest_input, comm_all in outputmappings_all[proc_all][
                        output_name]:
                    dest = proc_to_pe_all[comm_all.destinations[0]]
                    if not dest in processes:
                        # it's an external connection
                        external_connections.append(
                            (comm_all, partition_id, pe.id, output_name,
                             pe_to_partition[dest], dest, dest_input))
                        try:
                            result_mappings[pe.id].append(output_name)
                        except:
                            result_mappings[pe.id] = [output_name]
        partition_pe = SimpleProcessingPE(inputmappings, outputmappings,
                                          proc_to_pe)
        partition_pe.workflow = workflow
        partition_pe.partition_id = partition_id
        if result_mappings:
            partition_pe.result_mappings = result_mappings
        partition_pe.map_inputs = _map_inputs_to_pes
        partition_pe.map_outputs = _map_outputs_from_pes
        partition_pes.append(partition_pe)
    # print 'EXTERNAL CONNECTIONS : %s' % external_connections
    ubergraph = WorkflowGraph()
    ubergraph.pe_to_partition = pe_to_partition
    ubergraph.partition_pes = partition_pes
    # sort the external connections so that nodes are added in the same order
    # if doing this in multiple processes in parallel this is important
    for comm, source_partition, source_id, source_output, \
        dest_partition, dest_id, dest_input in sorted(external_connections):
        partition_pes[source_partition]._add_output((source_id, source_output))
        partition_pes[dest_partition]._add_input((dest_id, dest_input),
                                                 grouping=comm.name)
        ubergraph.connect(partition_pes[source_partition],
                          (source_id, source_output),
                          partition_pes[dest_partition], (dest_id, dest_input))
    return ubergraph
コード例 #18
0
def simpleProcess(graph, sources, inputs):

    uberWorkflow = WorkflowGraph()
    wrappers = {}
    externalConnections = []
    partitions = []
    try:
        partitions = graph.partitions
    except AttributeError: 
        sourcePartition = []
        otherPartition = []
        for node in graph.graph.nodes():
            pe = node.getContainedObject()
            if pe in sources:
                sourcePartition.append(pe)
            else:
                otherPartition.append(pe)
        partitions = [sourcePartition, otherPartition]
    print 'Partitions: ', ', '.join(('[%s]' % ', '.join((pe.id for pe in part)) for part in partitions))
    sources = [ pe.id for pe in sources ]

    mappedInput = copy.deepcopy(inputs)
    for component in partitions:
        inputnames = {}
        outputnames = {}
        workflow = copy.deepcopy(graph)
        componentIds = []
        
        for pe in component:
            componentIds.append(pe.id)
            
        # print('component: %s' % componentIds)
        # print('inputs: %s' % inputs)
        
        # print 'MAPPED INPUT BEFORE: %s' % mappedInput
        # print 'SOURCES : %s' % sources
        
        for node in workflow.graph.nodes():
            pe = node.getContainedObject()
            if pe.id in componentIds:
                for edge in workflow.graph.edges(node, data=True):
                    direction = edge[2]['DIRECTION']
                    source = direction[0]
                    source_output = edge[2]['FROM_CONNECTION']
                    dest = direction[1]
                    dest_input = edge[2]['TO_CONNECTION']
                    if dest == pe and source.id not in componentIds:
                        try:
                            inputnames[dest.id + '_' + dest_input].append((dest, dest_input))
                        except KeyError:
                            inputnames[dest.id + '_' + dest_input] = [(dest, dest_input)]
                    elif source == pe and dest.id not in componentIds:
                        outputnames[(source.id, source_output)] = source.id + '_' + source_output
                        try:
                            grouping = dest.inputconnections[dest_input][GROUPING]
                        except KeyError:
                            grouping = None
                        externalConnections.append((source.id, source_output, dest.id, dest_input, grouping))
                
                if pe.id in sources and mappedInput is not None:
                    for name in pe.inputconnections:
                        inputnames[pe.id + '_' + name] = [(pe, name)]
                    for block, mappedblock in zip(inputs, mappedInput):
                        for input_name in block:
                            mappedblock[pe.id + '_' + input_name] = block[input_name]

        for node in workflow.graph.nodes():
            if node.getContainedObject().id not in componentIds:
                workflow.graph.remove_node(node)
        # print ("inputnames : %s" % inputnames)
        wrapperPE = GraphWrapperPE(workflow, inputnames, outputnames)
        for node in workflow.graph.nodes():
            wrappers[node.getContainedObject().id] = wrapperPE
    
    # print ('External connections: %s' % externalConnections)
    for (source_id, source_output, dest_id, dest_input, grouping) in externalConnections:
        sourceWrapper = wrappers[source_id]
        destWrapper = wrappers[dest_id]
        if grouping:
	        destWrapper.inputconnections[dest_id + '_' + dest_input][GROUPING] = grouping
        uberWorkflow.connect(sourceWrapper, source_id + '_' + source_output, destWrapper, dest_id + '_' + dest_input)
        
    return uberWorkflow, mappedInput