def testWordCount(): prod = RandomWordProducer() filt = RandomFilter() count = WordCounter() graph = WorkflowGraph() graph.connect(prod, "output", filt, "input") graph.connect(filt, "output", count, "input") simple_process.process(graph, inputs={prod: 100})
def testWordCount(): prod = RandomWordProducer() filt = RandomFilter() count = WordCounter() graph = WorkflowGraph() graph.connect(prod, 'output', filt, 'input') graph.connect(filt, 'output', count, 'input') simple_process.process(graph, inputs={prod: 100})
def InitiateNewRun( graph, provRecorderClass, provImpClass=ProvenancePE, input=[], username=None, workflowId=None, description="", system_id=None, workflowName=None, w3c_prov=False, runId=None, clustersRecorders={}, feedbackPEs=[]): if username is None or workflowId is None or workflowName is None: raise Exception("Missing values") if runId is None: runId = getUniqueId() newrun = NewWorkflowRun() newrun.parameters = {"input": input, "username": username, "workflowId": workflowId, "description": description, "system_id": system_id, "workflowName": workflowName, "runId": runId, "mapping": sys.argv[1] } _graph = WorkflowGraph() provrec0 = provRecorderClass(toW3C=w3c_prov) _graph.connect(newrun, "output", provrec0, provrec0.INPUT_NAME) # attachProvenanceRecorderPE(_graph,provRecorderClass,runId,username,w3c_prov) # newrun.provon=True simple_process.process(_graph, {'NewWorkflowRun': [{'input': 'None'}]}) injectProv(graph, provImpClass) print("PREPARING PROVENANCE RECORDERS:") print("Provenance Recorders Clusters: "+str(clustersRecorders)) print("PEs processing Recorders feedback: "+str(feedbackPEs)) attachProvenanceRecorderPE( graph, provRecorderClass, runId, username, w3c_prov, clustersRecorders, feedbackPEs) provclusters={} return runId
def esgf_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() # TODO: configure limit esgsearch = EsgSearch( url=wps_url(), search_url=source.get('url', 'https://esgf-data.dkrz.de/esg-search'), constraints=source.get('constraints', source.get('facets')), # facets for backward compatibility query=source.get('query'), limit=source.get('limit', 100), search_type='File', distrib=source.get('distrib'), replica=source.get('replica'), latest=source.get('latest'), temporal=source.get('temporal'), start=source.get('start'), end=source.get('end')) esgsearch.set_monitor(monitor, 0, 10) download = Download(url=wps_url(), headers=headers) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(esgsearch, esgsearch.OUTPUT_NAME, download, download.INPUT_NAME) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process(graph, inputs={esgsearch: [{}]}) status_location = result.get((doit.id, doit.STATUS_LOCATION_NAME))[0] status = result.get((doit.id, doit.STATUS_NAME))[0] return dict(worker=dict(status_location=status_location, status=status))
def thredds_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() download = ThreddsDownload(url=wps_url(), headers=headers, **source) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process(graph, inputs={download: [{}]}) status_location = result.get((doit.id, doit.STATUS_LOCATION_NAME))[0] status = result.get((doit.id, doit.STATUS_NAME))[0] return dict(worker=dict(status_location=status_location, status=status))
def solr_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() solrsearch = SolrSearch( url=source.get('url'), query=source.get('query'), filter_query=source.get('filter_query')) solrsearch.set_monitor(monitor, 0, 10) download = Download(url=wps_url(), headers=headers) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(solrsearch, solrsearch.OUTPUT_NAME, download, download.INPUT_NAME) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process(graph, inputs={solrsearch: [{}]}) status_location = result.get((doit.id, doit.STATUS_LOCATION_NAME))[0] status = result.get((doit.id, doit.STATUS_NAME))[0] return dict(worker=dict(status_location=status_location, status=status))
def test_process_input_by_id(): prod = TestProducer() cons = PrintDataConsumer() graph = WorkflowGraph() graph.connect(prod, "output", cons, "input") simple_process.process(graph, inputs={prod.id: 5})
def test_process_input_by_id(): prod = TestProducer() cons = PrintDataConsumer() graph = WorkflowGraph() graph.connect(prod, 'output', cons, 'input') simple_process.process(graph, inputs={prod.id: 5})