def process(workflow, inputs, args):
    processes = {}
    inputmappings = {}
    outputmappings = {}
    success = True
    nodes = [node.getContainedObject() for node in workflow.graph.nodes()]
    if rank == 0 and not args.simple:
        try:
            processes, inputmappings, outputmappings =\
                processor.assign_and_connect(workflow, size)
        except:
            success = False
    success = comm.bcast(success, root=0)

    if args.simple or not success:
        ubergraph = processor.create_partitioned(workflow)
        nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()]
        if rank == 0:
            print('Partitions: %s' % ', '.join(
                ('[%s]' % ', '.join((pe.id for pe in part))
                    for part in workflow.partitions)))
            for node in ubergraph.graph.nodes():
                wrapperPE = node.getContainedObject()
                ns = [n.getContainedObject().id
                      for n in wrapperPE.workflow.graph.nodes()]
                print('%s contains %s' % (wrapperPE.id, ns))

            try:
                processes, inputmappings, outputmappings = \
                    processor.assign_and_connect(ubergraph, size)
                inputs = processor.map_inputs_to_partitions(ubergraph, inputs)
                success = True
            except:
                print('dispel4py.mpi_process: '
                      'Not enough processes for execution of graph')
                success = False

    success = comm.bcast(success, root=0)

    if not success:
        return

    inputs = {pe.id: v for pe, v in inputs.items()}
    processes = comm.bcast(processes, root=0)
    inputmappings = comm.bcast(inputmappings, root=0)
    outputmappings = comm.bcast(outputmappings, root=0)
    inputs = comm.bcast(inputs, root=0)

    if rank == 0:
        print('Processes: %s' % processes)
        # print('Inputs: %s' % inputs)

    for pe in nodes:
        if rank in processes[pe.id]:
            provided_inputs = processor.get_inputs(pe, inputs)
            wrapper = MPIWrapper(pe, provided_inputs)
            wrapper.targets = outputmappings[rank]
            wrapper.sources = inputmappings[rank]
            wrapper.process()
def process(workflow, inputs, args):
    processes = {}
    inputmappings = {}
    outputmappings = {}
    success = True
    nodes = [node.getContainedObject() for node in workflow.graph.nodes()]
    if rank == 0 and not args.simple:
        try:
            processes, inputmappings, outputmappings = processor.assign_and_connect(workflow, size)
        except:
            success = False
    success = comm.bcast(success, root=0)

    if args.simple or not success:
        ubergraph = processor.create_partitioned(workflow)
        nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()]
        if rank == 0:
            print(
                "Partitions: %s"
                % ", ".join(("[%s]" % ", ".join((pe.id for pe in part)) for part in workflow.partitions))
            )
            for node in ubergraph.graph.nodes():
                wrapperPE = node.getContainedObject()
                ns = [n.getContainedObject().id for n in wrapperPE.workflow.graph.nodes()]
                print("%s contains %s" % (wrapperPE.id, ns))

            try:
                processes, inputmappings, outputmappings = processor.assign_and_connect(ubergraph, size)
                inputs = processor.map_inputs_to_partitions(ubergraph, inputs)
                success = True
            except:
                print("dispel4py.mpi_process: " "Not enough processes for execution of graph")
                success = False

    success = comm.bcast(success, root=0)

    if not success:
        return

    inputs = {pe.id: v for pe, v in inputs.items()}
    processes = comm.bcast(processes, root=0)
    inputmappings = comm.bcast(inputmappings, root=0)
    outputmappings = comm.bcast(outputmappings, root=0)
    inputs = comm.bcast(inputs, root=0)

    if rank == 0:
        print("Processes: %s" % processes)
        # print('Inputs: %s' % inputs)

    for pe in nodes:
        if rank in processes[pe.id]:
            provided_inputs = processor.get_inputs(pe, inputs)
            wrapper = MPIWrapper(pe, provided_inputs)
            wrapper.targets = outputmappings[rank]
            wrapper.sources = inputmappings[rank]
            wrapper.process()
Exemple #3
0
def process_and_return(workflow, inputs, resultmappings=None):
    '''
    Executes the simple sequential processor for dispel4py graphs and returns
    the data collected from any unconnected output streams.

    :param workflow: the dispel4py graph to be enacted
    :param inputs: inputs for root PEs of the graphs.
        This is a dictionary mapping a PE to either a non-negative integer
        (the number of iterations) or a list of input data items.
    :rtype: a dictionary mapping PE ids to the output data produced by that PE

    '''
    numnodes = 0
    for node in workflow.graph.nodes():
        numnodes += 1
        node.getContainedObject().numprocesses = 1
    processes, inputmappings, outputmappings = \
        processor.assign_and_connect(workflow, numnodes)
    # print 'Processes: %s' % processes
    # print inputmappings
    # print outputmappings
    proc_to_pe = {}
    for node in workflow.graph.nodes():
        pe = node.getContainedObject()
        proc_to_pe[processes[pe.id][0]] = pe

    simple = SimpleProcessingPE(inputmappings, outputmappings, proc_to_pe)
    simple.id = 'SimplePE'
    simple.result_mappings = resultmappings
    wrapper = SimpleProcessingWrapper(simple, [inputs])
    wrapper.targets = {}
    wrapper.sources = {}
    wrapper.process()

    # now collect output data into a single list for each PE
    outputs = {}
    for (pe_id, output_name), data in wrapper.outputs.items():
        if pe_id not in outputs:
            outputs[pe_id] = {}
        try:
            outputs[pe_id][output_name] += data
        except KeyError:
            outputs[pe_id][output_name] = data
    return outputs
Exemple #4
0
def process_and_return(workflow, inputs, resultmappings=None):
    '''
    Executes the simple sequential processor for dispel4py graphs and returns
    the data collected from any unconnected output streams.

    :param workflow: the dispel4py graph to be enacted
    :param inputs: inputs for root PEs of the graphs.
        This is a dictionary mapping a PE to either a non-negative integer
        (the number of iterations) or a list of input data items.
    :rtype: a dictionary mapping PE ids to the output data produced by that PE

    '''
    numnodes = 0
    for node in workflow.graph.nodes():
        numnodes += 1
        node.getContainedObject().numprocesses = 1
    processes, inputmappings, outputmappings = \
        processor.assign_and_connect(workflow, numnodes)
    # print 'Processes: %s' % processes
    # print inputmappings
    # print outputmappings
    proc_to_pe = {}
    for node in workflow.graph.nodes():
        pe = node.getContainedObject()
        proc_to_pe[processes[pe.id][0]] = pe

    simple = SimpleProcessingPE(inputmappings, outputmappings, proc_to_pe)
    simple.id = 'SimplePE'
    simple.result_mappings = resultmappings
    wrapper = SimpleProcessingWrapper(simple, [inputs])
    wrapper.targets = {}
    wrapper.sources = {}
    wrapper.process()

    # now collect output data into a single list for each PE
    outputs = {}
    for (pe_id, output_name), data in wrapper.outputs.items():
        if pe_id not in outputs:
            outputs[pe_id] = {}
        try:
            outputs[pe_id][output_name] += data
        except KeyError:
            outputs[pe_id][output_name] = data
    return outputs
Exemple #5
0
def process(workflow, inputs, args):
    size = args.num
    success = True
    nodes = [node.getContainedObject() for node in workflow.graph.nodes()]
    if not args.simple:
        try:
            result = processor.assign_and_connect(workflow, size)
            processes, inputmappings, outputmappings = result
        except:
            success = False

    if args.simple or not success:
        ubergraph = processor.create_partitioned(workflow)
        print('Partitions: %s' % ', '.join(('[%s]' % ', '.join(
            (pe.id for pe in part)) for part in workflow.partitions)))
        for node in ubergraph.graph.nodes():
            wrapperPE = node.getContainedObject()
            pes = [n.getContainedObject().id for
                   n in wrapperPE.workflow.graph.nodes()]
            print('%s contains %s' % (wrapperPE.id, pes))

        try:
            result = processor.assign_and_connect(ubergraph, size)
            if result is None:
                return 'dispel4py.multi_process: ' \
                       'Not enough processes for execution of graph'
            processes, inputmappings, outputmappings = result
            inputs = processor.map_inputs_to_partitions(ubergraph, inputs)
            success = True
            nodes = [node.getContainedObject()
                     for node in ubergraph.graph.nodes()]
        except:
            print(traceback.format_exc())
            return 'dispel4py.multi_process: ' \
                   'Could not create mapping for execution of graph'

    print('Processes: %s' % processes)

    process_pes = {}
    queues = {}
    result_queue = None
    try:
        if args.results:
            result_queue = multiprocessing.Queue()
    except AttributeError:
        pass
    for pe in nodes:
        provided_inputs = processor.get_inputs(pe, inputs)
        for proc in processes[pe.id]:
            cp = copy.deepcopy(pe)
            cp.rank = proc
            cp.log = types.MethodType(simpleLogger, cp)
            wrapper = MultiProcessingWrapper(proc, cp, provided_inputs)
            process_pes[proc] = wrapper
            wrapper.input_queue = multiprocessing.Queue()
            wrapper.input_queue.name = 'Queue_%s_%s' % (cp.id, cp.rank)
            wrapper.result_queue = result_queue
            queues[proc] = wrapper.input_queue
            wrapper.targets = outputmappings[proc]
            wrapper.sources = inputmappings[proc]
    for proc in process_pes:
        wrapper = process_pes[proc]
        wrapper.output_queues = {}
        for target in wrapper.targets.values():
            for inp, comm in target:
                for i in comm.destinations:
                    wrapper.output_queues[i] = queues[i]

    jobs = []
    for wrapper in process_pes.values():
        p = multiprocessing.Process(target=_processWorker, args=(wrapper,))
        jobs.append(p)

    for j in jobs:
        j.start()

    for j in jobs:
        j.join()

    if result_queue:
        result_queue.put(STATUS_TERMINATED)
    return result_queue
Exemple #6
0
def process(sc, workflow, inputs, args):

    from dispel4py.new.processor\
        import assign_and_connect, _order_by_dependency
    graph = workflow.graph
    result = assign_and_connect(workflow, graph.number_of_nodes())
    if result is None:
        return

    processes, inputmappings, outputmappings = result
    process_to_pes = {}
    wrappers = {}
    for node in workflow.graph.nodes():
        pe = node.getContainedObject()
        wrapper = PEWrapper(pe)
        for p in processes[pe.id]:
            process_to_pes[p] = pe
            wrappers[p] = wrapper
    # print('Processes: %s' % processes)
    # print(inputmappings)
    # print(outputmappings)
    ordered = _order_by_dependency(inputmappings, outputmappings)
    # print('Ordered processes: %s' % ordered)
    output_rdd = {}
    result_rdd = {}

    for proc in ordered:
        inps = inputmappings[proc]
        outs = outputmappings[proc]
        wrapper = wrappers[proc]
        pe = process_to_pes[proc]
        if inps:
            if len(inps) == 1:
                for input_name, sources in inps.items():
                    inp_rdd = output_rdd[(sources[0], input_name)]
                out_rdd = inp_rdd.flatMap(wrapper.process)
            else:
                prev = None
                for input_name, sources in inps.items():
                    inp_rdd = output_rdd[(sources[0], input_name)]
                    if prev:
                        inp_rdd = prev.union(inp_rdd)
                    prev = inp_rdd
                out_rdd = prev.flatMap(wrapper.process)
            if len(outs) == 1:
                for output_name in outs:
                    for inp in outs[output_name]:
                        input_name = inp[0]
                        rename = Rename({output_name: input_name})
                        output_rdd[(proc, input_name)] = \
                            out_rdd.flatMap(rename.rename)
            else:
                for output_name in outs:
                    proj = Projection([output_name])
                    proj_rdd = out_rdd.flatMap(proj.project)
                    for inp in outs[output_name]:
                        rename = Rename({output_name: inp[0]})
                        output_rdd[(proc, inp[0])] = \
                            proj_rdd.flatMap(rename.rename)
            if not outs:
                result_rdd[proc] = out_rdd

        else:
            pe_input = inputs[pe.id]
            if type(pe_input) is list:
                # only one slice so there no repetitions - not the best
                start_rdd = sc.parallelize(pe_input, 1)
            elif isinstance(pe_input, int):
                start_rdd = sc.parallelize(xrange(pe_input), 1)
            else:
                # fingers crossed it's a string and the file exists!
                start_rdd = sc.textFile(pe_input)
            out_rdd = start_rdd.flatMap(wrapper.process)
            if len(outs) == 1:
                for output_name in outs:
                    for inp in outs[output_name]:
                        input_name = inp[0]
                        rename = Rename({output_name: input_name})
                        output_rdd[(proc, input_name)] = \
                            out_rdd.flatMap(rename.rename)
            else:
                for output_name in outs:
                    proj = Projection([output_name])
                    out_rdd = out_rdd.flatMap(proj.project).persist()
                    for inp in outs[output_name]:
                        input_name = inp[0]
                        rename = Rename({output_name: input_name})
                        output_rdd[(proc, input_name)] = \
                            out_rdd.flatMap(rename.rename)
            if not outs:
                result_rdd[proc] = out_rdd
    # print("RESULT PROCESSES: %s" % result_rdd.keys())
    for p in result_rdd:
        result = result_rdd[p].foreach(lambda x: None)
Exemple #7
0
def process(workflow, inputs, args):
    processes = {}
    inputmappings = {}
    outputmappings = {}
    success = True
    nodes = [node.getContainedObject() for node in workflow.graph.nodes()]
    if rank == 0 and not args.simple:
        try:
            processes, inputmappings, outputmappings =\
                processor.assign_and_connect(workflow, size)
        except:
            success = False
    success = comm.bcast(success, root=0)

    if args.simple or not success:
        ubergraph = processor.create_partitioned(workflow)
        nodes = [node.getContainedObject() for node in ubergraph.graph.nodes()]
        if rank == 0:
            print('Partitions: %s' % ', '.join(('[%s]' % ', '.join(
                (pe.id for pe in part)) for part in workflow.partitions)))
            for node in ubergraph.graph.nodes():
                wrapperPE = node.getContainedObject()
                print('%s contains %s' % (wrapperPE.id,
                                          [n.getContainedObject().id for n in
                                           wrapperPE.workflow.graph.nodes()]))
            try:
                processes, inputmappings, outputmappings =\
                    processor.assign_and_connect(ubergraph, size)
                inputs = processor.map_inputs_to_partitions(ubergraph, inputs)
                success = True
            except:
                # print traceback.format_exc()
                print('dispel4py.mpi_process: \
                    Not enough processes for execution of graph')
                success = False

    success = comm.bcast(success, root=0)

    if not success:
        return

    try:
        inputs = {pe.id: v for pe, v in inputs.items()}
    except AttributeError:
        pass
    processes = comm.bcast(processes, root=0)
    inputmappings = comm.bcast(inputmappings, root=0)
    outputmappings = comm.bcast(outputmappings, root=0)
    inputs = comm.bcast(inputs, root=0)

    if rank == 0:
        print('Processes: %s' % processes)
        # print 'Inputs: %s' % inputs

    for pe in nodes:
        if rank in processes[pe.id]:
            provided_inputs = processor.get_inputs(pe, inputs)
            wrapper = MPIWrapper(pe, provided_inputs)
            wrapper.targets = outputmappings[rank]
            wrapper.sources = inputmappings[rank]
            wrapper.process()
Exemple #8
0
def process(workflow, inputs, args):
    workflow_submission_id = uuid.uuid1().hex
    print workflow_submission_id
    # Check if switch profile mode on
    if args.profileOn:

        manager = multiprocessing.Manager()

        # A dict to store characterization
        profiles = manager.dict()

        multi_monitor = Monitor(profiles, args, workflow)

        t1 = time.time()

    size = args.num
    success = True
    nodes = [node.getContainedObject() for node in workflow.graph.nodes()]
    if not args.simple:
        try:
            result = processor.assign_and_connect(workflow, size)
            processes, inputmappings, outputmappings = result
        except:
            success = False

    if args.simple or not success:
        ubergraph = processor.create_partitioned(workflow)
        print('Partitions: %s' % ', '.join(('[%s]' % ', '.join(
            (pe.id for pe in part)) for part in workflow.partitions)))
        for node in ubergraph.graph.nodes():
            wrapperPE = node.getContainedObject()
            pes = [n.getContainedObject().id for
                   n in wrapperPE.workflow.graph.nodes()]
            print('%s contains %s' % (wrapperPE.id, pes))

        try:
            result = processor.assign_and_connect(ubergraph, size)
            if result is None:
                return 'dispel4py.multi_process: ' \
                       'Not enough processes for execution of graph'
            processes, inputmappings, outputmappings = result
            inputs = processor.map_inputs_to_partitions(ubergraph, inputs)
            success = True
            nodes = [node.getContainedObject()
                     for node in ubergraph.graph.nodes()]
        except:
            print(traceback.format_exc())
            return 'dispel4py.multi_process: ' \
                   'Could not create mapping for execution of graph'

    print('Processes: %s' % processes)
    # print ("inputmappings: %s, \noutputmappings: %s" % (inputmappings, outputmappings))

    process_pes = {}
    queues = {}
    result_queue = None
    try:
        if args.results:
            result_queue = multiprocessing.Queue()
    except AttributeError:
        pass
    for pe in nodes:
        provided_inputs = processor.get_inputs(pe, inputs)
        for proc in processes[pe.id]:
            cp = copy.deepcopy(pe)
            cp.rank = proc
            cp.log = types.MethodType(simpleLogger, cp)
            if args.profileOn:
                wrapper = MultiProcessingWrapper(proc, cp, provided_inputs, workflow_submission_id = workflow_submission_id, profiles = profiles)
            else:
                wrapper = MultiProcessingWrapper(proc, cp, provided_inputs, workflow_submission_id = workflow_submission_id)
            process_pes[proc] = wrapper
            wrapper.input_queue = multiprocessing.Queue()
            wrapper.input_queue.name = 'Queue_%s_%s' % (cp.id, cp.rank)
            wrapper.result_queue = result_queue
            queues[proc] = wrapper.input_queue
            wrapper.targets = outputmappings[proc]
            wrapper.sources = inputmappings[proc]
    for proc in process_pes:
        wrapper = process_pes[proc]
        wrapper.output_queues = {}
        for target in wrapper.targets.values():
            for inp, comm in target:
                for i in comm.destinations:
                    wrapper.output_queues[i] = queues[i]

    jobs = []
    for wrapper in process_pes.values():
        p = multiprocessing.Process(target=_processWorker, args=(wrapper, ))
        jobs.append(p)

    for j in jobs:
        j.start()

    for j in jobs:
        j.join()

    if result_queue:
        result_queue.put(STATUS_TERMINATED)

    if args.profileOn:
        t2 = time.time()
        t3 = t2 - t1

        profiles["exec_%s" % workflow_submission_id] = t3
        profiles["submitted_%s" % workflow_submission_id] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t1))
        # print("Total execution workflow time is  %s recorded by proccess %s" % (t3, cp.rank))

        multi_monitor.get_pe_process_map(processes, workflow_submission_id)
        multi_monitor.analyse_and_record()

    return result_queue