def test_causality(self): self.target.add_task(START_TICK + 100, None, {'nicename':'test1'}) self.target.add_task(START_TICK + 101, None, {'nicename':'test2'}) self.assertRaises(ValueError, self.target.connect, graph.Endpoint(START_TICK + 101, 'out'), graph.Endpoint(START_TICK + 100, 'in'))
def visit_Assign(self, node): target = node.targets[0] if (isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name) and node.value.func.id == "__pydron_next__"): call = node.value assert len(call.args) == 1 assert len(call.keywords) == 0 assert call.starargs is None assert call.kwargs is None assert isinstance(target, ast.Tuple) assert len(target.elts) == 2 assert isinstance(target.elts[0], ast.Name) assert isinstance(target.elts[1], ast.Name) iterator = self.visit(call.args[0]) tick = self.factory_stack[-1].exec_task(tasks.NextTask(), [(iterator, "iterator")], quick=True, syncpoint=False, nosend_ports={"iterator"}) self.factory_stack[-1].assign_variable(target.elts[0].id, graph.Endpoint(tick, "value")) self.factory_stack[-1].assign_variable(target.elts[1].id, graph.Endpoint(tick, "iterator")) return value = self.visit(node.value) if isinstance(target, ast.Name): self.factory_stack[-1].assign_variable(target.id, value) elif isinstance(target, ast.Attribute): obj = self.visit(target.value) attr = target.attr self.factory_stack[-1].exec_task(tasks.AttrAssign(attr), [(obj, "object"), (value, "value")], quick=True, syncpoint=True) elif isinstance(target, ast.Subscript): obj = self.visit(target.value) assert isinstance(target.slice, ast.Index) subscript = self.visit(target.slice.value) self.factory_stack[-1].exec_task(tasks.SubscriptAssign(), [(obj, "object"), (subscript, "slice"), (value, "value")], quick=True, syncpoint=True) elif isinstance(target, ast.Tuple) or isinstance(target, ast.List): count = len(target.elts) tick = self.factory_stack[-1].exec_task(tasks.UnpackTask(count), [(value, 'value')], quick=True) for i in range(count): assert isinstance(target.elts[i], ast.Name) ep = graph.Endpoint(tick, str(i)) self.factory_stack[-1].assign_variable(target.elts[i].id, ep) else: raise ValueError("not supported")
def test_set_data_before_connect(self): self.target.add_task(TICK1, "task1") self.target.add_task(TICK2, MockTask("in")) self.target.set_output_data(TICK1, {"out": "data"}) self.target.connect(graph.Endpoint(TICK1, "out"), graph.Endpoint(TICK2, "in")) self.assertEquals({TICK2}, self.target.collect_refine_tasks())
def visit_If(self, node): test = self.visit(node.test) body_factory = GraphFactory( unassinged_local_strategy=UnassignedLocalStrategy.MAKE_GRAPH_INPUT) self.factory_stack.append(body_factory) for stmt in node.body: self.visit(stmt) self.factory_stack.pop() body_factory.make_assigned_vars_outputs() orelse_factory = GraphFactory( unassinged_local_strategy=UnassignedLocalStrategy.MAKE_GRAPH_INPUT) self.factory_stack.append(orelse_factory) for stmt in node.orelse: self.visit(stmt) self.factory_stack.pop() orelse_factory.make_assigned_vars_outputs() task = tasks.IfTask(body_factory.get_graph(), orelse_factory.get_graph()) tick = self.factory_stack[-1].exec_task(task, inputs=[(test, '$test')]) for in_port in task.input_ports(): if in_port.startswith("$"): continue source = self.factory_stack[-1].read_variable(in_port) dest = graph.Endpoint(tick, in_port) self.factory_stack[-1].get_graph().connect(source, dest) for out_port in task.output_ports(): if out_port.startswith("$"): continue source = graph.Endpoint(tick, out_port) self.factory_stack[-1].assign_variable(out_port, source)
def test_was_collected_true(self): self.target.add_task(TICK1, "task1") self.target.add_task(TICK2, MockTask("in")) self.target.connect(graph.Endpoint(TICK1, "out"), graph.Endpoint(TICK2, "in")) self.target.set_output_data(TICK1, {"out": "data"}) self.target.collect_refine_tasks() self.assertTrue(self.target.was_refine_collected(TICK2))
def test_observer_connect(self): self.target.add_task(START_TICK + 100, "task1") self.target.add_task(START_TICK + 101, "task2") self.target.subscribe(self.observer) self.target.connect(graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in')) self.assertEquals([("connected", graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in'))], self.observer.calls)
def test_set_data_before_connection(self): self.target.add_task(TICK1, "task1") self.target.add_task(TICK2, "task2") self.target.set_output_data(TICK1, {"out": "data"}) self.target.connect(graph.Endpoint(TICK1, "out"), graph.Endpoint(TICK2, "in")) actual = self.target.collect_ready_tasks() expected = {TICK1, TICK2, FINAL} self.assertEqual(actual, expected)
def add_context(self): """ Add an output to the start tick - with name 'context' and connects that to an endpoint called 'context' defined at each node of the graph. """ source = graph.Endpoint(START_TICK, CONTEXT) for _, tick in self.ticks.iteritems(): dest = graph.Endpoint(tick, CONTEXT) self.graph.connect(source, dest)
def test_pickle(self): self.target.add_task(START_TICK + 100, None, {'nicename':'test1'}) self.target.add_task(START_TICK + 101, None, {'nicename':'test2'}) self.target.connect(graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in')) s = pickle.dumps(self.target, protocol=pickle.HIGHEST_PROTOCOL) copy = pickle.loads(s) self.assertEqual(self.target, copy)
def _out_connections(self, iteration_tick, collector_tick, iteration): out_connections = [] for source, _ in self.body_graph.get_in_connections(graph.FINAL_TICK): out_source = graph.Endpoint(source.tick << iteration_tick, source.port) out_dest = graph.Endpoint(collector_tick, "%s_%s" % (source.port, iteration + 1)) out_connections.append((out_source, out_dest)) return out_connections
def test_add_conn_before_flush(self): self.target.add_task(TICK1, "task1") self.target.add_task(TICK2, "task2") self.target.connect(graph.Endpoint(TICK1, "out"), graph.Endpoint(TICK2, "in")) actual = self.target.collect_ready_tasks() expected = {FINAL, TICK1} self.assertEqual(actual, expected)
def test_set_output_data_final(self): self.target.add_task(TICK1, "task1") self.target.connect(graph.Endpoint(TICK1, "out"), graph.Endpoint(FINAL, "in")) self.assertEqual({TICK1}, self.target.collect_ready_tasks()) self.target.set_output_data(TICK1, {"out": "data"}) self.assertEqual({FINAL}, self.target.collect_ready_tasks())
def test_add_conn_after_flush(self): self.target.add_task(graph.START_TICK + 1, "task1") self.target.add_task(graph.START_TICK + 2, "task2") self.target.collect_ready_tasks() self.target.connect(graph.Endpoint(graph.START_TICK + 1, "out"), graph.Endpoint(graph.START_TICK + 2, "in")) self.target.set_output_data(graph.START_TICK + 1, {"out": "data"}) self.assertEqual(set(), self.target.collect_ready_tasks())
def test_get_out_connections(self): self.target.add_task(START_TICK + 100, None, {'nicename':'test1'}) self.target.add_task(START_TICK + 101, None, {'nicename':'test2'}) self.target.connect(graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in')) actual = list(self.target.get_out_connections(START_TICK + 100)) expected = [(graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in'))] self.assertEqual(actual, expected)
def test_mocking(self): self.target.add_task(START_TICK + 100, "task1", {'nicename':'test1'}) self.target.add_task(START_TICK + 101, "task2", {'nicename':'test2'}) self.target.connect(graph.Endpoint(START_TICK + 100, 'out'), graph.Endpoint(START_TICK + 101, 'in')) expected = G( T(100, 'task1', {'nicename': 'test1'}), C(100, 'out', 101, 'in'), T(101, 'task2', {'nicename': 'test2'}), ) utils.assert_graph_equal(expected, self.target)
def exec_task(self, task, inputs=[], autoconnect=False, quick=False, syncpoint=False, nosend_ports=None): """ Adds a task to the graph. :param inputs: List of `(source, in_port)` tuples. :param autoconnect: If `true` all inputs that don't start with a `$` are automatically read from variables and all ouputs with the same criteria assigned to variables. :returns: tick """ for subgraph in task.subgraphs(): syncpoint |= dataflowutils.contains_sideeffects(subgraph) tick = self._next_tick self._next_tick += 1 properties = {} if quick: properties["quick"] = True if syncpoint: properties["syncpoint"] = True if nosend_ports: properties["nosend_ports"] = nosend_ports self._graph.add_task(tick, task, properties) for source, in_port in inputs: self._graph.connect(source, graph.Endpoint(tick, in_port)) if autoconnect: for var in task.input_ports(): if not var.startswith("$"): source = self.read_variable(var) self._graph.connect(source, graph.Endpoint(tick, var)) for var in task.output_ports(): if not var.startswith("$"): source = graph.Endpoint(tick, var) self.assign_variable(var, source) return tick
def task_completed(evalresult): if isinstance(evalresult.result, dict): # Injest values into our store and replace the eval results with ValueIds. outputs = evalresult.result outs = {} datasizes = {} for port, value in outputs.iteritems(): valueid = ValueId(graph.Endpoint(tick, port)) pickle_supported = True if nosend_ports and port in nosend_ports: pickle_supported = False try: size = self.set_value( valueid, value, pickle_supported, pickle_supported and fail_on_unexpected_nosend) except NoPickleError as e: e = NoPickleError( "Value of output port %r cannot be pickled." % port, cause=e.cause) # TODO: memory leak. We should remove the values we've set in # previous loop iterations. raise e outs[port] = valueid if size is not None: datasizes[port] = size evalresult.result = outs evalresult.datasizes = datasizes evalresult.transfer_results = transfer_results return evalresult
def exec_expr(self, task, inputs={}, quick=False, syncpoint=False, nosend_ports=None): """ Same as :meth:`exec_task` but returns an instance of :class:`ValueNode` for a port named `value`. """ tick = self.exec_task(task, inputs=inputs, quick=quick, syncpoint=syncpoint, nosend_ports=nosend_ports) return graph.Endpoint(tick, "value")
def graph_output(self, source, port): """ Declares an output with name `port`. This creates an output port `port` on `FINAL_TICK` connected to the given valuenode. """ self._graph.connect(source, graph.Endpoint(graph.FINAL_TICK, port)) self.set_nice_name(source, port)
def test_get_nonexistent_data(self): self.target.add_task(graph.START_TICK + 1, "task1") self.target.set_output_data(graph.START_TICK + 1, { "out1": "data1", "out2": "data2" }) self.assertRaises(KeyError, self.target.get_data, graph.Endpoint(graph.START_TICK + 1, "out3"))
def test_get_data(self): self.target.add_task(graph.START_TICK + 1, "task1") self.target.set_output_data(graph.START_TICK + 1, { "out1": "data1", "out2": "data2" }) actual = self.target.get_data( graph.Endpoint(graph.START_TICK + 1, "out1")) self.assertEqual("data1", actual)
def add_connections(self, task, tick): """ Connects each input of a given task with suitable outputs of tasks (possibly of the task with the start tick). From the source fed to the invocation we can find the output it needs to connect to. Note that whereas for each input there is only a single connection - an output may be connected to many inputs. """ for inputname, source in task.inputs.iteritems(): endpoint = graph.Endpoint(tick, inputname) if source.parent: source_tick = self.ticks[source.parent] source_name = _portname_from_source(source) else: # in this case it can only be an input to the dataflow source_tick = START_TICK source_name = source.name startpoint = graph.Endpoint(source_tick, source_name) self.graph.connect(startpoint, endpoint)
def visit_FunctionDef(self, node): assert not node.decorator_list, "Not supported" # Default values become inputs to the task defaults = { "default_%s" % i: self.visit(d) for i, d in enumerate(node.args.defaults) } factory = GraphFactory() # Prepare the inputs of the body-graph for arg in node.args.args: vn = factory.graph_input(arg.id) factory.assign_variable(arg.id, vn) if node.args.vararg: vn = factory.graph_input(node.args.vararg) factory.assign_variable(node.args.vararg, vn) if node.args.kwarg: vn = factory.graph_input(node.args.kwarg) factory.assign_variable(node.args.kwarg, vn) # lets build the body graph. self.factory_stack.append(factory) for stmt in node.body: self.visit(stmt) self.factory_stack.pop() body_graph = factory.get_graph() # some sanity checks graph_outputs = list(body_graph.get_in_connections(graph.FINAL_TICK)) if len(graph_outputs) != 1: raise ValueError( "Function graph invalid. Expected exactly one output:%s" % ` graph_outputs `) _, graph_output_dest = graph_outputs[0] if graph_output_dest.port != "retval": raise ValueError( "Function graph invalid. Missing return value:%s" % ` graph_outputs `) task = tasks.FunctionDefTask(scheduler=self.scheduler, name=node.name, graph=body_graph, args=[arg.id for arg in node.args.args], vararg=node.args.vararg, kwarg=node.args.kwarg, num_defaults=len(defaults)) tick = self.factory_stack[-1].exec_task(task, inputs=defaults, quick=True, syncpoint=False) self.factory_stack[-1].assign_variable( node.name, graph.Endpoint(tick, "function"))
def insert_subgraph(g, subgraph, supertick): """ Inserts all tasks and connections between them from `subgraph` into `g`. All ticks are shifted by `supertick`. The connections to START_TICK and FINAL_TICK are NOT copied. """ for tick in subgraph.get_all_ticks(): newtick = tick << supertick g.add_task(newtick, subgraph.get_task(tick), subgraph.get_task_properties(tick)) for tick in list(subgraph.get_all_ticks()) + [graph.FINAL_TICK]: for source, dest in subgraph.get_in_connections(tick): if source.tick == graph.START_TICK or dest.tick == graph.FINAL_TICK: continue g.connect(graph.Endpoint(source.tick << supertick, source.port), graph.Endpoint(dest.tick << supertick, dest.port))
def add_outputs(self): """ Connects the outputs of node representing the pipeline function with the final tick. Furthermore, it adds a 'aliases' property to the FINAL_TICK which contains the mapping of the outputs of the dataflow graph to the portnames. """ aliases = {} for s in self.invocation.outputs: modelpath = _portname_from_source(s) aliases[modelpath] = s.alias if s.alias else modelpath self.graph.set_task_property(graph.FINAL_TICK, 'aliases', aliases) for source in self.invocation.outputs: endpoint = graph.Endpoint(FINAL_TICK, _portname_from_source(source)) source_tick = self.ticks[source.ref.parent] source_port = _portname_from_source(source.ref) startpoint = graph.Endpoint(source_tick, source_port) self.graph.connect(startpoint, endpoint)
def inside_reactor(): logger.debug("Making sure RPC system is up and running.") yield runtime.ensure_rpcsystem() logger.debug("Getting local worker") me = anycall.RPCSystem.default.local_worker #@UndefinedVariable meremote = anycall.RPCSystem.default.local_remoteworker #@UndefinedVariable # Injest the inputs into the local worker # so that we can pass valueref's to the traverser logger.debug("Injesting graph inputs") graph_inputs = {} for port, value in inputs.iteritems(): valueid = worker.ValueId(graph.Endpoint( graph.START_TICK, port)) me.set_value(valueid, value) picklesupport = yield me.get_pickle_supported(valueid) valueref = worker.ValueRef(valueid, picklesupport, meremote) graph_inputs[port] = valueref logger.debug("Starting to traverse the graph.") graph_outputs = yield trav.execute(g, graph_inputs) logger.debug( "Graph traversal completed, transfering graph outputs to local worker." ) # Get the outputs back to the local worker # and extract them. outputs = {} for port, valueref in graph_outputs.iteritems(): source = shed._strategy.choose_source_worker( valueref, meremote) logger.debug("Transferring data for port %r from %r to %r." % (port, source, me)) yield me.fetch_from(source, valueref.valueid) logger.debug("Transferring data for port %r completed." % (port)) logger.debug("Extracting data for port %r." % (port)) value = yield me.get_value(valueref.valueid) logger.debug("Extracting data for port %r completed." % (port)) outputs[port] = value logger.debug("Got graph outputs") defer.returnValue(outputs)
def refine(self, g, tick, known_inputs): if "$iterator" not in known_inputs: return if self.has_breaked_input and "$breaked" not in known_inputs: return if self.has_breaked_input and known_inputs["$breaked"]: # `break` was called. Abort iteration refine.replace_task(g, tick, graph.Graph()) iterator = known_inputs["$iterator"] try: item = next(iterator) use_body = True except StopIteration: use_body = False if use_body: if self.is_tail: # the last tick item is the for-tail # the prev. to last is the subgraph_tick # the one before that is the iteration_tick # and then we have the tick of the original for-loop task. orig_for_tick = tick >> 3 #orig_for_tick = graph.Tick(tick._elements[:-3]) iteration_counter = tick._elements[-3] + 1 iteration_tick = graph.START_TICK + iteration_counter << orig_for_tick else: # First iteration iteration_counter = 1 iteration_tick = graph.START_TICK + iteration_counter << tick item_tick = graph.START_TICK + 1 << iteration_tick subgraph_tick = graph.START_TICK + 2 << iteration_tick g.add_task(item_tick, ConstTask(item)) item_endpoint = graph.Endpoint(item_tick, "value") refine.replace_task(g, tick, self.body_graph, subgraph_tick=subgraph_tick, additional_inputs={'$target': item_endpoint}) else: refine.replace_task(g, tick, self.orelse_graph)
def _in_connections(self, g, tick): ''' Prepare the connections to hook up the subgraph's inputs. ''' # connections for the inputs - in_connections = [] input_map = { dest.port: source for source, dest in g.get_in_connections(tick) } for source, dest in self.body_graph.get_out_connections( graph.START_TICK): in_source = input_map[source.port] in_dest = graph.Endpoint(dest.tick << tick, dest.port) in_connections.append((in_source, in_dest)) return in_connections
def _out_connections(self, g, tick): """ Connect the outputs of the body graph (inputs to the FINAL_TICK) with the outputs of the task. """ # outputs could be connected to many different input ports - this is not yet covered out_connections = [] output_map = {} # get the out connections of the given task for source, dest in g.get_out_connections(tick): if source.port not in output_map.keys(): output_map[source.port] = [] output_map[source.port].append(dest) for source, dest in self.body_graph.get_in_connections( graph.FINAL_TICK): out_source = graph.Endpoint(source.tick << tick, source.port) portname = dest.port for out_dest in output_map[portname]: out_connections.append((out_source, out_dest)) return out_connections
def _in_connections(self, iteration_tick, input_map, iterator_port_source): ''' Prepare the connections to hook up the subgraph's inputs. ''' iteration_input = {k: v for k, v in input_map.iteritems()} iteration_input.update({self.iterator_port: iterator_port_source}) # connections for the inputs - in_connections = [] for source, dest in self.body_graph.get_out_connections( graph.START_TICK): if dest.tick == graph.FINAL_TICK: continue # direct connection are treated as output connections. else: task_input = iteration_input[source.port] subgraph_dest = graph.Endpoint(dest.tick << iteration_tick, dest.port) in_connections.append((task_input, subgraph_dest)) return in_connections