コード例 #1
0
ファイル: tasks.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)
        for tid, task in graph.tasks.items():
            # Infer if the task is a source
            is_source = True
            for name, inport in task.inputs.items():
                if not inport.is_immediate:
                    is_source = False
                    break
            if is_source:
                graph.set_source(task)

            # Infer if the task is a sink and generate sink out-ports
            if not task.edges:
                task.is_sink = True
                # If there no out edges it means this is a sink and all outputs
                # are sinks. Make them so...
                current_backend = Backend.get_current_backend()
                for name, outport in task.outputs.items():
                    # If current out port is not a local port make it so
                    if not current_backend.name == 'LOCAL_NON_THREADED':
                        outport = Backend.get_backend(
                            BackendConfig(BackendType.LOCAL_NON_THREADED,
                                          'Local Non Threaded')).get_port(
                                              outport.type, outport.name,
                                              outport.index, outport.task_ref)
                        task.edges.append(Edge(outport, Sink(outport)))
        return PassResult.CONTINUE
コード例 #2
0
ファイル: typed.py プロジェクト: vibhatha/kisseru
 def run(self, graph, ctx):
     Pass.run(self, graph, ctx)
     type_errors_found = False
     for tid, task in graph.tasks.items():
         for edge in task.edges:
             intype = edge.source.type
             outtype = edge.dest.type
             if is_castable(intype, outtype):
                 # Handle file related types separately
                 if isinstance(intype, FileType) and isinstance(
                         outtype, FileType):
                     # Check if one end of the edge is untype but the other
                     # is not. If that's the case then we cast the untyped
                     # end to be the type of the other end. Otherwise we
                     # mark it as needing a data transformation
                     if intype.id == 'anyfile' and outtype.id != 'anyfile':
                         intype.id = outtype.id
                     elif intype.id != 'anyfile' and outtype.id == 'anyfile':
                         outtype.id = intype.id
                     else:
                         edge.needs_transform = True
             else:
                 ctx.errors.append(
                     "{} expected a {} got a {} from {}".format(
                         edge.dest.task_ref.name, intype.id, outtype.id,
                         edge.source.task_ref.name))
     if type_errors_found:
         return PassResult.ERROR
コード例 #3
0
ファイル: fusion.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)
        all_fusables = []
        visited = set()
        for name, source in graph.sources.items():
            cur_fusables = [source]
            self._dfs(source, cur_fusables, all_fusables, visited)

        # Filter out single node fusable regions which are redundant
        fusables = [fusable for fusable in all_fusables if len(fusable) > 1]

        fused_tasks = list(map(lambda fusable: FusedTask(fusable), fusables))

        for fused_task in fused_tasks:
            for fusee in fused_task.tasks:
                graph.fusee_map[fusee.id] = fused_task

        for fused_task in fused_tasks:
            graph.add_task(fused_task)

            # If the head of the fused task sequence is a source remove it and
            # make the fused container task the source
            if fused_task.head.id in graph.sources:
                graph.unset_source(fused_task.head)
                graph.set_source(fused_task)

            # If the tail of the fused task sequence is a sink make the fused
            # container task a sink as well
            if fused_task.tail.is_sink:
                fused_task.is_sink = True
コード例 #4
0
ファイル: dot.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)
        graph_map = ctx.properties.get('__dot_graph__', None)
        if not graph_map:
            graph_map = {}
        graph_map[self.tag] = self._generate_dot_graph(graph)
        ctx.properties['__dot_graph__'] = graph_map

        return PassResult.CONTINUE
コード例 #5
0
ファイル: tasks.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)

        count = 0
        tasks = set()
        for tid, task in graph.tasks.items():
            tasks.add(task)

        # Now remove any tasks within fused tasks
        for tid, task in graph.tasks.items():
            if isinstance(task, FusedTask):
                for t in task.tasks:
                    tasks.remove(t)

        graph.num_tasks = len(tasks)
コード例 #6
0
ファイル: transform.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)
        new_tasks = []
        new_sources = []
        deleted_sources = []
        # Run edge transformations
        for tid, task in graph.tasks.items():
            for index, edge in enumerate(task.edges):
                # If we find that we need to do a data type transformation we
                # need to splice in the transformation in between the original
                # tasks
                if edge.needs_transform:
                    intype = edge.source.type
                    outtype = edge.dest.type

                    # [FIXME] Code debt - Currently we have two overloaded ways
                    # of indexing in to the task.outputs dictionary. One with
                    # indices and one with names in the case of named outputs.
                    # But there is currently no way of distinguishing between
                    # these two at the moment. I currently just assume it is
                    # indexed addressing case here since we don't currently
                    # support named outputs. This needs to change once we add
                    # support for named outputs.
                    tasklet = Tasklet(edge.source.task_ref,
                                      str(edge.source.index))
                    args = [tasklet, intype, outtype]

                    sig = inspect.signature(transform)
                    new_task = Task(gen_runner(transform, sig), transform, sig,
                                    args, {})
                    new_task.is_transform = True
                    # The input corresponds to the 'infile' parameter of the
                    # transform function
                    inport = new_task.inputs['infile']
                    # We know this generated task only has one output
                    outport = new_task.outputs['0']

                    old_dest_port = edge.dest

                    # Remove the old edge since we should have generated a new
                    # edge from the original source to this newly generated task
                    # during the call to Task constructor
                    del task.edges[index]

                    # Make the original destination port of the edge to be the
                    # the destination port of the outward edge of the new task
                    new_task.edges.append(Edge(outport, old_dest_port))
                    # Collect newly generated tasks
                    new_tasks.append(new_task)

        # Run source input transformations
        for tid, source in graph.sources.items():
            for name, inport in source.inputs.items():
                # Get the actual argument value passed to this source
                arg = source._args[name]

                # Check if it looks like a file
                ext = get_file_extention(arg)
                if ext:
                    if ext == 'csv':
                        intype = inport.type.id
                        if intype != ext:
                            outtype = get_type(ext)
                            args = [arg, intype, outtype]

                            sig = inspect.signature(transform)
                            # Generate a new task for transforming the input to
                            # type the original source was expecting
                            task = Task(gen_runner(transform, sig), transform,
                                        sig, args, {})
                            task.is_transform = True

                            # We know this generated task only has one output
                            outport = task.outputs['0']

                            # Make the configuration of the original task's input to be
                            # non immediate since now it accepts the output from newly
                            # generated staging task at runtime
                            inport.flip_is_immediate()

                            # Connect the out port of the new task to the
                            # in port of the old source
                            task.edges.append(Edge(outport, inport))

                            # Collect the new task as a source
                            new_sources.append(task)
                            # Collect sources which are made not sources anymore
                            deleted_sources.append(source)
                            # Collect newly generated tasks
                            new_tasks.append(task)

        # Add the newly generated tasks to the graph
        for task in new_tasks:
            graph.add_task(task)

        # Mark removed sources as not sources
        for source in deleted_sources:
            graph.unset_source(source)

        # Add newly generated sources
        for source in new_sources:
            graph.set_source(source)

        return PassResult.CONTINUE
コード例 #7
0
ファイル: stage.py プロジェクト: vibhatha/kisseru
    def run(self, graph, ctx):
        Pass.run(self, graph, ctx)
        new_tasks = []
        new_sources = []
        deleted_sources = []

        # Run edge transformations
        #
        # This depends on the run time placement of the task since staging may
        # or may not be necessary depending on whether the next task is placed
        # at the same node or not.

        # Run source input staging
        for tid, source in graph.sources.items():
            for name, inport in source.inputs.items():
                # Get the actual argument value passed to this source
                arg = source._args[name]

                # Check if it looks like a URL (currently we only support FTP)
                if arg.startswith("ftp:"):
                    ext = get_file_extention(get_file_name(arg))
                    intype = inport.type.id
                    '''
                    if ext:
                        if intype != ext:
                            print(
                                Colors.WARNING +
                                """[Compiler] {} input file extention does not seem to match the declared argument type {} at {}"""
                                .format(ext, intype, source.name) +
                                Colors.ENDC)
                    '''
                    args = [arg]

                    sig = inspect.signature(staging)
                    # Generate a new task for staging the input
                    task = Task(gen_runner(staging, sig), staging, sig, args,
                                {})
                    task.is_staging = True

                    # We know this generated task only has one output
                    outport = task.outputs['0']

                    # Make the configuration of the original task's input to be
                    # non immediate since now it accepts the output from newly
                    # generated staging task at runtime
                    inport.flip_is_immediate()

                    # Connect the out port of the new task to the
                    # in port of the old source
                    task.edges.append(Edge(outport, inport))

                    # Collect the new task as a source
                    new_sources.append(task)
                    # Collect sources which are made not sources anymore
                    deleted_sources.append(source)
                    # Collect newly generated tasks
                    new_tasks.append(task)

        # Add the newly generated tasks to the graph
        for task in new_tasks:
            graph.add_task(task)

        # Mark removed sources as not sources
        for source in deleted_sources:
            graph.unset_source(source)

        # Add newly generated sources
        for source in new_sources:
            graph.set_source(source)

        return PassResult.CONTINUE