예제 #1
0
파일: common.py 프로젝트: rbax/plynx
import os
import stat
import json
import zipfile
from plynx.constants import NodeResources
from plynx.base import resource
from plynx.utils.common import zipdir
from plynx.utils.config import get_web_config

WEB_CONFIG = get_web_config()


class File(resource.BaseResource):
    pass


class PDF(resource.BaseResource):
    @classmethod
    def preview(cls, preview_object):
        return '<iframe src="{}" title="preview" type="application/pdf" width="100%"/>'.format(
            '{}/resource/{}'.format(WEB_CONFIG.endpoint,
                                    preview_object.resource_id), )


class Image(resource.BaseResource):
    @classmethod
    def preview(cls, preview_object):
        return '<img src="{}" width="100%" alt="preview" />'.format(
            '{}/resource/{}'.format(WEB_CONFIG.endpoint,
                                    preview_object.resource_id), )
예제 #2
0
class GraphScheduler(object):
    """ Main graph scheduler.

    It works with a single db.graph.Graph object.
    GraphScheduler loads the Graph from DB.
    It determines Nodes to be executed.

    Args:
        graph (str or Graph)

    """
    node_cache_manager = NodeCacheManager()
    WEB_CONFIG = get_web_config()

    def __init__(self, graph, node_collection=None):
        if isinstance(graph, Graph):
            self.graph_id = graph._id
            self.graph = graph
        else:
            self.graph_id = graph
            self.graph = Graph.load(self.graph_id)

        self.node_id_to_node = {
            node._id: node for node in self.graph.nodes
        }

        # number of dependencies to ids
        self.dependency_index_to_node_ids = defaultdict(lambda: set())
        self.node_id_to_dependents = defaultdict(lambda: set())
        self.node_id_to_dependency_index = defaultdict(lambda: 0)
        self.uncompleted_nodes_count = 0
        if node_collection:
            self.node_collection = node_collection
        else:
            self.node_collection = NodeCollection()

        for node in self.graph.nodes:
            # ignore nodes in finished statuses
            if NodeRunningStatus.is_finished(node.node_running_status):
                continue
            node_id = node._id
            dependency_index = 0
            for node_input in node.inputs:
                for input_value in node_input.values:
                    parent_node_id = to_object_id(input_value.node_id)
                    self.node_id_to_dependents[parent_node_id].add(node_id)
                    if not NodeRunningStatus.is_finished(self.node_id_to_node[parent_node_id].node_running_status):
                        dependency_index += 1

            if not NodeRunningStatus.is_finished(node.node_running_status):
                self.uncompleted_nodes_count += 1
            self.dependency_index_to_node_ids[dependency_index].add(node_id)
            self.node_id_to_dependency_index[node_id] = dependency_index

    def finished(self):
        if self.graph.graph_running_status == GraphRunningStatus.FAILED_WAITING:
            # wait for the rest of the running jobs to finish
            # check running status of each of the nodes
            for node in self.graph.nodes:
                if node.node_running_status == NodeRunningStatus.RUNNING:
                    return False
            # set status to FAILED
            self.graph.graph_running_status = GraphRunningStatus.FAILED
            self.graph.save(force=True)
            return True
        return self.graph.graph_running_status in {GraphRunningStatus.SUCCESS, GraphRunningStatus.FAILED, GraphRunningStatus.CANCELED}

    def pop_jobs(self):
        """Get a set of nodes with satisfied dependencies"""
        res = []
        if GraphRunningStatus.is_failed(self.graph.graph_running_status):
            return res
        cached_nodes = []
        for node_id in self.dependency_index_to_node_ids[0]:
            node = self._get_node_with_inputs(node_id).copy()
            if GraphScheduler._cacheable(node):
                try:
                    cache = GraphScheduler.node_cache_manager.get(node, self.graph.author)
                    if cache:
                        node.node_running_status = NodeRunningStatus.RESTORED
                        node.outputs = cache.outputs
                        node.logs = cache.logs
                        node.cache_url = '{}/graphs/{}?nid={}'.format(
                            GraphScheduler.WEB_CONFIG.endpoint.rstrip('/'),
                            str(cache.graph_id),
                            str(cache.node_id),
                        )
                        cached_nodes.append(node)
                        continue
                except Exception as err:
                    logging.exception("Unable to update cache: `{}`".format(err))
            job = self.node_collection.make_job(node)
            res.append(job)
        del self.dependency_index_to_node_ids[0]

        for node in cached_nodes:
            self.update_node(node)

        return res

    def update_node(self, node):
        dest_node = self.node_id_to_node[node._id]
        if node.node_running_status == NodeRunningStatus.SUCCESS \
                and dest_node.node_running_status != node.node_running_status \
                and GraphScheduler._cacheable(node):
            GraphScheduler.node_cache_manager.post(node, self.graph_id, self.graph.author)

        if dest_node.node_running_status == node.node_running_status:
            return

        self._set_node_status(node._id, node.node_running_status)
        # TODO smarter copy
        dest_node.logs = node.logs
        dest_node.outputs = node.outputs
        dest_node.cache_url = node.cache_url

        self.graph.save(force=True)

    def _set_node_status(self, node_id, node_running_status):
        node = self.node_id_to_node[node_id]
        node.node_running_status = node_running_status

        if node_running_status == NodeRunningStatus.FAILED:
            self.graph.graph_running_status = GraphRunningStatus.FAILED_WAITING

        if node_running_status in {NodeRunningStatus.SUCCESS, NodeRunningStatus.FAILED, NodeRunningStatus.RESTORED}:
            for dependent_node_id in self.node_id_to_dependents[node_id]:
                dependent_node = self.node_id_to_node[dependent_node_id]
                prev_dependency_index = self.node_id_to_dependency_index[dependent_node_id]

                removed_dependencies = 0
                for node_input in dependent_node.inputs:
                    for input_value in node_input.values:
                        if to_object_id(input_value.node_id) == to_object_id(node_id):
                            removed_dependencies += 1
                dependency_index = prev_dependency_index - removed_dependencies

                self.dependency_index_to_node_ids[prev_dependency_index].remove(dependent_node_id)
                self.dependency_index_to_node_ids[dependency_index].add(dependent_node_id)
                self.node_id_to_dependency_index[dependent_node_id] = dependency_index
            self.uncompleted_nodes_count -= 1

        if self.uncompleted_nodes_count == 0 and not GraphRunningStatus.is_failed(self.graph.graph_running_status):
            self.graph.graph_running_status = GraphRunningStatus.SUCCESS

        # self.graph.save()

    def _get_node_with_inputs(self, node_id):
        """Get the node and init its inputs, i.e. filling its resource_ids"""
        res = self.node_id_to_node[node_id]
        for node_input in res.inputs:
            for value in node_input.values:
                value.resource_id = self.node_id_to_node[to_object_id(value.node_id)].get_output_by_name(
                    value.output_id
                ).resource_id
        return res

    @staticmethod
    def _cacheable(node):
        for parameter in node.parameters:
            if parameter.name == 'cacheable':
                return parameter.value
        return False