コード例 #1
0
 def _compute_hash_key(self):
     """
     if hash changed, the port_setup, meta_setup
     and conf_json should be different
     In very rara case, might have the problem of hash collision,
     It affects the column, port and conf calculation. It won't
     change the computation result though.
     It returns the hash code, the loaded task_graph,
     the replacement conf obj
     """
     task_graph = ""
     inputs = ()
     replacementObj = {}
     input_node = ""
     task_graph_obj = None
     if 'taskgraph' in self.conf:
         task_graph = get_file_path(self.conf['taskgraph'])
         if os.path.exists(task_graph):
             with open(task_graph) as f:
                 task_graph = hashlib.md5(f.read().encode()).hexdigest()
             task_graph_obj = TaskGraph.load_taskgraph(
                 get_file_path(self.conf['taskgraph']))
     self.update_replace(replacementObj, task_graph_obj)
     if 'input' in self.conf:
         for inp in self.conf['input']:
             input_node += inp+","
             if hasattr(self, 'inputs'):
                 for i in self.inputs:
                     inputs += (hash(i['from_node']),
                                i['to_port'], i['from_port'])
     return (hash((self.uid, task_graph, inputs, json.dumps(self.conf),
                   input_node, json.dumps(replacementObj))), task_graph_obj,
             replacementObj)
コード例 #2
0
ファイル: test_taskgraph_api.py プロジェクト: idanre1/gQuant
    def test_load(self):
        '''Test that a taskgraph can be loaded from a yaml file.
        '''
        workflow_file = os.path.join(self._test_dir,
                                     'test_load_taskgraph.yaml')

        global TASKGRAPH_YAML
        with open(workflow_file, 'w') as wf:
            wf.write(TASKGRAPH_YAML)

        tspec_list = [task._task_spec for task in self.tgraph]

        tgraph = TaskGraph.load_taskgraph(workflow_file)
        all_tasks_exist = True
        for task in tgraph:
            if task._task_spec not in tspec_list:
                all_tasks_exist = False
                break

        with StringIO() as yf:
            yaml.dump(tspec_list, yf,
                      default_flow_style=False, sort_keys=False)
            yf.seek(0)

            err_msg = 'Load taskgraph failed. Missing expected task items.\n'\
                'EXPECTED TASKGRAPH YAML:\n\n'\
                '{wyaml}\n\n'\
                'GOT TASKS FORMATTED AS YAML:\n\n'\
                '{tlist}\n\n'.format(wyaml=TASKGRAPH_YAML, tlist=yf.read())

            self.assertTrue(all_tasks_exist, err_msg)
コード例 #3
0
def get_nodes_from_file(file):
    """
    Given an input yaml file string. It returns a dict which has two keys.
        nodes:
            - list of node objects for the UI client. It contains all the
            necessary information about the node including the size of the node
            input ports, output ports, output column names/types,
            conf schema and conf data.
        edges:
            - list of edge objects for the UI client. It enumerate all the
            edges in the graph.

    Arguments
    -------
    file: string
        file name

    Returns
    -------
    dict
        nodes and edges of the graph data

    """
    task_graph = TaskGraph.load_taskgraph(file)
    return get_nodes(task_graph)
コード例 #4
0
    def test_load_workflow(self):
        '''Test loading a workflow from yaml:'''
        from gquant.dataframe_flow import TaskGraph
        workflow_file = os.path.join(self._test_dir, 'test_save_workflow.yaml')

        with open(workflow_file, 'w') as wf:
            wf.write(WORKFLOW_YAML)

        task_list = TaskGraph.load_taskgraph(workflow_file)
        all_tasks_exist = True
        for t in task_list:
            match = False
            if t._task_spec in self._task_list:
                match = True
            if not match:
                all_tasks_exist = False
                break
        with StringIO() as yf:
            yaml.dump(self._task_list,
                      yf,
                      default_flow_style=False,
                      sort_keys=False)
            yf.seek(0)

            err_msg = 'Load workflow failed. Missing expected task items.\n'\
                'EXPECTED WORKFLOW YAML:\n\n'\
                '{wyaml}\n\n'\
                'GOT TASKS FORMATTED AS YAML:\n\n'\
                '{tlist}\n\n'.format(wyaml=WORKFLOW_YAML, tlist=yf.read())

            self.assertTrue(all_tasks_exist, err_msg)
コード例 #5
0
ファイル: compositeNode.py プロジェクト: zfy1989lee/gQuant
    def ports_setup(self):
        cache_key = self._compute_hash_key()
        if cache_key in cache_ports:
            # print('cache hit')
            return cache_ports[cache_key]
        inports = {}
        outports = {}
        if 'taskgraph' in self.conf:
            task_graph = TaskGraph.load_taskgraph(
                get_file_path(self.conf['taskgraph']))
            replacementObj = {}
            self.update_replace(replacementObj)
            task_graph.build(replace=replacementObj)

            def inputNode_fun(inputNode, in_ports):
                inport = {}
                before_fix = inputNode.ports_setup().inports
                for key in before_fix.keys():
                    if key in in_ports:
                        inport[key] = before_fix[key]
                inports.update(fix_port_name(inport, inputNode.uid))

            def outNode_fun(outNode, out_ports):
                ouport = {}
                before_fix = outNode.ports_setup().outports
                for key in before_fix.keys():
                    if key in out_ports:
                        ouport[key] = before_fix[key]
                outports.update(fix_port_name(ouport, outNode.uid))

            self._make_sub_graph_connection(task_graph, inputNode_fun,
                                            outNode_fun)
        output_port = NodePorts(inports=inports, outports=outports)
        cache_ports[cache_key] = output_port
        return output_port
コード例 #6
0
ファイル: compositeNode.py プロジェクト: zfy1989lee/gQuant
    def columns_setup(self):
        cache_key = self._compute_hash_key()
        if cache_key in cache_columns:
            # print('cache hit')
            return cache_columns[cache_key]
        required = {}
        out_columns = {}
        if 'taskgraph' in self.conf:
            task_graph = TaskGraph.load_taskgraph(
                get_file_path(self.conf['taskgraph']))
            replacementObj = {}
            self.update_replace(replacementObj)
            task_graph.build(replace=replacementObj)

            def inputNode_fun(inputNode, in_ports):
                req = {}
                # do columns_setup so required columns are ready
                inputNode.columns_setup()
                for key in inputNode.required.keys():
                    if key in in_ports:
                        req[key] = inputNode.required[key]
                required.update(fix_port_name(req, inputNode.uid))

            def outNode_fun(outNode, out_ports):
                oucols = {}
                before_fix = outNode.columns_setup()
                for key in before_fix.keys():
                    if key in out_ports:
                        oucols[key] = before_fix[key]
                out_columns.update(fix_port_name(oucols, outNode.uid))

            self._make_sub_graph_connection(task_graph, inputNode_fun,
                                            outNode_fun)

        self.required = required
        cache_columns[cache_key] = out_columns
        return out_columns
コード例 #7
0
ファイル: handlers.py プロジェクト: idanre1/gQuant
 def post(self):
     # input_data is a dictionnary with a key "name"
     input_data = self.get_json_body()
     task_graph = TaskGraph.load_taskgraph(input_data['path'])
     nodes_and_edges = get_nodes(task_graph)
     self.finish(json.dumps(nodes_and_edges))
コード例 #8
0
    def process(self, inputs):
        """
        Composite computation

        Arguments
        -------
         inputs: list
            list of input dataframes.
        Returns
        -------
        dataframe
        """
        if 'taskgraph' in self.conf:
            task_graph = TaskGraph.load_taskgraph(
                get_file_path(self.conf['taskgraph']))
            task_graph.build()

            outputLists = []
            replaceObj = {}
            input_feeders = []

            def inputNode_fun(inputNode, in_ports):
                inports = inputNode.ports_setup().inports

                class InputFeed(Node):

                    def meta_setup(self):
                        output = {}
                        for inp in inputNode.inputs:
                            output[inp['to_port']] = inp[
                                'from_node'].meta_setup().outports[
                                    inp['from_port']]
                        # it will be something like { input_port: columns }
                        return MetaData(inports={}, outports=output)

                    def ports_setup(self):
                        # it will be something like { input_port: types }
                        return NodePorts(inports={}, outports=inports)

                    def conf_schema(self):
                        return ConfSchema()

                    def process(self, empty):
                        output = {}
                        for key in inports.keys():
                            if inputNode.uid+'@'+key in inputs:
                                output[key] = inputs[inputNode.uid+'@'+key]
                        return output

                uni_id = str(uuid.uuid1())
                obj = {
                    TaskSpecSchema.task_id: uni_id,
                    TaskSpecSchema.conf: {},
                    TaskSpecSchema.node_type: InputFeed,
                    TaskSpecSchema.inputs: []
                }
                input_feeders.append(obj)
                newInputs = {}
                for key in inports.keys():
                    if inputNode.uid+'@'+key in inputs:
                        newInputs[key] = uni_id+'.'+key
                for inp in inputNode.inputs:
                    if inp['to_port'] not in in_ports:
                        # need to keep the old connections
                        newInputs[inp['to_port']] = (inp['from_node'].uid
                                                     + '.' + inp['from_port'])
                replaceObj.update({inputNode.uid: {
                    TaskSpecSchema.inputs: newInputs}
                })

            def outNode_fun(outNode, out_ports):
                out_ports = outNode.ports_setup().outports
                # fixed_outports = fix_port_name(out_ports, outNode.uid)
                for key in out_ports.keys():
                    if self.outport_connected(outNode.uid+'@'+key):
                        outputLists.append(outNode.uid+'.'+key)

            self._make_sub_graph_connection(task_graph,
                                            inputNode_fun, outNode_fun)

            task_graph.extend(input_feeders)
            self.update_replace(replaceObj, task_graph)
            result = task_graph.run(outputLists, replace=replaceObj)
            output = {}
            for key in result.get_keys():
                splits = key.split('.')
                output['@'.join(splits)] = result[key]
            return output
        else:
            return {}
コード例 #9
0
            def search_fun(config, checkpoint_dir=None):
                myinputs = {}
                for key in data_store.keys():
                    v = ray.get(data_store[key])
                    if isinstance(v, pandas.DataFrame):
                        myinputs[key] = cudf.from_pandas(v)
                    else:
                        myinputs[key] = v
                task_graph = TaskGraph.load_taskgraph(
                    get_file_path(self.conf['taskgraph']))
                task_graph.build()

                outputLists = [train_id + '.' + 'checkpoint_dir']
                replaceObj = {}
                input_feeders = []

                def inputNode_fun(inputNode, in_ports):
                    inports = inputNode.ports_setup().inports

                    class InputFeed(Node):
                        def meta_setup(self):
                            output = {}
                            for inp in inputNode.inputs:
                                output[inp['to_port']] = inp[
                                    'from_node'].meta_setup()[inp['from_port']]
                            # it will be something like { input_port: columns }
                            return output

                        def ports_setup(self):
                            # it will be something like { input_port: types }
                            return NodePorts(inports={}, outports=inports)

                        def conf_schema(self):
                            return ConfSchema()

                        def process(self, empty):
                            output = {}
                            for key in inports.keys():
                                if (inputNode.uid + '@' + key in myinputs):
                                    output[key] = myinputs[inputNode.uid +
                                                           '@' + key]
                            return output

                    uni_id = str(uuid.uuid1())
                    obj = {
                        TaskSpecSchema.task_id: uni_id,
                        TaskSpecSchema.conf: {},
                        TaskSpecSchema.node_type: InputFeed,
                        TaskSpecSchema.inputs: []
                    }
                    input_feeders.append(obj)
                    newInputs = {}
                    for key in inports.keys():
                        if inputNode.uid + '@' + key in myinputs:
                            newInputs[key] = uni_id + '.' + key
                    for inp in inputNode.inputs:
                        if inp['to_port'] not in in_ports:
                            # need to keep the old connections
                            newInputs[inp['to_port']] = (inp['from_node'].uid +
                                                         '.' +
                                                         inp['from_port'])
                    replaceObj.update(
                        {inputNode.uid: {
                            TaskSpecSchema.inputs: newInputs
                        }})

                def outNode_fun(outNode, out_ports):
                    pass

                self._make_sub_graph_connection(task_graph, inputNode_fun,
                                                outNode_fun)

                task_graph.extend(input_feeders)
                self.update_conf_for_search(replaceObj, task_graph, config)
                task_graph.run(outputLists, replace=replaceObj)
コード例 #10
0
ファイル: compositeNode.py プロジェクト: zfy1989lee/gQuant
    def conf_schema(self):
        cache_key = self._compute_hash_key()
        if cache_key in cache_schema:
            # print('cache hit')
            return cache_schema[cache_key]
        json = {
            "title": "Composite Node configure",
            "type": "object",
            "description": """Use a sub taskgraph as a composite node""",
            "properties": {
                "taskgraph": {
                    "type": "string",
                    "description": "the taskgraph filepath"
                },
                "input": {
                    "type": "array",
                    "description": "the input node ids",
                    "items": {
                        "type": "string"
                    }
                },
                "output": {
                    "type": "array",
                    "description": "the output node ids",
                    "items": {
                        "type": "string"
                    }
                },
                "subnode_ids": {
                    "title":
                    self.uid + " subnode ids",
                    "type":
                    "array",
                    "items": {
                        "type": "string"
                    },
                    "description":
                    """sub graph node ids that need
                    to be reconfigured"""
                },
                "subnodes_conf": {
                    "title": self.uid + " subnodes configuration",
                    "type": "object",
                    "properties": {}
                }
            },
            "required": ["taskgraph"],
        }
        ui = {
            "taskgraph": {
                "ui:widget": "TaskgraphSelector"
            },
            "subnodes_conf": {}
        }
        if 'taskgraph' in self.conf:
            task_graphh = TaskGraph.load_taskgraph(
                get_file_path(self.conf['taskgraph']))
            replacementObj = {}
            self.update_replace(replacementObj)
            task_graphh.build(replace=replacementObj)

            def inputNode_fun(inputNode, in_ports):
                pass

            def outNode_fun(outNode, out_ports):
                pass

            self._make_sub_graph_connection(task_graphh, inputNode_fun,
                                            outNode_fun)

            ids_in_graph = []
            in_ports = []
            out_ports = []
            for t in task_graphh:
                node_id = t.get('id')
                if node_id != '':
                    node = task_graphh[node_id]
                    all_ports = node.ports_setup()
                    for port in all_ports.inports.keys():
                        in_ports.append(node_id + '.' + port)
                    for port in all_ports.outports.keys():
                        out_ports.append(node_id + '.' + port)
                    ids_in_graph.append(node_id)
            json['properties']['input']['items']['enum'] = in_ports
            json['properties']['output']['items']['enum'] = out_ports
            json['properties']['subnode_ids']['items']['enum'] = ids_in_graph
        if 'subnode_ids' in self.conf:
            for subnodeId in self.conf['subnode_ids']:
                if subnodeId in task_graphh:
                    nodeObj = task_graphh[subnodeId]
                    schema = nodeObj.conf_schema()
                    json['properties']["subnodes_conf"]['properties'][
                        subnodeId] = {
                            "type": "object",
                            "properties": {
                                "conf": schema.json
                            }
                        }
                    ui["subnodes_conf"].update(
                        {subnodeId: {
                            'conf': schema.ui
                        }})
        out_schema = ConfSchema(json=json, ui=ui)
        cache_schema[cache_key] = out_schema
        return out_schema