def _case(_, gen_params):
     gen_params = dict([(k, BaseNodeRunner._format_value(v))
                        for k, v in six.iteritems(gen_params)
                        if k not in ignores and v])
     targets = dict((k, v) for k, v in six.iteritems(target_params)
                    if k not in ignores)
     self.assertDictEqual(targets, gen_params)
Example #2
0
    def job_conf(self, conf=None):
        conf_items = {
            'odps.task.major.version':
            options.cupid.major_task_version,
            'odps.access.id':
            getattr(self.odps.account, 'access_id', None),
            'odps.access.key':
            getattr(self.odps.account, 'secret_access_key', None),
            'odps.end.point':
            self.odps.endpoint,
            'odps.project.name':
            self.project,
            'odps.moye.am.cores':
            '400',
            'odps.cupid.proxy.end.point':
            options.cupid.proxy_endpoint,
        }

        if conf:
            conf_items.update(conf)
        for k, v in six.iteritems(options.cupid.settings or {}):
            if any(k.startswith(pf) for pf in _CUPID_CONF_PREFIXES):
                conf_items[k] = v
        conf_obj_items = [
            task_param_pb.JobConfItem(key=k, value=str(v))
            for k, v in six.iteritems(conf_items) if v is not None
        ]
        return task_param_pb.JobConf(jobconfitem=conf_obj_items)
Example #3
0
 def _case(_, gen_params):
     gen_params = dict([(k, BaseNodeRunner._format_value(v))
                        for k, v in six.iteritems(gen_params) if k not in ignores and v])
     targets = dict()
     for k, v in six.iteritems(target_params):
         if k in ignores:
             continue
         if k.startswith('input') and k.endswith('TableName') and '.' not in v:
             v = self.odps.project + '.' + v
         targets[k] = v
     self.assertDictEqual(targets, gen_params)
    def _build_canonical_str(self, url_components, req):
        # Build signing string
        lines = [req.method, ]
        headers_to_sign = dict()

        canonical_resource = url_components.path
        params = dict()
        if url_components.query:
            params_list = sorted(parse_qsl(url_components.query, True),
                                 key=lambda it: it[0])
            assert len(params_list) == len(set(it[0] for it in params_list))
            params = dict(params_list)
            convert = lambda kv: kv if kv[1] != '' else (kv[0], )
            params_str = '&'.join(['='.join(convert(kv)) for kv in params_list])

            canonical_resource = '%s?%s' % (canonical_resource, params_str)

        headers = req.headers
        LOG.debug('headers before signing: %s' % headers)
        for k, v in six.iteritems(headers):
            k = k.lower()
            if k in ('content-type', 'content-md5') or k.startswith('x-odps'):
                headers_to_sign[k] = v
        for k in ('content-type', 'content-md5'):
            if k not in headers_to_sign:
                headers_to_sign[k] = ''
        date_str = headers.get('Date')
        if not date_str:
            req_date = utils.formatdate(usegmt=True)
            headers['Date'] = req_date
            date_str = req_date
        headers_to_sign['date'] = date_str
        for param_key, param_value in six.iteritems(params):
            if param_key.startswith('x-odps-'):
                headers_to_sign[param_key] = param_value

        headers_to_sign = compat.OrderedDict([(k, headers_to_sign[k])
                                              for k in sorted(headers_to_sign)])
        LOG.debug('headers to sign: %s' % headers_to_sign)
        for k, v in six.iteritems(headers_to_sign):
            if k.startswith('x-odps-'):
                lines.append('%s:%s' % (k, v))
            else:
                lines.append(v)

        lines.append(canonical_resource)
        return '\n'.join(lines)
 def typed(v, type_code=None):
     if isinstance(v, dict):
         return dict([(key, typed(value, type_code))
                      for key, value in six.iteritems(v)])
     elif isinstance(v, list):
         return [typed(it, type_code) for it in v]
     return {
         'dataType': type_code or PREDICT_TYPE_CODES[type(v).__name__],
         'dataValue': v,
     }
Example #6
0
 def actual_exec(self):
     in_tables = []
     for in_name, in_port in six.iteritems(self._node.inputs):
         if in_port.type != PortType.DATA:
             continue
         ep = RunnerContext.instance()._obj_container.get(in_port.obj_uuid)
         if ep:
             in_tables.append((in_name, ep.table))
     msg = 'Message: %s Input tables: %s' % (self._node.message, ', '.join('%s<-%s' % ti for ti in in_tables))
     if self._node.action is not None:
         self._node.action(self._node)
     else:
         print(msg)
Example #7
0
 def actual_exec(self):
     in_tables = []
     for in_name, in_port in six.iteritems(self._node.inputs):
         if in_port.type != PortType.DATA:
             continue
         ep = RunnerContext.instance()._obj_container.get(in_port.obj_uuid)
         if ep:
             in_tables.append((in_name, ep.table))
     msg = 'Message: %s Input tables: %s' % (self._node.message, ', '.join(
         '%s<-%s' % ti for ti in in_tables))
     if self._node.action is not None:
         self._node.action(self._node)
     else:
         print(msg)
 def get_odps_type(p_type):
     for data_type, builtin_type in six.iteritems(
             o_types._odps_primitive_to_builtin_types):
         if issubclass(p_type.type, builtin_type):
             return data_type
Example #9
0
 def _case(_, gen_params):
     gen_params = dict([(k, BaseNodeEngine._format_value(v))
                        for k, v in six.iteritems(gen_params) if v])
     self.assertDictEqual(target_params, gen_params)
Example #10
0
 def _case(_, gen_params):
     gen_params = dict([(k, BaseNodeEngine._format_value(v))
                        for k, v in six.iteritems(gen_params) if v])
     print(repr(gen_params))
Example #11
0
    def test_base_dag_node(self):
        self.maxDiff = None

        self.create_ionosphere(IONOSPHERE_TABLE)
        df1 = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
        node1 = _get_bind_node(df1)
        df2 = self.mock_action(df1, msg='Node2')
        node2 = _get_bind_node(df2)

        df31, df32, _ = self.mock_action([df1, df2], 3, msg='Node3')
        node3 = _get_bind_node(df31)

        df41, model42 = self.mock_action(df31, 'dm', msg='Node4')
        self.assertIn('DFAdapter', repr(_get_bind_port(df41)))
        self.assertIn('Model', repr(_get_bind_port(model42)))
        node4 = _get_bind_node(df41)

        model5 = self.mock_action([model42, df32], 'm', msg='Node5')
        node5 = _get_bind_node(model5)

        df6 = self.mock_action([df41, model5], 1, msg='Node6')
        node6 = _get_bind_node(df6)

        # test params
        self.assertDictEqual(node2.parameters, dict(message='Node2'))
        self.assertDictEqual(node3.parameters, dict(message='Node3'))
        self.assertDictEqual(node4.parameters, dict(message='Node4'))
        self.assertDictEqual(node5.parameters, dict(message='Node5'))
        self.assertDictEqual(node6.parameters, dict(message='Node6'))

        # test node inputs and outputs
        gen_type_dict = lambda eps: dict(
            (nm, ep.type) for nm, ep in six.iteritems(eps))
        self.assertDictEqual(gen_type_dict(node2.inputs),
                             dict(input1=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node2.outputs),
                             dict(output1=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node3.inputs),
                             dict(input1=PortType.DATA, input2=PortType.DATA))
        self.assertDictEqual(
            gen_type_dict(node3.outputs),
            dict(output1=PortType.DATA,
                 output2=PortType.DATA,
                 output3=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node4.inputs),
                             dict(input1=PortType.DATA))
        self.assertDictEqual(
            gen_type_dict(node4.outputs),
            dict(output1=PortType.DATA, output2=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node5.inputs),
                             dict(input1=PortType.MODEL, input2=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node5.outputs),
                             dict(output1=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node6.inputs),
                             dict(input1=PortType.DATA, input2=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node6.outputs),
                             dict(output1=PortType.DATA))

        # test links
        def assertInEdge(dest_node, dest_ep, *sources):
            edges = [
                RunnerEdge(src_node, src_ep, dest_node, dest_ep)
                for src_node, src_ep in sources
            ]
            self.assertListEqual(dest_node.input_edges[dest_ep], edges)

        def assertOutEdge(src_node, src_ep, *targets):
            edges = [
                RunnerEdge(src_node, src_ep, dest_node, dest_ep)
                for dest_node, dest_ep in targets
            ]
            self.assertListEqual(src_node.output_edges[src_ep], edges)

        assertOutEdge(node1, 'output', (node2, 'input1'), (node3, 'input1'))

        assertInEdge(node2, 'input1', (node1, 'output'))
        assertOutEdge(node2, 'output1', (node3, 'input2'))

        assertInEdge(node3, 'input1', (node1, 'output'))
        assertInEdge(node3, 'input2', (node2, 'output1'))
        assertOutEdge(node3, 'output1', (node4, 'input1'))
        assertOutEdge(node3, 'output2', (node5, 'input2'))

        assertInEdge(node4, 'input1', (node3, 'output1'))
        assertOutEdge(node4, 'output1', (node6, 'input1'))
        assertOutEdge(node4, 'output2', (node5, 'input1'))

        assertInEdge(node5, 'input1', (node4, 'output2'))
        assertInEdge(node5, 'input2', (node3, 'output2'))

        assertInEdge(node6, 'input1', (node4, 'output1'))
        assertInEdge(node6, 'input2', (node5, 'output1'))

        steps_text = textwrap.dedent("""
        DataFrame_1 -> output
        MockNode_2(input1=DataFrame_1:output) -> output1
        MockNode_3(input1=DataFrame_1:output, input2=MockNode_2:output1) -> output1, output2, output3
        MockNode_4(input1=MockNode_3:output1) -> output1, output2
        MockNode_5(input1=MockNode_4:output2, input2=MockNode_3:output2) -> output1
        MockNode_6(input1=MockNode_4:output1, input2=MockNode_5:output1) -> output1(*)
        """).strip()

        steps_obj = df6.show_steps()
        self.assertEqual(steps_obj.text.strip(), steps_text)
 def get_odps_type(p_type):
     for data_type, builtin_type in six.iteritems(o_types._odps_primitive_to_builtin_types):
         if issubclass(p_type.type, builtin_type):
             return data_type
Example #13
0
 def _case(_, gen_params):
     gen_params = dict([(k, BaseNodeRunner._format_value(v))
                        for k, v in six.iteritems(gen_params) if v])
     print(repr(gen_params))
    def test_base_runner_node(self):
        self.maxDiff = None

        self.create_ionosphere(IONOSPHERE_TABLE)
        df1 = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
        node1 = _get_bind_node(df1)
        df2 = self.mock_action(df1, msg='Node2')
        node2 = _get_bind_node(df2)

        df31, df32, _ = self.mock_action([df1, df2], 3, msg='Node3')
        node3 = _get_bind_node(df31)

        df41, model42 = self.mock_action(df31, 'dm', msg='Node4')
        self.assertIn('DFAdapter', repr(_get_bind_port(df41)))
        self.assertIn('Model', repr(_get_bind_port(model42)))
        node4 = _get_bind_node(df41)

        model5 = self.mock_action([model42, df32], 'm', msg='Node5')
        node5 = _get_bind_node(model5)

        df6 = self.mock_action([df41, model5], 1, msg='Node6')
        node6 = _get_bind_node(df6)

        # test params
        self.assertDictEqual(node2.parameters, dict(message='Node2'))
        self.assertDictEqual(node3.parameters, dict(message='Node3'))
        self.assertDictEqual(node4.parameters, dict(message='Node4'))
        self.assertDictEqual(node5.parameters, dict(message='Node5'))
        self.assertDictEqual(node6.parameters, dict(message='Node6'))

        # test node inputs and outputs
        gen_type_dict = lambda eps: dict((nm, ep.type) for nm, ep in six.iteritems(eps))
        self.assertDictEqual(gen_type_dict(node2.inputs), dict(input1=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node2.outputs), dict(output1=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node3.inputs), dict(input1=PortType.DATA, input2=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node3.outputs), dict(output1=PortType.DATA, output2=PortType.DATA, output3=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node4.inputs), dict(input1=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node4.outputs), dict(output1=PortType.DATA, output2=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node5.inputs), dict(input1=PortType.MODEL, input2=PortType.DATA))
        self.assertDictEqual(gen_type_dict(node5.outputs), dict(output1=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node6.inputs), dict(input1=PortType.DATA, input2=PortType.MODEL))
        self.assertDictEqual(gen_type_dict(node6.outputs), dict(output1=PortType.DATA))

        # test links
        def assertInEdge(dest_node, dest_ep, *sources):
            edges = [RunnerEdge(src_node, src_ep, dest_node, dest_ep) for src_node, src_ep in sources]
            self.assertListEqual(dest_node.input_edges[dest_ep], edges)

        def assertOutEdge(src_node, src_ep, *targets):
            edges = [RunnerEdge(src_node, src_ep, dest_node, dest_ep) for dest_node, dest_ep in targets]
            self.assertListEqual(src_node.output_edges[src_ep], edges)

        assertOutEdge(node1, 'output', (node2, 'input1'), (node3, 'input1'))

        assertInEdge(node2, 'input1', (node1, 'output'))
        assertOutEdge(node2, 'output1', (node3, 'input2'))

        assertInEdge(node3, 'input1', (node1, 'output'))
        assertInEdge(node3, 'input2', (node2, 'output1'))
        assertOutEdge(node3, 'output1', (node4, 'input1'))
        assertOutEdge(node3, 'output2', (node5, 'input2'))

        assertInEdge(node4, 'input1', (node3, 'output1'))
        assertOutEdge(node4, 'output1', (node6, 'input1'))
        assertOutEdge(node4, 'output2', (node5, 'input1'))

        assertInEdge(node5, 'input1', (node4, 'output2'))
        assertInEdge(node5, 'input2', (node3, 'output2'))

        assertInEdge(node6, 'input1', (node4, 'output1'))
        assertInEdge(node6, 'input2', (node5, 'output1'))

        steps_text = textwrap.dedent("""
        DataFrame_1 -> output
        MockNode_2(input1=DataFrame_1:output) -> output1
        MockNode_3(input1=DataFrame_1:output, input2=MockNode_2:output1) -> output1, output2, output3
        MockNode_4(input1=MockNode_3:output1) -> output1, output2
        MockNode_5(input1=MockNode_4:output2, input2=MockNode_3:output2) -> output1
        MockNode_6(input1=MockNode_4:output1, input2=MockNode_5:output1) -> output1(*)
        """).strip()

        steps_obj = df6.show_steps()
        self.assertEqual(steps_obj.text.strip(), steps_text)
Example #15
0
 def after_exec(self):
     from odps.runner import RunnerContext
     context = RunnerContext.instance()
     self._node.after_exec(self._odps, True)
     context._node_outputs[self._node_hash] = dict((pn, self.get_output_object(p))
                                                   for pn, p in six.iteritems(self._node.outputs))