def _case(_, gen_params): gen_params = dict([(k, BaseNodeRunner._format_value(v)) for k, v in six.iteritems(gen_params) if k not in ignores and v]) targets = dict((k, v) for k, v in six.iteritems(target_params) if k not in ignores) self.assertDictEqual(targets, gen_params)
def job_conf(self, conf=None): conf_items = { 'odps.task.major.version': options.cupid.major_task_version, 'odps.access.id': getattr(self.odps.account, 'access_id', None), 'odps.access.key': getattr(self.odps.account, 'secret_access_key', None), 'odps.end.point': self.odps.endpoint, 'odps.project.name': self.project, 'odps.moye.am.cores': '400', 'odps.cupid.proxy.end.point': options.cupid.proxy_endpoint, } if conf: conf_items.update(conf) for k, v in six.iteritems(options.cupid.settings or {}): if any(k.startswith(pf) for pf in _CUPID_CONF_PREFIXES): conf_items[k] = v conf_obj_items = [ task_param_pb.JobConfItem(key=k, value=str(v)) for k, v in six.iteritems(conf_items) if v is not None ] return task_param_pb.JobConf(jobconfitem=conf_obj_items)
def _case(_, gen_params): gen_params = dict([(k, BaseNodeRunner._format_value(v)) for k, v in six.iteritems(gen_params) if k not in ignores and v]) targets = dict() for k, v in six.iteritems(target_params): if k in ignores: continue if k.startswith('input') and k.endswith('TableName') and '.' not in v: v = self.odps.project + '.' + v targets[k] = v self.assertDictEqual(targets, gen_params)
def _build_canonical_str(self, url_components, req): # Build signing string lines = [req.method, ] headers_to_sign = dict() canonical_resource = url_components.path params = dict() if url_components.query: params_list = sorted(parse_qsl(url_components.query, True), key=lambda it: it[0]) assert len(params_list) == len(set(it[0] for it in params_list)) params = dict(params_list) convert = lambda kv: kv if kv[1] != '' else (kv[0], ) params_str = '&'.join(['='.join(convert(kv)) for kv in params_list]) canonical_resource = '%s?%s' % (canonical_resource, params_str) headers = req.headers LOG.debug('headers before signing: %s' % headers) for k, v in six.iteritems(headers): k = k.lower() if k in ('content-type', 'content-md5') or k.startswith('x-odps'): headers_to_sign[k] = v for k in ('content-type', 'content-md5'): if k not in headers_to_sign: headers_to_sign[k] = '' date_str = headers.get('Date') if not date_str: req_date = utils.formatdate(usegmt=True) headers['Date'] = req_date date_str = req_date headers_to_sign['date'] = date_str for param_key, param_value in six.iteritems(params): if param_key.startswith('x-odps-'): headers_to_sign[param_key] = param_value headers_to_sign = compat.OrderedDict([(k, headers_to_sign[k]) for k in sorted(headers_to_sign)]) LOG.debug('headers to sign: %s' % headers_to_sign) for k, v in six.iteritems(headers_to_sign): if k.startswith('x-odps-'): lines.append('%s:%s' % (k, v)) else: lines.append(v) lines.append(canonical_resource) return '\n'.join(lines)
def typed(v, type_code=None): if isinstance(v, dict): return dict([(key, typed(value, type_code)) for key, value in six.iteritems(v)]) elif isinstance(v, list): return [typed(it, type_code) for it in v] return { 'dataType': type_code or PREDICT_TYPE_CODES[type(v).__name__], 'dataValue': v, }
def actual_exec(self): in_tables = [] for in_name, in_port in six.iteritems(self._node.inputs): if in_port.type != PortType.DATA: continue ep = RunnerContext.instance()._obj_container.get(in_port.obj_uuid) if ep: in_tables.append((in_name, ep.table)) msg = 'Message: %s Input tables: %s' % (self._node.message, ', '.join('%s<-%s' % ti for ti in in_tables)) if self._node.action is not None: self._node.action(self._node) else: print(msg)
def actual_exec(self): in_tables = [] for in_name, in_port in six.iteritems(self._node.inputs): if in_port.type != PortType.DATA: continue ep = RunnerContext.instance()._obj_container.get(in_port.obj_uuid) if ep: in_tables.append((in_name, ep.table)) msg = 'Message: %s Input tables: %s' % (self._node.message, ', '.join( '%s<-%s' % ti for ti in in_tables)) if self._node.action is not None: self._node.action(self._node) else: print(msg)
def get_odps_type(p_type): for data_type, builtin_type in six.iteritems( o_types._odps_primitive_to_builtin_types): if issubclass(p_type.type, builtin_type): return data_type
def _case(_, gen_params): gen_params = dict([(k, BaseNodeEngine._format_value(v)) for k, v in six.iteritems(gen_params) if v]) self.assertDictEqual(target_params, gen_params)
def _case(_, gen_params): gen_params = dict([(k, BaseNodeEngine._format_value(v)) for k, v in six.iteritems(gen_params) if v]) print(repr(gen_params))
def test_base_dag_node(self): self.maxDiff = None self.create_ionosphere(IONOSPHERE_TABLE) df1 = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) node1 = _get_bind_node(df1) df2 = self.mock_action(df1, msg='Node2') node2 = _get_bind_node(df2) df31, df32, _ = self.mock_action([df1, df2], 3, msg='Node3') node3 = _get_bind_node(df31) df41, model42 = self.mock_action(df31, 'dm', msg='Node4') self.assertIn('DFAdapter', repr(_get_bind_port(df41))) self.assertIn('Model', repr(_get_bind_port(model42))) node4 = _get_bind_node(df41) model5 = self.mock_action([model42, df32], 'm', msg='Node5') node5 = _get_bind_node(model5) df6 = self.mock_action([df41, model5], 1, msg='Node6') node6 = _get_bind_node(df6) # test params self.assertDictEqual(node2.parameters, dict(message='Node2')) self.assertDictEqual(node3.parameters, dict(message='Node3')) self.assertDictEqual(node4.parameters, dict(message='Node4')) self.assertDictEqual(node5.parameters, dict(message='Node5')) self.assertDictEqual(node6.parameters, dict(message='Node6')) # test node inputs and outputs gen_type_dict = lambda eps: dict( (nm, ep.type) for nm, ep in six.iteritems(eps)) self.assertDictEqual(gen_type_dict(node2.inputs), dict(input1=PortType.DATA)) self.assertDictEqual(gen_type_dict(node2.outputs), dict(output1=PortType.DATA)) self.assertDictEqual(gen_type_dict(node3.inputs), dict(input1=PortType.DATA, input2=PortType.DATA)) self.assertDictEqual( gen_type_dict(node3.outputs), dict(output1=PortType.DATA, output2=PortType.DATA, output3=PortType.DATA)) self.assertDictEqual(gen_type_dict(node4.inputs), dict(input1=PortType.DATA)) self.assertDictEqual( gen_type_dict(node4.outputs), dict(output1=PortType.DATA, output2=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node5.inputs), dict(input1=PortType.MODEL, input2=PortType.DATA)) self.assertDictEqual(gen_type_dict(node5.outputs), dict(output1=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node6.inputs), dict(input1=PortType.DATA, input2=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node6.outputs), dict(output1=PortType.DATA)) # test links def assertInEdge(dest_node, dest_ep, *sources): edges = [ RunnerEdge(src_node, src_ep, dest_node, dest_ep) for src_node, src_ep in sources ] self.assertListEqual(dest_node.input_edges[dest_ep], edges) def assertOutEdge(src_node, src_ep, *targets): edges = [ RunnerEdge(src_node, src_ep, dest_node, dest_ep) for dest_node, dest_ep in targets ] self.assertListEqual(src_node.output_edges[src_ep], edges) assertOutEdge(node1, 'output', (node2, 'input1'), (node3, 'input1')) assertInEdge(node2, 'input1', (node1, 'output')) assertOutEdge(node2, 'output1', (node3, 'input2')) assertInEdge(node3, 'input1', (node1, 'output')) assertInEdge(node3, 'input2', (node2, 'output1')) assertOutEdge(node3, 'output1', (node4, 'input1')) assertOutEdge(node3, 'output2', (node5, 'input2')) assertInEdge(node4, 'input1', (node3, 'output1')) assertOutEdge(node4, 'output1', (node6, 'input1')) assertOutEdge(node4, 'output2', (node5, 'input1')) assertInEdge(node5, 'input1', (node4, 'output2')) assertInEdge(node5, 'input2', (node3, 'output2')) assertInEdge(node6, 'input1', (node4, 'output1')) assertInEdge(node6, 'input2', (node5, 'output1')) steps_text = textwrap.dedent(""" DataFrame_1 -> output MockNode_2(input1=DataFrame_1:output) -> output1 MockNode_3(input1=DataFrame_1:output, input2=MockNode_2:output1) -> output1, output2, output3 MockNode_4(input1=MockNode_3:output1) -> output1, output2 MockNode_5(input1=MockNode_4:output2, input2=MockNode_3:output2) -> output1 MockNode_6(input1=MockNode_4:output1, input2=MockNode_5:output1) -> output1(*) """).strip() steps_obj = df6.show_steps() self.assertEqual(steps_obj.text.strip(), steps_text)
def get_odps_type(p_type): for data_type, builtin_type in six.iteritems(o_types._odps_primitive_to_builtin_types): if issubclass(p_type.type, builtin_type): return data_type
def _case(_, gen_params): gen_params = dict([(k, BaseNodeRunner._format_value(v)) for k, v in six.iteritems(gen_params) if v]) print(repr(gen_params))
def test_base_runner_node(self): self.maxDiff = None self.create_ionosphere(IONOSPHERE_TABLE) df1 = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) node1 = _get_bind_node(df1) df2 = self.mock_action(df1, msg='Node2') node2 = _get_bind_node(df2) df31, df32, _ = self.mock_action([df1, df2], 3, msg='Node3') node3 = _get_bind_node(df31) df41, model42 = self.mock_action(df31, 'dm', msg='Node4') self.assertIn('DFAdapter', repr(_get_bind_port(df41))) self.assertIn('Model', repr(_get_bind_port(model42))) node4 = _get_bind_node(df41) model5 = self.mock_action([model42, df32], 'm', msg='Node5') node5 = _get_bind_node(model5) df6 = self.mock_action([df41, model5], 1, msg='Node6') node6 = _get_bind_node(df6) # test params self.assertDictEqual(node2.parameters, dict(message='Node2')) self.assertDictEqual(node3.parameters, dict(message='Node3')) self.assertDictEqual(node4.parameters, dict(message='Node4')) self.assertDictEqual(node5.parameters, dict(message='Node5')) self.assertDictEqual(node6.parameters, dict(message='Node6')) # test node inputs and outputs gen_type_dict = lambda eps: dict((nm, ep.type) for nm, ep in six.iteritems(eps)) self.assertDictEqual(gen_type_dict(node2.inputs), dict(input1=PortType.DATA)) self.assertDictEqual(gen_type_dict(node2.outputs), dict(output1=PortType.DATA)) self.assertDictEqual(gen_type_dict(node3.inputs), dict(input1=PortType.DATA, input2=PortType.DATA)) self.assertDictEqual(gen_type_dict(node3.outputs), dict(output1=PortType.DATA, output2=PortType.DATA, output3=PortType.DATA)) self.assertDictEqual(gen_type_dict(node4.inputs), dict(input1=PortType.DATA)) self.assertDictEqual(gen_type_dict(node4.outputs), dict(output1=PortType.DATA, output2=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node5.inputs), dict(input1=PortType.MODEL, input2=PortType.DATA)) self.assertDictEqual(gen_type_dict(node5.outputs), dict(output1=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node6.inputs), dict(input1=PortType.DATA, input2=PortType.MODEL)) self.assertDictEqual(gen_type_dict(node6.outputs), dict(output1=PortType.DATA)) # test links def assertInEdge(dest_node, dest_ep, *sources): edges = [RunnerEdge(src_node, src_ep, dest_node, dest_ep) for src_node, src_ep in sources] self.assertListEqual(dest_node.input_edges[dest_ep], edges) def assertOutEdge(src_node, src_ep, *targets): edges = [RunnerEdge(src_node, src_ep, dest_node, dest_ep) for dest_node, dest_ep in targets] self.assertListEqual(src_node.output_edges[src_ep], edges) assertOutEdge(node1, 'output', (node2, 'input1'), (node3, 'input1')) assertInEdge(node2, 'input1', (node1, 'output')) assertOutEdge(node2, 'output1', (node3, 'input2')) assertInEdge(node3, 'input1', (node1, 'output')) assertInEdge(node3, 'input2', (node2, 'output1')) assertOutEdge(node3, 'output1', (node4, 'input1')) assertOutEdge(node3, 'output2', (node5, 'input2')) assertInEdge(node4, 'input1', (node3, 'output1')) assertOutEdge(node4, 'output1', (node6, 'input1')) assertOutEdge(node4, 'output2', (node5, 'input1')) assertInEdge(node5, 'input1', (node4, 'output2')) assertInEdge(node5, 'input2', (node3, 'output2')) assertInEdge(node6, 'input1', (node4, 'output1')) assertInEdge(node6, 'input2', (node5, 'output1')) steps_text = textwrap.dedent(""" DataFrame_1 -> output MockNode_2(input1=DataFrame_1:output) -> output1 MockNode_3(input1=DataFrame_1:output, input2=MockNode_2:output1) -> output1, output2, output3 MockNode_4(input1=MockNode_3:output1) -> output1, output2 MockNode_5(input1=MockNode_4:output2, input2=MockNode_3:output2) -> output1 MockNode_6(input1=MockNode_4:output1, input2=MockNode_5:output1) -> output1(*) """).strip() steps_obj = df6.show_steps() self.assertEqual(steps_obj.text.strip(), steps_text)
def after_exec(self): from odps.runner import RunnerContext context = RunnerContext.instance() self._node.after_exec(self._odps, True) context._node_outputs[self._node_hash] = dict((pn, self.get_output_object(p)) for pn, p in six.iteritems(self._node.outputs))