def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists( ['name', 'id', 'fid', 'isMale', 'scale', 'birth'], datatypes('string', 'int64', 'float64', 'boolean', 'decimal', 'datetime'), ['ds'], datatypes('string')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema) self.expr1 = CollectionExpr(_source_data=table1, _schema=schema) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema) self.expr2 = CollectionExpr(_source_data=table2, _schema=schema) schema2 = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64'), ['part1', 'part2'], datatypes('string', 'int64')) table3 = MockTable(name='pyodps_test_expr_table2', schema=schema2) self.expr3 = CollectionExpr(_source_data=table3, _schema=schema2) schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'], datatypes('int64', 'string', 'dict<string, string>', 'list<string>')) table4 = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr4 = CollectionExpr(_source_data=table4, _schema=schema3) self.maxDiff = None
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists( ['name', 'id', 'fid', 'isMale', 'scale', 'birth'], datatypes('string', 'int64', 'float64', 'boolean', 'decimal', 'datetime'), ['ds'], datatypes('string')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=Schema(columns=schema.columns)) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema) self.expr1 = CollectionExpr(_source_data=table1, _schema=Schema(columns=schema.columns)) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema) self.expr2 = CollectionExpr(_source_data=table2, _schema=Schema(columns=schema.columns)) schema2 = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64'), ['part1', 'part2'], datatypes('string', 'int64')) table3 = MockTable(name='pyodps_test_expr_table2', schema=schema2) self.expr3 = CollectionExpr(_source_data=table3, _schema=Schema(columns=schema2.columns))
def testSetitemField(self): from odps.df.expr.groupby import GroupByCollectionExpr from odps.df.expr.merge import JoinFieldMergedCollectionExpr expr = self.expr.copy() expr['new_id'] = expr.id + 1 self.assertIn('new_id', expr.schema.names) self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id']) expr['new_id2'] = expr.id + 2 self.assertIn('new_id2', expr.schema.names) self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id', 'new_id2']) self.assertIsNone(expr._input._proxy) expr['new_id2'] = expr.new_id expr['new_id3'] = expr.id + expr.new_id2 self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertIs(expr._fields[-1].rhs.lhs.input, expr.input) self.assertIsInstance(expr, ProjectCollectionExpr) self.assert_(isinstance(expr, ProjectCollectionExpr)) expr2 = expr.groupby('name').agg(expr.id.sum()) expr2['new_id2'] = expr2.id_sum + 1 self.assertIsInstance(expr2, ProjectCollectionExpr) self.assertNotIsInstance(expr2, GroupByCollectionExpr) self.assertNotIsInstance(expr2, FilterCollectionExpr) schema = Schema.from_lists( ['name', 'id', 'fid2', 'fid3'], [types.string, types.int64, types.float64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest expr3 = CollectionExpr(_source_data=table, _schema=schema) expr4 = expr.left_join( expr3, on=[expr.name == expr3.name, expr.id == expr3.id], merge_columns=True) expr4['fid_1'] = expr4.groupby('id').sort('fid2').row_number() self.assertIsInstance(expr4, JoinFieldMergedCollectionExpr) self.assertIsNone(expr4._proxy) expr5 = expr[expr] expr5['name_2'] = expr5.apply(lambda row: row.name, axis=1, reduce=True) self.assertIsInstance(expr5, ProjectCollectionExpr) self.assertIsNone(expr5._proxy)
def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema) self.expr1 = CollectionExpr(_source_data=table1, _schema=schema) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema) self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id'], datatypes('string', 'int64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) schema2 = Schema.from_lists(['name2', 'id2'], datatypes('string', 'int64')) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest self.expr = CollectionExpr(_source_data=table, _schema=schema) schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64], ['part1', 'part2'], [types.string, types.int64]) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) table2._client = self.config.odps.rest self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
def testSimpleJoin(self): schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) schema1 = Schema.from_lists(['id', 'value'], [types.int64, types.string]) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema1) expr1 = CollectionExpr(_source_data=table1, _schema=schema1) schema2 = Schema.from_lists(['value', 'num'], [types.string, types.float64]) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=schema2) df = expr.join(expr1).join(expr2) self.assertEqual(df.schema.names, ['name', 'id', 'value', 'num'])
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'], datatypes('int64', 'string', 'dict<string, string>', 'list<string>')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema)
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid', 'isMale', 'scale', 'birth'], datatypes('string', 'int64', 'float64', 'boolean', 'decimal', 'datetime')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema)
def testSVGFormatter(self): t = MockTable(name='pyodps_test_svg', schema=self.schema, _client=self.odps.rest) expr = CollectionExpr(_source_data=t, _schema=self.schema) expr1 = expr.groupby('name').agg(id=expr['id'].sum()) expr2 = expr1['name', expr1.id + 3] engine = MixedEngine(self.odps) dag = engine.compile(expr2) nodes = dag.nodes() self.assertEqual(len(nodes), 1) expr3 = nodes[0].expr self.assertIsInstance(expr3, GroupByCollectionExpr) dot = ExprExecutionGraphFormatter(dag)._to_dot() self.assertNotIn('Projection', dot) expr1 = expr.groupby('name').agg(id=expr['id'].sum()).cache() expr2 = expr1['name', expr1.id + 3] engine = MixedEngine(self.odps) dag = engine.compile(expr2) nodes = dag.nodes() self.assertEqual(len(nodes), 2) dot = ExprExecutionGraphFormatter(dag)._to_dot() self.assertIn('Projection', dot)
def setup(self): from odps.df.expr.tests.core import MockTable schema = Schema.from_lists(types._data_types.keys(), types._data_types.values()) self.expr = CollectionExpr(_source_data=None, _schema=schema) self.sourced_expr = CollectionExpr( _source_data=MockTable(client=self.odps.rest), _schema=schema)
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) self.ctx = ExecuteContext()
def testGetAttrs(self): schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) expected = ('_lhs', '_rhs', '_data_type', '_source_data_type', '_name', '_source_name', '_engine', '_cached_args') self.assertSequenceEqual(expected, get_attrs(expr.id + 1))
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = DynamicSchema.from_schema( Schema.from_lists( ['name', 'id', 'fid', 'isMale', 'scale', 'birth'], datatypes('string', 'int64', 'float64', 'boolean', 'decimal', 'datetime'))) table = MockTable(name='pyodps_test_expr_table', schema=schema) schema2 = DynamicSchema.from_schema(Schema.from_lists( ['name2', 'id', 'fid2'], datatypes('string', 'int64', 'float64')), default_type=types.string) table2 = MockTable(name='pyodps_test_expr_tabl2', schema=schema2) self.expr = DynamicCollectionExpr(_source_data=table, _schema=schema) self.expr2 = DynamicCollectionExpr(_source_data=table2, _schema=schema2)
def testSimpleJoin(self): schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) schema1 = Schema.from_lists(['id', 'value'], [types.int64, types.string]) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema1) expr1 = CollectionExpr(_source_data=table1, _schema=schema1) schema2 = Schema.from_lists(['value', 'num'], [types.string, types.float64]) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=schema2) df = expr.join(expr1).join(expr2) adapter = adapter_from_df(df) self.assertEqual(len(adapter._bind_node.inputs), 0) self.assertEqual(len(adapter._bind_node.outputs), 1)
def testConcat(self): from odps.ml.expr import AlgoCollectionExpr schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) df = CollectionExpr(_source_data=None, _schema=schema) df1 = CollectionExpr(_source_data=None, _schema=schema) df2 = CollectionExpr(_source_data=None, _schema=schema) schema = Schema.from_lists(['fid', 'fid2'], [types.int64, types.float64]) df3 = CollectionExpr(_source_data=None, _schema=schema) schema = Schema.from_lists(['fid', 'fid2'], [types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table2', schema=schema) table._client = self.config.odps.rest df4 = CollectionExpr(_source_data=table, _schema=schema) expr = df.concat([df1, df2]) self.assertIsInstance(expr, UnionCollectionExpr) self.assertIsInstance(expr._lhs, CollectionExpr) self.assertIsInstance(expr._rhs, CollectionExpr) expr = df.concat(df3, axis=1) try: import pandas as pd self.assertIsInstance(expr, ConcatCollectionExpr) self.assertIsInstance(expr._lhs, CollectionExpr) self.assertIsInstance(expr._rhs, CollectionExpr) except ImportError: self.assertIsInstance(expr, AlgoCollectionExpr) self.assertIn('name', expr.schema.names) self.assertIn('id', expr.schema.names) self.assertIn('fid', expr.schema.names) self.assertIn('fid2', expr.schema.names) expr = df.concat(df4, axis=1) self.assertIsInstance(expr, AlgoCollectionExpr) self.assertIn('name', expr.schema.names) self.assertIn('id', expr.schema.names) self.assertIn('fid', expr.schema.names) self.assertIn('fid2', expr.schema.names)
def testIsChanged(self): schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) expr2 = CollectionExpr(_source_data=table, _schema=schema) self.assertFalse(is_changed(expr[expr.id < 3], expr.id)) self.assertTrue(is_changed(expr[expr.id + 2, ], expr.id)) self.assertIsNone(is_changed(expr[expr.id < 3], expr2.id)) self.assertTrue( is_changed(expr.groupby('name').agg(id=expr.id.sum()), expr.id)) self.assertFalse( is_changed(expr.groupby('name').agg(id=expr.id.sum()), expr.name))
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid', 'isMale', 'scale', 'birth'], datatypes('string', 'bigint', 'double', 'boolean', 'decimal', 'datetime')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.tb = DataFrame(table) import pandas as pd df = pd.DataFrame([['name1', 2, 3.14], ['name2', 100, 2.7]], columns=['name', 'id', 'fid']) self.pd = DataFrame(df) self.expr = self.tb.join(self.pd, on='name') self.engine = MixedEngine(self.odps)
def testBizarreField(self): def my_func(row): return getattr(row, '012') * 2.0 datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid', '012'], datatypes('string', 'int64', 'float64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64'])) udtf = list(self.engine._ctx._func_to_udfs.values())[0] udtf = get_function(udtf, UDF_CLASS_NAME) self.assertEqual([20, 40], runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest self.expr = CollectionExpr(_source_data=table, _schema=schema) schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64], ['part1', 'part2'], [types.string, types.int64]) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) table2._client = self.config.odps.rest self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2) schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'], [types.int64, types.string, types.Dict(types.string, types.string), types.List(types.string)]) table3 = MockTable(name='pyodps_test_expr_table3', schema=schema3) self.expr3 = CollectionExpr(_source_data=table3, _schema=schema3)
def testGetAttrs(self): schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) expected = ( '_lhs', '_rhs', '_data_type', '_source_data_type', '_ml_fields_cache', '_ml_uplink', '_ml_operations', '_name', '_source_name', '_deps', '_ban_optimize', '_engine', '_need_cache', '_mem_cache', '_id', '_args_indexes', ) self.assertSequenceEqual(expected, get_attrs(expr.id + 1))
def testFilterPushdownThroughMultipleProjection(self): schema = Schema.from_lists(list('abcde'), ['string']*5) table = MockTable(name='pyodps_test_expr_table3', schema=schema) tab = CollectionExpr(_source_data=table, _schema=odps_schema_to_df_schema(schema)) labels2 = [] bins2 = [] for i in range(0, 30): a = str(7 * i) + '-' + str(7 * (i + 1)) b = 7 * i bins2.append(b) labels2.append(a) p1 = tab.select(tab.a, tab.c.astype('int').cut(bins2, labels=labels2, include_over=True).rename('c_cut'), tab.e.astype('int').rename('e'), tab.c.astype('int').rename('c')) p1['f'] = p1['e'] / p1['c'] t = [] l = [] for i in range(0, 20): a = 1 * i b = str(a) t.append(a) l.append(b) p2 = p1.select(p1.a, p1.c_cut, p1.f.cut(bins=t, labels=l, include_over=True).rename('f_cut')) expected = "SELECT t1.`a`, CASE WHEN (0 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 7) THEN '0-7' " \ "WHEN (7 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 14) " \ "THEN '7-14' WHEN (14 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 21) THEN '14-21' " \ "WHEN (21 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 28) " \ "THEN '21-28' WHEN (28 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 35) THEN '28-35' " \ "WHEN (35 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 42) THEN '35-42' " \ "WHEN (42 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 49) THEN '42-49' " \ "WHEN (49 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 56) " \ "THEN '49-56' WHEN (56 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 63) THEN '56-63' " \ "WHEN (63 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 70) THEN '63-70' " \ "WHEN (70 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 77) " \ "THEN '70-77' WHEN (77 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 84) " \ "THEN '77-84' WHEN (84 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 91) THEN '84-91' " \ "WHEN (91 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 98) " \ "THEN '91-98' WHEN (98 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 105) THEN '98-105' " \ "WHEN (105 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 112) " \ "THEN '105-112' WHEN (112 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 119) THEN '112-119' " \ "WHEN (119 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 126) " \ "THEN '119-126' WHEN (126 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 133) THEN '126-133' " \ "WHEN (133 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 140) " \ "THEN '133-140' WHEN (140 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 147) THEN '140-147' " \ "WHEN (147 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 154) " \ "THEN '147-154' WHEN (154 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 161) THEN '154-161' " \ "WHEN (161 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 168) " \ "THEN '161-168' WHEN (168 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 175) THEN '168-175' " \ "WHEN (175 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 182) " \ "THEN '175-182' WHEN (182 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 189) THEN '182-189' " \ "WHEN (189 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 196) " \ "THEN '189-196' WHEN (196 < CAST(t1.`c` AS BIGINT)) " \ "AND (CAST(t1.`c` AS BIGINT) <= 203) THEN '196-203' " \ "WHEN 203 < CAST(t1.`c` AS BIGINT) THEN '203-210' END AS `c_cut`, " \ "CASE WHEN (0 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 1) THEN '0' " \ "WHEN (1 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 2) " \ "THEN '1' WHEN (2 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 3) THEN '2' " \ "WHEN (3 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 4) " \ "THEN '3' WHEN (4 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 5) THEN '4' " \ "WHEN (5 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 6) THEN '5' " \ "WHEN (6 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 7) " \ "THEN '6' WHEN (7 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 8) THEN '7' " \ "WHEN (8 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 9) THEN '8' " \ "WHEN (9 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 10) " \ "THEN '9' WHEN (10 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 11) THEN '10' " \ "WHEN (11 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 12) " \ "THEN '11' WHEN (12 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 13) THEN '12' " \ "WHEN (13 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 14) THEN '13' " \ "WHEN (14 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 15) THEN '14' " \ "WHEN (15 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 16) THEN '15' " \ "WHEN (16 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 17) THEN '16' " \ "WHEN (17 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 18) " \ "THEN '17' WHEN (18 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 19) THEN '18' " \ "WHEN 19 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) THEN '19' END AS `f_cut` \n" \ "FROM mocked_project.`pyodps_test_expr_table3` t1 \n" \ "WHERE (CASE WHEN (0 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 1) THEN '0' " \ "WHEN (1 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 2) " \ "THEN '1' WHEN (2 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 3) THEN '2' " \ "WHEN (3 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 4) THEN '3' " \ "WHEN (4 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 5) THEN '4' " \ "WHEN (5 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 6) THEN '5' " \ "WHEN (6 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 7) THEN '6' " \ "WHEN (7 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 8) THEN '7' " \ "WHEN (8 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 9) THEN '8' " \ "WHEN (9 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 10) THEN '9' " \ "WHEN (10 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 11) THEN '10' " \ "WHEN (11 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 12) THEN '11' " \ "WHEN (12 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 13) THEN '12' " \ "WHEN (13 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 14) THEN '13' " \ "WHEN (14 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 15) THEN '14' " \ "WHEN (15 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 16) THEN '15' " \ "WHEN (16 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 17) THEN '16' " \ "WHEN (17 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 18) THEN '17' " \ "WHEN (18 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \ "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 19) THEN '18' " \ "WHEN 19 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) THEN '19' END) == '9'" self.assertEqual(str(expected), str(ODPSEngine(self.odps).compile(p2[p2.f_cut == '9'], prettify=False)))
def get_table2_df(self): schema = Schema.from_lists(['col21', 'col22'], datatypes('string', 'string')) table = MockTable(name=TEMP_TABLE_2_NAME, schema=schema) return CollectionExpr(_source_data=table, _schema=schema)
def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_query_table', schema=schema) table._client = self.config.odps.rest self.expr = CollectionExpr(_source_data=table, _schema=schema)