def test_numeric_literals(ic): @udf(BigIntVal(FunctionContext, SmallIntVal)) def numeric_literals(context, a): if a is None: return 1729 elif a < 0: return None elif a < 10: return a + 5 else: return a * 2 ship_udf(ic, numeric_literals, overwrite=True) ic._cursor.execute('SELECT %s.numeric_literals(NULL)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 1729 ic._cursor.execute('SELECT %s.numeric_literals(-5)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is None ic._cursor.execute('SELECT %s.numeric_literals(2)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 7 ic._cursor.execute('SELECT %s.numeric_literals(12)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 24
def test_return_string_literal(ic): @udf(StringVal(FunctionContext, StringVal)) def return_string_literal(context, a): return "bar" ship_udf(ic, return_string_literal, overwrite=True) ic._cursor.execute('SELECT %s.return_string_literal("foo")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'bar'
def test_return_null(ic): @udf(IntVal(FunctionContext, IntVal)) def return_null(context, a): return None ship_udf(ic, return_null, overwrite=True) ic._cursor.execute('SELECT %s.return_null(10)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is None
def test_string_concat(ic): @udf(StringVal(FunctionContext, StringVal, StringVal)) def string_concat(context, a, b): return a + b ship_udf(ic, string_concat, overwrite=True) ic._cursor.execute('SELECT %s.string_concat("howdy ", "doody")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'howdy doody'
def test_string_index_concat(ic): @udf(StringVal(FunctionContext, StringVal)) def string_index_concat(context, a): return a[0] + a[3] ship_udf(ic, string_index_concat, overwrite=True) ic._cursor.execute('SELECT %s.string_index_concat("money")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'me'
def test_string_indexing(ic): @udf(StringVal(FunctionContext, StringVal, IntVal)) def string_indexing(context, a, b): return a[b] ship_udf(ic, string_indexing, overwrite=True) ic._cursor.execute('SELECT %s.string_indexing("foo", 1)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'o'
def test_string_split_comma(ic): @udf(StringVal(FunctionContext, StringVal)) def string_split_comma(context, a): return string.split(a, ",")[1] ship_udf(ic, string_split_comma, overwrite=True) ic._cursor.execute('SELECT %s.string_split_comma("foo,bar")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'bar'
def test_string_len(ic): @udf(IntVal(FunctionContext, StringVal)) def string_len(context, a): return len(a) ship_udf(ic, string_len, overwrite=True) ic._cursor.execute('SELECT %s.string_len("australopithecus")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 16
def test_return_empty_string(ic): @udf(StringVal(FunctionContext, StringVal)) def return_empty_string(context, a): return "" ship_udf(ic, return_empty_string, overwrite=True) ic._cursor.execute('SELECT %s.return_empty_string("blah")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == ''
def test_int_promotion(ic): @udf(BigIntVal(FunctionContext, IntVal)) def int_promotion(context, x): return x + 1 ship_udf(ic, int_promotion, overwrite=True) ic._cursor.execute('SELECT %s.int_promotion(2)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 3 assert ic._cursor.description[0][1] == 'BIGINT'
def test_int_predicates(ic): @udf(BooleanVal(FunctionContext, IntVal)) def int_predicate(context, a): if a > 10: return True else: return False ship_udf(ic, int_predicate, overwrite=True) ic._cursor.execute('SELECT %s.int_predicate(10)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == False ic._cursor.execute('SELECT %s.int_predicate(11)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == True
def test_return_two_str_literals(ic): @udf(StringVal(FunctionContext, IntVal)) def return_two_str_literals(context, a): if a > 5: return "foo" else: return "bar" ship_udf(ic, return_two_str_literals, overwrite=True) ic._cursor.execute('SELECT %s.return_two_str_literals(2)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'bar' ic._cursor.execute('SELECT %s.return_two_str_literals(20)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == 'foo'
def test_int_predicates(ic): @udf(BooleanVal(FunctionContext, IntVal)) def int_predicate(context, a): if a > 10: return True else: return False ship_udf(ic, int_predicate, overwrite=True) ic._cursor.execute('SELECT %s.int_predicate(10)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is False ic._cursor.execute('SELECT %s.int_predicate(11)' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is True
def test_string_eq(ic): @udf(BooleanVal(FunctionContext, StringVal)) def string_eq(context, a): if a == "foo": return True elif a == "bar": return False else: return None ship_udf(ic, string_eq, overwrite=True) ic._cursor.execute('SELECT %s.string_eq("foo")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == True ic._cursor.execute('SELECT %s.string_eq("bar")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] == False ic._cursor.execute('SELECT %s.string_eq("baz")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is None
def test_string_eq(ic): @udf(BooleanVal(FunctionContext, StringVal)) def string_eq(context, a): if a == "foo": return True elif a == "bar": return False else: return None ship_udf(ic, string_eq, overwrite=True) ic._cursor.execute('SELECT %s.string_eq("foo")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is True ic._cursor.execute('SELECT %s.string_eq("bar")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is False ic._cursor.execute('SELECT %s.string_eq("baz")' % ic._temp_db) results = ic._cursor.fetchall() assert results[0][0] is None
LOCATION '/user/laserson/bigml/census_text' """ score_obs_query = """ SELECT DISTINCT predict_income(age, workclass, final_weight, education, education_num, marital_status, occupation, relationship, race, sex, hours_per_week, native_country, income) FROM census_text """ cursor.execute(create_table_query) for size in sizes: start_compile = time() predict_income = udf(signature)(udfs[size]) end_compile = time() ship_udf(cursor, predict_income, '/user/laserson/test-udf/census_%i.ll' % size, 'bottou01-10g.pa.cloudera.com', user='******', overwrite=True) start_score = time() cursor.execute(score_obs_query) distinct = cursor.fetchall() end_score = time() print "impala,%i,%i,%.2f,%.2f" % (size, len(udfs[size].func_code.co_code), end_score - start_score, end_compile - start_compile)