Exemple #1
0
def test_numeric_literals(ic):
    @udf(BigIntVal(FunctionContext, SmallIntVal))
    def numeric_literals(context, a):
        if a is None:
            return 1729
        elif a < 0:
            return None
        elif a < 10:
            return a + 5
        else:
            return a * 2

    ship_udf(ic, numeric_literals, overwrite=True)
    ic._cursor.execute('SELECT %s.numeric_literals(NULL)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 1729
    ic._cursor.execute('SELECT %s.numeric_literals(-5)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
    ic._cursor.execute('SELECT %s.numeric_literals(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 7
    ic._cursor.execute('SELECT %s.numeric_literals(12)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 24
def test_numeric_literals(ic):
    @udf(BigIntVal(FunctionContext, SmallIntVal))
    def numeric_literals(context, a):
        if a is None:
            return 1729
        elif a < 0:
            return None
        elif a < 10:
            return a + 5
        else:
            return a * 2

    ship_udf(ic, numeric_literals, overwrite=True)
    ic._cursor.execute('SELECT %s.numeric_literals(NULL)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 1729
    ic._cursor.execute('SELECT %s.numeric_literals(-5)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
    ic._cursor.execute('SELECT %s.numeric_literals(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 7
    ic._cursor.execute('SELECT %s.numeric_literals(12)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 24
def test_return_string_literal(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def return_string_literal(context, a):
        return "bar"
    ship_udf(ic, return_string_literal, overwrite=True)
    ic._cursor.execute('SELECT %s.return_string_literal("foo")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
def test_return_null(ic):
    @udf(IntVal(FunctionContext, IntVal))
    def return_null(context, a):
        return None
    ship_udf(ic, return_null, overwrite=True)
    ic._cursor.execute('SELECT %s.return_null(10)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
def test_string_concat(ic):
    @udf(StringVal(FunctionContext, StringVal, StringVal))
    def string_concat(context, a, b):
        return a + b
    ship_udf(ic, string_concat, overwrite=True)
    ic._cursor.execute('SELECT %s.string_concat("howdy ", "doody")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'howdy doody'
def test_string_index_concat(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def string_index_concat(context, a):
        return a[0] + a[3]
    ship_udf(ic, string_index_concat, overwrite=True)
    ic._cursor.execute('SELECT %s.string_index_concat("money")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'me'
def test_string_indexing(ic):
    @udf(StringVal(FunctionContext, StringVal, IntVal))
    def string_indexing(context, a, b):
        return a[b]
    ship_udf(ic, string_indexing, overwrite=True)
    ic._cursor.execute('SELECT %s.string_indexing("foo", 1)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'o'
def test_string_split_comma(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def string_split_comma(context, a):
        return string.split(a, ",")[1]
    ship_udf(ic, string_split_comma, overwrite=True)
    ic._cursor.execute('SELECT %s.string_split_comma("foo,bar")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
def test_string_len(ic):
    @udf(IntVal(FunctionContext, StringVal))
    def string_len(context, a):
        return len(a)
    ship_udf(ic, string_len, overwrite=True)
    ic._cursor.execute('SELECT %s.string_len("australopithecus")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 16
def test_return_empty_string(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def return_empty_string(context, a):
        return ""
    ship_udf(ic, return_empty_string, overwrite=True)
    ic._cursor.execute('SELECT %s.return_empty_string("blah")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == ''
def test_int_promotion(ic):
    @udf(BigIntVal(FunctionContext, IntVal))
    def int_promotion(context, x):
        return x + 1
    ship_udf(ic, int_promotion, overwrite=True)
    ic._cursor.execute('SELECT %s.int_promotion(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 3
    assert ic._cursor.description[0][1] == 'BIGINT'
Exemple #12
0
def test_return_empty_string(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def return_empty_string(context, a):
        return ""

    ship_udf(ic, return_empty_string, overwrite=True)
    ic._cursor.execute('SELECT %s.return_empty_string("blah")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == ''
Exemple #13
0
def test_string_indexing(ic):
    @udf(StringVal(FunctionContext, StringVal, IntVal))
    def string_indexing(context, a, b):
        return a[b]

    ship_udf(ic, string_indexing, overwrite=True)
    ic._cursor.execute('SELECT %s.string_indexing("foo", 1)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'o'
Exemple #14
0
def test_return_string_literal(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def return_string_literal(context, a):
        return "bar"

    ship_udf(ic, return_string_literal, overwrite=True)
    ic._cursor.execute('SELECT %s.return_string_literal("foo")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
Exemple #15
0
def test_return_null(ic):
    @udf(IntVal(FunctionContext, IntVal))
    def return_null(context, a):
        return None

    ship_udf(ic, return_null, overwrite=True)
    ic._cursor.execute('SELECT %s.return_null(10)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
Exemple #16
0
def test_string_index_concat(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def string_index_concat(context, a):
        return a[0] + a[3]

    ship_udf(ic, string_index_concat, overwrite=True)
    ic._cursor.execute('SELECT %s.string_index_concat("money")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'me'
Exemple #17
0
def test_string_split_comma(ic):
    @udf(StringVal(FunctionContext, StringVal))
    def string_split_comma(context, a):
        return string.split(a, ",")[1]

    ship_udf(ic, string_split_comma, overwrite=True)
    ic._cursor.execute('SELECT %s.string_split_comma("foo,bar")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
Exemple #18
0
def test_int_promotion(ic):
    @udf(BigIntVal(FunctionContext, IntVal))
    def int_promotion(context, x):
        return x + 1

    ship_udf(ic, int_promotion, overwrite=True)
    ic._cursor.execute('SELECT %s.int_promotion(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 3
    assert ic._cursor.description[0][1] == 'BIGINT'
Exemple #19
0
def test_string_concat(ic):
    @udf(StringVal(FunctionContext, StringVal, StringVal))
    def string_concat(context, a, b):
        return a + b

    ship_udf(ic, string_concat, overwrite=True)
    ic._cursor.execute('SELECT %s.string_concat("howdy ", "doody")' %
                       ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'howdy doody'
Exemple #20
0
def test_string_len(ic):
    @udf(IntVal(FunctionContext, StringVal))
    def string_len(context, a):
        return len(a)

    ship_udf(ic, string_len, overwrite=True)
    ic._cursor.execute('SELECT %s.string_len("australopithecus")' %
                       ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 16
def test_int_predicates(ic):
    @udf(BooleanVal(FunctionContext, IntVal))
    def int_predicate(context, a):
        if a > 10:
            return True
        else:
            return False
    ship_udf(ic, int_predicate, overwrite=True)
    ic._cursor.execute('SELECT %s.int_predicate(10)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == False
    ic._cursor.execute('SELECT %s.int_predicate(11)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == True
def test_return_two_str_literals(ic):
    @udf(StringVal(FunctionContext, IntVal))
    def return_two_str_literals(context, a):
        if a > 5:
            return "foo"
        else:
            return "bar"
    ship_udf(ic, return_two_str_literals, overwrite=True)
    ic._cursor.execute('SELECT %s.return_two_str_literals(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
    ic._cursor.execute('SELECT %s.return_two_str_literals(20)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'foo'
Exemple #23
0
def test_int_predicates(ic):
    @udf(BooleanVal(FunctionContext, IntVal))
    def int_predicate(context, a):
        if a > 10:
            return True
        else:
            return False

    ship_udf(ic, int_predicate, overwrite=True)
    ic._cursor.execute('SELECT %s.int_predicate(10)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is False
    ic._cursor.execute('SELECT %s.int_predicate(11)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is True
Exemple #24
0
def test_return_two_str_literals(ic):
    @udf(StringVal(FunctionContext, IntVal))
    def return_two_str_literals(context, a):
        if a > 5:
            return "foo"
        else:
            return "bar"

    ship_udf(ic, return_two_str_literals, overwrite=True)
    ic._cursor.execute('SELECT %s.return_two_str_literals(2)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'bar'
    ic._cursor.execute('SELECT %s.return_two_str_literals(20)' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == 'foo'
def test_string_eq(ic):
    @udf(BooleanVal(FunctionContext, StringVal))
    def string_eq(context, a):
        if a == "foo":
            return True
        elif a == "bar":
            return False
        else:
            return None
    ship_udf(ic, string_eq, overwrite=True)
    ic._cursor.execute('SELECT %s.string_eq("foo")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == True
    ic._cursor.execute('SELECT %s.string_eq("bar")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] == False
    ic._cursor.execute('SELECT %s.string_eq("baz")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
Exemple #26
0
def test_string_eq(ic):
    @udf(BooleanVal(FunctionContext, StringVal))
    def string_eq(context, a):
        if a == "foo":
            return True
        elif a == "bar":
            return False
        else:
            return None

    ship_udf(ic, string_eq, overwrite=True)
    ic._cursor.execute('SELECT %s.string_eq("foo")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is True
    ic._cursor.execute('SELECT %s.string_eq("bar")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is False
    ic._cursor.execute('SELECT %s.string_eq("baz")' % ic._temp_db)
    results = ic._cursor.fetchall()
    assert results[0][0] is None
Exemple #27
0
    LOCATION '/user/laserson/bigml/census_text'
"""

score_obs_query = """
    SELECT DISTINCT predict_income(age, workclass, final_weight, education,
            education_num, marital_status, occupation, relationship, race, sex,
            hours_per_week, native_country, income) FROM census_text
"""

cursor.execute(create_table_query)

for size in sizes:
    start_compile = time()
    predict_income = udf(signature)(udfs[size])
    end_compile = time()

    ship_udf(cursor, predict_income,
             '/user/laserson/test-udf/census_%i.ll' %
             size, 'bottou01-10g.pa.cloudera.com', user='******',
             overwrite=True)

    start_score = time()
    cursor.execute(score_obs_query)
    distinct = cursor.fetchall()
    end_score = time()

    print "impala,%i,%i,%.2f,%.2f" % (size,
                                      len(udfs[size].func_code.co_code),
                                      end_score - start_score,
                                      end_compile - start_compile)