Python KeyIndexed 예제들, h2o_xl.KeyIndexed Python 예제들

예제 #1

0

파일 보기

    def test_exec2_runif(self):
        print "in h2o-dev, params are column, min, max, seed"
        bucket = 'home-0xdiag-datasets'
        csvPathname = 'standard/covtype.data'
        hexKey = 'r.hex'
        parseResult = h2i.import_parse(bucket=bucket,
                                       path=csvPathname,
                                       schema='put',
                                       hex_key=hexKey)
        # work up to the failing case incrementally
        execExprList = [
            # hack to make them keys? (not really needed but interesting)
            # params for h2o-dev runif are: column, min, max, seed
            AssignObj('r0.hex', KeyIndexed('r.hex', col=0)),
            AssignObj('s0.hex', Fcn("h2o.runif", KeyIndexed('r.hex', col=0),
                                    1)),
            AssignObj('s1.hex', Fcn("h2o.runif", KeyIndexed('r.hex', col=1),
                                    -1)),
            AssignObj('s2.hex',
                      Fcn("h2o.runif", KeyIndexed('r.hex', col=54), -1)),
        ]

        results = []
        for execExpr in execExprList:
            start = time.time()
            result = execExpr.do(timeoutSecs=30)
            results.append(result)
            execResult = execExpr.execResult
            print "exec took", time.time() - start, "seconds"
            print "exec result:", result
            print "exec result (full):", h2o.dump_json(execResult)
            h2o.check_sandbox_for_errors()

        rSummary = h2o_cmd.runSummary(key='r0.hex', cols='0')
        # h2o_cmd.infoFromSummary(rSummary)

        rSummary = h2o_cmd.runSummary(key='s0.hex', cols='0')
        # h2o_cmd.infoFromSummary(rSummary)

        sSummary = h2o_cmd.runSummary(key='s1.hex', cols='0')
        # h2o_cmd.infoFromSummary(sSummary)

        sSummary = h2o_cmd.runSummary(key='s2.hex', cols='0')
        # h2o_cmd.infoFromSummary(sSummary)

        # since there are no NAs in covtype, r.hex and s.hex should be identical?
        if 1 == 0:
            print "Comparing summary of r.hex to summary of s.hex"
            df = h2o_util.JsonDiff(rSummary, sSummary, with_values=True)
            # time can be different
            print "df.difference:", h2o.dump_json(df.difference)
            self.assertLess(len(df.difference), 2)

            print "results from the individual exec expresssions (ignore last which was an apply)"
            print "results:", results
            self.assertEqual(results, [
                0.0, 0.0, 0.0, 1859.0, 581012.0, 581012.0, 2959.365300544567,
                1859.0, 1859.0
            ])

예제 #2

0

파일 보기

    def test_rapids_mean(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            (1000, 5, 'cA', 200),
        ]

        for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
            SEEDPERFILE = random.randint(0, sys.maxint)

            csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(
                rowCount) + 'x' + str(colCount) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename

            print "\nCreating random", csvPathname
            write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE)
            parseResult = h2i.import_parse(path=csvPathname,
                                           schema='put',
                                           hex_key=hex_key,
                                           timeoutSecs=timeoutSecs,
                                           doSummary=False)

            inspect = h2o_cmd.runInspect(key=hex_key)
            missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                inspect)

            print "\n" + csvPathname, \
                "    numRows:", "{:,}".format(numRows), \
                "    numCols:", "{:,}".format(numCols)

            # should match # of cols in header or ??
            self.assertEqual(
                numCols, colCount,
                "parse created result with the wrong number of cols %s %s" %
                (numCols, colCount))
            self.assertEqual(
                numRows, rowCount,
                "parse created result with the wrong number of rows %s %s" %
                (numRows, rowCount))

            data_key = hex_key
            data_key2 = hex_key + "_2"
            for trial in range(4):
                result_key = data_key + "_" + str(trial)
                # copy the key
                Assign(data_key2, data_key)
                Assign(result_key,
                       Fcn('mean', KeyIndexed(data_key2, col=0), 0, False))
                trial += 1

예제 #3

0

파일 보기

파일: test_rapids_funs_basic3.py 프로젝트: letsflykite/h2o-dev

    def test_rapids_funs_basic3(self):
        DO_FAIL = False
        if DO_FAIL:
            bucket = 'home-0xdiag-datasets'
            csvPathname = 'standard/covtype.data'
        else:
            bucket = 'smalldata'
            csvPathname = 'iris/iris_wheader.csv'

        hexKey = 'r1'
        parseResult = h2i.import_parse(bucket=bucket,
                                       path=csvPathname,
                                       schema='put',
                                       hex_key=hexKey)

        keys = []

        # works for 1 pass..why is execExpr set for 2nd pass? should be new instance?
        # if we reuse the same object in the list, it has state?
        # do we need to copy the object...hmm
        for trial in range(1):
            for execObj in funsList:
                freshObj = copy(execObj)
                result = freshObj.do()
                # rapids doesn't like complicated params right now?
                if DO_FAIL:
                    a = Assign('junk',
                               Fcn('anon', KeyIndexed('r1', col=0)),
                               do=False)
                else:
                    a = Assign('junk', Fcn('anon', 'r1'), do=False)
                result = a.do(timeoutSecs=60)

                # rows might be zero!
                if a.execResult['num_rows'] or a.execResult['num_cols']:
                    keys.append(a.execExpr)

        print "\nExpressions that created keys"
        for k in keys:
            print k

        # for execExpr in exprList:
        #     h2e.exec_expr(execExpr=execExpr, resultKey=None, timeoutSecs=10)

        h2o.check_sandbox_for_errors()

예제 #4

0

파일 보기

파일: test_rapids_funs_1000_stmnt.py 프로젝트: letsflykite/h2o-dev

    def test_rapids_funs_1000_stmnt(self):
        DO_FAIL = False
        if DO_FAIL:
            bucket = 'home-0xdiag-datasets'
            csvPathname = 'standard/covtype.data'
        else:
            bucket = 'smalldata'
            csvPathname = 'iris/iris_wheader.csv'

        hexKey = 'r1'
        parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='put', hex_key=hexKey)

        keys = []

        for trial in range(3):
            for execObj in funsList:
                freshObj = copy(execObj)
                print "ast length:", len(str(freshObj))
                result = freshObj.do()

                # rapids doesn't like complicated params right now?
                if DO_FAIL:
                    a = Assign('junk', Fcn('anon', KeyIndexed('r1',col=0)))
                else:
                    a = Assign('junk', Fcn('anon', 'r1'))
                result = a.do(timeoutSecs=60)

                # rows might be zero!
                if a.execResult['num_rows'] or a.execResult['num_cols']:
                    keys.append(a.execExpr)

        print "\nExpressions that created keys"
        for k in keys:
            print k

        # for execExpr in exprList:
        #     h2e.exec_expr(execExpr=execExpr, resultKey=None, timeoutSecs=10)

        h2o.check_sandbox_for_errors()

예제 #5

0

파일 보기

        Assign('q', Fcn('var', 'x', None, False, None), do=False),
        Assign('r', Fcn('var', 'x', None, False, None), do=False),
        Assign('s', Fcn('var', 'x', None, False, None), do=False),
        Assign('t', Fcn('var', 'x', None, False, None), do=False),
        Assign('u', Fcn('var', 'x', None, False, None), do=False),
        Assign('v', Fcn('var', 'x', None, False, None), do=False),
        Assign('w', Fcn('var', 'x', None, False, None), do=False),
        Assign('x', Fcn('var', 'x', None, False, None), do=False),
        Assign('y', Fcn('var', 'x', None, False, None), do=False),
        Assign('z', Fcn('var', 'x', None, False, None), do=False),
        Fcn('var', 'x', None, False, None),
    ),

    Def('anon', 'x', 
        [Assign(key, Fcn('var', 'x', None, False, None), do=False) for key in 'abdefghijklmnopqrstuvz'],
        [Assign(key, Fcn('sum', KeyIndexed('x',col=0), False), do=False) for key in 'abdefghijklmnopqrstuvz'],
        Fcn('var', 'x', None, False, None),
    ),
]

class Basic(unittest.TestCase):
    def tearDown(self):
        h2o.check_sandbox_for_errors()

    @classmethod
    def setUpClass(cls):
        global SEED
        SEED = h2o.setup_random_seed()
        h2o.init(1, base_port=54333)

    @classmethod

예제 #6

0

파일 보기

파일: test_rapids_funs_1000_stmnt.py 프로젝트: letsflykite/h2o-dev

from h2o_xl import Def, Fcn, Assign, KeyIndexed
from copy import copy

print "Trying a different way, listing Rapids objects, rather than .ast() strings"

# 'c' allowed
# should be able to take a list of statements
keyString = 'abdefghijklmnopqrstuvzabdefghijklmnopqrstuvz'
keyString += 'abdefghijklmnopqrstuvzabdefghijklmnopqrstuvz'
keyString += 'abdefghijklmnopqrstuvzabdefghijklmnopqrstuvz'
funsList = [
    Def('anon', 'x', 
        [Assign(key, Fcn('var', 'x', None, False, None), do=False) for key in keyString],
        
        [Assign(key, Fcn('sum', KeyIndexed('x',col=0), False), do=False) for key in keyString],
        [Assign(key, Fcn('max', KeyIndexed('x',col=0), False), do=False) for key in keyString],
        [Assign(key, Fcn('min', KeyIndexed('x',col=0), False), do=False) for key in keyString],
        [Assign(key, Fcn('xorsum', KeyIndexed('x',col=0), False), do=False) for key in keyString],

        [Assign(key, Fcn('sd', KeyIndexed('x',col=0), False), do=False) for key in keyString],
        [Assign(key, Fcn('ncol', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('is.factor', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('any.factor', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('length', KeyIndexed('x',col=0)), do=False) for key in keyString],

        [Assign(key, Fcn('sin', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('asin', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('sinh', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('cos', KeyIndexed('x',col=0)), do=False) for key in keyString],
        [Assign(key, Fcn('acos', KeyIndexed('x',col=0)), do=False) for key in keyString],

예제 #7

0

파일 보기

파일: test_rapids_row_range.py 프로젝트: letsflykite/h2o-dev

    def test_rapids_row_range(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            # (1000000, 5, 'cA', 200),
            (1000, 5, 'cA', 200),
        ]

        # h2b.browseTheCloud()
        for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
            SEEDPERFILE = random.randint(0, sys.maxint)

            csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(
                rowCount) + 'x' + str(colCount) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename

            print "\nCreating random", csvPathname
            write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE)
            parseResult = h2i.import_parse(path=csvPathname,
                                           schema='put',
                                           hex_key=hex_key,
                                           timeoutSecs=timeoutSecs,
                                           doSummary=False)

            inspect = h2o_cmd.runInspect(key=hex_key)
            missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                inspect)

            print "\n" + csvPathname, \
                "    numRows:", "{:,}".format(numRows), \
                "    numCols:", "{:,}".format(numCols)

            # should match # of cols in header or ??
            self.assertEqual(
                numCols, colCount,
                "parse created result with the wrong number of cols %s %s" %
                (numCols, colCount))
            self.assertEqual(
                numRows, rowCount,
                "parse created result with the wrong number of rows %s %s" %
                (numRows, rowCount))

            # Xbase.debugOnly = True

            REPEAT = 1
            data_key = hex_key
            for i in range(REPEAT):
                result_key = data_key + "_" + str(i)
                # Assign('s1', Seq(range(5)) ).do
                Assign('s1', Seq(range(5)))

                # take advantage of default params for row/col (None)
                # need the 'c' function, to make sure the key is created

                # first try as object, then method
                Assign('s2', Fcn('c', Seq(range(5))))
                print dump_json(Xbase.lastExecResult)
                print dump_json(Xbase.lastResult)

                # just combine
                Assign('s3', Col(Seq(range(5))))

                inspect = h2o_cmd.runInspect(key='s3')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)
                assert numRows == 5
                assert numCols == 1

                Assign('s2', Col(Seq(range(5))))

                inspect = h2o_cmd.runInspect(key='s2')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)
                assert numRows == 5
                assert numCols == 1

                # can't have sequence of sequences?
                # make sure key is created with c()
                f = Fcn(
                    'c',
                    Seq(Colon(99, 400), "#2", 1, range(1, 5), range(7, 10),
                        range(50, 52)))
                Assign('s1', f)

                f = Col(
                    Seq(Colon(99, 400), "#2", 1, range(1, 5), range(7, 10),
                        range(50, 52)))
                Assign('s2', f)

                inspect = h2o_cmd.runInspect(key='s2')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)
                assert numRows == 313
                assert numCols == 1

                print "z1"
                Assign(result_key, KeyIndexed(data_key, row=Seq(range(1, 5))))
                print "z2"
                Assign(
                    's1',
                    KeyIndexed(data_key,
                               row=Seq(Colon(99, 400), "#2", 1, range(1, 5))))

                print "z3"
                Assign(result_key, KeyIndexed(data_key, row='#1')).do
                print "z4"
                Assign(result_key, KeyIndexed(data_key,
                                              row=Colon('#1', '#100')))
                print "z5"
                Assign(result_key, KeyIndexed(data_key, row=Colon(1, 100)))
                # this should fail rapids because of reverse msb/lsb
                # illegal, detected
                # execResult, Assign(result_key, KeyIndexed(data_key, row=Colon('#100', '#1')))
                print "z6"
                Assign(result_key, KeyIndexed(data_key,
                                              row=Colon('#-2', '#-1')))
                print "z7"
                Assign(result_key, KeyIndexed(data_key, row=Colon(-2, -1)))
                # illegal, detected
                # execResult, Assign(result_key, KeyIndexed(data_key, row=Colon('#-1', '#-2')))
                # take advantage of number to string conversion
                print "z8"
                Assign(result_key,
                       KeyIndexed(data_key, row=Colon('#1', rowCount - 10)))
                print "z9"
                Assign(result_key,
                       KeyIndexed(data_key, col=Colon(
                           '#1',
                           colCount - 1,
                       )))

                # no assign
                print "z10"
                result = KeyIndexed(data_key, row=Colon('#1',
                                                        rowCount - 10)).do()
                print "z11"
                # result = KeyIndexed(data_key, col=Colon('#1', colCount-1,)).do()

                # do some function translation
                print "z12"
                # result = Fcn('==', 1, KeyIndexed(data_key, col=Colon('#1', colCount-1,))).do()

                print "\n" + csvPathname, \
                    "    numRows:", "{:,}".format(numRows), \
                    "    numCols:", "{:,}".format(numCols)

예제 #8

0

파일 보기

파일: test_rapids_cut.py 프로젝트: kordikp/AutoMLprediction

    def test_rapids_cut(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            (1000, 5, 'cA', 200),
        ]

        # h2b.browseTheCloud()
        for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
            SEEDPERFILE = random.randint(0, sys.maxint)

            csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(
                rowCount) + 'x' + str(colCount) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename

            print "\nCreating random", csvPathname
            write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE)
            parseResult = h2i.import_parse(path=csvPathname,
                                           schema='put',
                                           hex_key=hex_key,
                                           timeoutSecs=timeoutSecs,
                                           doSummary=False)

            inspect = h2o_cmd.runInspect(key=hex_key)
            missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                inspect)

            print "\n" + csvPathname, \
                "    numRows:", "{:,}".format(numRows), \
                "    numCols:", "{:,}".format(numCols)

            # should match # of cols in header or ??
            self.assertEqual(
                numCols, colCount,
                "parse created result with the wrong number of cols %s %s" %
                (numCols, colCount))
            self.assertEqual(
                numRows, rowCount,
                "parse created result with the wrong number of rows %s %s" %
                (numRows, rowCount))

            REPEAT = 1
            data_key = hex_key
            for i in range(REPEAT):
                result_key = data_key + "_" + str(i)

                Assign('seq1', Seq(range(5)))
                # take advantage of default params for row/col (None)
                # need the 'c' function, to make sure the key is created

                Assign('seq2', Fcn('c', Seq(range(5))))
                inspect = h2o_cmd.runInspect(key='seq1')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)

                Assign('seq3', Col(Seq(range(5))))
                inspect = h2o_cmd.runInspect(key='seq2')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)

                # can't have sequence of sequences?
                # make sure key is created with c()
                Assign(
                    'seq4',
                    Fcn(
                        'c',
                        Seq(Colon(99, 400), "#2", 1, range(1, 5), range(7, 10),
                            range(50, 52))))

                inspect = h2o_cmd.runInspect(key='seq1')
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(
                    inspect)

                Assign(result_key, KeyIndexed(data_key, row=Seq(range(1, 5))))
                Assign(
                    'seq5',
                    KeyIndexed(data_key,
                               row=Seq(Colon(99, 400), "#2", 1, range(1, 5))))

                # they need to be same size
                # Assign('seq6', Key('seq5') + Key('seq4') + Key('seq3'))

                # doesn't like my cut? complains on FALSE
                # Assign(result_key, Cut(KeyIndexed(data_key, col=0)))
                # Assign(result_key, Cut(KeyIndexed(data_key, col=1), breaks=3))

                Assign(result_key, Fcn('min', KeyIndexed(data_key, col=1),
                                       True))
                Assign(result_key, Fcn('max', KeyIndexed(data_key, col=1),
                                       True))
                Assign(result_key,
                       Fcn('mean', KeyIndexed(data_key, col=1), 0, False))

                Assign(result_key, KeyIndexed(data_key, row='#1'))
                Assign(result_key, KeyIndexed(data_key,
                                              row=Colon('#1', '#100')))
                Assign(result_key, KeyIndexed(data_key, row=Colon(1, 100)))
                # this should fail rapids because of reverse msb/lsb
                # illegal, detected
                # resultExpr, result = Assign(result_key, KeyIndexed(data_key, row=Colon('#100', '#1')))
                Assign(result_key, KeyIndexed(data_key,
                                              row=Colon('#-2', '#-1')))
                Assign(result_key, KeyIndexed(data_key, row=Colon(-2, -1)))
                # illegal, detected
                # resultExpr, result = Assign(result_key, KeyIndexed(data_key, row=Colon('#-1', '#-2')))
                # take advantage of number to string conversion
                Assign(result_key,
                       KeyIndexed(data_key, row=Colon('#1', rowCount - 10)))
                Assign(result_key,
                       KeyIndexed(data_key, col=Colon(
                           '#1',
                           colCount - 1,
                       )))

                # no assign. Expr() complains when result has no key?
                Assign(result_key,
                       KeyIndexed(data_key, row=Colon('#1', rowCount - 10)))
                Assign(result_key,
                       KeyIndexed(data_key, col=Colon(
                           '#1',
                           colCount - 1,
                       )))

                # do some function translation
                Assign(
                    result_key,
                    Fcn('==', 1,
                        KeyIndexed(data_key, col=Colon(
                            '#1',
                            colCount - 1,
                        ))))

                print "\n" + csvPathname, \
                    "    numRows:", "{:,}".format(numRows), \
                    "    numCols:", "{:,}".format(numCols)

예제 #9

0

파일 보기

    def test_rapids_funs_1op(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            # (1000000, 5, 'cA', 200),
            (1000, 5, 'cA', 200),
            ]

        # h2b.browseTheCloud()
        for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
            SEEDPERFILE = random.randint(0, sys.maxint)

            csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename

            print "\nCreating random", csvPathname
            write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE)
            parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, 
                timeoutSecs=timeoutSecs, doSummary=False)

            inspect = h2o_cmd.runInspect(key=hex_key)
            missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect)

            print "\n" + csvPathname, \
                "    numRows:", "{:,}".format(numRows), \
                "    numCols:", "{:,}".format(numCols)

            # should match # of cols in header or ??
            self.assertEqual(numCols, colCount,
                "parse created result with the wrong number of cols %s %s" % (numCols, colCount))
            self.assertEqual(numRows, rowCount,
                "parse created result with the wrong number of rows %s %s" % (numRows, rowCount))

            # Xbase.debugOnly = True

            REPEAT = 1
            data_key = hex_key
            data_key2 = hex_key + "_2"
            trial = 0
            good = []
            bad = []
            both = h2o_xl.xFcnOp1Set.union(h2o_xl.xFcnOp3Set)
            both = h2o_xl.xFcnOp1Set
            for fun in both:

                a = None
                try:
                    result_key = data_key + "_" + str(trial)
                    # copy the key
                    Assign(data_key2, data_key)

                    # a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), True))

                    # a = Assign(result_key, Fcn('sum', KeyIndexed(data_key2, col=0), True))
                    # a = Assign(result_key, Fcn('xorsum', KeyIndexed(data_key2, col=0), True))
                    # a = Assign(result_key, Fcn('sqrt', KeyIndexed(data_key2, col=0)))
                    # a = Assign(result_key, Fcn('ncol', KeyIndexed(data_key2, col=0)))

                    # what's wrong with mean?
                    if fun in ['ncol', 'asin', 'any.factor', 'sin', 'atan', 'tan', 'sign', 'log', 'exp', 'sqrt', 'abs', 'floor', 'ceiling', 'trunc','is.factor', 'is.na', 'any.na', 'nrow', 'tanh', 'length', 'acos', 'cos', 'sinh', 'cosh']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0)))
                        good.append(fun)
                    elif fun in ['sum', 'max', 'min', 'xorsum', 'sd']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), True))
                        good.append(fun)
                    elif fun in ['scale']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), False, False))
                        good.append(fun)
                    elif fun in ['round', 'signif']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), 1))
                        good.append(fun)
                    elif fun in ['seq_len', 'rep_len']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), 4))
                        good.append(fun)
                    elif fun in ['seq']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), 1, 5, 1))
                        good.append(fun)
                    elif fun in ['mean']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), 0, False))
                        good.append(fun)
                    elif fun in ['var']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), False, False, False))
                        good.append(fun)
                    elif fun in ['match']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), KeyIndexed(data_key2, col=0), 1, None))
                        good.append(fun)
                    elif fun in ['unique']:
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), False, 10, 1))
                        good.append(fun)
                    else:
                        # bad functions kill h2o?
                        a = Assign(result_key, Fcn(fun, KeyIndexed(data_key2, col=0), None))
                        bad.append(fun)

                        # a = Fcn(fun, KeyIndexed(data_key, col=0), '%FALSE ')
                        # a = Fcn(fun, data_key, '%FALSE')
                        # a = Fcn(fun, data_key)

                    # scalars?
                    if 1==0:
                        inspect = h2o_cmd.runInspect(key=result_key)
                        missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect)
                        assert numRows==1000, numRows
                        assert numCols==1, numCols

                        print "\n" + csvPathname, \
                            "    numRows:", "{:,}".format(numRows), \
                            "    numCols:", "{:,}".format(numCols)

                except: 
                    if not a:
                        # print dump_json(a.execResult)
                        bad.append(fun)

                trial += 1

            print "good:", good
            print "bad:", bad

예제 #10

0

파일 보기

파일: test_rapids_funs_basic3.py 프로젝트: letsflykite/h2o-dev

        Assign('v', Fcn('var', 'x', None, False, None), do=False),
        Assign('w', Fcn('var', 'x', None, False, None), do=False),
        Assign('x', Fcn('var', 'x', None, False, None), do=False),
        Assign('y', Fcn('var', 'x', None, False, None), do=False),
        Assign('z', Fcn('var', 'x', None, False, None), do=False),
        Fcn('var', 'x', None, False, None),
    ),
    Def(
        'anon',
        'x',
        [
            Assign(key, Fcn('var', 'x', None, False, None), do=False)
            for key in 'abdefghijklmnopqrstuvz'
        ],
        [
            Assign(key, Fcn('sum', KeyIndexed('x', col=0), False), do=False)
            for key in 'abdefghijklmnopqrstuvz'
        ],
        Fcn('var', 'x', None, False, None),
    ),
]


class Basic(unittest.TestCase):
    def tearDown(self):
        h2o.check_sandbox_for_errors()

    @classmethod
    def setUpClass(cls):
        global SEED
        SEED = h2o.setup_random_seed()