Python cancelAllJobs 예제들, h2o_jobs.cancelAllJobs Python 예제들

예제 #1

0

파일 보기

파일: h2o_import.py 프로젝트: krishnatray/h2o-dev

def delete_keys_at_all_nodes(node=None, pattern=None, timeoutSecs=120):
    print "Frame is too slow to look up key names when a lot of unparsed files were imported"
    print "Just using remove_all_keys and saying 0 removed"
    print "WARNING: pattern is ignored"
    if 1==1:
        h2o.n0.remove_all_keys()
        return 0
    else:
        print "Going to delete all keys one at a time (slower than 'remove all keys')"
        # TEMP: change this to remove_all_keys which ignores locking and removes keys?
        # getting problems when tests fail in multi-test-on-one-h2o-cluster runner*sh tests
        if not node: node = h2o_nodes.nodes[0]
        print "Will cancel any running jobs, because we can't unlock keys on running jobs"
        # I suppose if we used a pattern, we wouldn't have to worry about running jobs..oh well.
        h2o_jobs.cancelAllJobs()
        print "unlock all keys first to make sure broken keys get removed"
        node.unlock()
        totalDeletedCnt = 0
        deletedCnt = delete_keys(node, pattern=pattern, timeoutSecs=timeoutSecs)
        totalDeletedCnt += deletedCnt

        if pattern:
            print "Total: Deleted", totalDeletedCnt, "keys with filter=", pattern, "at", len(h2o_nodes.nodes), "nodes"
        else:
            print "Total: Deleted", totalDeletedCnt, "keys at", len(h2o_nodes.nodes), "nodes"
            # do a remove_all_keys to clean out any locked keys also (locked keys will complain above)
            # doesn't work if you remove job keys first, since it looks at the job list and gets confused
            ### node.remove_all_keys(timeoutSecs=timeoutSecs)

        return totalDeletedCnt

예제 #2

0

파일 보기

def delete_keys_at_all_nodes(node=None, pattern=None, timeoutSecs=120):
    print "Going to delete all keys one at a time (slower than 'remove all keys')"
    # TEMP: change this to remove_all_keys which ignores locking and removes keys?
    # getting problems when tests fail in multi-test-on-one-h2o-cluster runner*sh tests
    if not node: node = h2o_nodes.nodes[0]
    print "Will cancel any running jobs, because we can't unlock keys on running jobs"
    # I suppose if we used a pattern, we wouldn't have to worry about running jobs..oh well.
    h2o_jobs.cancelAllJobs()
    print "unlock all keys first to make sure broken keys get removed"
    node.unlock()
    totalDeletedCnt = 0
    deletedCnt = delete_keys(node, pattern=pattern, timeoutSecs=timeoutSecs)
    totalDeletedCnt += deletedCnt

    if pattern:
        print "Total: Deleted", totalDeletedCnt, "keys with filter=", pattern, "at", len(
            h2o_nodes.nodes), "nodes"
    else:
        print "Total: Deleted", totalDeletedCnt, "keys at", len(
            h2o_nodes.nodes), "nodes"
        # do a remove_all_keys to clean out any locked keys also (locked keys will complain above)
        # doesn't work if you remove job keys first, since it looks at the job list and gets confused
        ### node.remove_all_keys(timeoutSecs=timeoutSecs)

    return totalDeletedCnt

예제 #3

0

파일 보기

파일: test_1ktrees_job_cancel_many.py 프로젝트: MaheedharGunturu/h2o

    def test_1ktrees_job_cancel_many(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()

        # always match the run below!
        # just using one file for now
        for x in [1000]:
            shCmdString = "perl " + h2o.find_file("syn_scripts/parity.pl") + " 128 4 "+ str(x) + " quad"
            h2o.spawn_cmd_and_wait('parity.pl', shCmdString.split(),4)
            csvFilename = "parity_128_4_" + str(x) + "_quad.data"  

        csvFilename = "parity_128_4_" + str(1000) + "_quad.data"  
        csvPathname = SYNDATASETS_DIR + '/' + csvFilename
        hex_key = csvFilename + ".hex"
        parseResult = h2o_cmd.parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=30)

        print "Kick off twenty, then cancel them all..there's a timeout on the wait after cancelling"
        for trial in range (1,20):
            h2o.verboseprint("Trial", trial)
            start = time.time()
            h2o_cmd.runRF(parseResult=parseResult, trees=trial, depth=50, rfView=False, noPoll=True,
                timeoutSecs=600, retryDelaySecs=3)
            print "RF #", trial,  "started on ", csvFilename, 'took', time.time() - start, 'seconds'


        h2o.check_sandbox_for_errors()
        h2o_jobs.cancelAllJobs(timeoutSecs=10)

예제 #4

0

파일 보기

파일: h2o_import.py 프로젝트: ericeckstrand/h2o

def delete_keys_at_all_nodes(node=None, pattern=None, timeoutSecs=120):
    print "Going to delete all keys one at a time (slower than 'remove all keys')"
    # TEMP: change this to remove_all_keys which ignores locking and removes keys?
    # getting problems when tests fail in multi-test-on-one-h2o-cluster runner*sh tests
    if not node: node = h2o.nodes[0]
    print "Will cancel any running jobs, because we can't unlock keys on running jobs"
    # I suppose if we used a pattern, we wouldn't have to worry about running jobs..oh well.
    h2o_jobs.cancelAllJobs()
    print "unlock all keys first to make sure broken keys get removed"
    node.unlock()
    totalDeletedCnt = 0
    # do it in reverse order, since we always talk to 0 for other stuff
    # this will be interesting if the others don't have a complete set
    # theoretically, the deletes should be 0 after the first node 
    # since the deletes should be global
    # for node in reversed(h2o.nodes):

    # new: only use the directed node (node[0] typically)
    # h2o storeview should have a global view now.
    deletedCnt = delete_keys(node, pattern=pattern, timeoutSecs=timeoutSecs)
    totalDeletedCnt += deletedCnt

    if pattern:
        print "Total: Deleted", totalDeletedCnt, "keys with filter=", pattern, "at", len(h2o.nodes), "nodes"
    else:
        print "Total: Deleted", totalDeletedCnt, "keys at", len(h2o.nodes), "nodes"
        # do a remove_all_keys to clean out any locked keys also (locked keys will complain above)
        # doesn't work if you remove job keys first, since it looks at the job list and gets confused
        ### node.remove_all_keys(timeoutSecs=timeoutSecs)

    return totalDeletedCnt

예제 #5

0

파일 보기

    def test_GBM_mnist_restart_many(self):
        importFolderPath = "mnist"
        csvFilename = "train.csv.gz"
        timeoutSecs = 1800
        trialStart = time.time()

        for trial in range(10):
            # PARSE train****************************************
            trainKey = csvFilename + "_" + str(trial) + ".hex"
            start = time.time()
            parseResult = h2i.import_parse(bucket='smalldata',
                                           path=importFolderPath + "/" +
                                           csvFilename,
                                           schema='put',
                                           hex_key=trainKey,
                                           timeoutSecs=timeoutSecs)
            elapsed = time.time() - start
            print "parse end on ", csvFilename, 'took', elapsed, 'seconds',\
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
            print "parse result:", parseResult['destination_key']

            # GBM (train)****************************************
            params = {
                'destination_key': "GBMKEY",
                'learn_rate': .1,
                'ntrees': 10,
                'max_depth': 8,
                'min_rows': 1,
                'response':
                784,  # this dataset has the response in the last col (0-9 to check)
                # 'ignored_cols_by_name': range(200,784) # only use the first 200 for speed?
            }

            kwargs = params.copy()
            h2o.beta_features = True
            timeoutSecs = 1800
            #noPoll -> False when GBM finished
            GBMResult = h2o_cmd.runGBM(parseResult=parseResult,
                                       noPoll=True,
                                       **kwargs)
            h2o.beta_features = False
            # if it fails, should happen within 8 secs
            time.sleep(8)
            h2j.cancelAllJobs()
            h2o.check_sandbox_for_errors()
            print "Trial %s: GBM start didn't have any errors after 8 seconds. cancelled. Will delete all keys now." % trial

            if DO_DELETE_KEYS_AND_CAUSE_PROBLEM:
                h2i.delete_keys_at_all_nodes()

예제 #6

0

파일 보기

파일: test_GBM_mnist_restart_many.py 프로젝트: BhaskarPros/h2o

    def test_GBM_mnist_restart_many(self):
        importFolderPath = "mnist"
        csvFilename = "train.csv.gz"
        timeoutSecs=1800
        trialStart = time.time()

        for trial in range(10):
            # PARSE train****************************************
            trainKey = csvFilename + "_" + str(trial) + ".hex"
            start = time.time()
            parseResult = h2i.import_parse(bucket='smalldata', path=importFolderPath + "/" + csvFilename, schema='put',
                hex_key=trainKey, timeoutSecs=timeoutSecs)
            elapsed = time.time() - start
            print "parse end on ", csvFilename, 'took', elapsed, 'seconds',\
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
            print "parse result:", parseResult['destination_key']

            # GBM (train)****************************************
            params = { 
                'destination_key': "GBMKEY",
                'learn_rate': .1,
                'ntrees': 10,
                'max_depth': 8,
                'min_rows': 1,
                'response': 784, # this dataset has the response in the last col (0-9 to check)
                # 'ignored_cols_by_name': range(200,784) # only use the first 200 for speed?
                }

            kwargs = params.copy()
            h2o.beta_features = True
            timeoutSecs = 1800
            #noPoll -> False when GBM finished
            GBMResult = h2o_cmd.runGBM(parseResult=parseResult, noPoll=True, **kwargs)
            h2o.beta_features = False
            # if it fails, should happen within 8 secs
            time.sleep(8)
            h2j.cancelAllJobs()
            h2o.check_sandbox_for_errors()
            print "Trial %s: GBM start didn't have any errors after 8 seconds. cancelled. Will delete all keys now." % trial

            if DO_DELETE_KEYS_AND_CAUSE_PROBLEM:
                h2i.delete_keys_at_all_nodes()

예제 #7

0

파일 보기

파일: test_rf_covtype_fvec.py 프로젝트: yangls06/h2o

    def test_rf_covtype_fvec(self):
        h2o.beta_features = True  # fvec
        importFolderPath = "standard"

        # Parse Train ******************************************************
        csvTrainFilename = 'covtype.shuffled.90pct.data'
        csvTrainPathname = importFolderPath + "/" + csvTrainFilename
        hex_key = csvTrainFilename + ".hex"
        parseTrainResult = h2i.import_parse(bucket='home-0xdiag-datasets',
                                            path=csvTrainPathname,
                                            hex_key=hex_key,
                                            timeoutSecs=180,
                                            doSummary=False)
        inspect = h2o_cmd.runInspect(None, parseTrainResult['destination_key'])

        # Parse Test ******************************************************
        csvTestFilename = 'covtype.shuffled.10pct.data'
        csvTestPathname = importFolderPath + "/" + csvTestFilename
        hex_key = csvTestFilename + ".hex"
        parseTestResult = h2i.import_parse(bucket='home-0xdiag-datasets',
                                           path=csvTestPathname,
                                           hex_key=hex_key,
                                           timeoutSecs=180)
        inspect = h2o_cmd.runInspect(None, parseTestResult['destination_key'])

        rfViewInitial = []
        xList = []
        eList = []
        fList = []
        trial = 0

        depthList = [10, 20, 30, 40]
        ntreesList = [5, 10, 20, 30]
        # ntreesList = [2]
        nbinsList = [10, 100, 1000]

        if TRY == 'max_depth':
            tryList = depthList
        elif TRY == 'ntrees':
            tryList = ntreesList
        elif TRY == 'nbins':
            tryList = nbinsList
        else:
            raise Exception("huh? %s" % TRY)

        for d in tryList:
            if TRY == 'max_depth':
                paramDict['max_depth'] = d
            elif TRY == 'ntrees':
                paramDict['ntrees'] = d
            elif TRY == 'nbins':
                paramDict['nbins'] = d
            else:
                raise Exception("huh? %s" % TRY)

            # adjust timeoutSecs with the number of trees
            # seems ec2 can be really slow
            if DO_OOBE:
                paramDict['validation'] = None
            else:
                paramDict['validation'] = parseTestResult['destination_key']

            timeoutSecs = 30 + paramDict['ntrees'] * 200

            # do ten starts, to see the bad id problem?
            TRIES = 5
            for i in range(TRIES):
                lastOne = i == (TRIES - 1)

                # have unique model names
                trial += 1
                kwargs = paramDict.copy()
                model_key = 'RFModel_' + str(trial)
                kwargs['destination_key'] = model_key
                data_key = parseTrainResult['destination_key']

                start = time.time()
                rfResult = h2o_cmd.runRF(parseResult=parseTrainResult,
                                         timeoutSecs=timeoutSecs,
                                         noPoll=True,
                                         rfView=False,
                                         **kwargs)
                trainElapsed = time.time() - start
                print 'rf train end', i, 'on', csvTrainPathname, 'took', trainElapsed, 'seconds'

                # don't cancel the last one
                if not lastOne:
                    time.sleep(1)
                    h2o_jobs.cancelAllJobs(timeoutSecs=2)

            ### print "rfView", h2o.dump_json(rfView)
            print "We have a result from the RF above, completed but didn't do RFView yet"
            # could the RF indicate 'done' too soon?
            # if rfResult['state']=='RUNNING':
            #    raise Exception("Why is this RF still in RUNNING state? %s" % h2o.dump_json(rfResult))

            # if 'drf_model' not in rfResult:
            #    raise Exception("How come there's no drf_model in this RF result? %s" % h2o.dump_json(rfResult))
            h2o_jobs.pollWaitJobs(timeoutSecs=300)
            rfView = h2o_cmd.runRFView(None,
                                       model_key=model_key,
                                       timeoutSecs=60,
                                       retryDelaySecs=5,
                                       doSimpleCheck=False)
            print "rfView:", h2o.dump_json(rfView)

            rf_model = rfView['drf_model']
            cms = rf_model['cms']
            ### print "cm:", h2o.dump_json(cm)
            ntrees = rf_model['N']
            errs = rf_model['errs']
            N = rf_model['N']
            varimp = rf_model['varimp']
            treeStats = rf_model['treeStats']

            print "maxDepth:", treeStats['maxDepth']
            print "maxLeaves:", treeStats['maxLeaves']
            print "minDepth:", treeStats['minDepth']
            print "minLeaves:", treeStats['minLeaves']
            print "meanLeaves:", treeStats['meanLeaves']
            print "meanDepth:", treeStats['meanDepth']
            print "errs[0]:", errs[0]
            print "errs[-1]:", errs[-1]
            print "errs:", errs

            (classification_error, classErrorPctList,
             totalScores) = h2o_rf.simpleCheckRFView(rfv=rfView)
            # we iterate over params, so can't really do this check
            # self.assertAlmostEqual(classification_error, 0.03, delta=0.5, msg="Classification error %s differs too much" % classification_error)

            print "classErrorPctList:", classErrorPctList
            self.assertEqual(
                len(classErrorPctList), 7,
                "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict"
            )
            # FIX! should update this expected classification error
            predict = h2o.nodes[0].generate_predictions(model_key=model_key,
                                                        data_key=data_key)

            eList.append(classErrorPctList[4])
            fList.append(trainElapsed)
            if DO_PLOT:
                if TRY == 'max_depth':
                    xLabel = 'max_depth'
                elif TRY == 'ntrees':
                    xLabel = 'ntrees'
                elif TRY == 'nbins':
                    xLabel = 'nbins'
                else:
                    raise Exception("huh? %s" % TRY)
                xList.append(paramDict[xLabel])

        if DO_PLOT:
            eLabel = 'class 4 pctWrong'
            fLabel = 'trainElapsed'
            eListTitle = ""
            fListTitle = ""
            h2o_gbm.plotLists(xList, xLabel, eListTitle, eList, eLabel,
                              fListTitle, fList, fLabel)

예제 #8

0

파일 보기

    def test_GBM_cancel_model_reuse(self):
        h2o.beta_features = True
        importFolderPath = 'standard'
        timeoutSecs = 500
        csvFilenameAll = [
            # have to use col name for response?
            ("manyfiles-nflx-gz", "file_1.dat.gz", 378),
            # ("manyfiles-nflx-gz", "file_[1-9].dat.gz", 378),
            # ("standard", "covtype.data", 54),
            # ("standard", "covtype20x.data", 54),
            ]
        # csvFilenameList = random.sample(csvFilenameAll,1)
        csvFilenameList = csvFilenameAll

        # pop open a browser on the cloud
        # h2b.browseTheCloud()

        for (importFolderPath, csvFilename, response) in csvFilenameList:
            # creates csvFilename.hex from file in importFolder dir 
            csvPathname = importFolderPath + "/" + csvFilename 
            print "FIX! is this guy getting cancelled because he's reusing a key name? but it should be okay?"
            (importResult, importPattern) = h2i.import_only(bucket='home-0xdiag-datasets', path=csvPathname, schema='local', 
                timeoutSecs=50)
            parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, schema='local', hex_key='c.hex', 
                timeoutSecs=500, noPoll=False, doSummary=False) # can't do summary until parse result is correct json

            h2o.check_sandbox_for_errors()

            # wait for it to show up in jobs?
            ## time.sleep(2)
            # no pattern waits for all
            ## h2o_jobs.pollWaitJobs(pattern=None, timeoutSecs=300, pollTimeoutSecs=10, retryDelaySecs=5)
            # print "\nparseResult", h2o.dump_json(parseResult)
            print "Parse result['destination_key']:", parseResult['destination_key']
            ## What's wrong here? too big?
            ### inspect = h2o_cmd.runInspect(key=parseResult['destination_key'], timeoutSecs=30, verbose=True)

            h2o.check_sandbox_for_errors()

            # have to avoid this on nflx data. colswap with exec
            # Exception: rjson error in gbm: Argument 'response' error: 
            # Only integer or enum/factor columns can be classified

            if DO_CLASSIFICATION:
                # need to flip the right col! (R wise)
                execExpr = 'c.hex[,%s]=c.hex[,%s]>15' % (response+1,response+1)
                kwargs = { 'str': execExpr }
                resultExec = h2o_cmd.runExec(**kwargs)

            # lets look at the response column now
            s = h2o_cmd.runSummary(key="c.hex", cols=response, max_ncols=1)
            # x = range(542)
            # remove the output too! (378)
            ignoreIndex = [3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 16, 17, 18, 19, 20, 424, 425, 426, 540, 541, response]
            # have to add 1 for col start with 1, now. plus the C
            xIgnore = ",".join(["C" + str(i+1) for i in ignoreIndex])

            params = {
                'destination_key': None,
                'ignored_cols_by_name': xIgnore,
                'learn_rate': .1,
                'ntrees': 2,
                'max_depth': 8,
                'min_rows': 1,
                'response': "C" + str(response+1),
                'classification': 1 if DO_CLASSIFICATION else 0,
                'grid_parallelism': 4,
                }

            kwargs = params.copy()
            timeoutSecs = 1800

            for i in range(5):
                # now issue a couple background GBM jobs that we'll kill
                jobids = []     
                for j in range(5):
                    # FIX! apparently we can't reuse a model key after a cancel
                    kwargs['destination_key'] = 'GBMBad' + str(j)
                    # rjson error in poll_url: Job was cancelled by user!
                    GBMFirstResult = h2o_cmd.runGBM(parseResult=parseResult, noPoll=True, **kwargs)
                    jobids.append(GBMFirstResult['job_key'])
                    h2o.check_sandbox_for_errors()
                    
                # have to pass the job id
                # for j in jobids:
                #     h2o.nodes[0].jobs_cancel(key=j)

                h2o_jobs.cancelAllJobs()
                # PUB-361. going to wait after cancel before reusing keys
                time.sleep(3)
                # am I getting a subsequent parse job cancelled?
                h2o_jobs.showAllJobs()

            if DELETE_KEYS:
                h2i.delete_keys_from_import_result(pattern=csvFilename, importResult=importResult)

예제 #9

0

파일 보기

파일: test_speedrf_covtype.py 프로젝트: apprisi/h2o

    def test_rf_covtype_fvec(self):
        h2o.beta_features = True  # fvec
        importFolderPath = "standard"

        # Parse Train ******************************************************
        csvTrainFilename = 'covtype.shuffled.90pct.data'
        csvTrainPathname = importFolderPath + "/" + csvTrainFilename
        hex_key = csvTrainFilename + ".hex"
        parseTrainResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvTrainPathname, hex_key=hex_key,
                                            timeoutSecs=180, doSummary=False)
        inspect = h2o_cmd.runInspect(None, parseTrainResult['destination_key'])

        # Parse Test ******************************************************
        csvTestFilename = 'covtype.shuffled.10pct.data'
        csvTestPathname = importFolderPath + "/" + csvTestFilename
        hex_key = csvTestFilename + ".hex"
        parseTestResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvTestPathname, hex_key=hex_key,
                                           timeoutSecs=180)
        inspect = h2o_cmd.runInspect(None, parseTestResult['destination_key'])

        rfViewInitial = []
        xList = []
        eList = []
        fList = []
        trial = 0

        depthList  = [10, 20, 30, 40]
        ntreesList = [5, 10, 20, 30]
        # ntreesList = [2]
        nbinsList  = [10, 100, 1000]

        if TRY == 'max_depth':
            tryList = depthList
        elif TRY == 'ntrees':
            tryList = ntreesList
        elif TRY == 'nbins':
            tryList = nbinsList
        else:
            raise Exception("huh? %s" % TRY)

        for d in tryList:
            if TRY == 'max_depth':
                paramDict['max_depth'] = d
            elif TRY == 'ntrees':
                paramDict['ntrees'] = d
            elif TRY == 'nbins':
                paramDict['nbins'] = d
            else:
                raise Exception("huh? %s" % TRY)

            # adjust timeoutSecs with the number of trees
            # seems ec2 can be really slow
            if DO_OOBE:
                paramDict['validation'] = None
            else:
                paramDict['validation'] = parseTestResult['destination_key']

            timeoutSecs = 30 + paramDict['ntrees'] * 200


            # do ten starts, to see the bad id problem?
            TRIES = 5
            for i in range(TRIES):
                lastOne = i==(TRIES-1)

                # have unique model names
                trial += 1
                kwargs = paramDict.copy()
                model_key = 'RFModel_' + str(trial)
                kwargs['destination_key'] = model_key
                data_key = parseTrainResult['destination_key']

                start = time.time()
                rfResult = h2o_cmd.runSpeeDRF(parseResult=parseTrainResult, timeoutSecs=timeoutSecs,
                                         noPoll=True, **kwargs)
                trainElapsed = time.time() - start
                print 'rf train end', i, 'on', csvTrainPathname, 'took', trainElapsed, 'seconds'

                # don't cancel the last one
                if not lastOne:
                    time.sleep(1)
                    h2o_jobs.cancelAllJobs(timeoutSecs=2)


            ### print "rfView", h2o.dump_json(rfView)
            print "We have a result from the RF above, completed but didn't do RFView yet"
            # could the RF indicate 'done' too soon?
            # if rfResult['state']=='RUNNING':
            #    raise Exception("Why is this RF still in RUNNING state? %s" % h2o.dump_json(rfResult))

            # if 'drf_model' not in rfResult:
            #    raise Exception("How come there's no drf_model in this RF result? %s" % h2o.dump_json(rfResult))
            h2o_jobs.pollWaitJobs(timeoutSecs=300)
            rfView = h2o_cmd.runSpeeDRFView(None, model_key, timeoutSecs=60)
            print "rfView:", h2o.dump_json(rfView)

            rfView["drf_model"] = rfView.pop("speedrf_model")
            rf_model = rfView['drf_model']
            cms = rf_model['cms']
            ### print "cm:", h2o.dump_json(cm)
            ntrees = rf_model['N']
            errs = rf_model['errs']
            N = rf_model['N']
            varimp = rf_model['varimp']
            treeStats = rf_model['treeStats']

            print "maxDepth:", treeStats['maxDepth']
            print "maxLeaves:", treeStats['maxLeaves']
            print "minDepth:", treeStats['minDepth']
            print "minLeaves:", treeStats['minLeaves']
            print "meanLeaves:", treeStats['meanLeaves']
            print "meanDepth:", treeStats['meanDepth']
            print "errs[0]:", errs[0]
            print "errs[-1]:", errs[-1]
            print "errs:", errs

            (classification_error, classErrorPctList, totalScores) = h2o_rf.simpleCheckRFView(rfv=rfView)
            # we iterate over params, so can't really do this check
            # self.assertAlmostEqual(classification_error, 0.03, delta=0.5, msg="Classification error %s differs too much" % classification_error)

            print "classErrorPctList:", classErrorPctList
            self.assertEqual(len(classErrorPctList), 7, "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict")
            # FIX! should update this expected classification error
            predict = h2o.nodes[0].generate_predictions(model_key=model_key, data_key=data_key)

            eList.append(classErrorPctList[4])
            fList.append(trainElapsed)
            if DO_PLOT:
                if TRY == 'max_depth':
                    xLabel = 'max_depth'
                elif TRY == 'ntrees':
                    xLabel = 'ntrees'
                elif TRY == 'nbins':
                    xLabel = 'nbins'
                else:
                    raise Exception("huh? %s" % TRY)
                xList.append(paramDict[xLabel])

        if DO_PLOT:
            eLabel = 'class 4 pctWrong'
            fLabel = 'trainElapsed'
            eListTitle = ""
            fListTitle = ""
            h2o_gbm.plotLists(xList, xLabel, eListTitle, eList, eLabel, fListTitle, fList, fLabel)