def test_rapids_ifelse(self): bucket = 'smalldata' csvPathname = 'iris/iris_wheader.csv' hexKey = 'r1' parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='put', hex_key=hexKey) r = Key('r1') keys = [] for trial in range(2): for execExpr in exprList: exec(execExpr) result = Xbase.lastResult execResult = Xbase.lastExecResult print dump_json(execResult) # rows might be zero! if execResult['num_rows'] or execResult['num_cols']: keys.append(execExpr) print "\nExpressions that created keys" for k in keys: print k # for execExpr in exprList: # h2e.exec_expr(execExpr=execExpr, resultKey=None, timeoutSecs=10) h2o.check_sandbox_for_errors()
def test_exec2_sum(self): print "Replicating covtype.data by 2x for results comparison to 1x" filename1x = 'covtype.data' pathname1x = h2i.find_folder_and_filename('home-0xdiag-datasets', 'standard/covtype.data', returnFullPath=True) filename2x = "covtype_2x.data" pathname2x = SYNDATASETS_DIR + '/' + filename2x h2o_util.file_cat(pathname1x, pathname1x, pathname2x) csvAll = [ (pathname1x, "cA", 5, 1), (pathname2x, "cB", 5, 2), (pathname2x, "cC", 5, 2), ] # h2b.browseTheCloud() lenNodes = len(h2o.nodes) firstDone = False for (csvPathname, hex_key, timeoutSecs, resultMult) in csvAll: parseResultA = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key) pA = h2o_cmd.ParseObj(parseResultA) print pA.numRows print pA.numCols print pA.parse_key iA = h2o_cmd.InspectObj(pA.parse_key) k = Key(hex_key) colResultList = [] for i in range(pA.numCols): result = Expr(Fcn('sum', k[:, i], True)).result colResultList.append(result) print "\ncolResultList", colResultList if not firstDone: colResultList0 = list(colResultList) good = [float(x) for x in colResultList0] firstDone = True else: print "\n", colResultList0, "\n", colResultList # create the expected answer...i.e. N * first compare = [float(x) / resultMult for x in colResultList] print "\n", good, "\n", compare self.assertEqual( good, compare, 'compare is not equal to good (first try * resultMult)')
import unittest, random, sys, time, getpass sys.path.extend(['.', '..', '../..', 'py']) import h2o2 as h2o import h2o_browse as h2b, h2o_exec as h2e, h2o_import as h2i, h2o_cmd # new ...ability to reference cols # src[ src$age<17 && src$zip=95120 && ... , ] # can specify values for enums ..values are 0 thru n-1 for n enums print "FIX!: need to test the && and || reduction operators" initList = [] from h2o_xl import Key, AssignObj, Fcn DO_SUM = False r1 = Key('r1') if DO_SUM: funstr = 'sum' exprList = [ AssignObj('a', Fcn(funstr, r1[1], r1[2])), AssignObj('b', 1), AssignObj('b', Fcn(funstr, r1[1], r1[2])), AssignObj('d', 1), AssignObj('d', Fcn(funstr, r1[1], r1[2])), AssignObj('e', 1), AssignObj('e', Fcn(funstr, r1[1], r1[2])), AssignObj('f', 1), AssignObj('f', Fcn(funstr, r1[1], r1[2])), AssignObj('g', 1), AssignObj('g', Fcn(funstr, r1[1], r1[2])),
def test_bayes_basic(self): bucket = 'home-0xdiag-datasets' importFolderPath = 'standard' trainFilename = 'covtype.shuffled.90pct.data' train_key = 'covtype.train.hex' b = Key(train_key) model_key = 'bayesModelKey' timeoutSecs = 1800 csvPathname = importFolderPath + "/" + trainFilename # FIX! do I need to force enum for classification? what if I do regression after this? columnTypeDict = {54: 'Enum'} parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, columnTypeDict=columnTypeDict, schema='local', chunk_size=4194304, hex_key=train_key, timeoutSecs=timeoutSecs) # don't have to make it enum, if 0/1 (can't operate on enums like this) # make 1-7 go to 0-6. 0 isn't there. # make 1 thru 6 go to 1 # change columnTypeDict to None above if I do this # Assign(b[:,54], b[:,54]-1) # Assign(b[:,54], b[:,54]!=0) # now we have just 0 and 1 pA = h2o_cmd.ParseObj(parseResult) iA = h2o_cmd.InspectObj(pA.parse_key) parse_key = pA.parse_key numRows = iA.numRows numCols = iA.numCols labelList = iA.labelList labelListUsed = list(labelList) numColsUsed = numCols # run through a couple of parameter sets parameters = [] parameters.append({ 'response_column': 'C55', # still 1-55 on colnames }) # just default model_key = 'covtype_bayes.hex' for p in parameters: bmResult = h2o.n0.build_model(algo='naivebayes', destination_key=model_key, training_frame=train_key, validation_frame=train_key, parameters=p, timeoutSecs=60) bm = OutputObj(bmResult, 'bm') modelResult = h2o.n0.models(key=model_key) model = OutputObj(modelResult['models'][0]['output'], 'model') cmmResult = h2o.n0.compute_model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) cmm = OutputObj(cmmResult, 'cmm') mmResult = h2o.n0.model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) mmResultShort = mmResult['model_metrics'][0] del mmResultShort['frame'] # too much! mm = OutputObj(mmResultShort, 'mm') prResult = h2o.n0.predict(model=model_key, frame=parse_key, timeoutSecs=60) pr = OutputObj(prResult['model_metrics'][0]['predictions'], 'pr')
def test_GLM_error1(self): importFolderPath = "covtype" csvFilename = "covtype.20k.data" hex_key = "covtype20k.hex" binomial_key = "covtype20k.b.hex" b = Key(hex_key) csvPathname = importFolderPath + "/" + csvFilename parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=hex_key, check_header=1, timeoutSecs=180, doSummary=False) ## columnTypeDict = {54: 'Enum'} columnTypeDict = None parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=binomial_key, columnTypeDict=columnTypeDict, check_header=1, timeoutSecs=180, doSummary=False) # don't have to make it enum, if 0/1 (can't operate on enums like this) # make 1-7 go to 0-6. 0 isn't there. Assign(b[:, 54], b[:, 54] - 1) # make 1 thru 6 go to 1 Assign(b[:, 54], b[:, 54] != 0) # now we have just 0 and 1 pA = h2o_cmd.ParseObj(parseResult) iA = h2o_cmd.InspectObj(pA.parse_key) parse_key = pA.parse_key numRows = iA.numRows numCols = iA.numCols labelList = iA.labelList expected = [] allowedDelta = 0 # loop, to see if we get same centers labelListUsed = list(labelList) numColsUsed = numCols for trial in range(5): parameters = { 'response_column': 'C55', 'max_iterations': 3, 'solver': 'L_BFGS', 'ignored_columns': '["C1"]', 'alpha': '[0.1]', 'max_after_balance_size': 1000.0, 'class_sampling_factors': '[0.2]', # 'use_all_factor_levels': None, 'lambda': '[0]', } bHack = hex_key co = h2o_cmd.runSummary(key=binomial_key, column=54) print "binomial_key summary:", co.label, co.type, co.missing_count, co.domain, sum( co.histogram_bins) co = h2o_cmd.runSummary(key=hex_key, column=54) print "hex_key summary:", co.label, co.type, co.missing_count, co.domain, sum( co.histogram_bins) model_key = 'rand_glm.hex' bmResult = h2o.n0.build_model(algo='glm', model_id=model_key, training_frame=bHack, parameters=parameters, timeoutSecs=10) bm = OutputObj(bmResult, 'bm') modelResult = h2o.n0.models(key=model_key) model = OutputObj(modelResult['models'][0]['output'], 'model') h2o_glm.simpleCheckGLM(self, model, parameters, labelList, labelListUsed, allowNaN=True) cmmResult = h2o.n0.compute_model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) cmm = OutputObj(cmmResult, 'cmm') # FIX! when is this legal doClassification = False if doClassification: mcms = OutputObj( {'data': cmm.max_criteria_and_metric_scores.data}, 'mcms') m1 = mcms.data[1:] h0 = mcms.data[0] print "\nmcms", tabulate(m1, headers=h0) if doClassification: thms = OutputObj(cmm.thresholds_and_metric_scores, 'thms') cmms = OutputObj({'cm': cmm.confusion_matrices}, 'cmms') if 1 == 0: print "" for i, c in enumerate(cmms.cm): print "\ncmms.cm[%s]" % i, tabulate(c) print "" mmResult = h2o.n0.model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) mm = OutputObj(mmResult['model_metrics'][0], 'mm') prResult = h2o.n0.predict(model=model_key, frame=parse_key, timeoutSecs=60) pr = OutputObj(prResult['model_metrics'][0]['predictions'], 'pr')
def test_rapids_overloaded_opr(self): SYNDATASETS_DIR = h2o.make_syn_dir() tryList = [ # (1000000, 5, 'cA', 200), (1000, 5, 'cA', 200), ] # h2b.browseTheCloud() for (rowCount, colCount, hex_key, timeoutSecs) in tryList: SEEDPERFILE = random.randint(0, sys.maxint) csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(rowCount) + 'x' + str(colCount) + '.csv' csvPathname = SYNDATASETS_DIR + '/' + csvFilename print "\nCreating random", csvPathname write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE) parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=timeoutSecs, doSummary=False) numRows, numCols, parse_key = h2o_cmd.infoFromParse(parseResult) inspect = h2o_cmd.runInspect(key=hex_key) missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect) print "\n" + csvPathname, \ " numRows:", "{:,}".format(numRows), \ " numCols:", "{:,}".format(numCols) # should match # of cols in header or ?? self.assertEqual(numCols, colCount, "parse created result with the wrong number of cols %s %s" % (numCols, colCount)) self.assertEqual(numRows, rowCount, "parse created result with the wrong number of rows %s %s" % (numRows, rowCount)) # Xbase.debugOnly = True REPEAT = 1 data_key = hex_key for i in range(REPEAT): result_key = data_key + "_" + str(i) Assign('s1', Seq(range(5)) ) # take advantage of default params for row/col (None) # need the 'c' function, to make sure the key is created # first try as object, then method Assign('s2', Fcn('c', Seq(range(5)) )) # just combine Assign('s3', Col(Seq(range(5)) )) inspect = h2o_cmd.runInspect(key='s3') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect) assert numRows==5 assert numCols==1 Assign('s2', Col(Seq(range(5))) ) inspect = h2o_cmd.runInspect(key='s2') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect) assert numRows==5 assert numCols==1 # can't have sequence of sequences? # make sure key is created with c() f = Fcn('c', Seq(Colon(99,400), "#2", 1, range(1,5), range(7,10), range(50,52) )) Assign('s1', f) f = Col(Seq(Colon(99,400), "#2", 1, range(1,5), range(7,10), range(50,52) )) Assign('s2', f) inspect = h2o_cmd.runInspect(key='s2') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect) assert numRows==313 assert numCols==1 print "Now trying to do the functions with the alternate overloaded operators" data_key = Key(parse_key) result_key = Key() # what triggers immediate operation at h2o # as opposed to an object within a function result_key.frame = 'a1' result_key <<= data_key[Seq(range(1,4)), :] result_key.frame = 'a2' result_key <<= data_key[Seq(range(1,4)), :] result_key.frame = 'a3' result_key <<= data_key[Seq(range(1,4)), :] result_key.frame = 'a4' result_key <<= data_key[Seq(range(1,4)), 0:1] result_key.frame = 'a5' result_key <<= data_key[Seq(range(1,4)), 0:1] result_key.frame = 'a6' result_key <<= data_key[[1,2,3], 1] print "\n" + csvPathname, \ " numRows:", "{:,}".format(numRows), \ " numCols:", "{:,}".format(numCols)
def test_rapids_overloaded_opr(self): SYNDATASETS_DIR = h2o.make_syn_dir() tryList = [ # (1000000, 5, 'cA', 200), (1000, 5, 'cA', 200), ] # h2b.browseTheCloud() for (rowCount, colCount, hex_key, timeoutSecs) in tryList: SEEDPERFILE = random.randint(0, sys.maxint) csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str( rowCount) + 'x' + str(colCount) + '.csv' csvPathname = SYNDATASETS_DIR + '/' + csvFilename print "\nCreating random", csvPathname write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE) parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=timeoutSecs, doSummary=False) numRows, numCols, parse_key = h2o_cmd.infoFromParse(parseResult) inspect = h2o_cmd.runInspect(key=hex_key) missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) print "\n" + csvPathname, \ " numRows:", "{:,}".format(numRows), \ " numCols:", "{:,}".format(numCols) # should match # of cols in header or ?? self.assertEqual( numCols, colCount, "parse created result with the wrong number of cols %s %s" % (numCols, colCount)) self.assertEqual( numRows, rowCount, "parse created result with the wrong number of rows %s %s" % (numRows, rowCount)) # Xbase.debugOnly = True REPEAT = 1 data_key = hex_key for i in range(REPEAT): result_key = data_key + "_" + str(i) Assign('s1', Seq(range(5))) # take advantage of default params for row/col (None) # need the 'c' function, to make sure the key is created # first try as object, then method Assign('s2', Fcn('c', Seq(range(5)))) # just combine Assign('s3', Col(Seq(range(5)))) inspect = h2o_cmd.runInspect(key='s3') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) assert numRows == 5 assert numCols == 1 Assign('s2', Col(Seq(range(5)))) inspect = h2o_cmd.runInspect(key='s2') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) assert numRows == 5 assert numCols == 1 # can't have sequence of sequences? # make sure key is created with c() f = Fcn( 'c', Seq(Colon(99, 400), "#2", 1, range(1, 5), range(7, 10), range(50, 52))) Assign('s1', f) f = Col( Seq(Colon(99, 400), "#2", 1, range(1, 5), range(7, 10), range(50, 52))) Assign('s2', f) inspect = h2o_cmd.runInspect(key='s2') missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) assert numRows == 313 assert numCols == 1 print "Now trying to do the functions with the alternate overloaded operators" data_key = Key(parse_key) result_key = Key() # what triggers immediate operation at h2o # as opposed to an object within a function result_key.frame = 'a1' result_key <<= data_key[Seq(range(1, 4)), :] result_key.frame = 'a2' result_key <<= data_key[Seq(range(1, 4)), :] result_key.frame = 'a3' result_key <<= data_key[Seq(range(1, 4)), :] result_key.frame = 'a4' result_key <<= data_key[Seq(range(1, 4)), 0:1] result_key.frame = 'a5' result_key <<= data_key[Seq(range(1, 4)), 0:1] result_key.frame = 'a6' result_key <<= data_key[[1, 2, 3], 1] print "\n" + csvPathname, \ " numRows:", "{:,}".format(numRows), \ " numCols:", "{:,}".format(numCols)
def test_GLM_params_rand2(self): importFolderPath = "covtype" csvFilename = "covtype.20k.data" hex_key = "covtype20k.hex" binomial_key = "covtype20k.b.hex" b = Key(hex_key) csvPathname = importFolderPath + "/" + csvFilename parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=hex_key, check_header=1, timeoutSecs=180, doSummary=False) ## columnTypeDict = {54: 'Enum'} columnTypeDict = None parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=binomial_key, columnTypeDict=columnTypeDict, check_header=1, timeoutSecs=180, doSummary=False) # don't have to make it enum, if 0/1 (can't operate on enums like this) # make 1-7 go to 0-6. 0 isn't there. Assign(b[:, 54], b[:, 54] - 1) # make 1 thru 6 go to 1 Assign(b[:, 54], b[:, 54] != 0) # now we have just 0 and 1 pA = h2o_cmd.ParseObj(parseResult) iA = h2o_cmd.InspectObj(pA.parse_key) parse_key = pA.parse_key numRows = iA.numRows numCols = iA.numCols labelList = iA.labelList expected = [] allowedDelta = 0 # loop, to see if we get same centers labelListUsed = list(labelList) numColsUsed = numCols paramDict = define_params() for trial in range(5): # family [u'gaussian', u'binomial', u'poisson', u'gamma', u'tweedie'] # link [u'family_default', u'identity', u'logit', u'log', u'inverse', u'tweedie'] # can we do classification with probabilities? # are only lambda and alpha grid searchable? # params is mutable. This is default. parameters = { 'response_column': 'C55', 'alpha': 0.1, # 'lambda': 1e-4, 'lambda': 0, } h2o_glm.pickRandGlmParams(paramDict, parameters) if 'family' not in parameters or parameters['family'] == 'binomial': bHack = binomial_key else: bHack = hex_key co = h2o_cmd.runSummary(key=binomial_key, column=54) print "binomial_key summary:", co.label, co.type, co.missing_count, co.domain, sum( co.histogram_bins) co = h2o_cmd.runSummary(key=hex_key, column=54) print "hex_key summary:", co.label, co.type, co.missing_count, co.domain, sum( co.histogram_bins) # fix stupid params fixList = [ 'alpha', 'lambda', 'ignored_columns', 'class_sampling_factors' ] for f in fixList: if f in parameters: parameters[f] = "[%s]" % parameters[f] model_key = 'rand_glm.hex' bmResult = h2o.n0.build_model(algo='glm', model_id=model_key, training_frame=bHack, parameters=parameters, timeoutSecs=10) bm = OutputObj(bmResult, 'bm') modelResult = h2o.n0.models(key=model_key) model = OutputObj(modelResult['models'][0]['output'], 'model') h2o_glm.simpleCheckGLM(self, model, parameters, labelList, labelListUsed, allowNaN=True) cmmResult = h2o.n0.compute_model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) cmm = OutputObj(cmmResult, 'cmm') # FIX! when is this legal doClassification = False if doClassification: mcms = OutputObj( {'data': cmm.max_criteria_and_metric_scores.data}, 'mcms') m1 = mcms.data[1:] h0 = mcms.data[0] print "\nmcms", tabulate(m1, headers=h0) if doClassification: thms = OutputObj(cmm.thresholds_and_metric_scores, 'thms') cmms = OutputObj({'cm': cmm.confusion_matrices}, 'cmms') if 1 == 0: print "" for i, c in enumerate(cmms.cm): print "\ncmms.cm[%s]" % i, tabulate(c) print "" mmResult = h2o.n0.model_metrics(model=model_key, frame=parse_key, timeoutSecs=60) mm = OutputObj(mmResult['model_metrics'][0], 'mm') prResult = h2o.n0.predict(model=model_key, frame=parse_key, timeoutSecs=60) pr = OutputObj(prResult['model_metrics'][0]['predictions'], 'pr')
def test_exec2_enums_rand_cut(self): SYNDATASETS_DIR = h2o.make_syn_dir() n = ROWS tryList = [ (n, 10, 9, 'cE', 300), ] # create key names to use for exec eKeys = ['e%s' % i for i in range(10)] # h2b.browseTheCloud() trial = 0 for (rowCount, iColCount, oColCount, hex_key, timeoutSecs) in tryList: colCount = iColCount + oColCount hex_key = 'p' colEnumList = create_col_enum_list(iColCount) # create 100 possible cut expressions here, so we don't waste time below rowExprList = [] print "Creating", CUT_EXPR_CNT, 'cut expressions' for j in range(CUT_EXPR_CNT): # init cutValue. None means no compare cutValue = [None for i in range(iColCount)] # build up a random cut expression cols = random.sample(range(iColCount), random.randint(1, iColCount)) for c in cols: # possible choices within the column cel = colEnumList[c] # for now the cutValues are numbers for the enum mappings # FIX! hack. don't use encoding 0, maps to NA here? h2o doesn't like # celChoice = str(random.choice(range(len(cel)))) celChoice = random.choice(range(len(cel))) cutValue[c] = celChoice cutExprList = [] pKey = Key('p') for i, c in enumerate(cutValue): if c is None: continue else: # new ...ability to reference cols # src[ src$age<17 && src$zip=95120 && ... , ] # cutExprList.append('p$C'+str(i+1)+'=='+c) # all column indexing in h2o-dev is with number e = Fcn('==', c, pKey[:, i]) cutExprList.append(e) cutExpr = None for ce in cutExprList: if cutExpr: cutExpr = Fcn('&', cutExpr, ce) else: cutExpr = ce print "cutExpr:", cutExpr # should be two different keys in the sample e = random.sample(eKeys, 2) fKey = e[0] eKey = e[1] # rowExpr = '%s[%s,];' % (hex_key, cutExpr) hKey = Key(hex_key) rowExpr = hKey[cutExpr, :] print "rowExpr:", rowExpr rowExprList.append(rowExpr) # CREATE DATASET******************************************* SEEDPERFILE = random.randint(0, sys.maxint) csvFilename = 'syn_enums_' + str(rowCount) + 'x' + str( colCount) + '.csv' csvPathname = SYNDATASETS_DIR + '/' + csvFilename print "Creating random", csvPathname write_syn_dataset(csvPathname, rowCount, iColCount, oColCount, SEEDPERFILE, colEnumList=colEnumList) # PARSE******************************************************* parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=30) numRows, numCols, parse_key = h2o_cmd.infoFromParse(parseResult) inspect = h2o_cmd.runInspect(key=parse_key) missingList, valueList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) # print h2o.dump_json(inspect) # (missingValuesDict, constantValuesDict, enumSizeDict, colTypeDict, colNameDict) = \ # h2o_cmd.columnInfoFromInspect(parse_key, exceptionOnMissingValues=False) # error if any col has constant values # if len(constantValuesDict) != 0: # raise Exception("Probably got a col NA'ed and constant values as a result %s" % constantValuesDict) # INIT all possible key names used*************************** # remember. 1 indexing! # build up the columns Assign('b', [1, 2, 3]) # could also append 1 col at a time, by assigning to the next col number? Assign('a', Cbind(['b' for i in range(colCount)])) for eKey in eKeys: Assign(eKey, 'a') ## print h2o.dump_json(e) xList = [] eList = [] fList = [] for repeat in range(200): # EXEC******************************************************* # don't use exec_expr to avoid issues with Inspect following etc. randICol = random.randint(0, iColCount - 1) randOCol = random.randint(iColCount, iColCount + oColCount - 1) # should be two different keys in the sample e = random.sample(eKeys, 2) fKey = e[0] eKey = e[1] if 1 == 1: start = time.time() Assign(fKey, random.choice(rowExprList)).do() elapsed = time.time() - start execTime = elapsed print "exec 2 took", elapsed, "seconds." inspect = h2o_cmd.runInspect(key=fKey) missingList, valueList, numRows, numCols = h2o_cmd.infoFromInspect( inspect) if numRows == 0 or numCols != colCount: h2p.red_print("Warning: Cut resulted in", numRows, "rows and", numCols, "cols. Quantile will abort") # FIX! put quantile back in? quantileTime = 0 # remove all keys******************************************************* # what about hex_key? if 1 == 0: start = time.time() h2o.nodes[0].remove_all_keys() elapsed = time.time() - start print "remove all keys end on ", csvFilename, 'took', elapsed, 'seconds.' trial += 1 xList.append(trial) eList.append(execTime) fList.append(quantileTime) # just get a plot of the last one (biggest) if DO_PLOT: xLabel = 'trial' eLabel = 'exec cut time' fLabel = 'quantile time' eListTitle = "" fListTitle = "" h2o_gbm.plotLists(xList, xLabel, eListTitle, eList, eLabel, fListTitle, fList, fLabel)