def test_C_prostate(self): h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download() print "\nStarting prostate.csv" # columns start at 0 y = "1" x = "" csvFilename = "prostate.csv" csvPathname = "logreg" + "/" + csvFilename parseResult = h2i.import_parse(bucket="smalldata", path=csvPathname, hex_key=csvFilename + ".hex", schema="put") for maxx in range(2, 6): x = range(maxx) x.remove(0) # 0 is member ID. not used x.remove(1) # 1 is output x = ",".join(map(str, x)) print "\nx:", x print "y:", y kwargs = {"x": x, "y": y, "n_folds": 5} glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs) # ID,CAPSULE,AGE,RACE,DPROS,DCAPS,PSA,VOL,GLEASON h2o_glm.simpleCheckGLM(self, glm, "AGE", **kwargs) sys.stdout.write(".") sys.stdout.flush() # now redo it all thru the browser # three times! for i in range(3): h2b.browseJsonHistoryAsUrl() h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download()
def test_B_benign(self): h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download() print "\nStarting benign.csv" csvFilename = "benign.csv" csvPathname = "logreg" + "/" + csvFilename parseResult = h2i.import_parse(bucket="smalldata", path=csvPathname, hex_key=csvFilename + ".hex", schema="put") # columns start at 0 y = "3" # cols 0-13. 3 is output # no member id in this one for maxx in range(11, 14): x = range(maxx) x.remove(3) # 3 is output x = ",".join(map(str, x)) print "\nx:", x print "y:", y kwargs = {"x": x, "y": y} glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs) # no longer look at STR? h2o_glm.simpleCheckGLM(self, glm, None, **kwargs) sys.stdout.write(".") sys.stdout.flush() # now redo it all thru the browser h2b.browseJsonHistoryAsUrl()
def test_B_benign(self): h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download() print "\nStarting benign.csv" csvFilename = "benign.csv" csvPathname = 'logreg' + '/' + csvFilename parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=csvFilename + ".hex", schema='put') # columns start at 0 y = "3" # cols 0-13. 3 is output # no member id in this one for maxx in range(11, 14): x = range(maxx) x.remove(3) # 3 is output x = ",".join(map(str, x)) print "\nx:", x print "y:", y kwargs = {'x': x, 'y': y} glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs) # no longer look at STR? h2o_glm.simpleCheckGLM(self, glm, None, **kwargs) sys.stdout.write('.') sys.stdout.flush() # now redo it all thru the browser h2b.browseJsonHistoryAsUrl()
def test_rf_tnc3_fvec(self): h2o.beta_features = True csvPathname = 'tnc3.csv' print "\n" + csvPathname hex_key = "tnc3.hex" ### h2b.browseTheCloud() parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=hex_key, schema='put', timeoutSecs=10, retryDelaySecs=0.25, header=1) print "Parse result['Key']:", parseResult['destination_key'] inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) h2b.browseJsonHistoryAsUrlLastMatch("Inspect") if 1==1: lenNodes = len(h2o.nodes) colResultList = h2e.exec_expr_list_across_cols(lenNodes, numExprList, hex_key, maxCol=10, incrementingResult=False, timeoutSecs=10) print "\ncolResultList after num swap", colResultList if (1==1): print "\nWe're not CM data getting back from RFView.json that we can check!. so look at the browser" print 'The good case with ignore="boat,body"' rfv = h2o_cmd.runRF(parseResult=parseResult, trees=5, timeoutSecs=10, retryDelaySecs=0.25, ignored_cols_by_name="boat,body") inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") ### time.sleep(3600) h2b.browseJsonHistoryAsUrl(retryDelaySecs=0.5) #****************** if 1==0: colResultList = h2e.exec_expr_list_across_cols(lenNodes, charExprList, hex_key, maxCol=10, incrementingResult=False, timeoutSecs=10, retryDelaySecs=0.25) print "\ncolResultList after char swap", colResultList if 1==1: print "\nNow the bad case (no ignore)" rfv = h2o_cmd.runRF(parseResult=parseResult, trees=5, timeoutSecs=10, retryDelaySecs=0.25) inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") ### time.sleep(3600) h2b.browseJsonHistoryAsUrl(retryDelaySecs=0.5)
def test_C_prostate(self): h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download() print "\nStarting prostate.csv" # columns start at 0 y = "1" x = "" csvFilename = "prostate.csv" csvPathname = 'logreg' + '/' + csvFilename parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=csvFilename + ".hex", schema='put') for maxx in range(2, 6): x = range(maxx) x.remove(0) # 0 is member ID. not used x.remove(1) # 1 is output x = ",".join(map(str, x)) print "\nx:", x print "y:", y kwargs = {'x': x, 'y': y, 'n_folds': 5} glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs) # ID,CAPSULE,AGE,RACE,DPROS,DCAPS,PSA,VOL,GLEASON h2o_glm.simpleCheckGLM(self, glm, 'AGE', **kwargs) sys.stdout.write('.') sys.stdout.flush() # now redo it all thru the browser # three times! for i in range(3): h2b.browseJsonHistoryAsUrl() h2o.nodes[0].log_view() namelist = h2o.nodes[0].log_download()
def test_GLM2_basic_browser(self): h2b.browseTheCloud() importFolderPath = "logreg" csvFilename = 'prostate.csv' csvPathname = importFolderPath + "/" + csvFilename hex_key = csvFilename + ".hex" parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='local', hex_key=hex_key, timeoutSecs=180) inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) print inspect print "\n" + csvPathname, \ " numRows:", "{:,}".format(inspect['numRows']), \ " numCols:", "{:,}".format(inspect['numCols']) x = 'ID' y = 'CAPSULE' family = 'binomial' alpha = '0.5' lambda_ = '1E-4' nfolds = '0' f = 'prostate' modelKey = 'GLM_' + f kwargs = { 'response' : y, 'ignored_cols' : x, 'family' : family, 'lambda' : lambda_, 'alpha' : alpha, 'n_folds' : nfolds, # passes if 0, fails otherwise 'destination_key' : modelKey, } timeoutSecs = 60 start = time.time() glmResult = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=timeoutSecs, retryDelaySecs=0.25, pollTimeoutSecs=180, **kwargs) # this stuff was left over from when we got the result after polling the jobs list # okay to do it again # GLM2: when it redirects to the model view, we no longer have the job_key! (unlike the first response and polling) if 1==0: job_key = glmResult['job_key'] # is the job finishing before polling would say it's done? params = {'job_key': job_key, 'destination_key': modelKey} glm = h2o.nodes[0].completion_redirect(jsonRequest="2/GLMProgressPage2.json", params=params) print "GLM result from completion_redirect:", h2o.dump_json(a) if 1==1: glm = h2o.nodes[0].glm_view(_modelKey=modelKey) ### print "GLM result from glm_view:", h2o.dump_json(a) h2o_glm.simpleCheckGLM(self, glm, None, **kwargs) glm_model = glm['glm_model'] _names = glm_model['_names'] coefficients_names = glm_model['coefficients_names'] submodels = glm_model['submodels'][0] beta = submodels['beta'] norm_beta = submodels['norm_beta'] iteration = submodels['iteration'] validation = submodels['validation'] auc = validation['auc'] aic = validation['aic'] null_deviance = validation['null_deviance'] residual_deviance = validation['residual_deviance'] print '_names', _names print 'coefficients_names', coefficients_names # did beta get shortened? the simple check confirms names/beta/norm_beta are same length print 'beta', beta print 'iteration', iteration print 'auc', auc # now redo it all thru the browser h2b.browseJsonHistoryAsUrl()
def test_GLM2_basic_browser(self): h2b.browseTheCloud() h2o.beta_features=True importFolderPath = "logreg" csvFilename = 'prostate.csv' csvPathname = importFolderPath + "/" + csvFilename hex_key = csvFilename + ".hex" parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='local', hex_key=hex_key, timeoutSecs=180) inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) print inspect print "\n" + csvPathname, \ " numRows:", "{:,}".format(inspect['numRows']), \ " numCols:", "{:,}".format(inspect['numCols']) x = 'ID' y = 'CAPSULE' family = 'binomial' alpha = '0.5' lambda_ = '1E-4' nfolds = '0' f = 'prostate' modelKey = 'GLM_' + f kwargs = { 'response' : y, 'ignored_cols' : x, 'family' : family, 'lambda' : lambda_, 'alpha' : alpha, 'n_folds' : nfolds, # passes if 0, fails otherwise 'destination_key' : modelKey, } timeoutSecs = 60 start = time.time() glmResult = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=timeoutSecs, retryDelaySecs=0.25, pollTimeoutSecs=180, **kwargs) # this stuff was left over from when we got the result after polling the jobs list # okay to do it again # GLM2: when it redirects to the model view, we no longer have the job_key! (unlike the first response and polling) if 1==0: job_key = glmResult['job_key'] # is the job finishing before polling would say it's done? params = {'job_key': job_key, 'destination_key': modelKey} glm = h2o.nodes[0].completion_redirect(jsonRequest="2/GLMProgressPage2.json", params=params) print "GLM result from completion_redirect:", h2o.dump_json(a) if 1==1: glm = h2o.nodes[0].glm_view(_modelKey=modelKey) ### print "GLM result from glm_view:", h2o.dump_json(a) h2o_glm.simpleCheckGLM(self, glm, None, **kwargs) glm_model = glm['glm_model'] _names = glm_model['_names'] coefficients_names = glm_model['coefficients_names'] submodels = glm_model['submodels'][0] beta = submodels['beta'] norm_beta = submodels['norm_beta'] iteration = submodels['iteration'] validation = submodels['validation'] auc = validation['auc'] aic = validation['aic'] null_deviance = validation['null_deviance'] residual_deviance = validation['residual_deviance'] print '_names', _names print 'coefficients_names', coefficients_names # did beta get shortened? the simple check confirms names/beta/norm_beta are same length print 'beta', beta print 'iteration', iteration print 'auc', auc # now redo it all thru the browser h2b.browseJsonHistoryAsUrl()