Beispiel #1
0
    def test_C_prostate(self):
        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()
        print "\nStarting prostate.csv"
        # columns start at 0
        y = "1"
        x = ""
        csvFilename = "prostate.csv"
        csvPathname = "logreg" + "/" + csvFilename
        parseResult = h2i.import_parse(bucket="smalldata", path=csvPathname, hex_key=csvFilename + ".hex", schema="put")

        for maxx in range(2, 6):
            x = range(maxx)
            x.remove(0)  # 0 is member ID. not used
            x.remove(1)  # 1 is output
            x = ",".join(map(str, x))
            print "\nx:", x
            print "y:", y

            kwargs = {"x": x, "y": y, "n_folds": 5}
            glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs)
            # ID,CAPSULE,AGE,RACE,DPROS,DCAPS,PSA,VOL,GLEASON
            h2o_glm.simpleCheckGLM(self, glm, "AGE", **kwargs)
            sys.stdout.write(".")
            sys.stdout.flush()

        # now redo it all thru the browser
        # three times!
        for i in range(3):
            h2b.browseJsonHistoryAsUrl()

        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()
Beispiel #2
0
    def test_B_benign(self):
        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()

        print "\nStarting benign.csv"
        csvFilename = "benign.csv"
        csvPathname = "logreg" + "/" + csvFilename
        parseResult = h2i.import_parse(bucket="smalldata", path=csvPathname, hex_key=csvFilename + ".hex", schema="put")
        # columns start at 0
        y = "3"
        # cols 0-13. 3 is output
        # no member id in this one
        for maxx in range(11, 14):
            x = range(maxx)
            x.remove(3)  # 3 is output
            x = ",".join(map(str, x))
            print "\nx:", x
            print "y:", y

            kwargs = {"x": x, "y": y}
            glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=15, **kwargs)
            # no longer look at STR?
            h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)
            sys.stdout.write(".")
            sys.stdout.flush()

        # now redo it all thru the browser
        h2b.browseJsonHistoryAsUrl()
Beispiel #3
0
    def test_B_benign(self):
        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()

        print "\nStarting benign.csv"
        csvFilename = "benign.csv"
        csvPathname = 'logreg' + '/' + csvFilename
        parseResult = h2i.import_parse(bucket='smalldata',
                                       path=csvPathname,
                                       hex_key=csvFilename + ".hex",
                                       schema='put')
        # columns start at 0
        y = "3"
        # cols 0-13. 3 is output
        # no member id in this one
        for maxx in range(11, 14):
            x = range(maxx)
            x.remove(3)  # 3 is output
            x = ",".join(map(str, x))
            print "\nx:", x
            print "y:", y

            kwargs = {'x': x, 'y': y}
            glm = h2o_cmd.runGLM(parseResult=parseResult,
                                 timeoutSecs=15,
                                 **kwargs)
            # no longer look at STR?
            h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)
            sys.stdout.write('.')
            sys.stdout.flush()

        # now redo it all thru the browser
        h2b.browseJsonHistoryAsUrl()
    def test_rf_tnc3_fvec(self):
        h2o.beta_features = True
        csvPathname = 'tnc3.csv'
        print "\n" + csvPathname
        hex_key = "tnc3.hex"
        ### h2b.browseTheCloud()

        parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, hex_key=hex_key, schema='put', 
            timeoutSecs=10, retryDelaySecs=0.25, header=1)
        print "Parse result['Key']:", parseResult['destination_key']
        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
        h2b.browseJsonHistoryAsUrlLastMatch("Inspect")

        if 1==1:
            lenNodes = len(h2o.nodes)
            colResultList = h2e.exec_expr_list_across_cols(lenNodes, numExprList, hex_key, maxCol=10,
                incrementingResult=False, timeoutSecs=10)
            print "\ncolResultList after num swap", colResultList

        if (1==1):
            print "\nWe're not CM data getting back from RFView.json that we can check!. so look at the browser"
            print 'The good case with ignore="boat,body"'
            rfv = h2o_cmd.runRF(parseResult=parseResult, trees=5, timeoutSecs=10, retryDelaySecs=0.25,
                ignored_cols_by_name="boat,body")

        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
        ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
        ### time.sleep(3600)
        h2b.browseJsonHistoryAsUrl(retryDelaySecs=0.5)

        #******************
        if 1==0:
            colResultList = h2e.exec_expr_list_across_cols(lenNodes, charExprList, hex_key, maxCol=10,
                incrementingResult=False, timeoutSecs=10, retryDelaySecs=0.25)
            print "\ncolResultList after char swap", colResultList

        if 1==1:
            print "\nNow the bad case (no ignore)"
            rfv = h2o_cmd.runRF(parseResult=parseResult, trees=5, timeoutSecs=10, retryDelaySecs=0.25)

        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
        ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
        ### time.sleep(3600)
        h2b.browseJsonHistoryAsUrl(retryDelaySecs=0.5)
    def test_C_prostate(self):
        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()
        print "\nStarting prostate.csv"
        # columns start at 0
        y = "1"
        x = ""
        csvFilename = "prostate.csv"
        csvPathname = 'logreg' + '/' + csvFilename
        parseResult = h2i.import_parse(bucket='smalldata',
                                       path=csvPathname,
                                       hex_key=csvFilename + ".hex",
                                       schema='put')

        for maxx in range(2, 6):
            x = range(maxx)
            x.remove(0)  # 0 is member ID. not used
            x.remove(1)  # 1 is output
            x = ",".join(map(str, x))
            print "\nx:", x
            print "y:", y

            kwargs = {'x': x, 'y': y, 'n_folds': 5}
            glm = h2o_cmd.runGLM(parseResult=parseResult,
                                 timeoutSecs=15,
                                 **kwargs)
            # ID,CAPSULE,AGE,RACE,DPROS,DCAPS,PSA,VOL,GLEASON
            h2o_glm.simpleCheckGLM(self, glm, 'AGE', **kwargs)
            sys.stdout.write('.')
            sys.stdout.flush()

        # now redo it all thru the browser
        # three times!
        for i in range(3):
            h2b.browseJsonHistoryAsUrl()

        h2o.nodes[0].log_view()
        namelist = h2o.nodes[0].log_download()
Beispiel #6
0
    def test_GLM2_basic_browser(self):
        h2b.browseTheCloud()

        importFolderPath = "logreg"
        csvFilename = 'prostate.csv'
        csvPathname = importFolderPath + "/" + csvFilename
        hex_key = csvFilename + ".hex"

        parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='local', hex_key=hex_key, timeoutSecs=180)
        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
        print inspect
        print "\n" + csvPathname, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])

        x         = 'ID'
        y         = 'CAPSULE'
        family    = 'binomial'
        alpha     = '0.5'
        lambda_   = '1E-4'
        nfolds    = '0'
        f         = 'prostate'
        modelKey  = 'GLM_' + f

        kwargs = {       'response'           : y,
                         'ignored_cols'       : x,
                         'family'             : family,
                         'lambda'             : lambda_,
                         'alpha'              : alpha,
                         'n_folds'            : nfolds, # passes if 0, fails otherwise
                         'destination_key'    : modelKey,
                 }

        timeoutSecs = 60
        start = time.time()
        glmResult = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=timeoutSecs, retryDelaySecs=0.25, pollTimeoutSecs=180, **kwargs)

        # this stuff was left over from when we got the result after polling the jobs list
        # okay to do it again
        # GLM2: when it redirects to the model view, we no longer have the job_key! (unlike the first response and polling)
        if 1==0:
            job_key = glmResult['job_key']
            # is the job finishing before polling would say it's done?
            params = {'job_key': job_key, 'destination_key': modelKey}
            glm = h2o.nodes[0].completion_redirect(jsonRequest="2/GLMProgressPage2.json", params=params)
            print "GLM result from completion_redirect:", h2o.dump_json(a)
        if 1==1:
            glm = h2o.nodes[0].glm_view(_modelKey=modelKey)
            ### print "GLM result from glm_view:", h2o.dump_json(a)

        h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)

        glm_model = glm['glm_model']
        _names = glm_model['_names']
        coefficients_names = glm_model['coefficients_names']
        submodels = glm_model['submodels'][0]

        beta = submodels['beta']
        norm_beta = submodels['norm_beta']
        iteration = submodels['iteration']

        validation = submodels['validation']        
        auc = validation['auc']
        aic = validation['aic']
        null_deviance = validation['null_deviance']
        residual_deviance = validation['residual_deviance']

        print '_names', _names
        print 'coefficients_names', coefficients_names
        # did beta get shortened? the simple check confirms names/beta/norm_beta are same length
        print 'beta', beta
        print 'iteration', iteration        
        print 'auc', auc

        # now redo it all thru the browser
        h2b.browseJsonHistoryAsUrl()
    def test_GLM2_basic_browser(self):
        h2b.browseTheCloud()

        h2o.beta_features=True
        importFolderPath = "logreg"
        csvFilename = 'prostate.csv'
        csvPathname = importFolderPath + "/" + csvFilename
        hex_key = csvFilename + ".hex"

        parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='local', hex_key=hex_key, timeoutSecs=180)
        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
        print inspect
        print "\n" + csvPathname, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])

        x         = 'ID'
        y         = 'CAPSULE'
        family    = 'binomial'
        alpha     = '0.5'
        lambda_   = '1E-4'
        nfolds    = '0'
        f         = 'prostate'
        modelKey  = 'GLM_' + f

        kwargs = {       'response'           : y,
                         'ignored_cols'       : x,
                         'family'             : family,
                         'lambda'             : lambda_,
                         'alpha'              : alpha,
                         'n_folds'            : nfolds, # passes if 0, fails otherwise
                         'destination_key'    : modelKey,
                 }

        timeoutSecs = 60
        start = time.time()
        glmResult = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=timeoutSecs, retryDelaySecs=0.25, pollTimeoutSecs=180, **kwargs)

        # this stuff was left over from when we got the result after polling the jobs list
        # okay to do it again
        # GLM2: when it redirects to the model view, we no longer have the job_key! (unlike the first response and polling)
        if 1==0:
            job_key = glmResult['job_key']
            # is the job finishing before polling would say it's done?
            params = {'job_key': job_key, 'destination_key': modelKey}
            glm = h2o.nodes[0].completion_redirect(jsonRequest="2/GLMProgressPage2.json", params=params)
            print "GLM result from completion_redirect:", h2o.dump_json(a)
        if 1==1:
            glm = h2o.nodes[0].glm_view(_modelKey=modelKey)
            ### print "GLM result from glm_view:", h2o.dump_json(a)

        h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)

        glm_model = glm['glm_model']
        _names = glm_model['_names']
        coefficients_names = glm_model['coefficients_names']
        submodels = glm_model['submodels'][0]

        beta = submodels['beta']
        norm_beta = submodels['norm_beta']
        iteration = submodels['iteration']

        validation = submodels['validation']        
        auc = validation['auc']
        aic = validation['aic']
        null_deviance = validation['null_deviance']
        residual_deviance = validation['residual_deviance']

        print '_names', _names
        print 'coefficients_names', coefficients_names
        # did beta get shortened? the simple check confirms names/beta/norm_beta are same length
        print 'beta', beta
        print 'iteration', iteration        
        print 'auc', auc

        # now redo it all thru the browser
        h2b.browseJsonHistoryAsUrl()