Python checkLastValidationError 예제들, h2o_nn.checkLastValidationError Python 예제들

예제 #1

0

파일 보기

파일: test_NN_covtype.py 프로젝트: cloudtrends/h2o

    def test_NN_covtype(self):
        #h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = 'covtype/covtype.20k.data'
        csvPathname_test  = 'covtype/covtype.20k.data'
        hex_key = 'covtype.hex'
        validation_key = hex_key
        timeoutSecs = 30
        parseResult  = h2i.import_parse(bucket='smalldata', path=csvPathname_train, schema='local', hex_key=hex_key, timeoutSecs=timeoutSecs)
        ###No need - use training as validation
        ###parseResultV = h2i.import_parse(bucket='smalldata', path=csvPathname_test, schema='local', hex_key=validation_key, timeoutSecs=timeoutSecs)
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])
        response = inspect['numCols'] - 1

        modes = [
            'SingleThread',
            'SingleNode',
            ]

        for mode in modes:

            #Making random id
            identifier = ''.join(random.sample(string.ascii_lowercase + string.digits, 10))
            model_key = 'nn_' + identifier + '.hex'

            kwargs = {
                'ignored_cols'                 : None,
                'response'                     : response,
                'classification'               : 1,
                'mode'                         : mode,
                'activation'                   : 'Tanh',
                #'input_dropout_ratio'          : 0.1,
                'hidden'                       : '200,200',
                'rate'                         : 0.005,
                'rate_annealing'               : 1e-5,
                'momentum_start'               : 0.1,
                'momentum_ramp'                : 100000,
                'momentum_stable'              : 0.3,
                'l1'                           : 0.0000,
                'l2'                           : 0.0000,
                'seed'                         : 28372348842,
                'loss'                         : 'CrossEntropy',
                #'max_w2'                       : 10,
                'warmup_samples'               : 0,
                'initial_weight_distribution'  : 'Normal',
                'initial_weight_scale'         : 1,
                'epochs'                       : 2.0,
                'destination_key'              : model_key,
                'validation'                   : validation_key,
            }
            expectedErr = 0.3413 if mode == 'SingleThread' else 0.3 ## expected validation error for the above model

            timeoutSecs = 600
            start = time.time()
            nn = h2o_cmd.runNNet(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            print "neural net end on ", csvPathname_train, " and ", csvPathname_test, 'took', time.time() - start, 'seconds'

            relTol = 0.03 if mode == 'SingleThread' else 0.20 ### 20% relative error is acceptable for Hogwild
            h2o_nn.checkLastValidationError(self, nn['neuralnet_model'], inspect['numRows'], expectedErr, relTol, **kwargs)

            ### Now score using the model, and check the validation error
            kwargs = {
                'source' : validation_key,
                'max_rows': 0,
                'response': response,
                'ignored_cols': None, # this is not consistent with ignored_cols_by_name
                'classification': 1,
                'destination_key': 'score_' + identifier + '.hex',
                'model': model_key,
                }
            nnScoreResult = h2o_cmd.runNNetScore(key=parseResult['destination_key'], timeoutSecs=timeoutSecs, **kwargs)
            h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)

        h2o.beta_features = False

예제 #2

0

파일 보기

파일: test_NN_mnist_multi.py 프로젝트: jayfans3/h2o

    def test_NN_mnist_multi(self):
        # h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = "mnist/train.csv.gz"
        csvPathname_test = "mnist/test.csv.gz"
        hex_key = "mnist_train.hex"
        validation_key = "mnist_test.hex"
        timeoutSecs = 60
        parseResult = h2i.import_parse(
            bucket="smalldata", path=csvPathname_train, schema="put", hex_key=hex_key, timeoutSecs=timeoutSecs
        )
        parseResultV = h2i.import_parse(
            bucket="smalldata", path=csvPathname_test, schema="put", hex_key=validation_key, timeoutSecs=timeoutSecs
        )
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, "    numRows:", "{:,}".format(
            inspect["numRows"]
        ), "    numCols:", "{:,}".format(inspect["numCols"])
        response = inspect["numCols"] - 1

        modes = [
            ###'SingleThread', ### too slow (and slightly less accurate)
            "SingleNode",  ### wastes N-1 nodes, since their weight matrices are updated but never looked at...
            ###'MapReduce' ### TODO: enable, once implemented
        ]

        for mode in modes:

            # Making random id
            identifier = "".join(random.sample(string.ascii_lowercase + string.digits, 10))
            model_key = "nn_" + identifier + ".hex"

            kwargs = {
                "ignored_cols": None,
                "response": response,
                "classification": 1,
                "mode": mode,
                "activation": "RectifierWithDropout",
                "input_dropout_ratio": 0.2,
                "hidden": "117,131,129",
                "rate": 0.005,
                "rate_annealing": 1e-6,
                "momentum_start": 0.5,
                "momentum_ramp": 100000,
                "momentum_stable": 0.9,
                "l1": 0.00001,
                "l2": 0.0000001,
                "seed": 98037452452,
                "loss": "CrossEntropy",
                "max_w2": 15,
                "warmup_samples": 0,
                "initial_weight_distribution": "UniformAdaptive",
                #'initial_weight_scale'         : 0.01,
                "epochs": 20.0,
                "destination_key": model_key,
                "validation": validation_key,
            }
            ###expectedErr = 0.0362 ## from single-threaded mode
            expectedErr = 0.03  ## observed actual value with Hogwild

            timeoutSecs = 600
            start = time.time()
            nn = h2o_cmd.runNNet(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            print "neural net end on ", csvPathname_train, " and ", csvPathname_test, "took", time.time() - start, "seconds"

            relTol = 0.02 if mode == "SingleThread" else 0.10  ### 10% relative error is acceptable for Hogwild
            h2o_nn.checkLastValidationError(
                self, nn["neuralnet_model"], inspect["numRows"], expectedErr, relTol, **kwargs
            )

            ### Now score using the model, and check the validation error
            kwargs = {
                "source": validation_key,
                "max_rows": 0,
                "response": response,
                "ignored_cols": None,  # this is not consistent with ignored_cols_by_name
                "classification": 1,
                "destination_key": "score_" + identifier + ".hex",
                "model": model_key,
            }
            nnScoreResult = h2o_cmd.runNNetScore(key=parseResult["destination_key"], timeoutSecs=timeoutSecs, **kwargs)
            h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)

            if mode != "MapReduce":
                print "WARNING: Running in non-MapReduce mode on multiple nodes! Only one node contributes to results."

        h2o.beta_features = False

예제 #3

0

파일 보기

파일: test_NN_covtype.py 프로젝트: raghavendrabhat/h2o

    def test_NN_covtype(self):
        # h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = "covtype/covtype.20k.data"
        csvPathname_test = "covtype/covtype.20k.data"
        hex_key = "covtype.hex"
        validation_key = hex_key
        timeoutSecs = 30
        parseResult = h2i.import_parse(
            bucket="smalldata", path=csvPathname_train, schema="local", hex_key=hex_key, timeoutSecs=timeoutSecs
        )
        ###No need - use training as validation
        ###parseResultV = h2i.import_parse(bucket='smalldata', path=csvPathname_test, schema='local', hex_key=validation_key, timeoutSecs=timeoutSecs)
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, "    numRows:", "{:,}".format(
            inspect["numRows"]
        ), "    numCols:", "{:,}".format(inspect["numCols"])
        response = inspect["numCols"] - 1

        modes = ["SingleThread", "SingleNode"]

        for mode in modes:

            # Making random id
            identifier = "".join(random.sample(string.ascii_lowercase + string.digits, 10))
            model_key = "nn_" + identifier + ".hex"

            kwargs = {
                "ignored_cols": None,
                "response": response,
                "classification": 1,
                "mode": mode,
                "activation": "Tanh",
                #'input_dropout_ratio'          : 0.1,
                "hidden": "200,200",
                "rate": 0.005,
                "rate_annealing": 1e-5,
                "momentum_start": 0.1,
                "momentum_ramp": 100000,
                "momentum_stable": 0.3,
                "l1": 0.0000,
                "l2": 0.0000,
                "seed": 28372348842,
                "loss": "CrossEntropy",
                #'max_w2'                       : 10,
                "warmup_samples": 0,
                "initial_weight_distribution": "Normal",
                "initial_weight_scale": 1,
                "epochs": 2.0,
                "destination_key": model_key,
                "validation": validation_key,
            }
            expectedErr = 0.3413 if mode == "SingleThread" else 0.3  ## expected validation error for the above model

            timeoutSecs = 600
            start = time.time()
            nn = h2o_cmd.runNNet(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            print "neural net end on ", csvPathname_train, " and ", csvPathname_test, "took", time.time() - start, "seconds"

            relTol = 0.03 if mode == "SingleThread" else 0.15  ### 15% relative error is acceptable for Hogwild
            h2o_nn.checkLastValidationError(
                self, nn["neuralnet_model"], inspect["numRows"], expectedErr, relTol, **kwargs
            )

            ### Now score using the model, and check the validation error
            kwargs = {
                "source": validation_key,
                "max_rows": 0,
                "response": response,
                "ignored_cols": None,  # this is not consistent with ignored_cols_by_name
                "classification": 1,
                "destination_key": "score_" + identifier + ".hex",
                "model": model_key,
            }
            nnScoreResult = h2o_cmd.runNNetScore(key=parseResult["destination_key"], timeoutSecs=timeoutSecs, **kwargs)
            h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)

        h2o.beta_features = False

예제 #4

0

파일 보기

파일: test_NN_mnist.py 프로젝트: BhaskarPros/h2o

    def test_NN_mnist(self):
        #h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = 'mnist/train.csv.gz'
        csvPathname_test  = 'mnist/test.csv.gz'
        hex_key = 'mnist_train.hex'
        validation_key = 'mnist_test.hex'
        timeoutSecs = 30
        parseResult  = h2i.import_parse(bucket='smalldata', path=csvPathname_train, schema='put', hex_key=hex_key, timeoutSecs=timeoutSecs)
        parseResultV = h2i.import_parse(bucket='smalldata', path=csvPathname_test, schema='put', hex_key=validation_key, timeoutSecs=timeoutSecs)

        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])
        response = inspect['numCols'] - 1

        modes = [
            'SingleThread', 
            'SingleNode',
            ###'MapReduce' ### TODO: enable, once implemented
            ]

        for mode in modes:

            #Making random id
            identifier = ''.join(random.sample(string.ascii_lowercase + string.digits, 10))
            model_key = 'nn_' + identifier + '.hex'

            kwargs = {
                'ignored_cols'                 : None,
                'response'                     : response,
                'classification'               : 1,
                'mode'                         : mode,
                'activation'                   : 'RectifierWithDropout',
                'input_dropout_ratio'          : 0.2,
                'hidden'                       : '117,131,129',
                'rate'                         : 0.005,
                'rate_annealing'               : 1e-6,
                'momentum_start'               : 0.5,
                'momentum_ramp'                : 100000,
                'momentum_stable'              : 0.9,
                'l1'                           : 0.00001,
                'l2'                           : 0.0000001,
                'seed'                         : 98037452452,
                'loss'                         : 'CrossEntropy',
                'max_w2'                       : 15,
                'warmup_samples'               : 0,
                'initial_weight_distribution'  : 'UniformAdaptive',
                #'initial_weight_scale'         : 0.01,
                'epochs'                       : 2.0,
                'destination_key'              : model_key,
                'validation'                   : validation_key,
            }
            expectedErr = 0.0565 ## expected validation error for the above model on 1 thread

            timeoutSecs = 600
            start = time.time()
            nn = h2o_cmd.runNNet(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            print "neural net end on ", csvPathname_train, " and ", csvPathname_test, 'took', time.time() - start, 'seconds'

            #### Look at model progress, and check the last reported validation error
            relTol = 0.3 if mode == 'SingleThread' else 0.15
            h2o_nn.checkLastValidationError(self, nn['neuralnet_model'], inspect['numRows'], expectedErr, relTol, **kwargs)

            #### Now score using the model, and check the validation error
            kwargs = {
                'source' : validation_key,
                'max_rows': 0,
                'response': response,
                'ignored_cols': None, # this is not consistent with ignored_cols_by_name
                'classification': 1,
                'destination_key': 'score_' + identifier + '.hex',
                'model': model_key,
            }
            nnScoreResult = h2o_cmd.runNNetScore(key=parseResult['destination_key'], timeoutSecs=timeoutSecs, **kwargs)
            h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)

        h2o.beta_features = False

예제 #5

0

파일 보기

파일: test_NN_mnist_multi.py 프로젝트: brennane/h2o

    def test_NN_mnist_multi(self):
        #h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = 'mnist/train.csv.gz'
        csvPathname_test  = 'mnist/test.csv.gz'
        hex_key = 'mnist_train.hex'
        validation_key = 'mnist_test.hex'
        timeoutSecs = 30
        parseResult  = h2i.import_parse(bucket='smalldata', path=csvPathname_train, schema='put', hex_key=hex_key, timeoutSecs=timeoutSecs)
        parseResultV = h2i.import_parse(bucket='smalldata', path=csvPathname_test, schema='put', hex_key=validation_key, timeoutSecs=timeoutSecs)
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])
        response = inspect['numCols'] - 1

        modes = [
            ###'SingleThread', ### too slow (and slightly less accurate)
            'SingleNode',  ### wastes N-1 nodes, since their weight matrices are updated but never looked at...
            ###'MapReduce' ### TODO: enable, once implemented
            ]

        for mode in modes:

            #Making random id
            identifier = ''.join(random.sample(string.ascii_lowercase + string.digits, 10))
            model_key = 'nn_' + identifier + '.hex'

            kwargs = {
                'ignored_cols'                 : None,
                'response'                     : response,
                'classification'               : 1,
                'mode'                         : mode,
                'activation'                   : 'RectifierWithDropout',
                'input_dropout_ratio'          : 0.2,
                'hidden'                       : '117,131,129',
                'rate'                         : 0.005,
                'rate_annealing'               : 1e-6,
                'momentum_start'               : 0.5,
                'momentum_ramp'                : 100000,
                'momentum_stable'              : 0.9,
                'l1'                           : 0.00001,
                'l2'                           : 0.0000001,
                'seed'                         : 98037452452,
                'loss'                         : 'CrossEntropy',
                'max_w2'                       : 15,
                'warmup_samples'               : 0,
                'initial_weight_distribution'  : 'UniformAdaptive',
                #'initial_weight_scale'         : 0.01,
                'epochs'                       : 20.0,
                'destination_key'              : model_key,
                'validation'                   : validation_key,
            }
            ###expectedErr = 0.0362 ## from single-threaded mode
            expectedErr = 0.0331 ## observed actual value with Hogwild

            timeoutSecs = 600
            start = time.time()
            nn = h2o_cmd.runNNet(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            print "neural net end on ", csvPathname_train, " and ", csvPathname_test, 'took', time.time() - start, 'seconds'

            relTol = 0.02 if mode == 'SingleThread' else 0.10 ### 10% relative error is acceptable for Hogwild
            h2o_nn.checkLastValidationError(self, nn['neuralnet_model'], inspect['numRows'], expectedErr, relTol, **kwargs)

            ### Now score using the model, and check the validation error
            kwargs = {
                'source' : validation_key,
                'max_rows': 0,
                'response': response,
                'ignored_cols': None, # this is not consistent with ignored_cols_by_name
                'classification': 1,
                'destination_key': 'score_' + identifier + '.hex',
                'model': model_key,
            }
            nnScoreResult = h2o_cmd.runNNetScore(key=parseResult['destination_key'], timeoutSecs=timeoutSecs, **kwargs)
            h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)

            if mode != 'MapReduce':
                print 'WARNING: Running in non-MapReduce mode on multiple nodes! Only one node contributes to results.'

        h2o.beta_features = False

예제 #6

0

파일 보기

파일: test_NN2_mnist_multi.py 프로젝트: linearregression/h2o

    def test_NN2_mnist_multi(self):
        #h2b.browseTheCloud()
        h2o.beta_features = True
        csvPathname_train = 'mnist/train.csv.gz'
        csvPathname_test = 'mnist/test.csv.gz'
        hex_key = 'mnist_train.hex'
        validation_key = 'mnist_test.hex'
        timeoutSecs = 60
        parseResult = h2i.import_parse(bucket='smalldata',
                                       path=csvPathname_train,
                                       schema='put',
                                       hex_key=hex_key,
                                       timeoutSecs=timeoutSecs)
        parseResultV = h2i.import_parse(bucket='smalldata',
                                        path=csvPathname_test,
                                        schema='put',
                                        hex_key=validation_key,
                                        timeoutSecs=timeoutSecs)
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "\n" + csvPathname_train, \
            "    numRows:", "{:,}".format(inspect['numRows']), \
            "    numCols:", "{:,}".format(inspect['numCols'])
        response = inspect['numCols'] - 1

        #Making random id
        identifier = ''.join(
            random.sample(string.ascii_lowercase + string.digits, 10))
        model_key = 'nn_' + identifier + '.hex'

        kwargs = {
            'ignored_cols': None,
            'response': response,
            'classification': 1,
            'activation': 'RectifierWithDropout',
            'input_dropout_ratio': 0.2,
            'hidden': '117,131,129',
            'rate': 0.005,
            'rate_annealing': 1e-6,
            'momentum_start': 0.5,
            'momentum_ramp': 100000,
            'momentum_stable': 0.9,
            'l1': 0.00001,
            'l2': 0.0000001,
            'seed': 98037452452,
            'loss': 'CrossEntropy',
            'max_w2': 15,
            'initial_weight_distribution': 'UniformAdaptive',
            #'initial_weight_scale'         : 0.01,
            'epochs': 20.0,
            'destination_key': model_key,
            'validation': validation_key,
        }
        ###expectedErr = 0.0362 ## from single-threaded mode
        expectedErr = 0.03  ## observed actual value with Hogwild

        timeoutSecs = 600
        start = time.time()
        nn = h2o_cmd.runDeepLearning(parseResult=parseResult,
                                     timeoutSecs=timeoutSecs,
                                     **kwargs)
        print "neural net end on ", csvPathname_train, " and ", csvPathname_test, 'took', time.time(
        ) - start, 'seconds'

        relTol = 0.10  ### 10% relative error is acceptable for Hogwild
        h2o_nn.checkLastValidationError(self, nn['neuralnet_model'],
                                        inspect['numRows'], expectedErr,
                                        relTol, **kwargs)

        ### Now score using the model, and check the validation error
        kwargs = {
            'source': validation_key,
            'max_rows': 0,
            'response': response,
            'ignored_cols':
            None,  # this is not consistent with ignored_cols_by_name
            'classification': 1,
            'destination_key': 'score_' + identifier + '.hex',
            'model': model_key,
        }
        nnScoreResult = h2o_cmd.runDeepLearningScore(
            key=parseResult['destination_key'],
            timeoutSecs=timeoutSecs,
            **kwargs)
        h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol,
                                **kwargs)

        h2o.beta_features = False