Beispiel #1
0
 def parseS3File(self, s3bucket, filename, **kwargs):
     start      = time.time()
     parseKey   = h2o_cmd.parseS3File(bucket=s3bucket, filename=filename, **kwargs)
     parse_time = time.time() - start 
     h2o.verboseprint("py-S3 parse took {0} sec".format(parse_time))
     parseKey['python_call_timer'] = parse_time
     return parseKey
Beispiel #2
0
 def parseS3File(self, s3bucket, filename, **kwargs):
     start      = time.time()
     parseResult   = h2o_cmd.parseS3File(bucket=s3bucket, filename=filename, **kwargs)
     parse_time = time.time() - start 
     h2o.verboseprint("py-S3 parse took {0} sec".format(parse_time))
     parseResult['python_call_timer'] = parse_time
     return parseResult
Beispiel #3
0
    def test_RF_1000trees(self):
        # NAs cause CM to zero..don't run for now
        ### csvPathnamegz = h2o.find_file('smalldata/hhp_9_17_12.predict.100rows.data.gz')
        s3bucket = self.s3_default_bucket()
        s3dataset = 'covtype20x.data.gz'
        s3dataset = 'covtype.data'
        s3dataset = 'covtype200x.data.gz'
        s3dataset = 'covtype50x.data'
        s3dataset = 'covtype100x.data'
        s3dataset = 'covtype.20k.data'

        s3dataset = 'covtype.data'

        start = time.time()
        parseResult = h2o_cmd.parseS3File(bucket=s3bucket,
                                          filename=s3dataset,
                                          timeoutSecs=14800)
        print "Parsing took {0}".format(time.time() - start)

        start = time.time()
        rf_train = h2o_cmd.runRF(parseResult=parseResult,
                                 ntree=100,
                                 timeoutSecs=14800,
                                 bin_limit=20000,
                                 out_of_bag_error_estimate=1,
                                 stat_type='ENTROPY',
                                 depth=100,
                                 exclusive_split_limit=0)
        print "Computation took {0} sec".format(time.time() - start)
        print h2o_rf.pp_rf_result(rf_train)
    def test_RF_1000trees(self):
        # NAs cause CM to zero..don't run for now
        ### csvPathnamegz = h2o.find_file('smalldata/hhp_9_17_12.predict.100rows.data.gz')
        s3bucket = self.s3_default_bucket()
        s3dataset = "covtype20x.data.gz"
        s3dataset = "covtype.data"
        s3dataset = "covtype200x.data.gz"
        s3dataset = "covtype50x.data"
        s3dataset = "covtype100x.data"
        s3dataset = "covtype.20k.data"

        s3dataset = "covtype.data"

        start = time.time()
        parseKey = h2o_cmd.parseS3File(bucket=s3bucket, filename=s3dataset, timeoutSecs=14800)
        print "Parsing took {0}".format(time.time() - start)

        start = time.time()
        rf_train = h2o_cmd.runRFOnly(
            parseKey=parseKey,
            ntree=100,
            timeoutSecs=14800,
            bin_limit=20000,
            out_of_bag_error_estimate=1,
            gini=0,
            depth=100,
            exclusive_split_limit=0,
        )
        print "Computation took {0} sec".format(time.time() - start)
        print h2o_rf.pp_rf_result(rf_train)