def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=4,java_extra_args='-XX:+PrintCompilation')
     else:
         h2o_hosts.build_cloud_with_hosts(java_extra_args='-XX:+PrintCompilation')
Exemple #2
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3, 
             use_hdfs=True, hdfs_version='cdh3', hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #3
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud()
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         # maybe fails more reliably with just 2 jvms?
         h2o.build_cloud(2,java_heap_GB=5)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()
    def test_import_covtype_parse_loop(self):
        csvFilename = "covtype.data"
        importFolderPath = "/home/0xdiag/datasets/standard"
        trialMax = 2
        localhost = h2o.decide_if_localhost()
        for tryHeap in [4, 3, 2, 1]:
            print "\n", tryHeap, "GB heap, 1 jvms, import folder, then loop parsing 'covtype.data' to unique keys"
            if (localhost):
                h2o.build_cloud(node_count=1, java_heap_GB=tryHeap)
            else:
                h2o_hosts.build_cloud_with_hosts(
                    node_count=1, java_heap_GB=tryHeap)

            for trial in range(trialMax):
                # import each time, because h2o deletes source file after parse
                h2i.setupImportFolder(None, importFolderPath)
                key2 = csvFilename + "_" + str(trial) + ".hex"
                parseKey = h2i.parseImportFolderFile(
                    None,
                    csvFilename,
                    importFolderPath,
                    key2=key2,
                    timeoutSecs=20)
            # sticky ports?
            h2o.tear_down_cloud()
            time.sleep(2)
Exemple #7
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if localhost:
         # h2o.build_cloud(3, java_heap_GB=4, base_port=54323)
         h2o.build_cloud(3, java_heap_GB=12, base_port=54323)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54323)
Exemple #8
0
 def setUpClass(cls):
     global SEED, localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud(java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts()
    def setUpClass(cls):
        print "Will build_cloud() with random heap size and do overlapped import folder/parse (groups)"
        global SEED, localhost
        SEED = h2o.setup_random_seed()
        if RANDOM_HEAP:
            tryHeap = random.randint(4, 28)
        else:
            tryHeap = 28

        # print "\n", tryHeap,"GB heap, 1 jvm per host, import 192.168.1.176 hdfs, then parse"
        print "\n", tryHeap, "GB heap, 1 jvm per host, import,  then parse"
        localhost = h2o.decide_if_localhost()
        h2o.beta_features = True  # for the beta tab in the browser
        if localhost:
            h2o.build_cloud(
                node_count=3,
                java_heap_GB=4,
                base_port=54323,
                # use_hdfs=True, hdfs_name_node='192.168.1.176', hdfs_version='cdh3'
            )
        else:
            h2o_hosts.build_cloud_with_hosts(
                node_count=1,
                java_heap_GB=tryHeap,
                base_port=54321,
                # use_hdfs=True, hdfs_name_node='192.168.1.176', hdfs_version='cdh3'
            )
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, java_heap_GB=10, base_port=54333)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #11
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     # hdfs_config='/opt/mapr/conf/mapr-clusters.conf',
     #        # hdfs_name_node='mr-0x1.0xdata.loc:7222')
     #        hdfs_version='mapr2.1.3',
     if localhost:
         h2o.build_cloud(
             1,
             java_heap_GB=15,
             enable_benchmark_log=True,
             use_maprfs=True,
             hdfs_version="mapr3.0.1",
             hdfs_name_node="192.168.1.171:7222",
         )
     else:
         h2o_hosts.build_cloud_with_hosts(
             1,
             java_heap_GB=15,
             enable_benchmark_log=True,
             use_maprfs=True,
             hdfs_version="mapr3.0.1",
             hdfs_name_node="192.168.1.171:7222",
         )
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=10,use_flatfile=True)
     else:
         import h2o_hosts
         h2o_hosts.build_cloud_with_hosts()
    def test_import_covtype_parse_3jvm_fvec(self):
        h2o.beta_features = True
        csvFilename = "covtype.data"
        importFolderPath = "standard"
        trialMax = 2
        for tryHeap in [1]:
            print "\n", tryHeap,"GB heap, 3 jvms, import folder, then loop parsing 'covtype.data' to unique keys"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(node_count=3, java_heap_GB=tryHeap)
            else:
                h2o_hosts.build_cloud_with_hosts(node_count=3, java_heap_GB=tryHeap)

            for trial in range(trialMax):
                # import each time, because h2o deletes source file after parse
                csvPathname = importFolderPath + "/" + csvFilename
                hex_key = csvFilename + "_" + str(trial) + ".hex"
                parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, hex_key=hex_key, timeoutSecs=20)
            # sticky ports?
            h2o.tear_down_cloud()
            time.sleep(5)

        # print "Waiting 60 secs for TIME_WAIT sockets to go away"
        # time.sleep(60)
        time.sleep(2)
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=14, enable_benchmark_log=True)
     else:
         h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=True)
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud(node_count=1, java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=10)
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2b.browseTheCloud()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     h2o.beta_features = True
     if (localhost):
         h2o.build_cloud(3, java_heap_GB=1, use_hdfs=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     h2o.beta_features = True
     if localhost:
         h2o.build_cloud(3, java_heap_GB=1, use_hdfs=True, base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54321)
Exemple #20
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2o.beta_features = True
Exemple #21
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2, java_heap_GB=5)
     else:
         print "1 jvms per node, 28GB heap each"
         h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=28)
Exemple #22
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=10, enable_benchmark_log=True)
     else:
         h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=True)
Exemple #23
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=10)
Exemple #25
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #26
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(CLOUD_SIZE, java_heap_GB=12 / CLOUD_SIZE)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #27
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         params = collectConf(cls)
         h2o.build_cloud(**params)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, base_port=54327)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
Exemple #29
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=5)
     else:
         h2o_hosts.build_cloud_with_hosts()
     global SYNDATASETS_DIR
     SYNDATASETS_DIR = h2o.make_syn_dir()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=7)
     else:
         h2o_hosts.build_cloud_with_hosts() # uses import Hdfs for s3n instead of import folder
Exemple #31
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, use_hdfs=True, hdfs_version='cdh4', hdfs_name_node='172.16.2.176')
     else:
         h2o_hosts.build_cloud_with_hosts(1, use_hdfs=True, hdfs_version='cdh4', hdfs_name_node='172.16.2.176')
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         h2o_hosts.build_cloud_with_hosts(1)
     h2b.browseTheCloud()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3, java_heap_GB=1) # enum processing is more interesting with multiple jvms
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #34
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, use_hdfs=True, hdfs_version='cdh3u5', hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts(1, use_hdfs=True, hdfs_version='cdh3u5', hdfs_name_node='192.168.1.176')
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3, java_heap_GB=1) # enum processing is more interesting with multiple jvms
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, base_port=54327)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=14)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #38
0
 def setUpClass(cls):
     # done in build_cloud now
     ### h2o.write_flatfile(node_count=3)
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=3,use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=1)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_MB=1300,use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(java_heap_GB=4, random_udp_drop=RANDOM_UDP_DROP,
             use_hdfs=True, hdfs_version='cdh4', hdfs_name_node='mr-0x6')
     else:
         h2o_hosts.build_cloud_with_hosts(random_udp_drop=RANDOM_UDP_DROP)
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         h2o_hosts.build_cloud_with_hosts(1)
     h2o.beta_features = True # fvec
Exemple #43
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3,java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts() # uses import Hdfs for s3n instead of import folder
Exemple #44
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         h2o_hosts.build_cloud_with_hosts(1)
     global SYNDATASETS_DIR
     SYNDATASETS_DIR = h2o.make_syn_dir()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     # want detail on the cloud building to see what node fails
     h2o.verbose = True
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=2, base_port=54323)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54323)
     h2o.verbose = False
Exemple #46
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=14)
     else:
         # all hdfs info is done thru the hdfs_config michal's ec2 config sets up?
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED
     SEED = h2o.setup_random_seed()
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     # SEED = h2o.setup_random_seed()
     SEED = 8977501266014959103
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
 def setUpClass(cls):
     global SEED, localhost
     # use the known bad seed if it's set. otherwise should be None
     SEED = h2o.setup_random_seed(seed=BAD_SEED)
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #50
0
 def setUpClass(cls):
     global SEED, localhost
     # SEED = h2o.setup_random_seed()
     SEED = 6019110937119320453
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()
    def setUpClass(cls):
        # fails with 3
        localhost = h2o.decide_if_localhost()
        if (localhost):
            h2o.build_cloud(3, java_heap_GB=4, use_flatfile=True)
        else:
            h2o_hosts.build_cloud_with_hosts()

        h2b.browseTheCloud()
 def setUpClass(cls):
     global SEED, localhost
     # SEED = h2o.setup_random_seed()
     SEED = 6204672511291494176
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         h2o_hosts.build_cloud_with_hosts(1)
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2, java_heap_MB=1300, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2o.beta_features = True
Exemple #54
0
    def test_parse_nflx_loop_hdfs_fvec(self):
        h2o.beta_features = True
        print "Using the -.gz files from hdfs"
        # hdfs://<name node>/datasets/manyfiles-nflx-gz/file_1.dat.gz

        # default
        csvFilename = "hex_10"
        csvFilePattern = '*' # all files in the folder

        for tryHeap in [24]:
            print "\n", tryHeap,"GB heap, 1 jvm per host, import mr-0x6 hdfs, then parse"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(java_heap_GB=tryHeap, random_udp_drop=RANDOM_UDP_DROP, base_port=55930,
                    use_hdfs=True, hdfs_name_node=NAME_NODE, hdfs_version=VERSION)
            else:
                h2o_hosts.build_cloud_with_hosts(java_heap_GB=tryHeap, random_udp_drop=RANDOM_UDP_DROP, base_port=55600,
                    use_hdfs=True, hdfs_name_node=NAME_NODE, hdfs_version=VERSION)

            # don't raise exception if we find something bad in h2o stdout/stderr?
            # h2o.nodes[0].sandboxIgnoreErrors = True

            timeoutSecs = 500
            importFolderPath = "datasets/airlines_multi"
            csvPathname = importFolderPath + "/" + csvFilePattern
            parseResult = h2i.import_only(path=csvPathname, schema='hdfs',
                timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=60)

            for trial in range(TRIAL_MAX):
                # each parse now just does one
                csvFilePattern = "*%s.csv" % trial
                # if we want multifile
                # csvFilePattern = "*"

                hex_key = csvFilename + "_" + str(trial) + ".hex"
                csvPathname = importFolderPath + "/" + csvFilePattern
                start = time.time()
                # print "Don't wait for completion. Just load things up!"
    
                print "Drat. the source file is locked if we noPoll. Would have to increment across the individual files?"
                
                print "Drat. We can't re-import the folder, if there's a parse using one of the source files?"
                parseResult = h2i.parse_only(pattern=csvFilePattern, hex_key=hex_key, noPoll=True, delete_on_done=0,
                    timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=60)
                elapsed = time.time() - start

                print "parse result:", parseResult['destination_key']
                print "Parse #", trial, "completed in", "%6.2f" % elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                h2o_cmd.runStoreView()
                # we don't delete the hex key. it will start spilling? slow

            h2j.pollWaitJobs(timeoutSecs=300, pollTimeoutSecs=30)
            h2o.tear_down_cloud()
            # sticky ports? wait a bit.
            time.sleep(5)
Exemple #55
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3,
                         use_hdfs=True,
                         hdfs_version='cdh3',
                         hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #56
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2, java_heap_GB=4, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2b.browseTheCloud()
 def setUpClass(cls):
     h2o.beta_features = True
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=12)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=14)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2o.beta_features = True
Exemple #59
0
 def setUpClass(cls):
     global SEED
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     print "WARNING: won't work for remote h2o, because syn_datasets is created locally only, for import"
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=14)
     else:
         h2o_hosts.build_cloud_with_hosts()
Exemple #60
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3, java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()