Beispiel #1
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3, 
             use_hdfs=True, hdfs_version='cdh3', hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #2
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     if localhost:
         h2o.build_cloud(2)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     h2o.beta_features = True
     if localhost:
         h2o.build_cloud(3, java_heap_GB=1, use_hdfs=True, base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54321)
 def test_C_no_mc_rcv(self):
     print "\nwith flatfile, with multicast disabled"
     allAcceptIptables()
     multicastDropReceiveIptables()
     showIptables()
     h2o_hosts.build_cloud_with_hosts(nodes_per_host, use_flatfile=True)
     h2o.tear_down_cloud()
Beispiel #5
0
    def test_import_billion_rows_parse_loop(self):
        print "Apparently we can't handle 1B rows .gzed"
        csvFilename = "billion_rows.csv.gz"
        importFolderPath = "standard"
        csvPathname = importFolderPath + "/" + csvFilename
        trialMax = 3
        for tryHeap in [4,16]:
            print "\n", tryHeap,"GB heap, 1 jvm per host, import folder,", \
                "then loop parsing 'billion_rows.csv' to unique keys"
            h2o_hosts.build_cloud_with_hosts(1, java_heap_GB=tryHeap)
            timeoutSecs=800
            for trial in range(trialMax):
                hex_key = csvFilename + "_" + str(trial) + ".hex"
                start = time.time()
                parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, hex_key=hex_key, 
                    timeoutSecs=timeoutSecs, retryDelaySecs=4, pollTimeoutSecs=60)
                elapsed = time.time() - start
                print "Trial #", trial, "completed in", elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                print "Deleting key in H2O so we get it from S3 (if ec2) or nfs again. ", \
                      "Otherwise it would just parse the cached key."
                storeView = h2o.nodes[0].store_view()
                ### print "storeView:", h2o.dump_json(storeView)

            # sticky ports?
            h2o.tear_down_cloud()
            time.sleep(5)
Beispiel #6
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         # maybe fails more reliably with just 2 jvms?
         h2o.build_cloud(2,java_heap_GB=5)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def test_D_no_mc_snd(self):
     print "\nwith flatfile, with multicast disabled"
     allAcceptIptables()
     multicastBlockSendIptables()
     showIptables()
     h2o_hosts.build_cloud_with_hosts(nodes_per_host, use_flatfile=True)
     h2o.tear_down_cloud()
    def test_parse_covtype20x_loop(self):
        csvFilename = "covtype20x.data"
        importFolderPath = "/home/0xdiag/datasets"
        trialMax = 2
        for tryJvms in [1,2,3,4]:
            for tryHeap in [1,3]:
                print "\n", tryHeap,"GB heap,", tryJvms, "jvm per host, import folder,", \
                    "then loop parsing 'covtype20x.data' to unique keys"
                h2o_hosts.build_cloud_with_hosts(node_count=tryJvms, java_heap_GB=tryHeap)
                timeoutSecs=300
                for trial in range(trialMax):
                    # since we delete the key, we have to re-import every iteration, to get it again
                    h2i.setupImportFolder(None, importFolderPath)

                    key2 = csvFilename + "_" + str(trial) + ".hex"
                    start = time.time()
                    parseKey = h2i.parseImportFolderFile(None, csvFilename, importFolderPath, key2=key2, 
                        timeoutSecs=timeoutSecs, retryDelaySecs=4, pollTimeoutSecs=60)
                    elapsed = time.time() - start
                    print "Trial #", trial, "completed in", elapsed, "seconds.", \
                        "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                    print "Deleting key in H2O so we get it from S3 (if ec2) or nfs again.", \
                          "Otherwise it would just parse the cached key."
                    storeView = h2o.nodes[0].store_view()
                    ### print "storeView:", h2o.dump_json(storeView)
                    # h2o removes key after parse now
                    ## print "Removing", parseKey['source_key']
                    ## removeKeyResult = h2o.nodes[0].remove_key(key=parseKey['source_key'])
                    ### print "removeKeyResult:", h2o.dump_json(removeKeyResult)

                # sticky ports?
                h2o.tear_down_cloud()
                time.sleep(tryJvms * 5)
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2b.browseTheCloud()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     h2o.beta_features = True
     if (localhost):
         h2o.build_cloud(3, java_heap_GB=1, use_hdfs=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #12
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if localhost:
         # h2o.build_cloud(3, java_heap_GB=4, base_port=54323)
         h2o.build_cloud(3, java_heap_GB=12, base_port=54323)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54323)
    def setUpClass(cls):
        print "Will build_cloud() with random heap size and do overlapped import folder/parse (groups)"
        global SEED, localhost
        SEED = h2o.setup_random_seed()
        if RANDOM_HEAP:
            tryHeap = random.randint(4, 28)
        else:
            tryHeap = 28

        # print "\n", tryHeap,"GB heap, 1 jvm per host, import 192.168.1.176 hdfs, then parse"
        print "\n", tryHeap, "GB heap, 1 jvm per host, import,  then parse"
        localhost = h2o.decide_if_localhost()
        h2o.beta_features = True  # for the beta tab in the browser
        if localhost:
            h2o.build_cloud(
                node_count=3,
                java_heap_GB=4,
                base_port=54323,
                # use_hdfs=True, hdfs_name_node='192.168.1.176', hdfs_version='cdh3'
            )
        else:
            h2o_hosts.build_cloud_with_hosts(
                node_count=1,
                java_heap_GB=tryHeap,
                base_port=54321,
                # use_hdfs=True, hdfs_name_node='192.168.1.176', hdfs_version='cdh3'
            )
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
    def test_import_covtype_parse_3jvm_fvec(self):
        h2o.beta_features = True
        csvFilename = "covtype.data"
        importFolderPath = "standard"
        trialMax = 2
        for tryHeap in [1]:
            print "\n", tryHeap,"GB heap, 3 jvms, import folder, then loop parsing 'covtype.data' to unique keys"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(node_count=3, java_heap_GB=tryHeap)
            else:
                h2o_hosts.build_cloud_with_hosts(node_count=3, java_heap_GB=tryHeap)

            for trial in range(trialMax):
                # import each time, because h2o deletes source file after parse
                csvPathname = importFolderPath + "/" + csvFilename
                hex_key = csvFilename + "_" + str(trial) + ".hex"
                parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, hex_key=hex_key, timeoutSecs=20)
            # sticky ports?
            h2o.tear_down_cloud()
            time.sleep(5)

        # print "Waiting 60 secs for TIME_WAIT sockets to go away"
        # time.sleep(60)
        time.sleep(2)
Beispiel #16
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=14, enable_benchmark_log=True)
     else:
         h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=True)
Beispiel #17
0
 def setUpClass(cls):
     global local_host
     local_host = not 'hosts' in os.getcwd()
     if (local_host):
         h2o.build_cloud(1,java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #18
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud()
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, java_heap_GB=10, base_port=54333)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=10,use_flatfile=True)
     else:
         import h2o_hosts
         h2o_hosts.build_cloud_with_hosts()
Beispiel #21
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2o.beta_features = True
Beispiel #22
0
 def setUpClass(cls):
     global SEED, localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud(java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #23
0
 def setUpClass(cls):
     start = time.time()
     h2o_hosts.build_cloud_with_hosts(node_count, base_port=base_port, 
         use_flatfile=True, java_heap_GB=1)
     print "Cloud of", len(h2o.nodes), "built in", time.time()-start, "seconds"
     h2o.verify_cloud_size()
     h2o.check_sandbox_for_errors()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if localhost:
         h2o.build_cloud(node_count=1, java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=10)
    def test_import_covtype_parse_loop(self):
        csvFilename = "covtype.data"
        importFolderPath = "/home/0xdiag/datasets/standard"
        trialMax = 2
        localhost = h2o.decide_if_localhost()
        for tryHeap in [4, 3, 2, 1]:
            print "\n", tryHeap, "GB heap, 1 jvms, import folder, then loop parsing 'covtype.data' to unique keys"
            if (localhost):
                h2o.build_cloud(node_count=1, java_heap_GB=tryHeap)
            else:
                h2o_hosts.build_cloud_with_hosts(
                    node_count=1, java_heap_GB=tryHeap)

            for trial in range(trialMax):
                # import each time, because h2o deletes source file after parse
                h2i.setupImportFolder(None, importFolderPath)
                key2 = csvFilename + "_" + str(trial) + ".hex"
                parseKey = h2i.parseImportFolderFile(
                    None,
                    csvFilename,
                    importFolderPath,
                    key2=key2,
                    timeoutSecs=20)
            # sticky ports?
            h2o.tear_down_cloud()
            time.sleep(2)
    def setUpClass(cls):
        # Uses your username specific json: pytest_config-<username>.json

        # do what my json says, but with my hdfs. hdfs_name_node from the json
        # I'll set use_hdfs to False here, because H2O won't start if it can't talk to the hdfs
        # h2o_hosts.build_cloud_with_hosts(use_hdfs=False)
        h2o_hosts.build_cloud_with_hosts(use_hdfs=True)
Beispiel #27
0
 def setUpClass(cls):
     global local_host
     local_host = not "hosts" in os.getcwd()
     if local_host:
         h2o.build_cloud(1, java_heap_GB=1)
     else:
         h2o_hosts.build_cloud_with_hosts()
    def test_parse_covtype20x_loop_s3n_hdfs(self):
        bucket = 'home-0xdiag-datasets'
        importFolderPath = "standard"
        csvFilename = "covtype20x.data"
        csvPathname = importFolderPath + "/" + csvFilename
        timeoutSecs = 500
        trialMax = 3
        for tryHeap in [4,12]:
            print "\n", tryHeap,"GB heap, 1 jvm per host, import folder,", \
                "then parse 'covtype20x.data'"
            h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=tryHeap)
            # don't raise exception if we find something bad in h2o stdout/stderr?
            h2o.nodes[0].sandboxIgnoreErrors = True

            for trial in range(trialMax):
                hex_key = csvFilename + ".hex"
                start = time.time()
                parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='s3n', hex_key=hex_key,
                    timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=60)
                elapsed = time.time() - start
                print "parse result:", parseResult['destination_key']
                print "Trial #", trial, "completed in", elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                removeKeyResult = h2o.nodes[0].remove_key(key=hex_key)

            h2o.tear_down_cloud()
            # sticky ports? wait a bit.
            time.sleep(5)
Beispiel #29
0
 def setUpClass(cls):
     global local_host
     local_host = not 'hosts' in os.getcwd()
     if (local_host):
         h2o.build_cloud(2,java_heap_GB=4,java_extra_args='-XX:+PrintCompilation')
     else:
         h2o_hosts.build_cloud_with_hosts(java_extra_args='-XX:+PrintCompilation')
Beispiel #30
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     # hdfs_config='/opt/mapr/conf/mapr-clusters.conf',
     #        # hdfs_name_node='mr-0x1.0xdata.loc:7222')
     #        hdfs_version='mapr2.1.3',
     if localhost:
         h2o.build_cloud(
             1,
             java_heap_GB=15,
             enable_benchmark_log=True,
             use_maprfs=True,
             hdfs_version="mapr3.0.1",
             hdfs_name_node="192.168.1.171:7222",
         )
     else:
         h2o_hosts.build_cloud_with_hosts(
             1,
             java_heap_GB=15,
             enable_benchmark_log=True,
             use_maprfs=True,
             hdfs_version="mapr3.0.1",
             hdfs_name_node="192.168.1.171:7222",
         )
Beispiel #31
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1, base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts()
     global SYNDATASETS_DIR
     SYNDATASETS_DIR = h2o.make_syn_dir()
Beispiel #32
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(node_count=1)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
     global SYNDATASETS_DIR
     SYNDATASETS_DIR = h2o.make_syn_dir()
     h2o.beta_features = True
Beispiel #33
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         # h2o.nodes[0].delete_keys_at_teardown should cause the testdir_release
         # tests to delete keys after each test completion (not cloud teardown, don't care then)
         h2o.build_cloud(3, create_json=True, java_heap_GB=4, delete_keys_at_teardown=True)
     else:
         h2o_hosts.build_cloud_with_hosts(create_json=True, delete_keys_at_teardown=True)
Beispiel #34
0
 def setUpClass(cls):
     global SEED
     SEED = random.randint(0, sys.maxint)
     random.seed(SEED)
     print "\nUsing random seed:", SEED
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #35
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=4)
     else:
         h2o_hosts.build_cloud_with_hosts()
     # h2b.browseTheCloud()
     h2o.beta_features = True
Beispiel #36
0
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,java_heap_GB=4,
             # use_hdfs=True, hdfs_version='cdh3', hdfs_name_node='192.168.1.176')
             # use_hdfs=True)
             use_hdfs=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #37
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     h2o.beta_features = True # to get the browser page special tab
     if (localhost):
         h2o.build_cloud(node_count=1, base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
     h2o.beta_features = False
Beispiel #38
0
 def setUpClass(cls):
     # default to local unless you've got hosts somewhere in the path to cwd
     local_host = h2o.decide_if_localhost()
     if (local_host):
         h2o.build_cloud(3, java_heap_GB=7)
     else:
         #  could have different config jsons, in different dirs (and execute out of those dirs)
         # Uses your username specific json: pytest_config-<username>.json
         # copy pytest_config-simple.json and modify to your needs.
         h2o_hosts.build_cloud_with_hosts()
Beispiel #39
0
 def setUpClass(cls):
     start = time.time()
     h2o_hosts.build_cloud_with_hosts(node_count,
                                      base_port=base_port,
                                      use_flatfile=True,
                                      java_heap_GB=1)
     print "Cloud of", len(
         h2o.nodes), "built in", time.time() - start, "seconds"
     h2o.verify_cloud_size()
     h2o.check_sandbox_for_errors()
    def setUpClass(cls):
        global SEED, localhost
        SEED = h2o.setup_random_seed()
        # SEED = h2o.setup_random_seed(8968685305521902318)

        localhost = h2o.decide_if_localhost()
        if (localhost):
            h2o.build_cloud(2, java_heap_MB=1300, use_flatfile=True)
        else:
            h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(
             3, java_heap_GB=1
         )  # enum processing is more interesting with multiple jvms
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #42
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=14, base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts(1,
                                          java_heap_GB=28,
                                          base_port=54321)
Beispiel #43
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(java_heap_GB=14,
                         java_extra_args='-XX:+PrintGCDetails')
     else:
         h2o_hosts.build_cloud_with_hosts(
             java_heap_GB=28, java_extra_args='-XX:+PrintGCDetails')
Beispiel #44
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         pass
         h2o.build_cloud(2, java_heap_GB=10, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
     h2b.browseTheCloud()
Beispiel #45
0
    def setUpClass(cls):
        global SEED, localhost
        SEED = h2o.setup_random_seed()

        global localhost
        localhost = h2o.decide_if_localhost()
        if (localhost):
            h2o.build_cloud(1)
        else:
            h2o_hosts.build_cloud_with_hosts(1)
    def test_parse_nflx_loop_hdfs_fvec(self):
        h2o.beta_features = True
        print "Using the -.gz files from hdfs"
        # hdfs://<name node>/datasets/manyfiles-nflx-gz/file_1.dat.gz
        csvFilename = "file_10.dat.gz"
        csvFilepattern = "file_1[0-9].dat.gz"

        trialMax = 2
        for tryHeap in [24]:
            print "\n", tryHeap, "GB heap, 1 jvm per host, import 192.168.1.176 hdfs, then parse"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(node_count=1,
                                java_heap_GB=tryHeap,
                                use_hdfs=True,
                                hdfs_name_node='192.168.1.176',
                                hdfs_version='cdh3')
            else:
                h2o_hosts.build_cloud_with_hosts(
                    node_count=1,
                    java_heap_GB=tryHeap,
                    use_hdfs=True,
                    hdfs_name_node='192.168.1.176',
                    hdfs_version='cdh3')

            # don't raise exception if we find something bad in h2o stdout/stderr?
            # h2o.nodes[0].sandboxIgnoreErrors = True

            timeoutSecs = 500
            importFolderPath = "datasets/manyfiles-nflx-gz"
            for trial in range(trialMax):
                hex_key = csvFilename + "_" + str(trial) + ".hex"
                csvFilePattern = 'file_1.dat.gz'
                # "key": "hdfs://192.168.1.176/datasets/manyfiles-nflx-gz/file_99.dat.gz",

                time.sleep(5)
                csvPathname = importFolderPath + "/" + csvFilePattern
                start = time.time()
                parseResult = h2i.import_parse(path=csvPathname,
                                               schema='hdfs',
                                               hex_key=hex_key,
                                               timeoutSecs=timeoutSecs,
                                               retryDelaySecs=10,
                                               pollTimeoutSecs=60)
                elapsed = time.time() - start

                print "parse result:", parseResult['destination_key']
                print "Parse #", trial, "completed in", "%6.2f" % elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                h2o_cmd.runStoreView()

            h2o.tear_down_cloud()
            # sticky ports? wait a bit.
            time.sleep(5)
    def test_parse_airline_multi_hdfs_many(self):

        h2o.beta_features = True
        # default
        csvFilename = "hex_10"
        csvFilePattern = '*' # all files in the folder

        for tryHeap in [24]:
            print "\n", tryHeap,"GB heap, 1 jvm per host, import mr-0x6 hdfs, then parse"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(java_heap_GB=tryHeap, random_udp_drop=RANDOM_UDP_DROP, base_port=55930,
                    use_hdfs=True, hdfs_name_node=NAME_NODE, hdfs_version=VERSION)
            else:
                h2o_hosts.build_cloud_with_hosts(java_heap_GB=tryHeap, random_udp_drop=RANDOM_UDP_DROP, base_port=55600, disable_assertions=False,
                    use_hdfs=True, hdfs_name_node=NAME_NODE, hdfs_version=VERSION)

            # don't raise exception if we find something bad in h2o stdout/stderr?
            # h2o.nodes[0].sandboxIgnoreErrors = True

            timeoutSecs = 500
            importFolderPath = "datasets/airlines_multi"
            csvPathname = importFolderPath + "/" + csvFilePattern
            parseResult = h2i.import_only(path=csvPathname, schema='hdfs',
                timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=60)

            for trial in range(TRIAL_MAX):
                # each parse now just does one
                csvFilePattern = "*%s.csv" % trial
                # if we want multifile
                # csvFilePattern = "*"

                hex_key = csvFilename + "_" + str(trial) + ".hex"
                csvPathname = importFolderPath + "/" + csvFilePattern
                start = time.time()
                # print "Don't wait for completion. Just load things up!"
    
                print "Drat. the source file is locked if we noPoll. Would have to increment across the individual files?"
                
                print "Drat. We can't re-import the folder, if there's a parse using one of the source files?"
                parseResult = h2i.parse_only(pattern=csvFilePattern, hex_key=hex_key, noPoll=True, delete_on_done=0,
                    timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=60)
                elapsed = time.time() - start

                print "parse result:", parseResult['destination_key']
                print "Parse #", trial, "completed in", "%6.2f" % elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                h2o_cmd.runStoreView()
                # we don't delete the hex key. it will start spilling? slow

            h2j.pollWaitJobs(timeoutSecs=300, pollTimeoutSecs=30)
            h2o.tear_down_cloud()
            # sticky ports? wait a bit.
            time.sleep(5)
Beispiel #48
0
    def test_parse_nflx_loop_hdfs_fvec(self):
        h2o.beta_features = True
        print "Using the -.gz files from hdfs"
        # hdfs://<name node>/datasets/manyfiles-nflx-gz/file_1.dat.gz
        csvFilename = "hex_10"
        csvFilePattern = '*'  # all files in the folder

        trialMax = 2
        for tryHeap in [24]:
            print "\n", tryHeap, "GB heap, 1 jvm per host, import mr-0x6 hdfs, then parse"
            localhost = h2o.decide_if_localhost()
            if (localhost):
                h2o.build_cloud(java_heap_GB=tryHeap,
                                random_udp_drop=RANDOM_UDP_DROP,
                                base_port=55930,
                                use_hdfs=True,
                                hdfs_name_node=NAME_NODE,
                                hdfs_version=VERSION)
            else:
                h2o_hosts.build_cloud_with_hosts(
                    java_heap_GB=tryHeap,
                    random_udp_drop=RANDOM_UDP_DROP,
                    base_port=55600,
                    use_hdfs=True,
                    hdfs_name_node=NAME_NODE,
                    hdfs_version=VERSION)

            # don't raise exception if we find something bad in h2o stdout/stderr?
            # h2o.nodes[0].sandboxIgnoreErrors = True

            timeoutSecs = 500
            importFolderPath = "datasets/airlines_multi"

            for trial in range(trialMax):
                hex_key = csvFilename + "_" + str(trial) + ".hex"
                csvPathname = importFolderPath + "/" + csvFilePattern
                start = time.time()
                parseResult = h2i.import_parse(path=csvPathname,
                                               schema='hdfs',
                                               hex_key=hex_key,
                                               timeoutSecs=timeoutSecs,
                                               retryDelaySecs=10,
                                               pollTimeoutSecs=60)
                elapsed = time.time() - start

                print "parse result:", parseResult['destination_key']
                print "Parse #", trial, "completed in", "%6.2f" % elapsed, "seconds.", \
                    "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

                h2o_cmd.runStoreView()
                # we don't delete the hex key. it will start spilling? slow

            h2o.tear_down_cloud()
            # sticky ports? wait a bit.
            time.sleep(5)
Beispiel #49
0
def invoke_hosts_action(action, hosts_config, args, ec2_reservation=None):
    ids = [inst['id'] for inst in hosts_config['ec2_instances']]
    ips = [
        inst['private_ip_address'] for inst in hosts_config['ec2_instances']
    ]
    region = hosts_config['ec2_region']

    if (action == 'terminate'):
        terminate_instances(ids, region)
    elif (action == 'stop'):
        stop_instances(ids, region)
    elif (action == 'reboot'):
        reboot_instances(ids, region)
        wait_for_ssh(ips, skipAlive=False, requiredsuccess=10)
    elif (action == 'start'):
        start_instances(ids, region)
        # FIXME after start instances receive new IPs: wait_for_ssh(ips)
    elif (action == 'distribute_h2o'):
        pass
    elif (action == 'start_h2o'):
        try:
            h2o.config_json = args.hosts
            log("Starting H2O cloud...")
            h2o_hosts.build_cloud_with_hosts(timeoutSecs=120, retryDelaySecs=5)
            h2o.touch_cloud()
            log("Cloud started. Let's roll!")
            log("You can start for example here \033[93mhttp://{0}:{1}\033[0m".
                format(hosts_config['ec2_instances'][0]['public_dns_name'],
                       hosts_config['base_port']))
            if args.timeout:
                log("Cloud will shutdown after {0} seconds or use Ctrl+C to shutdown it."
                    .format(args.timeout))
                time.sleep(args.timeout)
            else:
                log("To kill the cloud please use Ctrl+C as usual.")
                while (True):
                    time.sleep(3600)
        except:
            print traceback.format_exc()
        finally:
            log("Goodbye H2O cloud...")
            h2o.tear_down_cloud()
            log("Cloud is gone.")

    elif (action == 'stop_h2o'):
        pass
    elif (action == 'clean_tmp'):
        execute_using_ssh_commands(hosts_config,
                                   ec2_reservation,
                                   command_string='sudo rm -rf /tmp/*; df')
    elif (action == 'nexec'):
        execute_using_ssh_commands(hosts_config,
                                   ec2_reservation,
                                   command_string=args.cmd)
Beispiel #50
0
    def setUpClass(cls):
        global SEED, localhost
        SEED = h2o.setup_random_seed()

        localhost = h2o.decide_if_localhost()
        h2o.beta_features = True  # for the beta tab in the browser
        if (localhost):
            h2o.build_cloud(node_count=NODES, java_heap_GB=4)
            # use_hdfs=True, hdfs_name_node='172.16.2.176', hdfs_version='cdh4'
        else:
            h2o_hosts.build_cloud_with_hosts(java_heap_GB=4)
 def setUpClass(cls):
     global SEED
     SEED = random.randint(0, sys.maxint)
     # SEED =
     random.seed(SEED)
     print "\nUsing random seed:", SEED
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, java_heap_GB=10)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #52
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(3,
                         use_hdfs=True,
                         hdfs_version='cdh3',
                         hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts(use_hdfs=True,
                                          hdfs_version='cdh3',
                                          hdfs_name_node='192.168.1.176')
Beispiel #53
0
 def setUpClass(cls):
     global SEED, localhost
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     # just to get a browser with beta tab
     h2o.beta_features = True
     if (localhost):
         h2o.build_cloud(node_count=1, base_port=54327)
     else:
         h2o_hosts.build_cloud_with_hosts(node_count=1)
     h2o.beta_features = False
Beispiel #54
0
 def setUpClass(cls):
     SEED = random.randint(0, sys.maxint)
     # if you have to force to redo a test
     # SEED = 
     random.seed(SEED)
     print "\nUsing random seed:", SEED
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(2,java_heap_GB=1)
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(base_port=59300,
                         java_heap_GB=4,
                         random_udp_drop=True,
                         use_hdfs=True,
                         hdfs_version='cdh4',
                         hdfs_name_node='mr-0x6')
     else:
         h2o_hosts.build_cloud_with_hosts()
 def setUpClass(cls):
     global SEED, localhost, tryHeap
     tryHeap = 4
     SEED = h2o.setup_random_seed()
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,
                         java_heap_GB=tryHeap,
                         enable_benchmark_log=True,
                         base_port=54321)
     else:
         h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=True)
Beispiel #57
0
 def setUpClass(cls):
     localhost = h2o.decide_if_localhost()
     # want detail on the cloud building to see what node fails
     h2o.verbose = True
     if (localhost):
         h2o.build_cloud(java_heap_GB=2,
                         base_port=54323,
                         random_udp_drop=RANDOM_UDP_DROP)
     else:
         h2o_hosts.build_cloud_with_hosts(base_port=54323,
                                          random_udp_drop=RANDOM_UDP_DROP)
     h2o.verbose = False
Beispiel #58
0
 def setUpClass(cls):
     global SEED
     ### SEED = random.randint(0, sys.maxint)
     ### SEED = 8389506152467586392
     SEED = 2437856391921621805
     random.seed(SEED)
     print "\nUsing random seed:", SEED
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1, use_flatfile=True)
     else:
         h2o_hosts.build_cloud_with_hosts()
Beispiel #59
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1)
     else:
         # all hdfs info is done thru the hdfs_config michal's ec2 config sets up?
         h2o_hosts.build_cloud_with_hosts(1, 
             # this is for our amazon ec hdfs
             # see https://github.com/0xdata/h2o/wiki/H2O-and-s3n
             hdfs_name_node='10.78.14.235:9000',
             hdfs_version='0.20.2')
Beispiel #60
0
 def setUpClass(cls):
     # assume we're at 0xdata with it's hdfs namenode
     global localhost
     localhost = h2o.decide_if_localhost()
     if (localhost):
         h2o.build_cloud(1,
                         use_hdfs=True,
                         hdfs_version='cdh3',
                         hdfs_name_node='192.168.1.176')
     else:
         h2o_hosts.build_cloud_with_hosts(1,
                                          use_hdfs=True,
                                          hdfs_version='cdh3',
                                          hdfs_name_node='192.168.1.176')