def check_cloud_and_setup_next(): h2b.browseTheCloud() h2o.verify_cloud_size() h2o.check_sandbox_for_errors() print "Tearing down cloud of size", len(h2o.nodes) h2o.tear_down_cloud() h2o.clean_sandbox() # wait to make sure no sticky ports or anything os-related # so let's expand the delay if larger number of jvms # 1 second per node seems good h2o.verboseprint("Waiting", node_count, "seconds to avoid OS sticky port problem") time.sleep(node_count)
def check_cloud_and_setup_next(): h2b.browseTheCloud() h2o.verify_cloud_size() h2o.check_sandbox_for_errors() print "Tearing down cloud of size", len(h2o.nodes) h2o.tear_down_cloud() h2o.clean_sandbox() # wait to make sure no sticky ports or anything os-related # so let's expand the delay if larger number of jvms # 1 second per node seems good h2o.verboseprint("Waiting", node_count, "seconds to avoid OS sticky port problem") time.sleep(node_count) # stick port issues (os) # wait a little for jvms to really clear out? # and increment the base_port global base_port # if we change the port we have to upload the flatfile again # maybe just use_flatfile=false base_port += portsPerNode * node_count
def test_remote_Cloud(self): trySize = 0 # FIX! we should increment this from 1 to N? for i in range(1,10): # timeout wants to be larger for large numbers of hosts * node_count # don't want to reload jar/flatfile, so use build_cloud timeoutSecs = max(60, 2*(len(h2o_hosts.hosts) * node_count)) h2o.build_cloud(node_count, hosts=h2o_hosts.hosts, use_flatfile=True, timeoutSecs=timeoutSecs, retryDelaySecs=0.5) h2o.verify_cloud_size() h2o.check_sandbox_for_errors() print "Tearing down cloud of size", len(h2o.nodes) h2o.tear_down_cloud() h2o.clean_sandbox() # wait to make sure no sticky ports or anything os-related # so let's expand the delay if larger number of jvms # 1 second per node seems good h2o.verboseprint("Waiting", node_count, "seconds to avoid OS sticky port problem") sys.stdout.write('.') sys.stdout.flush() time.sleep(node_count)
def setUpClass(cls): h2o.clean_sandbox()
import h2o, h2o_cmd import h2o_browse as h2b import time import psutil import webbrowser #import os, json, unittest, time, shutil, sys #sys.path.extend(['.','..','py']) try: for proc in psutil.process_iter(): if proc.name == 'java.exe': proc.kill() print 'Building cloud' h2o.clean_sandbox() #h2o.parse_our_args() h2o.build_cloud(4, java_heap_GB=1, capture_output=False, classpath=True) # h2o.nodes = [h2o.ExternalH2O()] print 'KMeans' file = csvPathname = h2o.find_file('smalldata/covtype/covtype.20k.data') h2o_cmd.runKMeans(csvPathname=file, key='covtype', k=7) print 'Web' webbrowser.open("http://localhost:54323/KMeansProgress.html?destination_key=covtype.kmeans") except KeyboardInterrupt: print 'Interrupted' finally: print 'EAT THE BABIES' # h2o.tear_down_cloud()
def setUpClass(cls): h2o.clean_sandbox() h2o.build_cloud(3,sigar=True)
def setUpClass(cls): h2o.clean_sandbox() global nodes nodes = h2o.build_cloud(3,sigar=True)
def build_cloud_with_hosts(node_count=None, **kwargs): ## if not h2o.disable_time_stamp: ## sys.stdout = h2o.OutWrapper(sys.stdout) # legacy: we allow node_count to be positional. # if it's used positionally, stick in in kwargs (overwrite if there too) if node_count is not None: # we use h2o_per_host in the config file. will translate to node_count for build_cloud kwargs['h2o_per_host'] = node_count # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used node_count = None # randomizing default base_port used offset = random.randint(0, 31) # for new params: # Just update this list with the param name and default and you're done allParamsDefault = { 'use_flatfile': None, 'use_hdfs': True, # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly 'hdfs_name_node': None, 'hdfs_config': None, 'hdfs_version': None, 'base_port': None, 'java_heap_GB': None, 'java_heap_MB': None, 'java_extra_args': None, 'timeoutSecs': 60, 'retryDelaySecs': 2, 'cleanup': True, 'slow_connection': False, 'h2o_per_host': 2, 'ip': '["127.0.0.1"]', # this is for creating the hosts list 'base_port': 54300 + offset, 'username': '******', 'password': None, 'rand_shuffle': True, 'use_home_for_ice': False, 'key_filename': None, 'aws_credentials': None, 'redirect_import_folder_to_s3_path': None, 'redirect_import_folder_to_s3n_path': None, 'disable_h2o_log': False, 'enable_benchmark_log': False, 'h2o_remote_buckets_root': None, 'conservative': False, 'create_json': False, # pass this from cloud building to the common "release" h2o_test.py classes # for deciding whether keys should be deleted when a test ends. 'delete_keys_at_teardown': False, 'clone_cloud': False, 'cloud_name': None, } # initialize the default values paramsToUse = {} for k, v in allParamsDefault.iteritems(): paramsToUse[k] = allParamsDefault.setdefault(k, v) # allow user to specify the config json at the command line. config_json is a global. if h2o.config_json: configFilename = find_config(h2o.config_json) else: # configs may be in the testdir_hosts configFilename = find_config(h2o.default_hosts_file()) h2o.verboseprint("Loading host config from", configFilename) with open(configFilename, 'rb') as fp: hostDict = json.load(fp) for k, v in hostDict.iteritems(): # Don't take in params that we don't have in the list above # Because michal has extra params in here for ec2! and comments! if k in paramsToUse: paramsToUse[k] = hostDict.setdefault(k, v) # Now overwrite with anything passed by the test # whatever the test passes, always overrules the config json for k, v in kwargs.iteritems(): paramsToUse[k] = kwargs.setdefault(k, v) # Let's assume we should set the h2o_remote_buckets_root (only affects # schema=local), to the home directory of whatever remote user # is being used for the hosts. Better than living with a decision # we made from scanning locally (remote might not match local) # assume the remote user has a /home/<username> (linux targets?) # This only affects import folder path name generation by python tests if paramsToUse['username']: paramsToUse[ 'h2o_remote_buckets_root'] = "/home/" + paramsToUse['username'] h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse) #******************** global hosts hosts = [] # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1 # hosts should be None for everyone if normal build_cloud is desired if paramsToUse['ip'] == ["127.0.0.1"]: hosts = None else: h2o.verboseprint("About to RemoteHost, likely bad ip if hangs") hosts = [] for h in paramsToUse['ip']: h2o.verboseprint("Connecting to:", h) # expand any ~ or ~user in the string key_filename = paramsToUse['key_filename'] if key_filename: # don't try to expand if None key_filename = os.path.expanduser(key_filename) hosts.append( h2o.RemoteHost(addr=h, username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename)) # done with these, don't pass to build_cloud paramsToUse.pop( 'ip' ) # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud # we want to save username in the node info. don't pop # paramsToUse.pop('username') paramsToUse.pop('password') paramsToUse.pop('key_filename') # flatfile is going into sandbox (LOG_DIR) now..so clean it first (will make sandbox dir if it doesn't exist already) h2o.clean_sandbox() # handles hosts=None correctly h2o.write_flatfile( node_count=paramsToUse['h2o_per_host'], # let the env variable H2O_PORT_OFFSET add in there base_port=paramsToUse['base_port'], hosts=hosts, rand_shuffle=paramsToUse['rand_shuffle'], port_offset=h2o.get_port_offset(), ) if hosts is not None: # this uploads the flatfile too h2o.upload_jar_to_remote_hosts( hosts, slow_connection=paramsToUse['slow_connection']) # timeout wants to be larger for large numbers of hosts * h2oPerHost # use 60 sec min, 5 sec per node. timeoutSecs = max(60, 8 * (len(hosts) * paramsToUse['h2o_per_host'])) else: # for 127.0.0.1 case timeoutSecs = 60 paramsToUse.pop('slow_connection') # sandbox gets cleaned in build_cloud # legacy param issue node_count = paramsToUse['h2o_per_host'] paramsToUse.pop('h2o_per_host') print "java_heap_GB", paramsToUse['java_heap_GB'] # don't wipe out or create the sandbox. already did here, and put flatfile there h2o.build_cloud(node_count, hosts=hosts, init_sandbox=False, **paramsToUse)
def build_cloud_with_hosts(node_count=None, **kwargs): ## if not h2o.disable_time_stamp: ## sys.stdout = h2o.OutWrapper(sys.stdout) # legacy: we allow node_count to be positional. # if it's used positionally, stick in in kwargs (overwrite if there too) if node_count is not None: # we use h2o_per_host in the config file. will translate to node_count for build_cloud kwargs['h2o_per_host'] = node_count # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used node_count = None # randomizing default base_port used offset = random.randint(0,31) # for new params: # Just update this list with the param name and default and you're done allParamsDefault = { 'use_flatfile': None, 'use_hdfs': True, # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly 'hdfs_name_node': None, 'hdfs_config': None, 'hdfs_version': None, 'base_port': None, 'java_heap_GB': None, 'java_heap_MB': None, 'java_extra_args': None, 'timeoutSecs': 60, 'retryDelaySecs': 2, 'cleanup': True, 'slow_connection': False, 'h2o_per_host': 2, 'ip':'["127.0.0.1"]', # this is for creating the hosts list 'base_port': 54300 + offset, 'username':'******', 'password': None, 'rand_shuffle': True, 'use_home_for_ice': False, 'key_filename': None, 'aws_credentials': None, 'redirect_import_folder_to_s3_path': None, 'redirect_import_folder_to_s3n_path': None, 'disable_h2o_log': False, 'enable_benchmark_log': False, 'h2o_remote_buckets_root': None, 'conservative': False, 'create_json': False, # pass this from cloud building to the common "release" h2o_test.py classes # for deciding whether keys should be deleted when a test ends. 'delete_keys_at_teardown': False, 'clone_cloud': False, 'cloud_name': None, } # initialize the default values paramsToUse = {} for k,v in allParamsDefault.iteritems(): paramsToUse[k] = allParamsDefault.setdefault(k, v) # allow user to specify the config json at the command line. config_json is a global. if h2o.config_json: configFilename = find_config(h2o.config_json) else: # configs may be in the testdir_hosts configFilename = find_config(h2o.default_hosts_file()) h2o.verboseprint("Loading host config from", configFilename) with open(configFilename, 'rb') as fp: hostDict = json.load(fp) for k,v in hostDict.iteritems(): # Don't take in params that we don't have in the list above # Because michal has extra params in here for ec2! and comments! if k in paramsToUse: paramsToUse[k] = hostDict.setdefault(k, v) # Now overwrite with anything passed by the test # whatever the test passes, always overrules the config json for k,v in kwargs.iteritems(): paramsToUse[k] = kwargs.setdefault(k, v) # Let's assume we should set the h2o_remote_buckets_root (only affects # schema=local), to the home directory of whatever remote user # is being used for the hosts. Better than living with a decision # we made from scanning locally (remote might not match local) # assume the remote user has a /home/<username> (linux targets?) # This only affects import folder path name generation by python tests if paramsToUse['username']: paramsToUse['h2o_remote_buckets_root'] = "/home/" + paramsToUse['username'] h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse) #******************** global hosts hosts = [] # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1 # hosts should be None for everyone if normal build_cloud is desired if paramsToUse['ip']== ["127.0.0.1"]: hosts = None else: h2o.verboseprint("About to RemoteHost, likely bad ip if hangs") hosts = [] for h in paramsToUse['ip']: h2o.verboseprint("Connecting to:", h) # expand any ~ or ~user in the string key_filename = paramsToUse['key_filename'] if key_filename: # don't try to expand if None key_filename=os.path.expanduser(key_filename) hosts.append(h2o.RemoteHost(addr=h, username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename)) # done with these, don't pass to build_cloud paramsToUse.pop('ip') # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud # we want to save username in the node info. don't pop # paramsToUse.pop('username') paramsToUse.pop('password') paramsToUse.pop('key_filename') # flatfile is going into sandbox (LOG_DIR) now..so clean it first (will make sandbox dir if it doesn't exist already) h2o.clean_sandbox() # handles hosts=None correctly h2o.write_flatfile( node_count=paramsToUse['h2o_per_host'], # let the env variable H2O_PORT_OFFSET add in there base_port=paramsToUse['base_port'], hosts=hosts, rand_shuffle=paramsToUse['rand_shuffle'], port_offset=h2o.get_port_offset(), ) if hosts is not None: # this uploads the flatfile too h2o.upload_jar_to_remote_hosts(hosts, slow_connection=paramsToUse['slow_connection']) # timeout wants to be larger for large numbers of hosts * h2oPerHost # use 60 sec min, 5 sec per node. timeoutSecs = max(60, 8*(len(hosts) * paramsToUse['h2o_per_host'])) else: # for 127.0.0.1 case timeoutSecs = 60 paramsToUse.pop('slow_connection') # sandbox gets cleaned in build_cloud # legacy param issue node_count = paramsToUse['h2o_per_host'] paramsToUse.pop('h2o_per_host') print "java_heap_GB", paramsToUse['java_heap_GB'] # don't wipe out or create the sandbox. already did here, and put flatfile there h2o.build_cloud(node_count, hosts=hosts, init_sandbox=False, **paramsToUse)
import h2o, h2o_cmd import h2o_browse as h2b import time, sys, h2o_hosts h2o.clean_sandbox() h2o.parse_our_args() try: while True: sys.stdout.write('.') sys.stdout.flush() #h2o.build_cloud(5) h2o_hosts.build_cloud_with_hosts() h2o.tear_down_cloud() h2o.check_sandbox_for_errors() except KeyboardInterrupt: print 'Interrupted' finally: print 'EAT THE BABIES' h2o.tear_down_cloud() h2o.check_sandbox_for_errors()