Ejemplo n.º 1
0
 def setUpClass(cls):
     # do the first one to build up hosts
     # so we don't repeatedly copy the jar
     # have to make sure base_port is the same on both!
     h2o.write_flatfile(node_count=node_count, base_port=base_port)
     h2o_hosts.build_cloud_with_hosts(node_count, base_port=base_port, use_flatfile=True)
     h2o.verify_cloud_size()
     h2o.check_sandbox_for_errors()
Ejemplo n.º 2
0
def build_cloud(hosts, node_count=2, base_port=54321, **kwargs):
    kwargs['use_flatfile'] = h2o.flatfile_name()
    h2o.write_flatfile(node_count=node_count, base_port=base_port, hosts=hosts)
    h2o.upload_jar_to_remote_hosts(hosts, slow_connection=True)
    kwargs.setdefault('node_count', node_count)
    kwargs.setdefault('base_port', base_port)
    kwargs.setdefault('hosts', hosts)
    return h2o.build_cloud(**kwargs)
Ejemplo n.º 3
0
    def setUpClass(cls):
        # do the first one to build up hosts
        # so we don't repeatedly copy the jar
        # have to make sure base_port is the same on both!
        # just do once and don't clean sandbox
        h2o.write_flatfile(node_count=node_count, base_port=base_port)
        start = time.time()
        h2o_hosts.build_cloud_with_hosts(node_count, base_port=base_port, 
            use_flatfile=True, java_heap_GB=1)
        print "Cloud of", len(h2o.nodes), "built in", time.time()-start, "seconds"

        h2o.verify_cloud_size()
        h2o.check_sandbox_for_errors()
Ejemplo n.º 4
0
 def setUpClass(cls):
     # do the first one to build up hosts
     # so we don't repeatedly copy the jar
     # have to make sure base_port is the same on both!
     print "base_port:", base_port
     h2o.check_port_group(base_port)
     h2o.write_flatfile(node_count=node_count, base_port=base_port)
     start = time.time()
     h2o_hosts.build_cloud_with_hosts(node_count, base_port=base_port, 
         use_flatfile=True, java_heap_GB=1)
     print "jar/flatfile copied and Cloud of", len(h2o.nodes), "built in", time.time()-start, "seconds"
     # have to remember total # of nodes for the next class. it will stay the same
     # when we tear down the cloud, we zero the nodes list
     global totalNodes
     totalNodes = len(h2o.nodes)
     check_cloud_and_setup_next()
Ejemplo n.º 5
0
    def test_remote_cloud(self):
        global base_port
        # FIX! we should increment this from 1 to N? 
        for i in range(1,10):
            # timeout wants to be larger for large numbers of hosts * node_count
            # don't want to reload jar/flatfile, so use build_cloud
            base_port += portsPerNode * node_count
            print "base_port:", base_port
            timeoutSecs = max(60, 8 * totalNodes)
            print "totalNodes:", totalNodes, "timeoutSecs:", timeoutSecs

            h2o.check_port_group(base_port)
            # FIX! have to recopy the flatfile if we change base_port? hmm.. won't work
            h2o.write_flatfile(node_count=node_count, base_port=base_port)
            
            # FIX! ..just use_flatfile=False for now on these subsequent ones. rely on multicast
            start = time.time()
            h2o.build_cloud(node_count, base_port=base_port, hosts=h2o_hosts.hosts, use_flatfile=False, 
                timeoutSecs=timeoutSecs, retryDelaySecs=0.5)
            print "Cloud of", len(h2o.nodes), "built in", time.time()-start, "seconds"
            check_cloud_and_setup_next()
Ejemplo n.º 6
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    sys.stdout = h2o.OutWrapper(sys.stdout)
    # legacy: we allow node_count to be positional. 
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs['h2o_per_host'] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # randomizing default base_port used
    offset = random.randint(0,31)
    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        'use_flatfile': None,
        'use_hdfs': True, # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
        'hdfs_name_node': None, 
        'hdfs_config': None,
        'hdfs_version': None,
        'base_port': None,
        'java_heap_GB': None,
        'java_heap_MB': None,
        'java_extra_args': None,

        'timeoutSecs': 60, 
        'retryDelaySecs': 2, 
        'cleanup': True,
        'slow_connection': False,

        'h2o_per_host': 2,
        'ip':'["127.0.0.1"]', # this is for creating the hosts list
        'base_port': 54300 + offset,
        'username':'******',
        'password': None,
        'rand_shuffle': True,

        'use_home_for_ice': False,
        'key_filename': None,
        'aws_credentials': None,
        'redirect_import_folder_to_s3_path': None,
        'redirect_import_folder_to_s3n_path': None,
        'disable_h2o_log': False,
        'enable_benchmark_log': False,
        'h2o_remote_buckets_root': None,
        'conservative': False,
        'create_json': False,
        # pass this from cloud building to the common "release" h2o_test.py classes
        # for deciding whether keys should be deleted when a test ends.
        'delete_keys_at_teardown': False, 
        'clone_cloud': False,
        'cloud_name': None,
    }
    # initialize the default values
    paramsToUse = {}
    for k,v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o.config_json:
        configFilename = find_config(h2o.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(h2o.default_hosts_file())

    h2o.verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
         hostDict = json.load(fp)

    for k,v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k,v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)


    # Let's assume we should set the h2o_remote_buckets_root (only affects
    # schema=local), to the home directory of whatever remote user
    # is being used for the hosts. Better than living with a decision
    # we made from scanning locally (remote might not match local)
    # assume the remote user has a /home/<username> (linux targets?)
    # This only affects import folder path name generation by python tests
    if paramsToUse['username']:
        paramsToUse['h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']

    h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    #********************
    global hosts
    hosts = []
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse['ip']== ["127.0.0.1"]:
        hosts = None
    else:
        h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse['ip']:
            h2o.verboseprint("Connecting to:", h)
            # expand any ~ or ~user in the string
            key_filename = paramsToUse['key_filename']
            if key_filename: # don't try to expand if None
               key_filename=os.path.expanduser(key_filename)
            hosts.append(h2o.RemoteHost(addr=h, 
                username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename))

    # done with these, don't pass to build_cloud
    paramsToUse.pop('ip') # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud

    # we want to save username in the node info. don't pop
    # paramsToUse.pop('username')
    paramsToUse.pop('password')
    paramsToUse.pop('key_filename')
   
    # handles hosts=None correctly
    h2o.write_flatfile(
        node_count=paramsToUse['h2o_per_host'],
        base_port=paramsToUse['base_port'],
        hosts=hosts,
        rand_shuffle=paramsToUse['rand_shuffle']
        )

    if hosts is not None:
        # this uploads the flatfile too
        h2o.upload_jar_to_remote_hosts(hosts, slow_connection=paramsToUse['slow_connection'])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8*(len(hosts) * paramsToUse['h2o_per_host']))
    else: # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop('slow_connection')

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse['h2o_per_host']
    paramsToUse.pop('h2o_per_host')
    print "java_heap_GB", paramsToUse['java_heap_GB']
    h2o.build_cloud(node_count, hosts=hosts, **paramsToUse)
Ejemplo n.º 7
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    # legacy: we allow node_count to be positional.
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs['h2o_per_host'] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # randomizing default base_port used
    offset = random.randint(0, 31)
    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        'use_flatfile': None,
        'use_hdfs':
        True,  # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
        'hadoop': False,
        'hdfs_name_node': None,
        'hdfs_config': None,
        'hdfs_version': None,
        'base_port': None,
        'java_heap_GB': None,
        'java_heap_MB': None,
        'java_extra_args': None,
        'sigar': False,
        'timeoutSecs': 60,
        'retryDelaySecs': 2,
        'cleanup': True,
        'slow_connection': False,
        'h2o_per_host': 2,
        'ip': '["127.0.0.1"]',  # this is for creating the hosts list
        'base_port': 54300 + offset,
        'username': '******',
        'password': None,
        'rand_shuffle': True,
        'use_home_for_ice': False,
        'key_filename': None,
        'aws_credentials': None,
        'redirect_import_folder_to_s3_path': None,
        'redirect_import_folder_to_s3n_path': None,
        'disable_h2o_log': False,
        'enable_benchmark_log': False,
    }
    # initialize the default values
    paramsToUse = {}
    for k, v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o.config_json:
        configFilename = find_config(h2o.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(h2o.default_hosts_file())

    h2o.verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
        hostDict = json.load(fp)

    for k, v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k, v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)

    h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    #********************
    global hosts
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse['ip'] == ["127.0.0.1"]:
        hosts = None
    else:
        h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse['ip']:
            h2o.verboseprint("Connecting to:", h)
            hosts.append(
                h2o.RemoteHost(addr=h,
                               username=paramsToUse['username'],
                               password=paramsToUse['password'],
                               key_filename=paramsToUse['key_filename']))

    # done with these, don't pass to build_cloud
    paramsToUse.pop(
        'ip'
    )  # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud
    paramsToUse.pop('username')
    paramsToUse.pop('password')
    paramsToUse.pop('key_filename')

    # handles hosts=None correctly
    h2o.write_flatfile(node_count=paramsToUse['h2o_per_host'],
                       base_port=paramsToUse['base_port'],
                       hosts=hosts,
                       rand_shuffle=paramsToUse['rand_shuffle'])

    if not paramsToUse['hadoop'] and hosts is not None:
        # this uploads the flatfile too
        h2o.upload_jar_to_remote_hosts(
            hosts, slow_connection=paramsToUse['slow_connection'])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8 * (len(hosts) * paramsToUse['h2o_per_host']))
    else:  # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop('slow_connection')

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse['h2o_per_host']
    paramsToUse.pop('h2o_per_host')
    print "java_heap_GB", paramsToUse['java_heap_GB']
    h2o.build_cloud(node_count, hosts=hosts, **paramsToUse)
Ejemplo n.º 8
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    # legacy: we allow node_count to be positional.
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs["h2o_per_host"] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # randomizing default base_port used
    offset = random.randint(0, 31)
    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        "use_flatfile": None,
        "use_hdfs": None,
        "hdfs_name_node": None,
        "hdfs_config": None,
        "hdfs_version": None,
        "base_port": None,
        "java_heap_GB": None,
        "java_heap_MB": None,
        "java_extra_args": None,
        "sigar": False,
        "timeoutSecs": 60,
        "retryDelaySecs": 2,
        "cleanup": True,
        "slow_connection": False,
        "h2o_per_host": 2,
        "ip": '["127.0.0.1"]',  # this is for creating the hosts list
        "base_port": 54300 + offset,
        "username": "******",
        "password": None,
        "rand_shuffle": True,
        "use_home_for_ice": False,
        "key_filename": None,
        "aws_credentials": None,
        "redirect_import_folder_to_s3_path": None,
        "disable_h2o_log": False,
        "enable_benchmark_log": False,
    }
    # initialize the default values
    paramsToUse = {}
    for k, v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o.config_json:
        configFilename = find_config(h2o.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(h2o.default_hosts_file())

    h2o.verboseprint("Loading host config from", configFilename)
    with open(configFilename, "rb") as fp:
        hostDict = json.load(fp)

    for k, v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k, v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)

    h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    # ********************
    global hosts
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse["ip"] == ["127.0.0.1"]:
        hosts = None
    else:
        h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse["ip"]:
            h2o.verboseprint("Connecting to:", h)
            hosts.append(
                h2o.RemoteHost(
                    addr=h,
                    username=paramsToUse["username"],
                    password=paramsToUse["password"],
                    key_filename=paramsToUse["key_filename"],
                )
            )

    # done with these, don't pass to build_cloud
    paramsToUse.pop("ip")  # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud
    paramsToUse.pop("username")
    paramsToUse.pop("password")
    paramsToUse.pop("key_filename")

    # handles hosts=None correctly
    h2o.write_flatfile(
        node_count=paramsToUse["h2o_per_host"],
        base_port=paramsToUse["base_port"],
        hosts=hosts,
        rand_shuffle=paramsToUse["rand_shuffle"],
    )

    if hosts is not None:
        # this uploads the flatfile too
        h2o.upload_jar_to_remote_hosts(hosts, slow_connection=paramsToUse["slow_connection"])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8 * (len(hosts) * paramsToUse["h2o_per_host"]))
    else:  # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop("slow_connection")

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse["h2o_per_host"]
    paramsToUse.pop("h2o_per_host")
    h2o.build_cloud(node_count, hosts=hosts, **paramsToUse)
Ejemplo n.º 9
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    ## if not h2o.disable_time_stamp:
    ##      sys.stdout = h2o.OutWrapper(sys.stdout)
    # legacy: we allow node_count to be positional.
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs['h2o_per_host'] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # randomizing default base_port used
    offset = random.randint(0, 31)
    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        'use_flatfile': None,
        'use_hdfs':
        True,  # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
        'hdfs_name_node': None,
        'hdfs_config': None,
        'hdfs_version': None,
        'base_port': None,
        'java_heap_GB': None,
        'java_heap_MB': None,
        'java_extra_args': None,
        'timeoutSecs': 60,
        'retryDelaySecs': 2,
        'cleanup': True,
        'slow_connection': False,
        'h2o_per_host': 2,
        'ip': '["127.0.0.1"]',  # this is for creating the hosts list
        'base_port': 54300 + offset,
        'username': '******',
        'password': None,
        'rand_shuffle': True,
        'use_home_for_ice': False,
        'key_filename': None,
        'aws_credentials': None,
        'redirect_import_folder_to_s3_path': None,
        'redirect_import_folder_to_s3n_path': None,
        'disable_h2o_log': False,
        'enable_benchmark_log': False,
        'h2o_remote_buckets_root': None,
        'conservative': False,
        'create_json': False,
        # pass this from cloud building to the common "release" h2o_test.py classes
        # for deciding whether keys should be deleted when a test ends.
        'delete_keys_at_teardown': False,
        'clone_cloud': False,
        'cloud_name': None,
    }
    # initialize the default values
    paramsToUse = {}
    for k, v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o.config_json:
        configFilename = find_config(h2o.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(h2o.default_hosts_file())

    h2o.verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
        hostDict = json.load(fp)

    for k, v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k, v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)

    # Let's assume we should set the h2o_remote_buckets_root (only affects
    # schema=local), to the home directory of whatever remote user
    # is being used for the hosts. Better than living with a decision
    # we made from scanning locally (remote might not match local)
    # assume the remote user has a /home/<username> (linux targets?)
    # This only affects import folder path name generation by python tests
    if paramsToUse['username']:
        paramsToUse[
            'h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']

    h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    #********************
    global hosts
    hosts = []
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse['ip'] == ["127.0.0.1"]:
        hosts = None
    else:
        h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse['ip']:
            h2o.verboseprint("Connecting to:", h)
            # expand any ~ or ~user in the string
            key_filename = paramsToUse['key_filename']
            if key_filename:  # don't try to expand if None
                key_filename = os.path.expanduser(key_filename)
            hosts.append(
                h2o.RemoteHost(addr=h,
                               username=paramsToUse['username'],
                               password=paramsToUse['password'],
                               key_filename=key_filename))

    # done with these, don't pass to build_cloud
    paramsToUse.pop(
        'ip'
    )  # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud

    # we want to save username in the node info. don't pop
    # paramsToUse.pop('username')
    paramsToUse.pop('password')
    paramsToUse.pop('key_filename')

    # flatfile is going into sandbox (LOG_DIR) now..so clean it first (will make sandbox dir if it doesn't exist already)
    h2o.clean_sandbox()

    # handles hosts=None correctly
    h2o.write_flatfile(
        node_count=paramsToUse['h2o_per_host'],
        # let the env variable H2O_PORT_OFFSET add in there
        base_port=paramsToUse['base_port'],
        hosts=hosts,
        rand_shuffle=paramsToUse['rand_shuffle'],
        port_offset=h2o.get_port_offset(),
    )

    if hosts is not None:
        # this uploads the flatfile too
        h2o.upload_jar_to_remote_hosts(
            hosts, slow_connection=paramsToUse['slow_connection'])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8 * (len(hosts) * paramsToUse['h2o_per_host']))
    else:  # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop('slow_connection')

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse['h2o_per_host']
    paramsToUse.pop('h2o_per_host')
    print "java_heap_GB", paramsToUse['java_heap_GB']
    # don't wipe out or create the sandbox. already did here, and put flatfile there
    h2o.build_cloud(node_count, hosts=hosts, init_sandbox=False, **paramsToUse)
Ejemplo n.º 10
0
def build_cloud_with_hosts(node_count=None, use_flatfile=None, 
    use_hdfs=None, hdfs_name_node=None, hdfs_config=None,  hdfs_version=None,
    base_port=None,
    java_heap_GB=None, java_extra_args=None,
    **kwargs):

    # For seeing example of what we want in the json, if we add things
    #   import h2o_config

    # allow user to specify the config json at the command line. config_json is a global.
    # shouldn't need this??
    if h2o.config_json:
        configFilename = find_config(h2o.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config('pytest_config-%s.json' %getpass.getuser())

    h2o.verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
         hostDict = json.load(fp)

    slow_connection = hostDict.setdefault('slow_connection', False)
    hostList = hostDict.setdefault('ip','127.0.0.1')

    h2oPerHost = hostDict.setdefault('h2o_per_host', 2)
    # default should avoid colliding with sri's demo cloud ports: 54321
    # we get some problems with sticky ports, during back to back tests in regressions
    # to avoid waiting, randomize the port to make it less likely?
    # at least for the hosts case
    offset = random.randint(0,31)
    basePort = hostDict.setdefault('base_port', 55300 + offset)
    username = hostDict.setdefault('username','0xdiag')
    # stupid but here for clarity
    password = hostDict.setdefault('password', None)
    sigar = hostDict.setdefault('sigar', False)

    useFlatfile = hostDict.setdefault('use_flatfile', False)

    useHdfs = hostDict.setdefault('use_hdfs', False)
    hdfsNameNode = hostDict.setdefault('hdfs_name_node', '192.168.1.151')
    hdfsVersion = hostDict.setdefault('hdfs_version', 'cdh3u5')
    hdfsConfig = hostDict.setdefault('hdfs_config', None)

    # default to none, which means the arg isn't used and java decides for us
    # useful for small dram systems, and for testing that
    javaHeapGB = hostDict.setdefault('java_heap_GB', None)
    javaExtraArgs = hostDict.setdefault('java_extra_args', None)

    use_home_for_ice = hostDict.setdefault('use_home_for_ice', False)

    # can override the json with a caller's argument
    # FIX! and we support passing othe kwargs from above? but they don't override
    # json, ...so have to fix here if that's desired
    if node_count is not None:
        h2oPerHost = node_count

    if use_flatfile is not None:
        useFlatfile = use_flatfile

    if use_hdfs is not None:
        useHdfs = use_hdfs

    if hdfs_name_node is not None:
        hdfsNameNode = hdfs_name_node

    if hdfs_version is not None:
        hdfsVersion = hdfs_version

    if hdfs_config is not None:
        hdfsConfig = hdfs_config

    if java_heap_GB is not None:
        javaHeapGB = java_heap_GB

    if java_extra_args is not None:
        javaExtraArgs = java_extra_args

    if base_port is not None:
        basePort = base_port

    h2o.verboseprint("host config: ", username, password, 
        h2oPerHost, basePort, sigar, useFlatfile, 
        useHdfs, hdfsNameNode, hdfsVersion, hdfsConfig, javaHeapGB, use_home_for_ice,
        hostList, **kwargs)

    #********************
    global hosts
    # Update: special case hostList = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if hostList == ["127.0.0.1"]:
        hosts = None
    else:
        h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in hostList:
            h2o.verboseprint("Connecting to:", h)
            hosts.append(h2o.RemoteHost(h, username, password))
   
    # handles hosts=None correctly
    h2o.write_flatfile(node_count=h2oPerHost, base_port=basePort, hosts=hosts)

    if hosts is not None:
        h2o.upload_jar_to_remote_hosts(hosts, slow_connection=slow_connection)
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 2 sec per node.
        timeoutSecs = max(60, 2*(len(hosts) * h2oPerHost))
    else: # for 127.0.0.1 case
        timeoutSecs = 60

    # sandbox gets cleaned in build_cloud
    h2o.build_cloud(h2oPerHost,
            base_port=basePort, hosts=hosts, timeoutSecs=timeoutSecs, sigar=sigar, 
            use_flatfile=useFlatfile,
            use_hdfs=useHdfs, hdfs_name_node=hdfsNameNode,
            hdfs_version=hdfsVersion, hdfs_config=hdfsConfig,
            java_heap_GB=javaHeapGB, java_extra_args=javaExtraArgs,
            use_home_for_ice=use_home_for_ice,
            **kwargs)