Example #1
0
def build_cloud(node_count=1, base_port=None, hosts=None,
    timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True,
    conservative=False, create_json=False, clone_cloud=None, 
    init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs):

    # expectedSize is only used if usecloud

    # usecloud can be passed thru build_cloud param, or command line 
    # not in config json though so no build_cloud_with_hosts path.

    # redirect to build_cloud_with_json if a command line arg
    # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts
    # (both come thru here)
    # clone_cloud is just another way to get the effect (maybe ec2 config file thru
    # build_cloud_with_hosts?
    global stdout_wrapped
    if not h2o_args.disable_time_stamp and not stdout_wrapped:
        sys.stdout = OutWrapper(sys.stdout)
        stdout_wrapped = True

    if h2o_args.usecloud or usecloud:
        # for now, just have fixed name in local file.  (think of this as a temp or debug file)
        # eventually we'll pass the json object instead  for speed?
        nodesJsonPathname = "h2o_fc-nodes.json"

    elif h2o_args.clone_cloud_json:
        nodesJsonPathname = h2o_args.clone_cloud_json

    elif clone_cloud:
        nodesJsonPathname = clone_cloud

    else:
        # normal build_cloud() doesn't use
        nodesJsonPathname = None

    # usecloud dominates over all
    if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud):
        # then build_cloud_with_json with json object
        # we don't need to specify these defaults, but leave here to show that we can pass
        # I suppose kwargs will have it
        if h2o_args.usecloud:
            ip_port = h2o_args.usecloud
        elif usecloud:
            ip_port = usecloud
        else:
            ip_port = None

        # h2o_args dominates
        if h2o_args.usecloud_size:
            # only used for expected size
            useCloudExpectedSize = h2o_args.usecloud_size
        else: 
            useCloudExpectedSize = usecloud_size

        nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port,
            expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs)
            # potentially passed in kwargs
            # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176', 

        nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname)
        return nodeList

    # else
    # moved to here from unit_main. so will run with nosetests too!
    # Normally do this.
    # Don't if build_cloud_with_hosts() did and put a flatfile in there already!
    if init_sandbox:
        clean_sandbox()

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ")
    log("#*********************************************************************")

    # start up h2o to report the java version (once). output to python stdout
    # only do this for regression testing

    # temporarily disable this, to go a little faster
    #    if getpass.getuser() == 'jenkins':
    #        check_h2o_version()

    ports_per_node = 2
    nodeList = []
    # shift the port used to run groups of tests on the same machine at the same time?
    base_port  = get_base_port(base_port)

    try:
        # if no hosts list, use psutil method on local host.
        totalNodes = 0
        # doing this list outside the loops so we can shuffle for better test variation
        # this jvm startup shuffle is independent from the flatfile shuffle
        portList = [base_port + ports_per_node * i for i in range(node_count)]
        if hosts is None:
            # if use_flatfile, we should create it
            # because tests will just call build_cloud with use_flatfile=True
            # best to just create it all the time..may or may not be used
            write_flatfile(node_count=node_count, base_port=base_port)
            hostCount = 1
            if rand_shuffle:
                random.shuffle(portList)
            for p in portList:
                verboseprint("psutil starting node", i)
                newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1
        else:
            # if hosts, the flatfile was created and uploaded to hosts already
            # I guess don't recreate it, don't overwrite the one that was copied beforehand.
            # we don't always use the flatfile (use_flatfile=False)
            # Suppose we could dispatch from the flatfile to match it's contents
            # but sometimes we want to test with a bad/different flatfile then we invoke h2o?
            hostCount = len(hosts)
            hostPortList = []
            for h in hosts:
                for port in portList:
                    hostPortList.append((h, port))
            if rand_shuffle: random.shuffle(hostPortList)
            for (h, p) in hostPortList:
                verboseprint('ssh starting node', totalNodes, 'via', h)
                newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1

        verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts")
        start = time.time()
        # UPDATE: best to stabilize on the last node!
        stabilize_cloud(nodeList[0], nodeList,
            timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noSandboxErrorCheck=True)
        verboseprint(len(nodeList), "Last added node stabilized in ", time.time() - start, " secs")
        verboseprint("Built cloud: %d nodes on %d hosts, in %d s" % \
            (len(nodeList), hostCount, (time.time() - start)))
        h2p.red_print("Built cloud:", nodeList[0].java_heap_GB, "GB java heap(s) with",
            len(nodeList), "total nodes")

        # FIX! using "consensus" in node[-1] should mean this is unnecessary?
        # maybe there's a bug. For now do this. long term: don't want?
        # UPDATE: do it for all cases now 2/14/13
        if conservative: # still needed?
            for n in nodeList:
                stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noSandboxErrorCheck=True)

        # this does some extra checking now
        # verifies cloud name too if param is not None
        verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name)

        # best to check for any errors due to cloud building right away?
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though
        if cleanup and nodeList:
            for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    print len(nodeList), "total jvms in H2O cloud"
    # put the test start message in the h2o log, to create a marker
    nodeList[0].h2o_log_msg()

    if h2o_args.config_json:
        LOG_DIR = get_sandbox_name()
        # like cp -p. Save the config file, to sandbox
        print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR
        shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json))

    # Figure out some stuff about how this test was run
    cs_time = str(datetime.datetime.now())
    cs_cwd = os.getcwd()
    cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args)
    cs_python_test_name = h2o_args.python_test_name
    if h2o_args.config_json:
        cs_config_json = os.path.abspath(h2o_args.config_json)
    else:
        cs_config_json = None
    cs_username = h2o_args.python_username
    cs_ip = h2o_args.python_cmd_ip

    # dump the nodes state to a json file # include enough extra info to have someone
    # rebuild the cloud if a test fails that was using that cloud.
    if create_json:
        q = {
            'cloud_start':
                {
                    'time': cs_time,
                    'cwd': cs_cwd,
                    'python_test_name': cs_python_test_name,
                    'python_cmd_line': cs_python_cmd_line,
                    'config_json': cs_config_json,
                    'username': cs_username,
                    'ip': cs_ip,
                },
            'h2o_nodes': h2o_util.json_repr(nodeList),
        }

        with open('h2o-nodes.json', 'w+') as f:
            f.write(json.dumps(q, indent=4))

    # save it to a local global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Example #2
0
def build_cloud(node_count=1, base_port=None, hosts=None,
    timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True,
    conservative=False, create_json=False, clone_cloud=None,
    init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs):

    # expectedSize is only used if usecloud

    # usecloud can be passed thru build_cloud param, or command line
    # not in config json though so no build_cloud_with_hosts path.

    # redirect to build_cloud_with_json if a command line arg
    # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts
    # (both come thru here)
    # clone_cloud is just another way to get the effect (maybe ec2 config file thru
    # build_cloud_with_hosts?
    global stdout_wrapped
    if not h2o_args.disable_time_stamp and not stdout_wrapped:
        sys.stdout = OutWrapper(sys.stdout)
        stdout_wrapped = True

    if h2o_args.usecloud or usecloud:
        # for now, just have fixed name in local file.  (think of this as a temp or debug file)
        # eventually we'll pass the json object instead  for speed?
        nodesJsonPathname = "h2o_fc-nodes.json"

    elif h2o_args.clone_cloud_json:
        nodesJsonPathname = h2o_args.clone_cloud_json

    elif clone_cloud:
        nodesJsonPathname = clone_cloud

    else:
        # normal build_cloud() doesn't use
        nodesJsonPathname = None

    # usecloud dominates over all
    if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud):
        # then build_cloud_with_json with json object
        # we don't need to specify these defaults, but leave here to show that we can pass
        # I suppose kwargs will have it
        if h2o_args.usecloud:
            ip_port = h2o_args.usecloud
        elif usecloud:
            ip_port = usecloud
        else:
            ip_port = None

        # h2o_args dominates
        if h2o_args.usecloud_size:
            # only used for expected size
            useCloudExpectedSize = h2o_args.usecloud_size
        else:
            useCloudExpectedSize = usecloud_size

        if (h2o_args.usecloud or usecloud):
            nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port,
                expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs)
                # potentially passed in kwargs
                # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176',
        else:
            if h2o_args.clone_cloud_json:
                nodesJsonPathname = h2o_args.clone_cloud_json
            else:
                nodesJsonPathname = clone_cloud

        nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname)
        return nodeList

    # else
    # moved to here from unit_main. so will run with nosetests too!
    # Normally do this.
    # Don't if build_cloud_with_hosts() did and put a flatfile in there already!
    if init_sandbox:
        clean_sandbox()

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ")
    log("#*********************************************************************")

    # start up h2o to report the java version (once). output to python stdout
    # only do this for regression testing

    # temporarily disable this, to go a little faster
    #    if getpass.getuser() == 'jenkins':
    #        check_h2o_version()

    ports_per_node = 2
    nodeList = []
    # shift the port used to run groups of tests on the same machine at the same time?
    base_port  = get_base_port(base_port)

    try:
        # if no hosts list, use psutil method on local host.
        totalNodes = 0
        # doing this list outside the loops so we can shuffle for better test variation
        # this jvm startup shuffle is independent from the flatfile shuffle
        portList = [base_port + ports_per_node * i for i in range(node_count)]
        if hosts is None:
            # if use_flatfile, we should create it
            # because tests will just call build_cloud with use_flatfile=True
            # best to just create it all the time..may or may not be used
            write_flatfile(node_count=node_count, base_port=base_port)
            hostCount = 1
            if rand_shuffle:
                random.shuffle(portList)
            for p in portList:
                verboseprint("psutil starting node", i)
                newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1
        else:
            # if hosts, the flatfile was created and uploaded to hosts already
            # I guess don't recreate it, don't overwrite the one that was copied beforehand.
            # we don't always use the flatfile (use_flatfile=False)
            # Suppose we could dispatch from the flatfile to match it's contents
            # but sometimes we want to test with a bad/different flatfile then we invoke h2o?
            hostCount = len(hosts)
            hostPortList = []
            for h in hosts:
                for port in portList:
                    hostPortList.append((h, port))
            if rand_shuffle: random.shuffle(hostPortList)
            for (h, p) in hostPortList:
                verboseprint('ssh starting node', totalNodes, 'via', h)
                newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1

        verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts")
        start = time.time()
        # UPDATE: best to stabilize on the last node!
        # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems
        stabilize_cloud(nodeList[0], nodeList,
            timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noExtraErrorCheck=False)
        stabilizeTime = time.time() - start
        verboseprint(len(nodeList), "Last added node stabilized in ", stabilizeTime, " secs")

        # assume all the heap sizes are the same as zero
        if nodeList[0].java_heap_GB:
            heapSize = str(nodeList[0].java_heap_GB) + " GB"
        elif nodeList[0].java_heap_GB:
            heapSize = str(nodeList[0].java_heap_MB) + " MB"
        else:
            heapSize = "(unknown)"

        h2p.red_print("Built cloud: %s java heap(s) with %d nodes on %d hosts, stabilizing in %d secs" % \
            (heapSize, len(nodeList), hostCount, stabilizeTime))

        # FIX! using "consensus" in node[-1] should mean this is unnecessary?
        # maybe there's a bug. For now do this. long term: don't want?
        # UPDATE: do it for all cases now 2/14/13
        if conservative: # still needed?
            for n in nodeList:
                # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems
                stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noExtraErrorCheck=False)

        # this does some extra checking now
        # verifies cloud name too if param is not None
        verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name, expectedLocked=0)

        # FIX! should probably check that the cloud's lock=0. It will go to 1 later.
        # but if it's an existing cloud, it may already be locked.
        # That will be in build_cloud_with_json, though

        # best to check for any errors due to cloud building right away?
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

        # put the test start message in the h2o log, to create a marker
        nodeList[0].h2o_log_msg()

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though
        if cleanup and nodeList:
            for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    print len(nodeList), "total jvms in H2O cloud"

    if h2o_args.config_json:
        # like cp -p. Save the config file, to sandbox
        print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR
        shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json))

    if create_json:
        # Figure out some stuff about how this test was run
        cs_time = str(datetime.datetime.now())
        cs_cwd = os.getcwd()
        cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args)
        cs_python_test_name = h2o_args.python_test_name
        if h2o_args.config_json:
            cs_config_json = os.path.abspath(h2o_args.config_json)
        else:
            cs_config_json = None
        cs_username = h2o_args.python_username
        cs_ip = h2o_args.python_cmd_ip

        # dump the nodes state to a json file # include enough extra info to have someone
        # rebuild the cloud if a test fails that was using that cloud.
        q = {
            'cloud_start':
                {
                    'time': cs_time,
                    'cwd': cs_cwd,
                    'python_test_name': cs_python_test_name,
                    'python_cmd_line': cs_python_cmd_line,
                    'config_json': cs_config_json,
                    'username': cs_username,
                    'ip': cs_ip,
                },
            'h2o_nodes': h2o_util.json_repr(nodeList),
        }

        with open('h2o-nodes.json', 'w+') as f:
            f.write(json.dumps(q, indent=4))

    # save it to a local global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Example #3
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    # legacy: we allow node_count to be positional.
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs['h2o_per_host'] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        # any combination of force_ip/network could be interesting
        # network may mean you don't need force_ip
        'force_ip': False,
        'network': None,
        'use_flatfile': False,
        'use_hdfs':
        True,  # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
        'hdfs_name_node': None,
        'hdfs_config': None,
        'hdfs_version': None,
        'java_heap_GB': None,
        'java_heap_MB': None,
        'java_extra_args': None,
        'timeoutSecs': 60,
        'retryDelaySecs': 2,
        'cleanup': True,
        'slow_connection': False,
        'h2o_per_host': 2,
        'ip': '["127.0.0.1"]',  # this is for creating the hosts list
        'base_port': None,
        'username': '******',
        'password': None,
        'rand_shuffle': True,
        'use_home_for_ice': False,
        'key_filename': None,
        'aws_credentials': None,
        'redirect_import_folder_to_s3_path': None,
        'redirect_import_folder_to_s3n_path': None,
        'disable_h2o_log': False,
        'enable_benchmark_log': False,
        'h2o_remote_buckets_root': None,
        'conservative': False,
        'create_json': False,
        # pass this from cloud building to the common "release" h2o_test.py classes
        # for deciding whether keys should be deleted when a test ends.
        'delete_keys_at_teardown': False,
        'clone_cloud': False,
        'cloud_name': None,
        'force_tcp': None,
        'random_udp_drop': None,
        'sandbox_ignore_errors': None,
    }
    # initialize the default values
    paramsToUse = {}
    for k, v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o_args.config_json:
        configFilename = find_config(h2o_args.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(default_hosts_file())

    verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
        hostDict = json.load(fp)

    for k, v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k, v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)

    # Let's assume we should set the h2o_remote_buckets_root (only affects
    # schema=local), to the home directory of whatever remote user
    # is being used for the hosts. Better than living with a decision
    # we made from scanning locally (remote might not match local)
    # assume the remote user has a /home/<username> (linux targets?)
    # This only affects import folder path name generation by python tests
    if paramsToUse['username']:
        paramsToUse[
            'h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']

    verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    #********************
    global hosts
    hosts = []
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse['ip'] == ["127.0.0.1"]:
        hosts = None
    else:
        verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse['ip']:
            verboseprint("Connecting to:", h)
            # expand any ~ or ~user in the string
            key_filename = paramsToUse['key_filename']
            if key_filename:  # don't try to expand if None
                key_filename = os.path.expanduser(key_filename)
            hosts.append(
                RemoteHost(addr=h,
                           username=paramsToUse['username'],
                           password=paramsToUse['password'],
                           key_filename=key_filename))

    # done with these, don't pass to build_cloud
    paramsToUse.pop(
        'ip'
    )  # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud

    # we want to save username in the node info. don't pop
    # paramsToUse.pop('username')
    paramsToUse.pop('password')
    paramsToUse.pop('key_filename')

    # flatfile is going into sandbox (LOG_DIR) now..so clean it first (will make sandbox dir if it doesn't exist already)
    clean_sandbox()

    # handles hosts=None correctly
    base_port = get_base_port(base_port=paramsToUse['base_port'])

    write_flatfile(
        node_count=paramsToUse['h2o_per_host'],
        # let the env variable H2O_PORT_OFFSET add in there
        base_port=base_port,
        hosts=hosts,
        rand_shuffle=paramsToUse['rand_shuffle'],
    )

    if hosts is not None:
        # this uploads the flatfile too
        upload_jar_to_remote_hosts(
            hosts, slow_connection=paramsToUse['slow_connection'])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8 * (len(hosts) * paramsToUse['h2o_per_host']))
    else:  # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop('slow_connection')

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse['h2o_per_host']
    paramsToUse.pop('h2o_per_host')
    print "java_heap_GB", paramsToUse['java_heap_GB']
    # don't wipe out or create the sandbox. already did here, and put flatfile there
    nodes = build_cloud(node_count,
                        hosts=hosts,
                        init_sandbox=False,
                        **paramsToUse)

    # we weren't doing this before, but since build_cloud returns nodes
    # people might expect this works similarly
    return nodes
Example #4
0
def build_cloud_with_hosts(node_count=None, **kwargs):
    # legacy: we allow node_count to be positional. 
    # if it's used positionally, stick in in kwargs (overwrite if there too)
    if node_count is not None:
        # we use h2o_per_host in the config file. will translate to node_count for build_cloud
        kwargs['h2o_per_host'] = node_count
        # set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
        node_count = None

    # for new params:
    # Just update this list with the param name and default and you're done
    allParamsDefault = {
        # any combination of force_ip/network could be interesting
        # network may mean you don't need force_ip
        'force_ip': False,
        'network': None,
        'use_flatfile': False,
        'use_hdfs': True, # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
        'hdfs_name_node': None, 
        'hdfs_config': None,
        'hdfs_version': None,
        'java_heap_GB': None,
        'java_heap_MB': None,
        'java_extra_args': None,

        'timeoutSecs': 60, 
        'retryDelaySecs': 2, 
        'cleanup': True,
        'slow_connection': False,

        'h2o_per_host': 2,
        'ip':'["127.0.0.1"]', # this is for creating the hosts list
        'base_port': None,
        'username':'******',
        'password': None,
        'rand_shuffle': True,

        'use_home_for_ice': False,
        'key_filename': None,
        'aws_credentials': None,
        'redirect_import_folder_to_s3_path': None,
        'redirect_import_folder_to_s3n_path': None,
        'disable_h2o_log': False,
        'enable_benchmark_log': False,
        'h2o_remote_buckets_root': None,
        'conservative': False,
        'create_json': False,
        # pass this from cloud building to the common "release" h2o_test.py classes
        # for deciding whether keys should be deleted when a test ends.
        'delete_keys_at_teardown': False, 
        'clone_cloud': False,
        'cloud_name': None,
        'force_tcp': None,
        'random_udp_drop': None,
        'sandbox_ignore_errors': None,
    }
    # initialize the default values
    paramsToUse = {}
    for k,v in allParamsDefault.iteritems():
        paramsToUse[k] = allParamsDefault.setdefault(k, v)

    # allow user to specify the config json at the command line. config_json is a global.
    if h2o_args.config_json:
        configFilename = find_config(h2o_args.config_json)
    else:
        # configs may be in the testdir_hosts
        configFilename = find_config(default_hosts_file())

    verboseprint("Loading host config from", configFilename)
    with open(configFilename, 'rb') as fp:
         hostDict = json.load(fp)

    for k,v in hostDict.iteritems():
        # Don't take in params that we don't have in the list above
        # Because michal has extra params in here for ec2! and comments!
        if k in paramsToUse:
            paramsToUse[k] = hostDict.setdefault(k, v)

    # Now overwrite with anything passed by the test
    # whatever the test passes, always overrules the config json
    for k,v in kwargs.iteritems():
        paramsToUse[k] = kwargs.setdefault(k, v)


    # Let's assume we should set the h2o_remote_buckets_root (only affects
    # schema=local), to the home directory of whatever remote user
    # is being used for the hosts. Better than living with a decision
    # we made from scanning locally (remote might not match local)
    # assume the remote user has a /home/<username> (linux targets?)
    # This only affects import folder path name generation by python tests
    if paramsToUse['username']:
        paramsToUse['h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']

    verboseprint("All build_cloud_with_hosts params:", paramsToUse)

    #********************
    global hosts
    hosts = []
    # Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
    # this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
    # hosts should be None for everyone if normal build_cloud is desired
    if paramsToUse['ip']== ["127.0.0.1"]:
        hosts = None
    else:
        verboseprint("About to RemoteHost, likely bad ip if hangs")
        hosts = []
        for h in paramsToUse['ip']:
            verboseprint("Connecting to:", h)
            # expand any ~ or ~user in the string
            key_filename = paramsToUse['key_filename']
            if key_filename: # don't try to expand if None
               key_filename=os.path.expanduser(key_filename)
            hosts.append(RemoteHost(addr=h, 
                username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename))

    # done with these, don't pass to build_cloud
    paramsToUse.pop('ip') # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud

    # we want to save username in the node info. don't pop
    # paramsToUse.pop('username')
    paramsToUse.pop('password')
    paramsToUse.pop('key_filename')

    # flatfile is going into sandbox (LOG_DIR) now..so clean it first (will make sandbox dir if it doesn't exist already)    
    clean_sandbox()

    # handles hosts=None correctly
    base_port = get_base_port(base_port=paramsToUse['base_port'])

    write_flatfile(
        node_count=paramsToUse['h2o_per_host'],
        # let the env variable H2O_PORT_OFFSET add in there
        base_port=base_port,
        hosts=hosts,
        rand_shuffle=paramsToUse['rand_shuffle'],
        )

    if hosts is not None:
        # this uploads the flatfile too
        upload_jar_to_remote_hosts(hosts, slow_connection=paramsToUse['slow_connection'])
        # timeout wants to be larger for large numbers of hosts * h2oPerHost
        # use 60 sec min, 5 sec per node.
        timeoutSecs = max(60, 8*(len(hosts) * paramsToUse['h2o_per_host']))
    else: # for 127.0.0.1 case
        timeoutSecs = 60
    paramsToUse.pop('slow_connection')

    # sandbox gets cleaned in build_cloud
    # legacy param issue
    node_count = paramsToUse['h2o_per_host']
    paramsToUse.pop('h2o_per_host')
    print "java_heap_GB", paramsToUse['java_heap_GB']
    # don't wipe out or create the sandbox. already did here, and put flatfile there
    build_cloud(node_count, hosts=hosts, init_sandbox=False, **paramsToUse)