Esempio n. 1
0
    def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False,
        cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs):
        # if url param is used, use it as full url. otherwise create from the jsonRequest
        if fullUrl:
            url = fullUrl
        else:
            url = self.url(jsonRequest)

        # remove any params that are 'None'
        # need to copy dictionary, since can't delete while iterating
        if params is not None:
            params2 = params.copy()
            for k in params2:
                if params2[k] is None:
                    del params[k]
            paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()])
        else:
            paramsStr = ''

        extraComment2 = " " + str(postData)+";" if cmd=='post' else ""
        extraComment2 += extraComment if extraComment else ""

        if len(extraComment2) > 0:
            log('Start ' + url + paramsStr, comment=extraComment2)
        else:
            log('Start ' + url + paramsStr)

        # file get passed thru kwargs here
        if h2o_args.no_timeout:
            timeout = None # infinite
        try:
            if 'post' == cmd:
                # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST.
                # This is temporary.
                # 
                # This following does application/json (aka, posting JSON in the body):
                # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(postData), **kwargs)
                # 
                # This does form-encoded, which doesn't allow POST of nested structures
                r = requests.post(url, timeout=timeout, params=params, data=postData, **kwargs)
            elif 'delete' == cmd:
                r = requests.delete(url, timeout=timeout, params=params, **kwargs)
            elif 'get' == cmd:
                r = requests.get(url, timeout=timeout, params=params, **kwargs)
            else:
                raise ValueError("Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd)

        except Exception, e:
            # rethrow the exception after we've checked for stack trace from h2o
            # out of memory errors maybe don't show up right away? so we should wait for h2o
            # to get it out to h2o stdout. We don't want to rely on cloud teardown to check
            # because there's no delay, and we don't want to delay all cloud teardowns by waiting.
            exc_info = sys.exc_info()
            # use this to ignore the initial connection errors during build cloud when h2o is coming up
            if not noExtraErrorCheck: 
                h2p.red_print(
                    "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr))
                time.sleep(2)
                check_sandbox_for_errors(python_test_name=h2o_args.python_test_name);
            raise exc_info[1], None, exc_info[2]
Esempio n. 2
0
    def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, returnFast=False,
        cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs):
        # if url param is used, use it as full url. otherwise crate from the jsonRequest
        if fullUrl:
            url = fullUrl
        else:
            url = self.url(jsonRequest)

        # remove any params that are 'None'
        # need to copy dictionary, since can't delete while iterating
        if params is not None:
            params2 = params.copy()
            for k in params2:
                if params2[k] is None:
                    del params[k]
            paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()])
        else:
            paramsStr = ''

        if extraComment:
            log('Start ' + url + paramsStr, comment=extraComment)
        else:
            log('Start ' + url + paramsStr)

        log_rest("")
        log_rest("----------------------------------------------------------------------\n")
        if extraComment:
            log_rest("# Extra comment info about this request: " + extraComment)
        if cmd == 'get':
            log_rest("GET")
        else:
            log_rest("POST")
        log_rest(url + paramsStr)

        # file get passed thru kwargs here
        try:
            if cmd == 'post':
                r = requests.post(url, timeout=timeout, params=params, **kwargs)
            else:
                r = requests.get(url, timeout=timeout, params=params, **kwargs)

        except Exception, e:
            # rethrow the exception after we've checked for stack trace from h2o
            # out of memory errors maybe don't show up right away? so we should wait for h2o
            # to get it out to h2o stdout. We don't want to rely on cloud teardown to check
            # because there's no delay, and we don't want to delay all cloud teardowns by waiting.
            # (this is new/experimental)
            exc_info = sys.exc_info()
            # use this to ignore the initial connection errors during build cloud when h2o is coming up
            if not noExtraErrorCheck: 
                h2p.red_print(
                    "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr))
                time.sleep(2)
                check_sandbox_for_errors(python_test_name=h2o_args.python_test_name);
            log_rest("")
            log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message))
            raise exc_info[1], None, exc_info[2]
Esempio n. 3
0
    def run_cmd(self, cmd):
        log('Running `%s` on %s' % (cmd, self))
        (stdin, stdout, stderr) = self.ssh.exec_command(cmd)
        stdin.close()

        sys.stdout.write(stdout.read())
        sys.stdout.flush()
        stdout.close()

        sys.stderr.write(stderr.read())
        sys.stderr.flush()
        stderr.close()
Esempio n. 4
0
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs):
    params = {'key': key}
    paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()])
    url = self.url('2/DownloadDataset.json')
    log('Start ' + url + paramsStr, comment=csvPathname)

    # do it (absorb in 1024 byte chunks)
    r = requests.get(url, params=params, timeout=timeoutSecs)
    print "csv_download r.headers:", r.headers
    if r.status_code == 200:
        f = open(csvPathname, 'wb')
        for chunk in r.iter_content(1024):
            f.write(chunk)
    print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname)
Esempio n. 5
0
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs):
    params = {'key': key}
    paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()])
    url = self.url('3/DownloadDataset.json')
    log('Start ' + url + paramsStr, comment=csvPathname)

    # do it (absorb in 1024 byte chunks)
    r = requests.get(url, params=params, timeout=timeoutSecs)
    print "csv_download r.headers:", r.headers
    if r.status_code == 200:
        f = open(csvPathname, 'wb')
        for chunk in r.iter_content(1024):
            f.write(chunk)
    print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname)
Esempio n. 6
0
def log_download(self, logDir=None, timeoutSecs=30, **kwargs):
    if logDir == None:
        logDir = get_sandbox_name()

    url = self.url('Logs/download')
    log('Start ' + url)
    print "\nDownloading h2o log(s) using:", url
    r = requests.get(url, timeout=timeoutSecs, **kwargs)
    if not r or not r.ok:
        raise Exception("Maybe bad url? no r in log_download %s in %s:" %
                        inspect.stack()[1][3])

    z = zipfile.ZipFile(StringIO.StringIO(r.content))
    print "z.namelist:", z.namelist()
    print "z.printdir:", z.printdir()

    nameList = z.namelist()
    # the first is the h2ologs dir name.
    h2oLogDir = logDir + "/" + nameList.pop(0)
    print "h2oLogDir:", h2oLogDir
    print "logDir:", logDir

    # it's a zip of zipped files
    # first unzip it
    z = zipfile.ZipFile(StringIO.StringIO(r.content))
    z.extractall(logDir)
    # unzipped file should be in LOG_DIR now
    # now unzip the files in that directory
    for zname in nameList:
        resultList = h2o_util.flat_unzip(logDir + "/" + zname, logDir)

    print "\nlogDir:", logDir
    for logfile in resultList:
        numLines = sum(1 for line in open(logfile))
        print logfile, "Lines:", numLines
    print
    return resultList
Esempio n. 7
0
def log_download(self, logDir=None, timeoutSecs=30, **kwargs):
    if logDir == None:
        logDir = get_sandbox_name()

    url = self.url('LogDownload.json')
    log('Start ' + url);
    print "\nDownloading h2o log(s) using:", url
    r = requests.get(url, timeout=timeoutSecs, **kwargs)
    if not r or not r.ok:
        raise Exception("Maybe bad url? no r in log_download %s in %s:" % inspect.stack()[1][3])

    z = zipfile.ZipFile(StringIO.StringIO(r.content))
    print "z.namelist:", z.namelist()
    print "z.printdir:", z.printdir()

    nameList = z.namelist()
    # the first is the h2ologs dir name.
    h2oLogDir = logDir + "/" + nameList.pop(0)
    print "h2oLogDir:", h2oLogDir
    print "logDir:", logDir

    # it's a zip of zipped files
    # first unzip it
    z = zipfile.ZipFile(StringIO.StringIO(r.content))
    z.extractall(logDir)
    # unzipped file should be in LOG_DIR now
    # now unzip the files in that directory
    for zname in nameList:
        resultList = h2o_util.flat_unzip(logDir + "/" + zname, logDir)

    print "\nlogDir:", logDir
    for logfile in resultList:
        numLines = sum(1 for line in open(logfile))
        print logfile, "Lines:", numLines
    print
    return resultList
Esempio n. 8
0
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs):
    params = {
        'key': key
    }

    paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()])
    url = self.url('DownloadDataset.json')
    log('Start ' + url + paramsStr, comment=csvPathname)

    # do it (absorb in 1024 byte chunks)
    r = requests.get(url, params=params, timeout=timeoutSecs)
    print "csv_download r.headers:", r.headers
    if r.status_code == 200:
        f = open(csvPathname, 'wb')
        for chunk in r.iter_content(1024):
            f.write(chunk)
    else:
        raise Exception("unexpected status for DownloadDataset: %s" % r.status_code)

    print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname)
    h2o_sandbox.check_sandbox_for_errors()

    # FIX! we're skipping all the checks in do_json_request. And no json return?
    return 
Esempio n. 9
0
def browseJsonHistoryAsUrl(retryDelaySecs=0.25):
    if not h2o_args.browse_disable:
        # stop if you get to -50, don't want more than 50 tabs on browser
        tabCount = 0
        for url in h2o_nodes.json_url_history:
            # ignore the Cloud "alive" views
            # FIX! we probably want to expand ignoring to more than Cloud?
            if not re.search('Cloud', url):
                # url = re.sub("GLMGridProgress","GLMGridProgress.html",url)
                # url = re.sub("Progress","Progress.html",url)
                url = re.sub("ProgressPage","ProgressPage.html",url)
                url = re.sub("Progress?!Page","Progress.html",url)
                url = re.sub("Progress\?","Progress.html?",url)
                url = re.sub(".json",".html",url)
                print "browseJsonHistoryAsUrl:", url
                print "same, decoded:", urllib.unquote(url)
                # does this open in same window?
                log(url, comment="From browseJsonHistoryAsUrl")
                webbrowser.open(url, new=0)
                time.sleep(retryDelaySecs)
                tabCount += 1

            if tabCount==50: 
                break;
Esempio n. 10
0
def browseJsonHistoryAsUrl(retryDelaySecs=0.25):
    if not h2o_args.browse_disable:
        # stop if you get to -50, don't want more than 50 tabs on browser
        tabCount = 0
        for url in h2o_nodes.json_url_history:
            # ignore the Cloud "alive" views
            # FIX! we probably want to expand ignoring to more than Cloud?
            if not re.search('Cloud', url):
                # url = re.sub("GLMGridProgress","GLMGridProgress.html",url)
                # url = re.sub("Progress","Progress.html",url)
                url = re.sub("ProgressPage", "ProgressPage.html", url)
                url = re.sub("Progress?!Page", "Progress.html", url)
                url = re.sub("Progress\?", "Progress.html?", url)
                url = re.sub(".json", ".html", url)
                print "browseJsonHistoryAsUrl:", url
                print "same, decoded:", urllib.unquote(url)
                # does this open in same window?
                log(url, comment="From browseJsonHistoryAsUrl")
                webbrowser.open(url, new=0)
                time.sleep(retryDelaySecs)
                tabCount += 1

            if tabCount == 50:
                break
Esempio n. 11
0
def build_cloud_with_json(h2o_nodes_json='h2o-nodes.json'):

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()")
    log("#*********************************************************************")

    print "This only makes sense if h2o is running as defined by", h2o_nodes_json
    print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here"
    print "No output means no h2o here! Some other info about stuff on the system is printed first though."
    import h2o_os_util

    if not os.path.exists(h2o_nodes_json):
        raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file")

    # h2o_os_util.show_h2o_processes()

    with open(h2o_nodes_json, 'rb') as f:
        cloneJson = json.load(f)

    # These are supposed to be in the file.
    # Just check the first one. if not there, the file must be wrong
    if not 'cloud_start' in cloneJson:
        raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json)
    else:
        cs = cloneJson['cloud_start']
        print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json
        # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff
        valList = ['time', 'cwd', 'python_test_name', 'python_cmd_line', 'config_json', 'username', 'ip']
        for v in valList:
            if v not in cs:
                raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json))
            print "cloud_start['%s']: %s" % (v, cs[v])

        # this is the internal node state for python..nodes rebuild
        nodeStateList = cloneJson['h2o_nodes']

    nodeList = []
    if not nodeStateList:
        raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json)
    for nodeState in nodeStateList:
        print "Cloning state for node", nodeState['node_id'], 'from', h2o_nodes_json

        newNode = ExternalH2O(nodeState)
        nodeList.append(newNode)

    print ""
    h2p.red_print("Ingested from json:", nodeList[0].java_heap_GB, "GB java heap(s) with",
        len(nodeList), "total nodes")
    print ""
    # put the test start message in the h2o log, to create a marker
    nodeList[0].h2o_log_msg()

    # save it to a global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Esempio n. 12
0
    def __init__(self, host, *args, **kwargs):
        super(RemoteH2O, self).__init__(*args, **kwargs)

        # it gets set True if an address is specified for LocalH2o init. Override.
        if 'force_ip' in kwargs:
            self.force_ip = kwargs['force_ip']

        self.remoteH2O = True # so we can tell if we're remote or local
        self.jar = host.upload_file('build/h2o.jar')
        # need to copy the flatfile. We don't always use it (depends on h2o args)
        self.flatfile = host.upload_file(flatfile_pathname())
        # distribute AWS credentials
        if self.aws_credentials:
            self.aws_credentials = host.upload_file(self.aws_credentials)

        if self.hdfs_config:
            self.hdfs_config = host.upload_file(self.hdfs_config)

        if self.use_home_for_ice:
            # this will be the username used to ssh to the host
            self.ice = "/home/" + host.username + '/ice.%d.%s' % (self.port, time.time())
        else:
            self.ice = '/tmp/ice.%d.%s' % (self.port, time.time())

        self.channel = host.open_channel()
        ### FIX! TODO...we don't check on remote hosts yet

        # this fires up h2o over there
        cmd = ' '.join(self.get_args())
        # UPDATE: somehow java -jar on cygwin target (xp) can't handle /tmp/h2o*jar
        # because it's a windows executable and expects windows style path names.
        # but if we cd into /tmp, it can do java -jar h2o*jar.
        # So just split out the /tmp (pretend we don't know) and the h2o jar file name
        # Newer windows may not have this problem? Do the ls (this goes into the local stdout
        # files) so we can see the file is really where we expect.
        # This hack only works when the dest is /tmp/h2o*jar. It's okay to execute
        # with pwd = /tmp. If /tmp/ isn't in the jar path, I guess things will be the same as
        # normal.
        if 1 == 0: # enable if you want windows remote machines
            cmdList = ["cd /tmp"] # separate by ;<space> when we join
            cmdList += ["ls -ltr " + self.jar]
            cmdList += [re.sub("/tmp/", "", cmd)]
            self.channel.exec_command("; ".join(cmdList))
        else:
            self.channel.exec_command(cmd)

        if self.capture_output:
            if self.node_id is not None:
                logPrefix = 'remote-h2o-' + str(self.node_id)
            else:
                logPrefix = 'remote-h2o'

            logPrefix += '-' + host.h2o_addr

            outfd, outpath = tmp_file(logPrefix + '.stdout.', '.log')
            errfd, errpath = tmp_file(logPrefix + '.stderr.', '.log')

            drain(self.channel.makefile(), outfd)
            drain(self.channel.makefile_stderr(), errfd)
            comment = 'Remote on %s, stdout %s, stderr %s' % (
                self.h2o_addr, os.path.basename(outpath), os.path.basename(errpath))
        else:
            drain(self.channel.makefile(), sys.stdout)
            drain(self.channel.makefile_stderr(), sys.stderr)
            comment = 'Remote on %s' % self.h2o_addr

        log(cmd, comment=comment)
Esempio n. 13
0
class H2O(object):
    def __init__(
        self,
        use_this_ip_addr=None,
        port=54321,
        capture_output=True,
        force_ip=False,
        network=None,
        use_debugger=None,
        classpath=None,
        use_hdfs=False,
        use_maprfs=False,
        hdfs_version=None,
        hdfs_name_node=None,
        hdfs_config=None,
        aws_credentials=None,
        use_flatfile=False,
        java_heap_GB=None,
        java_heap_MB=None,
        java_extra_args=None,
        use_home_for_ice=False,
        node_id=None,
        username=None,
        random_udp_drop=False,
        force_tcp=False,
        redirect_import_folder_to_s3_path=None,
        redirect_import_folder_to_s3n_path=None,
        disable_h2o_log=False,
        enable_benchmark_log=False,
        h2o_remote_buckets_root=None,
        delete_keys_at_teardown=False,
        cloud_name=None,
        disable_assertions=None,
        sandbox_ignore_errors=False,
    ):

        if use_hdfs:
            # see if we can touch a 0xdata machine
            try:
                # long timeout in ec2...bad
                a = requests.get('http://172.16.2.176:80', timeout=1)
                hdfs_0xdata_visible = True
            except:
                hdfs_0xdata_visible = False

            # different defaults, depending on where we're running
            if hdfs_name_node is None:
                if hdfs_0xdata_visible:
                    hdfs_name_node = "172.16.2.176"
                else:  # ec2
                    hdfs_name_node = "10.78.14.235:9000"

            if hdfs_version is None:
                if hdfs_0xdata_visible:
                    hdfs_version = "cdh4"
                else:  # ec2
                    hdfs_version = "0.20.2"

        self.redirect_import_folder_to_s3_path = redirect_import_folder_to_s3_path
        self.redirect_import_folder_to_s3n_path = redirect_import_folder_to_s3n_path

        self.aws_credentials = aws_credentials
        self.port = port
        # None is legal for self.h2o_addr.
        # means we won't give an ip to the jar when we start.
        # Or we can say use use_this_ip_addr=127.0.0.1, or the known address
        # if use_this_addr is None, use 127.0.0.1 for urls and json
        # Command line arg 'ip_from_cmd_line' dominates:

        # ip_from_cmd_line and use_this_ip_addr shouldn't be used for mutli-node
        if h2o_args.ip_from_cmd_line:
            self.h2o_addr = h2o_args.ip_from_cmd_line
        else:
            self.h2o_addr = use_this_ip_addr

        self.force_ip = force_ip or (self.h2o_addr != None)

        if self.h2o_addr:
            self.http_addr = self.h2o_addr
        else:
            self.http_addr = h2o_args.python_cmd_ip

        if h2o_args.network_from_cmd_line:
            self.network = h2o_args.network_from_cmd_line
        else:
            self.network = network

        # command line should always dominate for enabling
        if h2o_args.debugger: use_debugger = True
        self.use_debugger = use_debugger

        self.classpath = classpath
        self.capture_output = capture_output

        self.use_hdfs = use_hdfs
        self.use_maprfs = use_maprfs
        self.hdfs_name_node = hdfs_name_node
        self.hdfs_version = hdfs_version
        self.hdfs_config = hdfs_config

        self.use_flatfile = use_flatfile
        self.java_heap_GB = java_heap_GB
        self.java_heap_MB = java_heap_MB
        self.java_extra_args = java_extra_args

        self.use_home_for_ice = use_home_for_ice
        self.node_id = node_id

        if username:
            self.username = username
        else:
            self.username = getpass.getuser()

        # don't want multiple reports from tearDown and tearDownClass
        # have nodes[0] remember (0 always exists)
        self.sandbox_error_was_reported = False
        self.sandbox_ignore_errors = sandbox_ignore_errors

        self.random_udp_drop = random_udp_drop
        self.force_tcp = force_tcp
        self.disable_h2o_log = disable_h2o_log

        # this dumps stats from tests, and perf stats while polling to benchmark.log
        self.enable_benchmark_log = enable_benchmark_log
        self.h2o_remote_buckets_root = h2o_remote_buckets_root
        self.delete_keys_at_teardown = delete_keys_at_teardown
        self.disable_assertions = disable_assertions

        if cloud_name:
            self.cloud_name = cloud_name
        else:
            self.cloud_name = 'pytest-%s-%s' % (getpass.getuser(), os.getpid())

    def __str__(self):
        return '%s - http://%s:%d/' % (type(self), self.http_addr, self.port)

    def url(self, loc, port=None):
        # always use the new api port
        if port is None: port = self.port
        if loc.startswith('/'):
            delim = ''
        else:
            delim = '/'
        u = 'http://%s:%d%s%s' % (self.http_addr, port, delim, loc)
        return u

    def do_json_request(self,
                        jsonRequest=None,
                        fullUrl=None,
                        timeout=10,
                        params=None,
                        returnFast=False,
                        cmd='get',
                        extraComment=None,
                        ignoreH2oError=False,
                        noExtraErrorCheck=False,
                        **kwargs):
        # if url param is used, use it as full url. otherwise crate from the jsonRequest
        if fullUrl:
            url = fullUrl
        else:
            url = self.url(jsonRequest)

        # remove any params that are 'None'
        # need to copy dictionary, since can't delete while iterating
        if params is not None:
            params2 = params.copy()
            for k in params2:
                if params2[k] is None:
                    del params[k]
            paramsStr = '?' + '&'.join(
                ['%s=%s' % (k, v) for (k, v) in params.items()])
        else:
            paramsStr = ''

        if extraComment:
            log('Start ' + url + paramsStr, comment=extraComment)
        else:
            log('Start ' + url + paramsStr)

        log_rest("")
        log_rest(
            "----------------------------------------------------------------------\n"
        )
        if extraComment:
            log_rest("# Extra comment info about this request: " +
                     extraComment)
        if cmd == 'get':
            log_rest("GET")
        else:
            log_rest("POST")
        log_rest(url + paramsStr)

        # file get passed thru kwargs here
        try:
            if cmd == 'post':
                r = requests.post(url,
                                  timeout=timeout,
                                  params=params,
                                  **kwargs)
            else:
                r = requests.get(url, timeout=timeout, params=params, **kwargs)

        except Exception, e:
            # rethrow the exception after we've checked for stack trace from h2o
            # out of memory errors maybe don't show up right away? so we should wait for h2o
            # to get it out to h2o stdout. We don't want to rely on cloud teardown to check
            # because there's no delay, and we don't want to delay all cloud teardowns by waiting.
            exc_info = sys.exc_info()
            # use this to ignore the initial connection errors during build cloud when h2o is coming up
            if not noExtraErrorCheck:
                h2p.red_print(
                    "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.."
                    % (url + paramsStr))
                time.sleep(2)
                check_sandbox_for_errors(
                    python_test_name=h2o_args.python_test_name)
            log_rest("")
            log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message))
            raise exc_info[1], None, exc_info[2]

        log_rest("")
        try:
            if r is None:
                log_rest("r is None")
            else:
                log_rest("HTTP status code: " + str(r.status_code))
                if hasattr(r, 'text'):
                    if r.text is None:
                        log_rest("r.text is None")
                    else:
                        log_rest(r.text)
                else:
                    log_rest("r does not have attr text")
        except Exception, e:
            # Paranoid exception catch.
            log('WARNING: ignoring unexpected exception on %s' + url +
                paramsStr)
            # Ignore logging exceptions in the case that the above error checking isn't sufficient.
            pass
Esempio n. 14
0
def build_cloud_with_json(h2o_nodes_json="h2o-nodes.json"):

    # local sandbox may not exist. Don't clean if it does, just append
    if not os.path.exists(LOG_DIR):
        os.mkdir(LOG_DIR)

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()")
    log("#*********************************************************************")

    print "This only makes sense if h2o is running as defined by", h2o_nodes_json
    print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here"
    print "No output means no h2o here! Some other info about stuff on the system is printed first though."
    import h2o_os_util

    if not os.path.exists(h2o_nodes_json):
        raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file")

    ## h2o_os_util.show_h2o_processes()

    with open(h2o_nodes_json, "rb") as f:
        cloneJson = json.load(f)

    # These are supposed to be in the file.
    # Just check the first one. if not there, the file must be wrong
    if not "cloud_start" in cloneJson:
        raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json)
    else:
        cs = cloneJson["cloud_start"]
        print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json
        # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff
        valList = ["time", "cwd", "python_test_name", "python_cmd_line", "config_json", "username", "ip"]
        for v in valList:
            if v not in cs:
                raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json))
            print "cloud_start['%s']: %s" % (v, cs[v])

        # this is the internal node state for python..nodes rebuild
        nodeStateList = cloneJson["h2o_nodes"]

    nodeList = []
    if not nodeStateList:
        raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json)

    try:
        for nodeState in nodeStateList:
            print "Cloning state for node", nodeState["node_id"], "from", h2o_nodes_json

            newNode = ExternalH2O(nodeState)
            nodeList.append(newNode)

        # If it's an existing cloud, it may already be locked. so never check.
        # we don't have the cloud name in the -ccj since it may change (and the file be static?)
        # so don't check expectedCloudName
        verify_cloud_size(nodeList, expectedCloudName=None, expectedLocked=None)

        # best to check for any errors right away?
        # (we won't report errors from prior tests due to marker stuff?
        ## check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

        # put the test start message in the h2o log, to create a marker
        nodeList[0].h2o_log_msg()

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though

        ## if cleanup and nodeList:
        ##     for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    # like cp -p. Save the config file, to sandbox
    print "Saving the ", h2o_nodes_json, "we used to", LOG_DIR
    shutil.copy(h2o_nodes_json, LOG_DIR + "/" + os.path.basename(h2o_nodes_json))

    print ""
    h2p.red_print("Ingested from json:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes")
    print ""

    # save it to a global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Esempio n. 15
0
def build_cloud(node_count=1, base_port=None, hosts=None,
    timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True,
    conservative=False, create_json=False, clone_cloud=None,
    init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs):

    # expectedSize is only used if usecloud

    # usecloud can be passed thru build_cloud param, or command line
    # not in config json though so no build_cloud_with_hosts path.

    # redirect to build_cloud_with_json if a command line arg
    # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts
    # (both come thru here)
    # clone_cloud is just another way to get the effect (maybe ec2 config file thru
    # build_cloud_with_hosts?
    global stdout_wrapped
    if not h2o_args.disable_time_stamp and not stdout_wrapped:
        sys.stdout = OutWrapper(sys.stdout)
        stdout_wrapped = True

    if h2o_args.usecloud or usecloud:
        # for now, just have fixed name in local file.  (think of this as a temp or debug file)
        # eventually we'll pass the json object instead  for speed?
        nodesJsonPathname = "h2o_fc-nodes.json"

    elif h2o_args.clone_cloud_json:
        nodesJsonPathname = h2o_args.clone_cloud_json

    elif clone_cloud:
        nodesJsonPathname = clone_cloud

    else:
        # normal build_cloud() doesn't use
        nodesJsonPathname = None

    # usecloud dominates over all
    if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud):
        # then build_cloud_with_json with json object
        # we don't need to specify these defaults, but leave here to show that we can pass
        # I suppose kwargs will have it
        if h2o_args.usecloud:
            ip_port = h2o_args.usecloud
        elif usecloud:
            ip_port = usecloud
        else:
            ip_port = None

        # h2o_args dominates
        if h2o_args.usecloud_size:
            # only used for expected size
            useCloudExpectedSize = h2o_args.usecloud_size
        else:
            useCloudExpectedSize = usecloud_size

        if (h2o_args.usecloud or usecloud):
            nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port,
                expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs)
                # potentially passed in kwargs
                # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176',
        else:
            if h2o_args.clone_cloud_json:
                nodesJsonPathname = h2o_args.clone_cloud_json
            else:
                nodesJsonPathname = clone_cloud

        nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname)
        return nodeList

    # else
    # moved to here from unit_main. so will run with nosetests too!
    # Normally do this.
    # Don't if build_cloud_with_hosts() did and put a flatfile in there already!
    if init_sandbox:
        clean_sandbox()

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ")
    log("#*********************************************************************")

    # start up h2o to report the java version (once). output to python stdout
    # only do this for regression testing

    # temporarily disable this, to go a little faster
    #    if getpass.getuser() == 'jenkins':
    #        check_h2o_version()

    ports_per_node = 2
    nodeList = []
    # shift the port used to run groups of tests on the same machine at the same time?
    base_port  = get_base_port(base_port)

    try:
        # if no hosts list, use psutil method on local host.
        totalNodes = 0
        # doing this list outside the loops so we can shuffle for better test variation
        # this jvm startup shuffle is independent from the flatfile shuffle
        portList = [base_port + ports_per_node * i for i in range(node_count)]
        if hosts is None:
            # if use_flatfile, we should create it
            # because tests will just call build_cloud with use_flatfile=True
            # best to just create it all the time..may or may not be used
            write_flatfile(node_count=node_count, base_port=base_port)
            hostCount = 1
            if rand_shuffle:
                random.shuffle(portList)
            for p in portList:
                verboseprint("psutil starting node", i)
                newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1
        else:
            # if hosts, the flatfile was created and uploaded to hosts already
            # I guess don't recreate it, don't overwrite the one that was copied beforehand.
            # we don't always use the flatfile (use_flatfile=False)
            # Suppose we could dispatch from the flatfile to match it's contents
            # but sometimes we want to test with a bad/different flatfile then we invoke h2o?
            hostCount = len(hosts)
            hostPortList = []
            for h in hosts:
                for port in portList:
                    hostPortList.append((h, port))
            if rand_shuffle: random.shuffle(hostPortList)
            for (h, p) in hostPortList:
                verboseprint('ssh starting node', totalNodes, 'via', h)
                newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1

        verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts")
        start = time.time()
        # UPDATE: best to stabilize on the last node!
        # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems
        stabilize_cloud(nodeList[0], nodeList,
            timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noExtraErrorCheck=False)
        stabilizeTime = time.time() - start
        verboseprint(len(nodeList), "Last added node stabilized in ", stabilizeTime, " secs")

        # assume all the heap sizes are the same as zero
        if nodeList[0].java_heap_GB:
            heapSize = str(nodeList[0].java_heap_GB) + " GB"
        elif nodeList[0].java_heap_GB:
            heapSize = str(nodeList[0].java_heap_MB) + " MB"
        else:
            heapSize = "(unknown)"

        h2p.red_print("Built cloud: %s java heap(s) with %d nodes on %d hosts, stabilizing in %d secs" % \
            (heapSize, len(nodeList), hostCount, stabilizeTime))

        # FIX! using "consensus" in node[-1] should mean this is unnecessary?
        # maybe there's a bug. For now do this. long term: don't want?
        # UPDATE: do it for all cases now 2/14/13
        if conservative: # still needed?
            for n in nodeList:
                # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems
                stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noExtraErrorCheck=False)

        # this does some extra checking now
        # verifies cloud name too if param is not None
        verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name, expectedLocked=0)

        # FIX! should probably check that the cloud's lock=0. It will go to 1 later.
        # but if it's an existing cloud, it may already be locked.
        # That will be in build_cloud_with_json, though

        # best to check for any errors due to cloud building right away?
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

        # put the test start message in the h2o log, to create a marker
        nodeList[0].h2o_log_msg()

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though
        if cleanup and nodeList:
            for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    print len(nodeList), "total jvms in H2O cloud"

    if h2o_args.config_json:
        # like cp -p. Save the config file, to sandbox
        print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR
        shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json))

    if create_json:
        # Figure out some stuff about how this test was run
        cs_time = str(datetime.datetime.now())
        cs_cwd = os.getcwd()
        cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args)
        cs_python_test_name = h2o_args.python_test_name
        if h2o_args.config_json:
            cs_config_json = os.path.abspath(h2o_args.config_json)
        else:
            cs_config_json = None
        cs_username = h2o_args.python_username
        cs_ip = h2o_args.python_cmd_ip

        # dump the nodes state to a json file # include enough extra info to have someone
        # rebuild the cloud if a test fails that was using that cloud.
        q = {
            'cloud_start':
                {
                    'time': cs_time,
                    'cwd': cs_cwd,
                    'python_test_name': cs_python_test_name,
                    'python_cmd_line': cs_python_cmd_line,
                    'config_json': cs_config_json,
                    'username': cs_username,
                    'ip': cs_ip,
                },
            'h2o_nodes': h2o_util.json_repr(nodeList),
        }

        with open('h2o-nodes.json', 'w+') as f:
            f.write(json.dumps(q, indent=4))

    # save it to a local global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Esempio n. 16
0
def build_cloud_with_json(h2o_nodes_json='h2o-nodes.json'):

    # local sandbox may not exist. Don't clean if it does, just append
    if not os.path.exists(LOG_DIR):
        os.mkdir(LOG_DIR)

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()")
    log("#*********************************************************************")

    print "This only makes sense if h2o is running as defined by", h2o_nodes_json
    print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here"
    print "No output means no h2o here! Some other info about stuff on the system is printed first though."
    import h2o_os_util

    if not os.path.exists(h2o_nodes_json):
        raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file")

    ## h2o_os_util.show_h2o_processes()

    with open(h2o_nodes_json, 'rb') as f:
        cloneJson = json.load(f)

    # These are supposed to be in the file.
    # Just check the first one. if not there, the file must be wrong
    if not 'cloud_start' in cloneJson:
        raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json)
    else:
        cs = cloneJson['cloud_start']
        print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json
        # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff
        valList = ['time', 'cwd', 'python_test_name', 'python_cmd_line', 'config_json', 'username', 'ip']
        for v in valList:
            if v not in cs:
                raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json))
            print "cloud_start['%s']: %s" % (v, cs[v])

        # this is the internal node state for python..nodes rebuild
        nodeStateList = cloneJson['h2o_nodes']

    nodeList = []
    if not nodeStateList:
        raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json)

    try:
        for nodeState in nodeStateList:
            print "Cloning state for node", nodeState['node_id'], 'from', h2o_nodes_json

            newNode = ExternalH2O(nodeState)
            nodeList.append(newNode)

        # If it's an existing cloud, it may already be locked. so never check.
        # we don't have the cloud name in the -ccj since it may change (and the file be static?)
        # so don't check expectedCloudName
        verify_cloud_size(nodeList, expectedCloudName=None, expectedLocked=None)

        # best to check for any errors right away?
        # (we won't report errors from prior tests due to marker stuff?
        ## check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

        # put the test start message in the h2o log, to create a marker
        nodeList[0].h2o_log_msg()

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though

        ## if cleanup and nodeList:
        ##     for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    # like cp -p. Save the config file, to sandbox
    print "Saving the ", h2o_nodes_json, "we used to", LOG_DIR
    shutil.copy(h2o_nodes_json, LOG_DIR + "/" + os.path.basename(h2o_nodes_json))

    print ""
    h2p.red_print("Ingested from json:",
        nodeList[0].java_heap_GB, "GB java heap(s) with",
        len(nodeList), "total nodes")
    print ""

    # save it to a global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Esempio n. 17
0
def build_cloud(node_count=1, base_port=None, hosts=None,
    timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True,
    conservative=False, create_json=False, clone_cloud=None, 
    init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs):

    # expectedSize is only used if usecloud

    # usecloud can be passed thru build_cloud param, or command line 
    # not in config json though so no build_cloud_with_hosts path.

    # redirect to build_cloud_with_json if a command line arg
    # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts
    # (both come thru here)
    # clone_cloud is just another way to get the effect (maybe ec2 config file thru
    # build_cloud_with_hosts?
    global stdout_wrapped
    if not h2o_args.disable_time_stamp and not stdout_wrapped:
        sys.stdout = OutWrapper(sys.stdout)
        stdout_wrapped = True

    if h2o_args.usecloud or usecloud:
        # for now, just have fixed name in local file.  (think of this as a temp or debug file)
        # eventually we'll pass the json object instead  for speed?
        nodesJsonPathname = "h2o_fc-nodes.json"

    elif h2o_args.clone_cloud_json:
        nodesJsonPathname = h2o_args.clone_cloud_json

    elif clone_cloud:
        nodesJsonPathname = clone_cloud

    else:
        # normal build_cloud() doesn't use
        nodesJsonPathname = None

    # usecloud dominates over all
    if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud):
        # then build_cloud_with_json with json object
        # we don't need to specify these defaults, but leave here to show that we can pass
        # I suppose kwargs will have it
        if h2o_args.usecloud:
            ip_port = h2o_args.usecloud
        elif usecloud:
            ip_port = usecloud
        else:
            ip_port = None

        # h2o_args dominates
        if h2o_args.usecloud_size:
            # only used for expected size
            useCloudExpectedSize = h2o_args.usecloud_size
        else: 
            useCloudExpectedSize = usecloud_size

        nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port,
            expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs)
            # potentially passed in kwargs
            # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176', 

        nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname)
        return nodeList

    # else
    # moved to here from unit_main. so will run with nosetests too!
    # Normally do this.
    # Don't if build_cloud_with_hosts() did and put a flatfile in there already!
    if init_sandbox:
        clean_sandbox()

    log("#*********************************************************************")
    log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ")
    log("#*********************************************************************")

    # start up h2o to report the java version (once). output to python stdout
    # only do this for regression testing

    # temporarily disable this, to go a little faster
    #    if getpass.getuser() == 'jenkins':
    #        check_h2o_version()

    ports_per_node = 2
    nodeList = []
    # shift the port used to run groups of tests on the same machine at the same time?
    base_port  = get_base_port(base_port)

    try:
        # if no hosts list, use psutil method on local host.
        totalNodes = 0
        # doing this list outside the loops so we can shuffle for better test variation
        # this jvm startup shuffle is independent from the flatfile shuffle
        portList = [base_port + ports_per_node * i for i in range(node_count)]
        if hosts is None:
            # if use_flatfile, we should create it
            # because tests will just call build_cloud with use_flatfile=True
            # best to just create it all the time..may or may not be used
            write_flatfile(node_count=node_count, base_port=base_port)
            hostCount = 1
            if rand_shuffle:
                random.shuffle(portList)
            for p in portList:
                verboseprint("psutil starting node", i)
                newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1
        else:
            # if hosts, the flatfile was created and uploaded to hosts already
            # I guess don't recreate it, don't overwrite the one that was copied beforehand.
            # we don't always use the flatfile (use_flatfile=False)
            # Suppose we could dispatch from the flatfile to match it's contents
            # but sometimes we want to test with a bad/different flatfile then we invoke h2o?
            hostCount = len(hosts)
            hostPortList = []
            for h in hosts:
                for port in portList:
                    hostPortList.append((h, port))
            if rand_shuffle: random.shuffle(hostPortList)
            for (h, p) in hostPortList:
                verboseprint('ssh starting node', totalNodes, 'via', h)
                newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs)
                nodeList.append(newNode)
                totalNodes += 1

        verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts")
        start = time.time()
        # UPDATE: best to stabilize on the last node!
        stabilize_cloud(nodeList[0], nodeList,
            timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noSandboxErrorCheck=True)
        verboseprint(len(nodeList), "Last added node stabilized in ", time.time() - start, " secs")
        verboseprint("Built cloud: %d nodes on %d hosts, in %d s" % \
            (len(nodeList), hostCount, (time.time() - start)))
        h2p.red_print("Built cloud:", nodeList[0].java_heap_GB, "GB java heap(s) with",
            len(nodeList), "total nodes")

        # FIX! using "consensus" in node[-1] should mean this is unnecessary?
        # maybe there's a bug. For now do this. long term: don't want?
        # UPDATE: do it for all cases now 2/14/13
        if conservative: # still needed?
            for n in nodeList:
                stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noSandboxErrorCheck=True)

        # this does some extra checking now
        # verifies cloud name too if param is not None
        verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name)

        # best to check for any errors due to cloud building right away?
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)

    except:
        # nodeList might be empty in some exception cases?
        # no shutdown issued first, though
        if cleanup and nodeList:
            for n in nodeList: n.terminate()
        check_sandbox_for_errors(python_test_name=h2o_args.python_test_name)
        raise

    print len(nodeList), "total jvms in H2O cloud"
    # put the test start message in the h2o log, to create a marker
    nodeList[0].h2o_log_msg()

    if h2o_args.config_json:
        LOG_DIR = get_sandbox_name()
        # like cp -p. Save the config file, to sandbox
        print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR
        shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json))

    # Figure out some stuff about how this test was run
    cs_time = str(datetime.datetime.now())
    cs_cwd = os.getcwd()
    cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args)
    cs_python_test_name = h2o_args.python_test_name
    if h2o_args.config_json:
        cs_config_json = os.path.abspath(h2o_args.config_json)
    else:
        cs_config_json = None
    cs_username = h2o_args.python_username
    cs_ip = h2o_args.python_cmd_ip

    # dump the nodes state to a json file # include enough extra info to have someone
    # rebuild the cloud if a test fails that was using that cloud.
    if create_json:
        q = {
            'cloud_start':
                {
                    'time': cs_time,
                    'cwd': cs_cwd,
                    'python_test_name': cs_python_test_name,
                    'python_cmd_line': cs_python_cmd_line,
                    'config_json': cs_config_json,
                    'username': cs_username,
                    'ip': cs_ip,
                },
            'h2o_nodes': h2o_util.json_repr(nodeList),
        }

        with open('h2o-nodes.json', 'w+') as f:
            f.write(json.dumps(q, indent=4))

    # save it to a local global copy, in case it's needed for tearDown
    h2o_nodes.nodes[:] = nodeList
    return nodeList
Esempio n. 18
0
    def do_json_request(self,
                        jsonRequest=None,
                        fullUrl=None,
                        timeout=10,
                        params=None,
                        returnFast=False,
                        cmd='get',
                        extraComment=None,
                        ignoreH2oError=False,
                        noExtraErrorCheck=False,
                        **kwargs):
        # if url param is used, use it as full url. otherwise crate from the jsonRequest
        if fullUrl:
            url = fullUrl
        else:
            url = self.url(jsonRequest)

        # remove any params that are 'None'
        # need to copy dictionary, since can't delete while iterating
        if params is not None:
            params2 = params.copy()
            for k in params2:
                if params2[k] is None:
                    del params[k]
            paramsStr = '?' + '&'.join(
                ['%s=%s' % (k, v) for (k, v) in params.items()])
        else:
            paramsStr = ''

        if extraComment:
            log('Start ' + url + paramsStr, comment=extraComment)
        else:
            log('Start ' + url + paramsStr)

        log_rest("")
        log_rest(
            "----------------------------------------------------------------------\n"
        )
        if extraComment:
            log_rest("# Extra comment info about this request: " +
                     extraComment)
        if cmd == 'get':
            log_rest("GET")
        else:
            log_rest("POST")
        log_rest(url + paramsStr)

        # file get passed thru kwargs here
        try:
            if cmd == 'post':
                r = requests.post(url,
                                  timeout=timeout,
                                  params=params,
                                  **kwargs)
            else:
                r = requests.get(url, timeout=timeout, params=params, **kwargs)

        except Exception, e:
            # rethrow the exception after we've checked for stack trace from h2o
            # out of memory errors maybe don't show up right away? so we should wait for h2o
            # to get it out to h2o stdout. We don't want to rely on cloud teardown to check
            # because there's no delay, and we don't want to delay all cloud teardowns by waiting.
            exc_info = sys.exc_info()
            # use this to ignore the initial connection errors during build cloud when h2o is coming up
            if not noExtraErrorCheck:
                h2p.red_print(
                    "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.."
                    % (url + paramsStr))
                time.sleep(2)
                check_sandbox_for_errors(
                    python_test_name=h2o_args.python_test_name)
            log_rest("")
            log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message))
            raise exc_info[1], None, exc_info[2]