def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs): # if url param is used, use it as full url. otherwise create from the jsonRequest if fullUrl: url = fullUrl else: url = self.url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params2 = params.copy() for k in params2: if params2[k] is None: del params[k] paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' extraComment2 = " " + str(postData)+";" if cmd=='post' else "" extraComment2 += extraComment if extraComment else "" if len(extraComment2) > 0: log('Start ' + url + paramsStr, comment=extraComment2) else: log('Start ' + url + paramsStr) # file get passed thru kwargs here if h2o_args.no_timeout: timeout = None # infinite try: if 'post' == cmd: # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST. # This is temporary. # # This following does application/json (aka, posting JSON in the body): # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(postData), **kwargs) # # This does form-encoded, which doesn't allow POST of nested structures r = requests.post(url, timeout=timeout, params=params, data=postData, **kwargs) elif 'delete' == cmd: r = requests.delete(url, timeout=timeout, params=params, **kwargs) elif 'get' == cmd: r = requests.get(url, timeout=timeout, params=params, **kwargs) else: raise ValueError("Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) check_sandbox_for_errors(python_test_name=h2o_args.python_test_name); raise exc_info[1], None, exc_info[2]
def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs): # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params2 = params.copy() for k in params2: if params2[k] is None: del params[k] paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest("----------------------------------------------------------------------\n") if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if cmd == 'post': r = requests.post(url, timeout=timeout, params=params, **kwargs) else: r = requests.get(url, timeout=timeout, params=params, **kwargs) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. # (this is new/experimental) exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) check_sandbox_for_errors(python_test_name=h2o_args.python_test_name); log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2]
def run_cmd(self, cmd): log('Running `%s` on %s' % (cmd, self)) (stdin, stdout, stderr) = self.ssh.exec_command(cmd) stdin.close() sys.stdout.write(stdout.read()) sys.stdout.flush() stdout.close() sys.stderr.write(stderr.read()) sys.stderr.flush() stderr.close()
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs): params = {'key': key} paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) url = self.url('2/DownloadDataset.json') log('Start ' + url + paramsStr, comment=csvPathname) # do it (absorb in 1024 byte chunks) r = requests.get(url, params=params, timeout=timeoutSecs) print "csv_download r.headers:", r.headers if r.status_code == 200: f = open(csvPathname, 'wb') for chunk in r.iter_content(1024): f.write(chunk) print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname)
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs): params = {'key': key} paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) url = self.url('3/DownloadDataset.json') log('Start ' + url + paramsStr, comment=csvPathname) # do it (absorb in 1024 byte chunks) r = requests.get(url, params=params, timeout=timeoutSecs) print "csv_download r.headers:", r.headers if r.status_code == 200: f = open(csvPathname, 'wb') for chunk in r.iter_content(1024): f.write(chunk) print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname)
def log_download(self, logDir=None, timeoutSecs=30, **kwargs): if logDir == None: logDir = get_sandbox_name() url = self.url('Logs/download') log('Start ' + url) print "\nDownloading h2o log(s) using:", url r = requests.get(url, timeout=timeoutSecs, **kwargs) if not r or not r.ok: raise Exception("Maybe bad url? no r in log_download %s in %s:" % inspect.stack()[1][3]) z = zipfile.ZipFile(StringIO.StringIO(r.content)) print "z.namelist:", z.namelist() print "z.printdir:", z.printdir() nameList = z.namelist() # the first is the h2ologs dir name. h2oLogDir = logDir + "/" + nameList.pop(0) print "h2oLogDir:", h2oLogDir print "logDir:", logDir # it's a zip of zipped files # first unzip it z = zipfile.ZipFile(StringIO.StringIO(r.content)) z.extractall(logDir) # unzipped file should be in LOG_DIR now # now unzip the files in that directory for zname in nameList: resultList = h2o_util.flat_unzip(logDir + "/" + zname, logDir) print "\nlogDir:", logDir for logfile in resultList: numLines = sum(1 for line in open(logfile)) print logfile, "Lines:", numLines print return resultList
def log_download(self, logDir=None, timeoutSecs=30, **kwargs): if logDir == None: logDir = get_sandbox_name() url = self.url('LogDownload.json') log('Start ' + url); print "\nDownloading h2o log(s) using:", url r = requests.get(url, timeout=timeoutSecs, **kwargs) if not r or not r.ok: raise Exception("Maybe bad url? no r in log_download %s in %s:" % inspect.stack()[1][3]) z = zipfile.ZipFile(StringIO.StringIO(r.content)) print "z.namelist:", z.namelist() print "z.printdir:", z.printdir() nameList = z.namelist() # the first is the h2ologs dir name. h2oLogDir = logDir + "/" + nameList.pop(0) print "h2oLogDir:", h2oLogDir print "logDir:", logDir # it's a zip of zipped files # first unzip it z = zipfile.ZipFile(StringIO.StringIO(r.content)) z.extractall(logDir) # unzipped file should be in LOG_DIR now # now unzip the files in that directory for zname in nameList: resultList = h2o_util.flat_unzip(logDir + "/" + zname, logDir) print "\nlogDir:", logDir for logfile in resultList: numLines = sum(1 for line in open(logfile)) print logfile, "Lines:", numLines print return resultList
def csv_download(self, key, csvPathname, timeoutSecs=60, **kwargs): params = { 'key': key } paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) url = self.url('DownloadDataset.json') log('Start ' + url + paramsStr, comment=csvPathname) # do it (absorb in 1024 byte chunks) r = requests.get(url, params=params, timeout=timeoutSecs) print "csv_download r.headers:", r.headers if r.status_code == 200: f = open(csvPathname, 'wb') for chunk in r.iter_content(1024): f.write(chunk) else: raise Exception("unexpected status for DownloadDataset: %s" % r.status_code) print csvPathname, "size:", h2o_util.file_size_formatted(csvPathname) h2o_sandbox.check_sandbox_for_errors() # FIX! we're skipping all the checks in do_json_request. And no json return? return
def browseJsonHistoryAsUrl(retryDelaySecs=0.25): if not h2o_args.browse_disable: # stop if you get to -50, don't want more than 50 tabs on browser tabCount = 0 for url in h2o_nodes.json_url_history: # ignore the Cloud "alive" views # FIX! we probably want to expand ignoring to more than Cloud? if not re.search('Cloud', url): # url = re.sub("GLMGridProgress","GLMGridProgress.html",url) # url = re.sub("Progress","Progress.html",url) url = re.sub("ProgressPage","ProgressPage.html",url) url = re.sub("Progress?!Page","Progress.html",url) url = re.sub("Progress\?","Progress.html?",url) url = re.sub(".json",".html",url) print "browseJsonHistoryAsUrl:", url print "same, decoded:", urllib.unquote(url) # does this open in same window? log(url, comment="From browseJsonHistoryAsUrl") webbrowser.open(url, new=0) time.sleep(retryDelaySecs) tabCount += 1 if tabCount==50: break;
def browseJsonHistoryAsUrl(retryDelaySecs=0.25): if not h2o_args.browse_disable: # stop if you get to -50, don't want more than 50 tabs on browser tabCount = 0 for url in h2o_nodes.json_url_history: # ignore the Cloud "alive" views # FIX! we probably want to expand ignoring to more than Cloud? if not re.search('Cloud', url): # url = re.sub("GLMGridProgress","GLMGridProgress.html",url) # url = re.sub("Progress","Progress.html",url) url = re.sub("ProgressPage", "ProgressPage.html", url) url = re.sub("Progress?!Page", "Progress.html", url) url = re.sub("Progress\?", "Progress.html?", url) url = re.sub(".json", ".html", url) print "browseJsonHistoryAsUrl:", url print "same, decoded:", urllib.unquote(url) # does this open in same window? log(url, comment="From browseJsonHistoryAsUrl") webbrowser.open(url, new=0) time.sleep(retryDelaySecs) tabCount += 1 if tabCount == 50: break
def build_cloud_with_json(h2o_nodes_json='h2o-nodes.json'): log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()") log("#*********************************************************************") print "This only makes sense if h2o is running as defined by", h2o_nodes_json print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here" print "No output means no h2o here! Some other info about stuff on the system is printed first though." import h2o_os_util if not os.path.exists(h2o_nodes_json): raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file") # h2o_os_util.show_h2o_processes() with open(h2o_nodes_json, 'rb') as f: cloneJson = json.load(f) # These are supposed to be in the file. # Just check the first one. if not there, the file must be wrong if not 'cloud_start' in cloneJson: raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json) else: cs = cloneJson['cloud_start'] print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff valList = ['time', 'cwd', 'python_test_name', 'python_cmd_line', 'config_json', 'username', 'ip'] for v in valList: if v not in cs: raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json)) print "cloud_start['%s']: %s" % (v, cs[v]) # this is the internal node state for python..nodes rebuild nodeStateList = cloneJson['h2o_nodes'] nodeList = [] if not nodeStateList: raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json) for nodeState in nodeStateList: print "Cloning state for node", nodeState['node_id'], 'from', h2o_nodes_json newNode = ExternalH2O(nodeState) nodeList.append(newNode) print "" h2p.red_print("Ingested from json:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes") print "" # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() # save it to a global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
def __init__(self, host, *args, **kwargs): super(RemoteH2O, self).__init__(*args, **kwargs) # it gets set True if an address is specified for LocalH2o init. Override. if 'force_ip' in kwargs: self.force_ip = kwargs['force_ip'] self.remoteH2O = True # so we can tell if we're remote or local self.jar = host.upload_file('build/h2o.jar') # need to copy the flatfile. We don't always use it (depends on h2o args) self.flatfile = host.upload_file(flatfile_pathname()) # distribute AWS credentials if self.aws_credentials: self.aws_credentials = host.upload_file(self.aws_credentials) if self.hdfs_config: self.hdfs_config = host.upload_file(self.hdfs_config) if self.use_home_for_ice: # this will be the username used to ssh to the host self.ice = "/home/" + host.username + '/ice.%d.%s' % (self.port, time.time()) else: self.ice = '/tmp/ice.%d.%s' % (self.port, time.time()) self.channel = host.open_channel() ### FIX! TODO...we don't check on remote hosts yet # this fires up h2o over there cmd = ' '.join(self.get_args()) # UPDATE: somehow java -jar on cygwin target (xp) can't handle /tmp/h2o*jar # because it's a windows executable and expects windows style path names. # but if we cd into /tmp, it can do java -jar h2o*jar. # So just split out the /tmp (pretend we don't know) and the h2o jar file name # Newer windows may not have this problem? Do the ls (this goes into the local stdout # files) so we can see the file is really where we expect. # This hack only works when the dest is /tmp/h2o*jar. It's okay to execute # with pwd = /tmp. If /tmp/ isn't in the jar path, I guess things will be the same as # normal. if 1 == 0: # enable if you want windows remote machines cmdList = ["cd /tmp"] # separate by ;<space> when we join cmdList += ["ls -ltr " + self.jar] cmdList += [re.sub("/tmp/", "", cmd)] self.channel.exec_command("; ".join(cmdList)) else: self.channel.exec_command(cmd) if self.capture_output: if self.node_id is not None: logPrefix = 'remote-h2o-' + str(self.node_id) else: logPrefix = 'remote-h2o' logPrefix += '-' + host.h2o_addr outfd, outpath = tmp_file(logPrefix + '.stdout.', '.log') errfd, errpath = tmp_file(logPrefix + '.stderr.', '.log') drain(self.channel.makefile(), outfd) drain(self.channel.makefile_stderr(), errfd) comment = 'Remote on %s, stdout %s, stderr %s' % ( self.h2o_addr, os.path.basename(outpath), os.path.basename(errpath)) else: drain(self.channel.makefile(), sys.stdout) drain(self.channel.makefile_stderr(), sys.stderr) comment = 'Remote on %s' % self.h2o_addr log(cmd, comment=comment)
class H2O(object): def __init__( self, use_this_ip_addr=None, port=54321, capture_output=True, force_ip=False, network=None, use_debugger=None, classpath=None, use_hdfs=False, use_maprfs=False, hdfs_version=None, hdfs_name_node=None, hdfs_config=None, aws_credentials=None, use_flatfile=False, java_heap_GB=None, java_heap_MB=None, java_extra_args=None, use_home_for_ice=False, node_id=None, username=None, random_udp_drop=False, force_tcp=False, redirect_import_folder_to_s3_path=None, redirect_import_folder_to_s3n_path=None, disable_h2o_log=False, enable_benchmark_log=False, h2o_remote_buckets_root=None, delete_keys_at_teardown=False, cloud_name=None, disable_assertions=None, sandbox_ignore_errors=False, ): if use_hdfs: # see if we can touch a 0xdata machine try: # long timeout in ec2...bad a = requests.get('http://172.16.2.176:80', timeout=1) hdfs_0xdata_visible = True except: hdfs_0xdata_visible = False # different defaults, depending on where we're running if hdfs_name_node is None: if hdfs_0xdata_visible: hdfs_name_node = "172.16.2.176" else: # ec2 hdfs_name_node = "10.78.14.235:9000" if hdfs_version is None: if hdfs_0xdata_visible: hdfs_version = "cdh4" else: # ec2 hdfs_version = "0.20.2" self.redirect_import_folder_to_s3_path = redirect_import_folder_to_s3_path self.redirect_import_folder_to_s3n_path = redirect_import_folder_to_s3n_path self.aws_credentials = aws_credentials self.port = port # None is legal for self.h2o_addr. # means we won't give an ip to the jar when we start. # Or we can say use use_this_ip_addr=127.0.0.1, or the known address # if use_this_addr is None, use 127.0.0.1 for urls and json # Command line arg 'ip_from_cmd_line' dominates: # ip_from_cmd_line and use_this_ip_addr shouldn't be used for mutli-node if h2o_args.ip_from_cmd_line: self.h2o_addr = h2o_args.ip_from_cmd_line else: self.h2o_addr = use_this_ip_addr self.force_ip = force_ip or (self.h2o_addr != None) if self.h2o_addr: self.http_addr = self.h2o_addr else: self.http_addr = h2o_args.python_cmd_ip if h2o_args.network_from_cmd_line: self.network = h2o_args.network_from_cmd_line else: self.network = network # command line should always dominate for enabling if h2o_args.debugger: use_debugger = True self.use_debugger = use_debugger self.classpath = classpath self.capture_output = capture_output self.use_hdfs = use_hdfs self.use_maprfs = use_maprfs self.hdfs_name_node = hdfs_name_node self.hdfs_version = hdfs_version self.hdfs_config = hdfs_config self.use_flatfile = use_flatfile self.java_heap_GB = java_heap_GB self.java_heap_MB = java_heap_MB self.java_extra_args = java_extra_args self.use_home_for_ice = use_home_for_ice self.node_id = node_id if username: self.username = username else: self.username = getpass.getuser() # don't want multiple reports from tearDown and tearDownClass # have nodes[0] remember (0 always exists) self.sandbox_error_was_reported = False self.sandbox_ignore_errors = sandbox_ignore_errors self.random_udp_drop = random_udp_drop self.force_tcp = force_tcp self.disable_h2o_log = disable_h2o_log # this dumps stats from tests, and perf stats while polling to benchmark.log self.enable_benchmark_log = enable_benchmark_log self.h2o_remote_buckets_root = h2o_remote_buckets_root self.delete_keys_at_teardown = delete_keys_at_teardown self.disable_assertions = disable_assertions if cloud_name: self.cloud_name = cloud_name else: self.cloud_name = 'pytest-%s-%s' % (getpass.getuser(), os.getpid()) def __str__(self): return '%s - http://%s:%d/' % (type(self), self.http_addr, self.port) def url(self, loc, port=None): # always use the new api port if port is None: port = self.port if loc.startswith('/'): delim = '' else: delim = '/' u = 'http://%s:%d%s%s' % (self.http_addr, port, delim, loc) return u def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs): # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params2 = params.copy() for k in params2: if params2[k] is None: del params[k] paramsStr = '?' + '&'.join( ['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest( "----------------------------------------------------------------------\n" ) if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if cmd == 'post': r = requests.post(url, timeout=timeout, params=params, **kwargs) else: r = requests.get(url, timeout=timeout, params=params, **kwargs) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) check_sandbox_for_errors( python_test_name=h2o_args.python_test_name) log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2] log_rest("") try: if r is None: log_rest("r is None") else: log_rest("HTTP status code: " + str(r.status_code)) if hasattr(r, 'text'): if r.text is None: log_rest("r.text is None") else: log_rest(r.text) else: log_rest("r does not have attr text") except Exception, e: # Paranoid exception catch. log('WARNING: ignoring unexpected exception on %s' + url + paramsStr) # Ignore logging exceptions in the case that the above error checking isn't sufficient. pass
def build_cloud_with_json(h2o_nodes_json="h2o-nodes.json"): # local sandbox may not exist. Don't clean if it does, just append if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()") log("#*********************************************************************") print "This only makes sense if h2o is running as defined by", h2o_nodes_json print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here" print "No output means no h2o here! Some other info about stuff on the system is printed first though." import h2o_os_util if not os.path.exists(h2o_nodes_json): raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file") ## h2o_os_util.show_h2o_processes() with open(h2o_nodes_json, "rb") as f: cloneJson = json.load(f) # These are supposed to be in the file. # Just check the first one. if not there, the file must be wrong if not "cloud_start" in cloneJson: raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json) else: cs = cloneJson["cloud_start"] print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff valList = ["time", "cwd", "python_test_name", "python_cmd_line", "config_json", "username", "ip"] for v in valList: if v not in cs: raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json)) print "cloud_start['%s']: %s" % (v, cs[v]) # this is the internal node state for python..nodes rebuild nodeStateList = cloneJson["h2o_nodes"] nodeList = [] if not nodeStateList: raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json) try: for nodeState in nodeStateList: print "Cloning state for node", nodeState["node_id"], "from", h2o_nodes_json newNode = ExternalH2O(nodeState) nodeList.append(newNode) # If it's an existing cloud, it may already be locked. so never check. # we don't have the cloud name in the -ccj since it may change (and the file be static?) # so don't check expectedCloudName verify_cloud_size(nodeList, expectedCloudName=None, expectedLocked=None) # best to check for any errors right away? # (we won't report errors from prior tests due to marker stuff? ## check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() except: # nodeList might be empty in some exception cases? # no shutdown issued first, though ## if cleanup and nodeList: ## for n in nodeList: n.terminate() check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) raise # like cp -p. Save the config file, to sandbox print "Saving the ", h2o_nodes_json, "we used to", LOG_DIR shutil.copy(h2o_nodes_json, LOG_DIR + "/" + os.path.basename(h2o_nodes_json)) print "" h2p.red_print("Ingested from json:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes") print "" # save it to a global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
def build_cloud(node_count=1, base_port=None, hosts=None, timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True, conservative=False, create_json=False, clone_cloud=None, init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs): # expectedSize is only used if usecloud # usecloud can be passed thru build_cloud param, or command line # not in config json though so no build_cloud_with_hosts path. # redirect to build_cloud_with_json if a command line arg # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts # (both come thru here) # clone_cloud is just another way to get the effect (maybe ec2 config file thru # build_cloud_with_hosts? global stdout_wrapped if not h2o_args.disable_time_stamp and not stdout_wrapped: sys.stdout = OutWrapper(sys.stdout) stdout_wrapped = True if h2o_args.usecloud or usecloud: # for now, just have fixed name in local file. (think of this as a temp or debug file) # eventually we'll pass the json object instead for speed? nodesJsonPathname = "h2o_fc-nodes.json" elif h2o_args.clone_cloud_json: nodesJsonPathname = h2o_args.clone_cloud_json elif clone_cloud: nodesJsonPathname = clone_cloud else: # normal build_cloud() doesn't use nodesJsonPathname = None # usecloud dominates over all if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud): # then build_cloud_with_json with json object # we don't need to specify these defaults, but leave here to show that we can pass # I suppose kwargs will have it if h2o_args.usecloud: ip_port = h2o_args.usecloud elif usecloud: ip_port = usecloud else: ip_port = None # h2o_args dominates if h2o_args.usecloud_size: # only used for expected size useCloudExpectedSize = h2o_args.usecloud_size else: useCloudExpectedSize = usecloud_size if (h2o_args.usecloud or usecloud): nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port, expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs) # potentially passed in kwargs # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176', else: if h2o_args.clone_cloud_json: nodesJsonPathname = h2o_args.clone_cloud_json else: nodesJsonPathname = clone_cloud nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname) return nodeList # else # moved to here from unit_main. so will run with nosetests too! # Normally do this. # Don't if build_cloud_with_hosts() did and put a flatfile in there already! if init_sandbox: clean_sandbox() log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ") log("#*********************************************************************") # start up h2o to report the java version (once). output to python stdout # only do this for regression testing # temporarily disable this, to go a little faster # if getpass.getuser() == 'jenkins': # check_h2o_version() ports_per_node = 2 nodeList = [] # shift the port used to run groups of tests on the same machine at the same time? base_port = get_base_port(base_port) try: # if no hosts list, use psutil method on local host. totalNodes = 0 # doing this list outside the loops so we can shuffle for better test variation # this jvm startup shuffle is independent from the flatfile shuffle portList = [base_port + ports_per_node * i for i in range(node_count)] if hosts is None: # if use_flatfile, we should create it # because tests will just call build_cloud with use_flatfile=True # best to just create it all the time..may or may not be used write_flatfile(node_count=node_count, base_port=base_port) hostCount = 1 if rand_shuffle: random.shuffle(portList) for p in portList: verboseprint("psutil starting node", i) newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 else: # if hosts, the flatfile was created and uploaded to hosts already # I guess don't recreate it, don't overwrite the one that was copied beforehand. # we don't always use the flatfile (use_flatfile=False) # Suppose we could dispatch from the flatfile to match it's contents # but sometimes we want to test with a bad/different flatfile then we invoke h2o? hostCount = len(hosts) hostPortList = [] for h in hosts: for port in portList: hostPortList.append((h, port)) if rand_shuffle: random.shuffle(hostPortList) for (h, p) in hostPortList: verboseprint('ssh starting node', totalNodes, 'via', h) newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts") start = time.time() # UPDATE: best to stabilize on the last node! # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems stabilize_cloud(nodeList[0], nodeList, timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noExtraErrorCheck=False) stabilizeTime = time.time() - start verboseprint(len(nodeList), "Last added node stabilized in ", stabilizeTime, " secs") # assume all the heap sizes are the same as zero if nodeList[0].java_heap_GB: heapSize = str(nodeList[0].java_heap_GB) + " GB" elif nodeList[0].java_heap_GB: heapSize = str(nodeList[0].java_heap_MB) + " MB" else: heapSize = "(unknown)" h2p.red_print("Built cloud: %s java heap(s) with %d nodes on %d hosts, stabilizing in %d secs" % \ (heapSize, len(nodeList), hostCount, stabilizeTime)) # FIX! using "consensus" in node[-1] should mean this is unnecessary? # maybe there's a bug. For now do this. long term: don't want? # UPDATE: do it for all cases now 2/14/13 if conservative: # still needed? for n in nodeList: # FIX! for now, always check sandbox, because h2oddev has TIME_WAIT port problems stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noExtraErrorCheck=False) # this does some extra checking now # verifies cloud name too if param is not None verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name, expectedLocked=0) # FIX! should probably check that the cloud's lock=0. It will go to 1 later. # but if it's an existing cloud, it may already be locked. # That will be in build_cloud_with_json, though # best to check for any errors due to cloud building right away? check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() except: # nodeList might be empty in some exception cases? # no shutdown issued first, though if cleanup and nodeList: for n in nodeList: n.terminate() check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) raise print len(nodeList), "total jvms in H2O cloud" if h2o_args.config_json: # like cp -p. Save the config file, to sandbox print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json)) if create_json: # Figure out some stuff about how this test was run cs_time = str(datetime.datetime.now()) cs_cwd = os.getcwd() cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args) cs_python_test_name = h2o_args.python_test_name if h2o_args.config_json: cs_config_json = os.path.abspath(h2o_args.config_json) else: cs_config_json = None cs_username = h2o_args.python_username cs_ip = h2o_args.python_cmd_ip # dump the nodes state to a json file # include enough extra info to have someone # rebuild the cloud if a test fails that was using that cloud. q = { 'cloud_start': { 'time': cs_time, 'cwd': cs_cwd, 'python_test_name': cs_python_test_name, 'python_cmd_line': cs_python_cmd_line, 'config_json': cs_config_json, 'username': cs_username, 'ip': cs_ip, }, 'h2o_nodes': h2o_util.json_repr(nodeList), } with open('h2o-nodes.json', 'w+') as f: f.write(json.dumps(q, indent=4)) # save it to a local global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
def build_cloud_with_json(h2o_nodes_json='h2o-nodes.json'): # local sandbox may not exist. Don't clean if it does, just append if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud_with_json()") log("#*********************************************************************") print "This only makes sense if h2o is running as defined by", h2o_nodes_json print "For now, assuming it's a cloud on this machine, and here's info on h2o processes running here" print "No output means no h2o here! Some other info about stuff on the system is printed first though." import h2o_os_util if not os.path.exists(h2o_nodes_json): raise Exception("build_cloud_with_json: Can't find " + h2o_nodes_json + " file") ## h2o_os_util.show_h2o_processes() with open(h2o_nodes_json, 'rb') as f: cloneJson = json.load(f) # These are supposed to be in the file. # Just check the first one. if not there, the file must be wrong if not 'cloud_start' in cloneJson: raise Exception("Can't find 'cloud_start' in %s, wrong file? h2o-nodes.json?" % h2o_nodes_json) else: cs = cloneJson['cloud_start'] print "Info on the how the cloud we're cloning was started (info from %s)" % h2o_nodes_json # required/legal values in 'cloud_start'. A robust check is good for easy debug when we add stuff valList = ['time', 'cwd', 'python_test_name', 'python_cmd_line', 'config_json', 'username', 'ip'] for v in valList: if v not in cs: raise Exception("Can't find %s in %s, wrong file or version change?" % (v, h2o_nodes_json)) print "cloud_start['%s']: %s" % (v, cs[v]) # this is the internal node state for python..nodes rebuild nodeStateList = cloneJson['h2o_nodes'] nodeList = [] if not nodeStateList: raise Exception("nodeStateList is empty. %s file must be empty/corrupt" % h2o_nodes_json) try: for nodeState in nodeStateList: print "Cloning state for node", nodeState['node_id'], 'from', h2o_nodes_json newNode = ExternalH2O(nodeState) nodeList.append(newNode) # If it's an existing cloud, it may already be locked. so never check. # we don't have the cloud name in the -ccj since it may change (and the file be static?) # so don't check expectedCloudName verify_cloud_size(nodeList, expectedCloudName=None, expectedLocked=None) # best to check for any errors right away? # (we won't report errors from prior tests due to marker stuff? ## check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() except: # nodeList might be empty in some exception cases? # no shutdown issued first, though ## if cleanup and nodeList: ## for n in nodeList: n.terminate() check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) raise # like cp -p. Save the config file, to sandbox print "Saving the ", h2o_nodes_json, "we used to", LOG_DIR shutil.copy(h2o_nodes_json, LOG_DIR + "/" + os.path.basename(h2o_nodes_json)) print "" h2p.red_print("Ingested from json:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes") print "" # save it to a global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
def build_cloud(node_count=1, base_port=None, hosts=None, timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True, conservative=False, create_json=False, clone_cloud=None, init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs): # expectedSize is only used if usecloud # usecloud can be passed thru build_cloud param, or command line # not in config json though so no build_cloud_with_hosts path. # redirect to build_cloud_with_json if a command line arg # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts # (both come thru here) # clone_cloud is just another way to get the effect (maybe ec2 config file thru # build_cloud_with_hosts? global stdout_wrapped if not h2o_args.disable_time_stamp and not stdout_wrapped: sys.stdout = OutWrapper(sys.stdout) stdout_wrapped = True if h2o_args.usecloud or usecloud: # for now, just have fixed name in local file. (think of this as a temp or debug file) # eventually we'll pass the json object instead for speed? nodesJsonPathname = "h2o_fc-nodes.json" elif h2o_args.clone_cloud_json: nodesJsonPathname = h2o_args.clone_cloud_json elif clone_cloud: nodesJsonPathname = clone_cloud else: # normal build_cloud() doesn't use nodesJsonPathname = None # usecloud dominates over all if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud): # then build_cloud_with_json with json object # we don't need to specify these defaults, but leave here to show that we can pass # I suppose kwargs will have it if h2o_args.usecloud: ip_port = h2o_args.usecloud elif usecloud: ip_port = usecloud else: ip_port = None # h2o_args dominates if h2o_args.usecloud_size: # only used for expected size useCloudExpectedSize = h2o_args.usecloud_size else: useCloudExpectedSize = usecloud_size nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port, expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs) # potentially passed in kwargs # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176', nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname) return nodeList # else # moved to here from unit_main. so will run with nosetests too! # Normally do this. # Don't if build_cloud_with_hosts() did and put a flatfile in there already! if init_sandbox: clean_sandbox() log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ") log("#*********************************************************************") # start up h2o to report the java version (once). output to python stdout # only do this for regression testing # temporarily disable this, to go a little faster # if getpass.getuser() == 'jenkins': # check_h2o_version() ports_per_node = 2 nodeList = [] # shift the port used to run groups of tests on the same machine at the same time? base_port = get_base_port(base_port) try: # if no hosts list, use psutil method on local host. totalNodes = 0 # doing this list outside the loops so we can shuffle for better test variation # this jvm startup shuffle is independent from the flatfile shuffle portList = [base_port + ports_per_node * i for i in range(node_count)] if hosts is None: # if use_flatfile, we should create it # because tests will just call build_cloud with use_flatfile=True # best to just create it all the time..may or may not be used write_flatfile(node_count=node_count, base_port=base_port) hostCount = 1 if rand_shuffle: random.shuffle(portList) for p in portList: verboseprint("psutil starting node", i) newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 else: # if hosts, the flatfile was created and uploaded to hosts already # I guess don't recreate it, don't overwrite the one that was copied beforehand. # we don't always use the flatfile (use_flatfile=False) # Suppose we could dispatch from the flatfile to match it's contents # but sometimes we want to test with a bad/different flatfile then we invoke h2o? hostCount = len(hosts) hostPortList = [] for h in hosts: for port in portList: hostPortList.append((h, port)) if rand_shuffle: random.shuffle(hostPortList) for (h, p) in hostPortList: verboseprint('ssh starting node', totalNodes, 'via', h) newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts") start = time.time() # UPDATE: best to stabilize on the last node! stabilize_cloud(nodeList[0], nodeList, timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noSandboxErrorCheck=True) verboseprint(len(nodeList), "Last added node stabilized in ", time.time() - start, " secs") verboseprint("Built cloud: %d nodes on %d hosts, in %d s" % \ (len(nodeList), hostCount, (time.time() - start))) h2p.red_print("Built cloud:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes") # FIX! using "consensus" in node[-1] should mean this is unnecessary? # maybe there's a bug. For now do this. long term: don't want? # UPDATE: do it for all cases now 2/14/13 if conservative: # still needed? for n in nodeList: stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noSandboxErrorCheck=True) # this does some extra checking now # verifies cloud name too if param is not None verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name) # best to check for any errors due to cloud building right away? check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) except: # nodeList might be empty in some exception cases? # no shutdown issued first, though if cleanup and nodeList: for n in nodeList: n.terminate() check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) raise print len(nodeList), "total jvms in H2O cloud" # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() if h2o_args.config_json: LOG_DIR = get_sandbox_name() # like cp -p. Save the config file, to sandbox print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json)) # Figure out some stuff about how this test was run cs_time = str(datetime.datetime.now()) cs_cwd = os.getcwd() cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args) cs_python_test_name = h2o_args.python_test_name if h2o_args.config_json: cs_config_json = os.path.abspath(h2o_args.config_json) else: cs_config_json = None cs_username = h2o_args.python_username cs_ip = h2o_args.python_cmd_ip # dump the nodes state to a json file # include enough extra info to have someone # rebuild the cloud if a test fails that was using that cloud. if create_json: q = { 'cloud_start': { 'time': cs_time, 'cwd': cs_cwd, 'python_test_name': cs_python_test_name, 'python_cmd_line': cs_python_cmd_line, 'config_json': cs_config_json, 'username': cs_username, 'ip': cs_ip, }, 'h2o_nodes': h2o_util.json_repr(nodeList), } with open('h2o-nodes.json', 'w+') as f: f.write(json.dumps(q, indent=4)) # save it to a local global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
def do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs): # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params2 = params.copy() for k in params2: if params2[k] is None: del params[k] paramsStr = '?' + '&'.join( ['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest( "----------------------------------------------------------------------\n" ) if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if cmd == 'post': r = requests.post(url, timeout=timeout, params=params, **kwargs) else: r = requests.get(url, timeout=timeout, params=params, **kwargs) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) check_sandbox_for_errors( python_test_name=h2o_args.python_test_name) log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2]