def checkit(suffix, expectedMinLines): logNameList = ["h2o_" + str(n.http_addr) + "_" + str(n.port) + suffix + ".log" for n in h2o_nodes.nodes] lineCountList = [] for logName in logNameList: lineCount = h2o_util.file_line_count(get_sandbox_name() + "/" + logName) print logName, "lineCount:", lineCount lineCountList.append(lineCount) print logNameList if len(h2o_nodes.nodes) != len(logNameList): raise Exception("Should be %d logs, are %d" % len(h2o_nodes.nodes), len(logNameList)) # line counts seem to vary..check for "too small" # variance in polling (cloud building and status)? for i, l in enumerate(lineCountList): if l < expectedMinLines: raise Exception("node %d %s log is too small" % (i, logNameList[i])) return (logNameList, lineCountList)
def log_download(self, logDir=None, timeoutSecs=30, **kwargs): if logDir == None: logDir = get_sandbox_name() url = self.url('LogDownload.json') log('Start ' + url); print "\nDownloading h2o log(s) using:", url r = requests.get(url, timeout=timeoutSecs, **kwargs) if not r or not r.ok: raise Exception("Maybe bad url? no r in log_download %s in %s:" % inspect.stack()[1][3]) z = zipfile.ZipFile(StringIO.StringIO(r.content)) print "z.namelist:", z.namelist() print "z.printdir:", z.printdir() nameList = z.namelist() # the first is the h2ologs dir name. h2oLogDir = logDir + "/" + nameList.pop(0) print "h2oLogDir:", h2oLogDir print "logDir:", logDir # it's a zip of zipped files # first unzip it z = zipfile.ZipFile(StringIO.StringIO(r.content)) z.extractall(logDir) # unzipped file should be in LOG_DIR now # now unzip the files in that directory for zname in nameList: resultList = h2o_util.flat_unzip(logDir + "/" + zname, logDir) print "\nlogDir:", logDir for logfile in resultList: numLines = sum(1 for line in open(logfile)) print logfile, "Lines:", numLines print return resultList
def build_cloud(node_count=1, base_port=None, hosts=None, timeoutSecs=30, retryDelaySecs=1, cleanup=True, rand_shuffle=True, conservative=False, create_json=False, clone_cloud=None, init_sandbox=True, usecloud=False, usecloud_size=None, **kwargs): # expectedSize is only used if usecloud # usecloud can be passed thru build_cloud param, or command line # not in config json though so no build_cloud_with_hosts path. # redirect to build_cloud_with_json if a command line arg # wants to force a test to ignore it's build_cloud/build_cloud_with_hosts # (both come thru here) # clone_cloud is just another way to get the effect (maybe ec2 config file thru # build_cloud_with_hosts? global stdout_wrapped if not h2o_args.disable_time_stamp and not stdout_wrapped: sys.stdout = OutWrapper(sys.stdout) stdout_wrapped = True if h2o_args.usecloud or usecloud: # for now, just have fixed name in local file. (think of this as a temp or debug file) # eventually we'll pass the json object instead for speed? nodesJsonPathname = "h2o_fc-nodes.json" elif h2o_args.clone_cloud_json: nodesJsonPathname = h2o_args.clone_cloud_json elif clone_cloud: nodesJsonPathname = clone_cloud else: # normal build_cloud() doesn't use nodesJsonPathname = None # usecloud dominates over all if (h2o_args.clone_cloud_json or clone_cloud) or (h2o_args.usecloud or usecloud): # then build_cloud_with_json with json object # we don't need to specify these defaults, but leave here to show that we can pass # I suppose kwargs will have it if h2o_args.usecloud: ip_port = h2o_args.usecloud elif usecloud: ip_port = usecloud else: ip_port = None # h2o_args dominates if h2o_args.usecloud_size: # only used for expected size useCloudExpectedSize = h2o_args.usecloud_size else: useCloudExpectedSize = usecloud_size nodesJsonObject = h2o_fc.find_cloud(ip_port=ip_port, expectedSize=useCloudExpectedSize, nodesJsonPathname=nodesJsonPathname, **kwargs) # potentially passed in kwargs # hdfs_version='cdh4', hdfs_config=None, hdfs_name_node='172.16.1.176', nodeList = build_cloud_with_json(h2o_nodes_json=nodesJsonPathname) return nodeList # else # moved to here from unit_main. so will run with nosetests too! # Normally do this. # Don't if build_cloud_with_hosts() did and put a flatfile in there already! if init_sandbox: clean_sandbox() log("#*********************************************************************") log("Starting new test: " + h2o_args.python_test_name + " at build_cloud() ") log("#*********************************************************************") # start up h2o to report the java version (once). output to python stdout # only do this for regression testing # temporarily disable this, to go a little faster # if getpass.getuser() == 'jenkins': # check_h2o_version() ports_per_node = 2 nodeList = [] # shift the port used to run groups of tests on the same machine at the same time? base_port = get_base_port(base_port) try: # if no hosts list, use psutil method on local host. totalNodes = 0 # doing this list outside the loops so we can shuffle for better test variation # this jvm startup shuffle is independent from the flatfile shuffle portList = [base_port + ports_per_node * i for i in range(node_count)] if hosts is None: # if use_flatfile, we should create it # because tests will just call build_cloud with use_flatfile=True # best to just create it all the time..may or may not be used write_flatfile(node_count=node_count, base_port=base_port) hostCount = 1 if rand_shuffle: random.shuffle(portList) for p in portList: verboseprint("psutil starting node", i) newNode = LocalH2O(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 else: # if hosts, the flatfile was created and uploaded to hosts already # I guess don't recreate it, don't overwrite the one that was copied beforehand. # we don't always use the flatfile (use_flatfile=False) # Suppose we could dispatch from the flatfile to match it's contents # but sometimes we want to test with a bad/different flatfile then we invoke h2o? hostCount = len(hosts) hostPortList = [] for h in hosts: for port in portList: hostPortList.append((h, port)) if rand_shuffle: random.shuffle(hostPortList) for (h, p) in hostPortList: verboseprint('ssh starting node', totalNodes, 'via', h) newNode = h.remote_h2o(port=p, node_id=totalNodes, **kwargs) nodeList.append(newNode) totalNodes += 1 verboseprint("Attempting Cloud stabilize of", totalNodes, "nodes on", hostCount, "hosts") start = time.time() # UPDATE: best to stabilize on the last node! stabilize_cloud(nodeList[0], nodeList, timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, noSandboxErrorCheck=True) verboseprint(len(nodeList), "Last added node stabilized in ", time.time() - start, " secs") verboseprint("Built cloud: %d nodes on %d hosts, in %d s" % \ (len(nodeList), hostCount, (time.time() - start))) h2p.red_print("Built cloud:", nodeList[0].java_heap_GB, "GB java heap(s) with", len(nodeList), "total nodes") # FIX! using "consensus" in node[-1] should mean this is unnecessary? # maybe there's a bug. For now do this. long term: don't want? # UPDATE: do it for all cases now 2/14/13 if conservative: # still needed? for n in nodeList: stabilize_cloud(n, nodeList, timeoutSecs=timeoutSecs, noSandboxErrorCheck=True) # this does some extra checking now # verifies cloud name too if param is not None verify_cloud_size(nodeList, expectedCloudName=nodeList[0].cloud_name) # best to check for any errors due to cloud building right away? check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) except: # nodeList might be empty in some exception cases? # no shutdown issued first, though if cleanup and nodeList: for n in nodeList: n.terminate() check_sandbox_for_errors(python_test_name=h2o_args.python_test_name) raise print len(nodeList), "total jvms in H2O cloud" # put the test start message in the h2o log, to create a marker nodeList[0].h2o_log_msg() if h2o_args.config_json: LOG_DIR = get_sandbox_name() # like cp -p. Save the config file, to sandbox print "Saving the ", h2o_args.config_json, "we used to", LOG_DIR shutil.copy(h2o_args.config_json, LOG_DIR + "/" + os.path.basename(h2o_args.config_json)) # Figure out some stuff about how this test was run cs_time = str(datetime.datetime.now()) cs_cwd = os.getcwd() cs_python_cmd_line = "python %s %s" % (h2o_args.python_test_name, h2o_args.python_cmd_args) cs_python_test_name = h2o_args.python_test_name if h2o_args.config_json: cs_config_json = os.path.abspath(h2o_args.config_json) else: cs_config_json = None cs_username = h2o_args.python_username cs_ip = h2o_args.python_cmd_ip # dump the nodes state to a json file # include enough extra info to have someone # rebuild the cloud if a test fails that was using that cloud. if create_json: q = { 'cloud_start': { 'time': cs_time, 'cwd': cs_cwd, 'python_test_name': cs_python_test_name, 'python_cmd_line': cs_python_cmd_line, 'config_json': cs_config_json, 'username': cs_username, 'ip': cs_ip, }, 'h2o_nodes': h2o_util.json_repr(nodeList), } with open('h2o-nodes.json', 'w+') as f: f.write(json.dumps(q, indent=4)) # save it to a local global copy, in case it's needed for tearDown h2o_nodes.nodes[:] = nodeList return nodeList
clean_sandbox_doneToLine, verboseprint, OutWrapper, log, flatfile_pathname, dump_json, find_file, check_h2o_version, ) from h2o_objects import LocalH2O, RemoteH2O, ExternalH2O import h2o_fc import h2o_hosts # print "h2o_bc" LOG_DIR = get_sandbox_name() # ************************************************************ def default_hosts_file(): if os.environ.has_key("H2O_HOSTS_FILE"): return os.environ["H2O_HOSTS_FILE"] return "pytest_config-{0}.json".format(getpass.getuser()) # ************************************************************ # node_count is number of H2O instances per host if hosts is specified. # hack: this returns true for the --usecloud/-uc cases, to force it thru # build_cloud/build_cloud_with_json/find_cloud. also for the -ccj cases def decide_if_localhost(): if h2o_args.usecloud: print "* Will ask h2o node about cloud using -uc argument:", h2o_args.usecloud
import h2o_args import h2o_nodes import h2o_print as h2p, h2o_util import h2o_import as h2i from h2o_test import \ get_sandbox_name, clean_sandbox, check_sandbox_for_errors, clean_sandbox_doneToLine,\ verboseprint, OutWrapper, log, flatfile_pathname, dump_json, find_file, check_h2o_version from h2o_objects import LocalH2O, RemoteH2O, ExternalH2O import h2o_fc import h2o_hosts # print "h2o_bc" LOG_DIR = get_sandbox_name() #************************************************************ def default_hosts_file(): if os.environ.has_key("H2O_HOSTS_FILE"): return os.environ["H2O_HOSTS_FILE"] return 'pytest_config-{0}.json'.format(getpass.getuser()) #************************************************************ # node_count is number of H2O instances per host if hosts is specified. # hack: this returns true for the --usecloud/-uc cases, to force it thru # build_cloud/build_cloud_with_json/find_cloud. also for the -ccj cases def decide_if_localhost(): if h2o_args.usecloud: print "* Will ask h2o node about cloud using -uc argument:", h2o_args.usecloud return True