def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, raiseIfNon200=True, **kwargs): H2O.verboseprint("__do_json_request, timeout: " + str(timeout)) # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.__url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params_serialized = params.copy() for k in params_serialized: if params_serialized[k] is None: del params[k] paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' # The requests package takes array parameters and explodes them: ['f00', 'b4r'] becomes "f00,b4r". # NOTE: this handles 1D arrays only; if we need ND this needs to be recursive. # NOTE: we currently don't need to do this for GET, so that's not implemented. if postData is not None: munged_postData = {} for k, v in postData.iteritems(): if type(v) is list: if len(v) == 0: munged_postData[k] = '[]' else: first = True array_str = '[' for val in v: if not first: array_str += ', ' if val is None: array_str += 'null' elif isinstance(val, basestring): array_str += "\"" + str(val) + "\"" else: array_str += str(val) first = False array_str += ']' munged_postData[k] = array_str else: # not list: munged_postData[k] = v else: # None munged_postData = postData if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest("----------------------------------------------------------------------\n") if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if 'post' == cmd: # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST. # This is temporary. # # This following does application/json (aka, posting JSON in the body): # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(munged_postData), **kwargs) # # This does form-encoded, which doesn't allow POST of nested structures r = requests.post(url, timeout=timeout, params=params, data=munged_postData, **kwargs) elif 'delete' == cmd: r = requests.delete(url, timeout=timeout, params=params, **kwargs) elif 'get' == cmd: r = requests.get(url, timeout=timeout, params=params, **kwargs) else: raise ValueError("Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. # (this is new/experimental) exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) H2O.check_sandbox_for_errors(python_test_name=H2O.python_test_name); log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2] H2O.verboseprint("r: " + repr(r))
class H2O(object): # static (class) variables ipaddr_from_cmd_line = None debugger = False json_url_history = [] python_test_name = inspect.stack()[1][1] verbose = False ## TODO: support api_version parameter for all api calls! # Also a global in the H2O object set at creation time. # TODO: ensure that all of this is really necessary: def __init__( self, use_this_ip_addr=None, port=54321, capture_output=True, use_debugger=None, classpath=None, use_hdfs=False, use_maprfs=False, # hdfs_version="cdh4", hdfs_name_node="192.168.1.151", # hdfs_version="cdh3", hdfs_name_node="192.168.1.176", hdfs_version=None, hdfs_name_node=None, hdfs_config=None, aws_credentials=None, use_flatfile=False, java_heap_GB=None, java_heap_MB=None, java_extra_args=None, use_home_for_ice=False, node_id=None, username=None, random_udp_drop=False, redirect_import_folder_to_s3_path=None, redirect_import_folder_to_s3n_path=None, disable_h2o_log=False, enable_benchmark_log=False, h2o_remote_buckets_root=None, delete_keys_at_teardown=False, cloud_name=None, ): if use_hdfs: # see if we can touch a 0xdata machine try: # long timeout in ec2...bad a = requests.get('http://192.168.1.176:80', timeout=1) hdfs_0xdata_visible = True except: hdfs_0xdata_visible = False # different defaults, depending on where we're running if hdfs_name_node is None: if hdfs_0xdata_visible: hdfs_name_node = "192.168.1.176" else: # ec2 hdfs_name_node = "10.78.14.235:9000" if hdfs_version is None: if hdfs_0xdata_visible: hdfs_version = "cdh3" else: # ec2 hdfs_version = "0.20.2" self.redirect_import_folder_to_s3_path = redirect_import_folder_to_s3_path self.redirect_import_folder_to_s3n_path = redirect_import_folder_to_s3n_path self.aws_credentials = aws_credentials self.port = port # None is legal for self.addr. # means we won't give an ip to the jar when we start. # Or we can say use use_this_ip_addr=127.0.0.1, or the known address # if use_this_addr is None, use 127.0.0.1 for urls and json # Command line arg 'ipaddr_from_cmd_line' dominates: if H2O.ipaddr_from_cmd_line: self.addr = H2O.ipaddr_from_cmd_line else: self.addr = use_this_ip_addr if self.addr is not None: self.http_addr = self.addr else: self.http_addr = get_ip_address() # command line should always dominate for enabling if H2O.debugger: use_debugger = True self.use_debugger = use_debugger self.classpath = classpath self.capture_output = capture_output self.use_hdfs = use_hdfs self.use_maprfs = use_maprfs self.hdfs_name_node = hdfs_name_node self.hdfs_version = hdfs_version self.hdfs_config = hdfs_config self.use_flatfile = use_flatfile self.java_heap_GB = java_heap_GB self.java_heap_MB = java_heap_MB self.java_extra_args = java_extra_args self.use_home_for_ice = use_home_for_ice self.node_id = node_id if username: self.username = username else: self.username = getpass.getuser() # don't want multiple reports from tearDown and tearDownClass # have nodes[0] remember (0 always exists) self.sandbox_error_was_reported = False self.sandbox_ignore_errors = False self.random_udp_drop = random_udp_drop self.disable_h2o_log = disable_h2o_log # this dumps stats from tests, and perf stats while polling to benchmark.log self.enable_benchmark_log = enable_benchmark_log self.h2o_remote_buckets_root = h2o_remote_buckets_root self.delete_keys_at_teardown = delete_keys_at_teardown if cloud_name: self.cloud_name = cloud_name else: self.cloud_name = 'pytest-%s-%s' % (getpass.getuser(), os.getpid()) ''' Printable string representation of an H2O node object. ''' def __str__(self): return '%s - http://%s:%d/' % (type(self), self.http_addr, self.port) # TODO: UGH, move this. @staticmethod def verboseprint(*args, **kwargs): if H2O.verbose: for x in args: # so you don't have to create a single string print x, for x in kwargs: # so you don't have to create a single string print x, print sys.stdout.flush() def __url(self, loc, port=None): # always use the new api port if port is None: port = self.port if loc.startswith('/'): delim = '' else: delim = '/' u = 'http://%s:%d%s%s' % (self.http_addr, port, delim, loc) return u ''' Make a REST request to the h2o server and if succesful return a dict containing the JSON result. ''' # @profile def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, raiseIfNon200=True, **kwargs): H2O.verboseprint("__do_json_request, timeout: " + str(timeout)) # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.__url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params_serialized = params.copy() for k in params_serialized: if params_serialized[k] is None: del params[k] paramsStr = '?' + '&'.join( ['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' # The requests package takes array parameters and explodes them: ['f00', 'b4r'] becomes "f00,b4r". # NOTE: this handles 1D arrays only; if we need ND this needs to be recursive. # NOTE: we currently don't need to do this for GET, so that's not implemented. if postData is not None: munged_postData = {} for k, v in postData.iteritems(): if type(v) is list: if len(v) == 0: munged_postData[k] = '[]' else: first = True array_str = '[' for val in v: if not first: array_str += ', ' if val is None: array_str += 'null' elif isinstance(val, basestring): array_str += "\"" + str(val) + "\"" else: array_str += str(val) first = False array_str += ']' munged_postData[k] = array_str else: # not list: munged_postData[k] = v else: # None munged_postData = postData if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest( "----------------------------------------------------------------------\n" ) if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if 'post' == cmd: # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST. # This is temporary. # # This following does application/json (aka, posting JSON in the body): # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(munged_postData), **kwargs) # # This does form-encoded, which doesn't allow POST of nested structures r = requests.post(url, timeout=timeout, params=params, data=munged_postData, **kwargs) elif 'delete' == cmd: r = requests.delete(url, timeout=timeout, params=params, **kwargs) elif 'get' == cmd: r = requests.get(url, timeout=timeout, params=params, **kwargs) else: raise ValueError( "Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. # (this is new/experimental) exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) H2O.check_sandbox_for_errors( python_test_name=H2O.python_test_name) log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2] H2O.verboseprint("r: " + repr(r)) if raiseIfNon200 and 200 != r.status_code: print "JSON call returned non-200 status: ", url print "r.status_code: " + str(r.status_code) print "r.headers: " + repr(r.headers) print "r.text: " + r.text log_rest("") try: if r is None: log_rest("r is None") else: log_rest("HTTP status code: " + str(r.status_code)) # The following accesses to r.text were taking most of the runtime: log_text = False if log_text: if hasattr(r, 'text'): if r.text is None: log_rest("r.text is None") else: log_rest(r.text) else: log_rest("r does not have attr text") except Exception, e: # Paranoid exception catch. # Ignore logging exceptions in the case that the above error checking isn't sufficient. print "Caught exception from result logging: ", e, "; result: ", repr( r)
def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, **kwargs): H2O.verboseprint("__do_json_request, timeout: " + str(timeout)) # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.__url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params2 = params.copy() for k in params2: if params2[k] is None: del params[k] paramsStr = '?' + '&'.join(['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest("----------------------------------------------------------------------\n") if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if 'post' == cmd: # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST. # This is temporary. # # This following does application/json (aka, posting JSON in the body): # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(postData), **kwargs) # # This does form-encoded, which doesn't allow POST of nested structures r = requests.post(url, timeout=timeout, params=params, data=postData, **kwargs) elif 'delete' == cmd: r = requests.delete(url, timeout=timeout, params=params, **kwargs) elif 'get' == cmd: r = requests.get(url, timeout=timeout, params=params, **kwargs) else: raise ValueError("Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. # (this is new/experimental) exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) H2O.check_sandbox_for_errors(python_test_name=H2O.python_test_name); log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2] H2O.verboseprint("r: " + repr(r))
def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None, postData=None, returnFast=False, cmd='get', extraComment=None, ignoreH2oError=False, noExtraErrorCheck=False, raiseIfNon200=True, **kwargs): H2O.verboseprint("__do_json_request, timeout: " + str(timeout)) # if url param is used, use it as full url. otherwise crate from the jsonRequest if fullUrl: url = fullUrl else: url = self.__url(jsonRequest) # remove any params that are 'None' # need to copy dictionary, since can't delete while iterating if params is not None: params_serialized = params.copy() for k in params_serialized: if params_serialized[k] is None: del params[k] paramsStr = '?' + '&'.join( ['%s=%s' % (k, v) for (k, v) in params.items()]) else: paramsStr = '' # The requests package takes array parameters and explodes them: ['f00', 'b4r'] becomes "f00,b4r". # NOTE: this handles 1D arrays only; if we need ND this needs to be recursive. # NOTE: we currently don't need to do this for GET, so that's not implemented. if postData is not None: munged_postData = {} for k, v in postData.iteritems(): if type(v) is list: if len(v) == 0: munged_postData[k] = '[]' else: first = True array_str = '[' for val in v: if not first: array_str += ', ' if val is None: array_str += 'null' elif isinstance(val, basestring): array_str += "\"" + str(val) + "\"" else: array_str += str(val) first = False array_str += ']' munged_postData[k] = array_str else: # not list: munged_postData[k] = v else: # None munged_postData = postData if extraComment: log('Start ' + url + paramsStr, comment=extraComment) else: log('Start ' + url + paramsStr) log_rest("") log_rest( "----------------------------------------------------------------------\n" ) if extraComment: log_rest("# Extra comment info about this request: " + extraComment) if cmd == 'get': log_rest("GET") else: log_rest("POST") log_rest(url + paramsStr) # file get passed thru kwargs here try: if 'post' == cmd: # NOTE == cmd: for now, since we don't have deserialization from JSON in h2o-dev, we use form-encoded POST. # This is temporary. # # This following does application/json (aka, posting JSON in the body): # r = requests.post(url, timeout=timeout, params=params, data=json.dumps(munged_postData), **kwargs) # # This does form-encoded, which doesn't allow POST of nested structures r = requests.post(url, timeout=timeout, params=params, data=munged_postData, **kwargs) elif 'delete' == cmd: r = requests.delete(url, timeout=timeout, params=params, **kwargs) elif 'get' == cmd: r = requests.get(url, timeout=timeout, params=params, **kwargs) else: raise ValueError( "Unknown HTTP command (expected 'get', 'post' or 'delete'): " + cmd) except Exception, e: # rethrow the exception after we've checked for stack trace from h2o # out of memory errors maybe don't show up right away? so we should wait for h2o # to get it out to h2o stdout. We don't want to rely on cloud teardown to check # because there's no delay, and we don't want to delay all cloud teardowns by waiting. # (this is new/experimental) exc_info = sys.exc_info() # use this to ignore the initial connection errors during build cloud when h2o is coming up if not noExtraErrorCheck: h2p.red_print( "ERROR: got exception on %s to h2o. \nGoing to check sandbox, then rethrow.." % (url + paramsStr)) time.sleep(2) H2O.check_sandbox_for_errors( python_test_name=H2O.python_test_name) log_rest("") log_rest("EXCEPTION CAUGHT DOING REQUEST: " + str(e.message)) raise exc_info[1], None, exc_info[2] H2O.verboseprint("r: " + repr(r))