Ejemplo n.º 1
0
 def _import_parse(self,file_path):
   rawkey = h2o.import_file(file_path)
   setup = h2o.parse_setup(rawkey)
   parse = h2o.parse(setup, _py_tmp_key())  # create a new key
   self._id = parse["job"]["dest"]["name"]
   self._computed=True
   self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar())
   self._ncols = parse["number_columns"]
   self._col_names = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)]
   thousands_sep = h2o.H2ODisplay.THOUSANDS
   if isinstance(file_path, str): print "Imported {}. Parsed {} rows and {} cols".format(file_path,thousands_sep.format(self._nrows), thousands_sep.format(self._ncols))
   else:                          h2o.H2ODisplay([["File"+str(i+1),f] for i,f in enumerate(file_path)],None, "Parsed {} rows and {} cols".format(thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)))
Ejemplo n.º 2
0
 def _import_parse(self,file_path):
   rawkey = h2o.lazy_import(file_path)
   setup = h2o.parse_setup(rawkey)
   parse = h2o.parse(setup, _py_tmp_key())  # create a new key
   self._id = parse["job"]["dest"]["name"]
   self._computed=True
   self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar())
   self._ncols = parse["number_columns"]
   self._col_names = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols+1)]
   self._types = dict(zip(self._col_names,parse["column_types"]))
   self._keep = True
   thousands_sep = h2o.H2ODisplay.THOUSANDS
   if isinstance(file_path, str): print "Imported {}. Parsed {} rows and {} cols".format(file_path,thousands_sep.format(self._nrows), thousands_sep.format(self._ncols))
   else:                          h2o.H2ODisplay([["File"+str(i+1),f] for i,f in enumerate(file_path)],None, "Parsed {} rows and {} cols".format(thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)))
Ejemplo n.º 3
0
 def _handle_text_key(self, text_key):
   """
   Handle result of upload_file
   :param test_key: A key pointing to raw text to be parsed
   :return: Part of the H2OFrame constructor.
   """
   # perform the parse setup
   setup = h2o.parse_setup(text_key)
   parse = h2o.parse(setup, _py_tmp_key())
   self._computed=True
   self._id = parse["destination_frame"]["name"]
   self._ncols = parse["number_columns"]
   self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)]
   self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar())
   thousands_sep = h2o.H2ODisplay.THOUSANDS
   print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))
Ejemplo n.º 4
0
 def _handle_text_key(self, text_key):
   """
   Handle result of upload_file
   :param test_key: A key pointing to raw text to be parsed
   :return: Part of the H2OFrame constructor.
   """
   # perform the parse setup
   setup = h2o.parse_setup(text_key)
   # blocking parse, first line is always a header (since "we" wrote the data out)
   parse = h2o.parse(setup, _py_tmp_key(), first_line_is_header=1)
   # a hack to get the column names correct since "parse" does not provide them
   self._computed=True
   self._id = parse["destination_frame"]["name"]
   self._ncols = parse["number_columns"]
   self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)]
   self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar())
   thousands_sep = h2o.H2ODisplay.THOUSANDS
   print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))
Ejemplo n.º 5
0
 def _handle_text_key(self, text_key, check_header=None):
   """
   Handle result of upload_file
   :param test_key: A key pointing to raw text to be parsed
   :return: Part of the H2OFrame constructor.
   """
   # perform the parse setup
   setup = h2o.parse_setup(text_key)
   if check_header is not None: setup["check_header"] = check_header
   parse = h2o.parse(setup, _py_tmp_key())
   self._computed=True
   self._id = parse["destination_frame"]["name"]
   self._ncols = parse["number_columns"]
   self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols+1)]
   self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar())
   self._keep = True
   thousands_sep = h2o.H2ODisplay.THOUSANDS
   print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))