def _import_parse(self,file_path): rawkey = h2o.import_file(file_path) setup = h2o.parse_setup(rawkey) parse = h2o.parse(setup, _py_tmp_key()) # create a new key self._id = parse["job"]["dest"]["name"] self._computed=True self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar()) self._ncols = parse["number_columns"] self._col_names = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)] thousands_sep = h2o.H2ODisplay.THOUSANDS if isinstance(file_path, str): print "Imported {}. Parsed {} rows and {} cols".format(file_path,thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)) else: h2o.H2ODisplay([["File"+str(i+1),f] for i,f in enumerate(file_path)],None, "Parsed {} rows and {} cols".format(thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)))
def _import_parse(self,file_path): rawkey = h2o.lazy_import(file_path) setup = h2o.parse_setup(rawkey) parse = h2o.parse(setup, _py_tmp_key()) # create a new key self._id = parse["job"]["dest"]["name"] self._computed=True self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar()) self._ncols = parse["number_columns"] self._col_names = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols+1)] self._types = dict(zip(self._col_names,parse["column_types"])) self._keep = True thousands_sep = h2o.H2ODisplay.THOUSANDS if isinstance(file_path, str): print "Imported {}. Parsed {} rows and {} cols".format(file_path,thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)) else: h2o.H2ODisplay([["File"+str(i+1),f] for i,f in enumerate(file_path)],None, "Parsed {} rows and {} cols".format(thousands_sep.format(self._nrows), thousands_sep.format(self._ncols)))
def _handle_text_key(self, text_key): """ Handle result of upload_file :param test_key: A key pointing to raw text to be parsed :return: Part of the H2OFrame constructor. """ # perform the parse setup setup = h2o.parse_setup(text_key) parse = h2o.parse(setup, _py_tmp_key()) self._computed=True self._id = parse["destination_frame"]["name"] self._ncols = parse["number_columns"] self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)] self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar()) thousands_sep = h2o.H2ODisplay.THOUSANDS print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))
def _handle_text_key(self, text_key): """ Handle result of upload_file :param test_key: A key pointing to raw text to be parsed :return: Part of the H2OFrame constructor. """ # perform the parse setup setup = h2o.parse_setup(text_key) # blocking parse, first line is always a header (since "we" wrote the data out) parse = h2o.parse(setup, _py_tmp_key(), first_line_is_header=1) # a hack to get the column names correct since "parse" does not provide them self._computed=True self._id = parse["destination_frame"]["name"] self._ncols = parse["number_columns"] self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols)] self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar()) thousands_sep = h2o.H2ODisplay.THOUSANDS print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))
def _handle_text_key(self, text_key, check_header=None): """ Handle result of upload_file :param test_key: A key pointing to raw text to be parsed :return: Part of the H2OFrame constructor. """ # perform the parse setup setup = h2o.parse_setup(text_key) if check_header is not None: setup["check_header"] = check_header parse = h2o.parse(setup, _py_tmp_key()) self._computed=True self._id = parse["destination_frame"]["name"] self._ncols = parse["number_columns"] self._col_names = cols = parse['column_names'] if parse["column_names"] else ["C" + str(x) for x in range(1,self._ncols+1)] self._nrows = int(H2OFrame(expr=ExprNode("nrow", self))._scalar()) self._keep = True thousands_sep = h2o.H2ODisplay.THOUSANDS print "Uploaded {} into cluster with {} rows and {} cols".format(text_key, thousands_sep.format(self._nrows), thousands_sep.format(len(cols)))