def _python(self): """gets frame backend as _PythonFrame, causes conversion if it is current not""" if self._is_scala: # convert Scala Frame to a PythonFrame""" scala_schema = self._frame.schema() java_rdd = self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython(self._frame.rdd()) python_schema = schema_to_python(self._tc.sc, scala_schema) python_rdd = RDD(java_rdd, self._tc.sc) self._frame = PythonFrame(python_rdd, python_schema) return self._frame
def _python(self): """gets frame backend as _PythonFrame, causes conversion if it is current not""" if self._is_scala: # convert Scala Frame to a PythonFrame""" scala_schema = self._frame.schema() java_rdd = self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython(self._frame.rdd()) python_schema = schema_to_python(self._tc.sc, scala_schema) python_rdd = RDD(java_rdd, self._tc.sc) # If schema contains matrix datatype, then apply type_coercer to convert list[list] to numpy ndarray map_python_rdd = MatrixCoercion.schema_is_coercible(python_rdd, list(python_schema)) self._frame = PythonFrame(map_python_rdd, python_schema) return self._frame
def _python(self): """gets frame backend as _PythonFrame, causes conversion if it is current not""" if self._is_scala: # convert Scala Frame to a PythonFrame""" scala_schema = self._frame.schema() java_rdd = self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython( self._frame.rdd()) python_schema = schema_to_python(self._tc.sc, scala_schema) python_rdd = RDD(java_rdd, self._tc.sc) # If schema contains matrix datatype, then apply type_coercer to convert list[list] to numpy ndarray map_python_rdd = MatrixCoercion.schema_is_coercible( python_rdd, list(python_schema)) self._frame = PythonFrame(map_python_rdd, python_schema) return self._frame
def rename_columns(self, names): """ Rename columns Parameters ---------- :param names: (dict) Dictionary of old names to new names. Examples -------- Start with a frame with columns *Black* and *White*. <hide> >>> s = [('Black', unicode), ('White', unicode)] >>> rows = [["glass", "clear"],["paper","unclear"]] >>> my_frame = tc.frame.create(rows, s) -etc- </hide> >>> print my_frame.schema [('Black', <type 'unicode'>), ('White', <type 'unicode'>)] Rename the columns to *Mercury* and *Venus*: >>> my_frame.rename_columns({"Black": "Mercury", "White": "Venus"}) >>> print my_frame.schema [(u'Mercury', <type 'unicode'>), (u'Venus', <type 'unicode'>)] """ if not isinstance(names, dict): raise ValueError( "Unsupported 'names' parameter type. Expected dictionary, but found %s." % type(names)) if self.schema is None: raise RuntimeError( "Unable rename column(s), because the frame's schema has not been defined." ) if self._is_python: scala_rename_map = self._tc.jutils.convert.to_scala_map(names) scala_schema = schema_to_scala(self._tc.sc, self._python.schema) rename_scala_schema = scala_schema.renameColumns(scala_rename_map) self._python.schema = schema_to_python(self._tc.sc, rename_scala_schema) else: self._scala.renameColumns(self._tc.jutils.convert.to_scala_map(names))
def rename_columns(self, names): """ Rename columns Parameters ---------- :param names: (dict) Dictionary of old names to new names. Examples -------- Start with a frame with columns *Black* and *White*. <hide> >>> s = [('Black', unicode), ('White', unicode)] >>> rows = [["glass", "clear"],["paper","unclear"]] >>> my_frame = tc.frame.create(rows, s) -etc- </hide> >>> print my_frame.schema [('Black', <type 'unicode'>), ('White', <type 'unicode'>)] Rename the columns to *Mercury* and *Venus*: >>> my_frame.rename_columns({"Black": "Mercury", "White": "Venus"}) >>> print my_frame.schema [(u'Mercury', <type 'unicode'>), (u'Venus', <type 'unicode'>)] """ if not isinstance(names, dict): raise ValueError("Unsupported 'names' parameter type. Expected dictionary, but found %s." % type(names)) if self.schema is None: raise RuntimeError("Unable rename column(s), because the frame's schema has not been defined.") if self._is_python: scala_rename_map = self._tc.jutils.convert.to_scala_map(names) scala_schema = schema_to_scala(self._tc.sc, self._python.schema) rename_scala_schema = scala_schema.renameColumns(scala_rename_map) self._python.schema = schema_to_python(self._tc.sc, rename_scala_schema) else: self._scala.renameColumns(self._tc.jutils.convert.to_scala_map(names))
def schema(self): if self._is_scala: return schema_to_python(self._tc.sc, self._frame.schema()) # need ()'s on schema because it's a def in scala return self._frame.schema
def schema(self): """A list of (name, type) tuples which describe the column names and data types of this Frame""" if self._is_scala: return schema_to_python(self._tc.sc, self._frame.schema( )) # need () on schema because it's a def in scala return self._frame.schema