コード例 #1
0
 def _python(self):
     """gets frame backend as _PythonFrame, causes conversion if it is current not"""
     if self._is_scala:
         # convert Scala Frame to a PythonFrame"""
         scala_schema = self._frame.schema()
         java_rdd =  self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython(self._frame.rdd())
         python_schema = schema_to_python(self._tc.sc, scala_schema)
         python_rdd = RDD(java_rdd, self._tc.sc)
         self._frame = PythonFrame(python_rdd, python_schema)
     return self._frame
コード例 #2
0
ファイル: frame.py プロジェクト: ashaarunkumar/spark-tk
 def _python(self):
     """gets frame backend as _PythonFrame, causes conversion if it is current not"""
     if self._is_scala:
         # convert Scala Frame to a PythonFrame"""
         scala_schema = self._frame.schema()
         java_rdd =  self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython(self._frame.rdd())
         python_schema = schema_to_python(self._tc.sc, scala_schema)
         python_rdd = RDD(java_rdd, self._tc.sc)
         # If schema contains matrix datatype, then apply type_coercer to convert list[list] to numpy ndarray
         map_python_rdd = MatrixCoercion.schema_is_coercible(python_rdd, list(python_schema))
         self._frame = PythonFrame(map_python_rdd, python_schema)
     return self._frame
コード例 #3
0
 def _python(self):
     """gets frame backend as _PythonFrame, causes conversion if it is current not"""
     if self._is_scala:
         # convert Scala Frame to a PythonFrame"""
         scala_schema = self._frame.schema()
         java_rdd = self._tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.rdd.PythonJavaRdd.scalaToPython(
             self._frame.rdd())
         python_schema = schema_to_python(self._tc.sc, scala_schema)
         python_rdd = RDD(java_rdd, self._tc.sc)
         # If schema contains matrix datatype, then apply type_coercer to convert list[list] to numpy ndarray
         map_python_rdd = MatrixCoercion.schema_is_coercible(
             python_rdd, list(python_schema))
         self._frame = PythonFrame(map_python_rdd, python_schema)
     return self._frame
コード例 #4
0
ファイル: rename_columns.py プロジェクト: mapleNvg/spark-tk
def rename_columns(self, names):
    """
    Rename columns

    Parameters
    ----------

    :param names: (dict) Dictionary of old names to new names.

    Examples
    --------
    Start with a frame with columns *Black* and *White*.

        <hide>

        >>> s = [('Black', unicode), ('White', unicode)]
        >>> rows = [["glass", "clear"],["paper","unclear"]]
        >>> my_frame = tc.frame.create(rows, s)
        -etc-

        </hide>

        >>> print my_frame.schema
        [('Black', <type 'unicode'>), ('White', <type 'unicode'>)]

    Rename the columns to *Mercury* and *Venus*:

        >>> my_frame.rename_columns({"Black": "Mercury", "White": "Venus"})

        >>> print my_frame.schema
        [(u'Mercury', <type 'unicode'>), (u'Venus', <type 'unicode'>)]

    """
    if not isinstance(names, dict):
        raise ValueError(
            "Unsupported 'names' parameter type.  Expected dictionary, but found %s."
            % type(names))
    if self.schema is None:
        raise RuntimeError(
            "Unable rename column(s), because the frame's schema has not been defined."
        )
    if self._is_python:
        scala_rename_map = self._tc.jutils.convert.to_scala_map(names)
        scala_schema = schema_to_scala(self._tc.sc, self._python.schema)
        rename_scala_schema = scala_schema.renameColumns(scala_rename_map)
        self._python.schema = schema_to_python(self._tc.sc,
                                               rename_scala_schema)
    else:
        self._scala.renameColumns(self._tc.jutils.convert.to_scala_map(names))
コード例 #5
0
ファイル: rename_columns.py プロジェクト: Haleyo/spark-tk
def rename_columns(self, names):
    """
    Rename columns

    Parameters
    ----------

    :param names: (dict) Dictionary of old names to new names.

    Examples
    --------
    Start with a frame with columns *Black* and *White*.

        <hide>

        >>> s = [('Black', unicode), ('White', unicode)]
        >>> rows = [["glass", "clear"],["paper","unclear"]]
        >>> my_frame = tc.frame.create(rows, s)
        -etc-

        </hide>

        >>> print my_frame.schema
        [('Black', <type 'unicode'>), ('White', <type 'unicode'>)]

    Rename the columns to *Mercury* and *Venus*:

        >>> my_frame.rename_columns({"Black": "Mercury", "White": "Venus"})

        >>> print my_frame.schema
        [(u'Mercury', <type 'unicode'>), (u'Venus', <type 'unicode'>)]

    """
    if not isinstance(names, dict):
        raise ValueError("Unsupported 'names' parameter type.  Expected dictionary, but found %s." % type(names))
    if self.schema is None:
        raise RuntimeError("Unable rename column(s), because the frame's schema has not been defined.")
    if self._is_python:
        scala_rename_map = self._tc.jutils.convert.to_scala_map(names)
        scala_schema = schema_to_scala(self._tc.sc, self._python.schema)
        rename_scala_schema = scala_schema.renameColumns(scala_rename_map)
        self._python.schema = schema_to_python(self._tc.sc, rename_scala_schema)
    else:
        self._scala.renameColumns(self._tc.jutils.convert.to_scala_map(names))
コード例 #6
0
ファイル: frame.py プロジェクト: ashaarunkumar/spark-tk
 def schema(self):
     if self._is_scala:
         return schema_to_python(self._tc.sc, self._frame.schema())  # need ()'s on schema because it's a def in scala
     return self._frame.schema
コード例 #7
0
 def schema(self):
     if self._is_scala:
         return schema_to_python(self._tc.sc, self._frame.schema())  # need ()'s on schema because it's a def in scala
     return self._frame.schema
コード例 #8
0
ファイル: frame.py プロジェクト: lewisc/spark-tk-1
 def schema(self):
     """A list of (name, type) tuples which describe the column names and data types of this Frame"""
     if self._is_scala:
         return schema_to_python(self._tc.sc, self._frame.schema(
         ))  # need () on schema because it's a def in scala
     return self._frame.schema