コード例 #1
0
def consume_records():
    spark_context = SparkContext(appName='RatingConsumer')
    sql_context = SQLContext(spark_context)
    stream_reader = DataStreamReader(sql_context)

    fpath = os.path.join(os.environ['SPARK_DATA'], 'structured')

    fields = [
        StructField('userId', IntegerType(), True),
        StructField('movieId', IntegerType(), True),
        StructField('rating', FloatType(), True),
        StructField('timestamp', StringType(), True),
    ]

    schema = StructType(fields)
    ratings = stream_reader.load(fpath, schema=schema, format='csv')

    ratings.createOrReplaceTempView('ratingsView')

    #user_481 = sql_context.sql ("select userId, rating from ratingsView where userId < 481")
    user_481 = ratings.where("userId < 481").select("userId", "rating")

    query = user_481\
        .writeStream\
        .outputMode ('append')\
        .format ('console')\
        .start()
    query.awaitTermination()
コード例 #2
0
def consume_records():
    spark_context = SparkContext(appName='RatingConsumer')
    sql_context = SQLContext(spark_context)
    stream_reader = DataStreamReader(sql_context)

    fpath = os.path.join(os.environ['SPARK_DATA'], 'structured')

    fields = [
        StructField('userId', IntegerType(), True),
        StructField('movieId', IntegerType(), True),
        StructField('rating', FloatType(), True),
        StructField('timestamp', StringType(), True),
    ]

    schema = StructType(fields)
    ratings = stream_reader.load(fpath, schema=schema, format='csv')

    user_counts = ratings.groupBy('userId').count()

    query = user_counts\
        .writeStream\
        .outputMode ('complete')\
        .format ('console')\
        .start()
    query.awaitTermination()
コード例 #3
0
    def readStream(self) -> DataStreamReader:
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. versionadded:: 2.0.0

        Notes
        -----
        This API is evolving.

        Returns
        -------
        :class:`DataStreamReader`

        Examples
        --------
        >>> spark.readStream
        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>

        The example below uses Rate source that generates rows continously.
        After that, we operate a modulo by 3, and then write the stream out to the console.
        The streaming query stops in 3 seconds.

        >>> import time
        >>> df = spark.readStream.format("rate").load()
        >>> df = df.selectExpr("value % 3 as v")
        >>> q = df.writeStream.format("console").start()
        >>> time.sleep(3)
        >>> q.stop()
        """
        return DataStreamReader(self)
コード例 #4
0
ファイル: session.py プロジェクト: FUHENG0571/S
    def readStream(self):
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. note:: Evolving.

        :return: :class:`DataStreamReader`
        """
        return DataStreamReader(self._wrapped)
コード例 #5
0
    def readStream(self):
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. note:: Evolving.

        :return: :class:`DataStreamReader`

        >>> text_sdf = sqlContext.readStream.text(tempfile.mkdtemp())
        >>> text_sdf.isStreaming
        True
        """
        return DataStreamReader(self)
コード例 #6
0
ファイル: context.py プロジェクト: iceberg12/spark-1
    def readStream(self):
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. note:: Experimental.

        :return: :class:`DataStreamReader`

        >>> text_sdf = sqlContext.readStream.text(os.path.join(tempfile.mkdtemp(), 'data'))
        >>> text_sdf.isStreaming
        True
        """
        return DataStreamReader(self)
コード例 #7
0
ファイル: session.py プロジェクト: mingpeng2live/spark
    def readStream(self) -> DataStreamReader:
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. versionadded:: 2.0.0

        Notes
        -----
        This API is evolving.

        Returns
        -------
        :class:`DataStreamReader`
        """
        return DataStreamReader(self._wrapped)
コード例 #8
0
ファイル: context.py プロジェクト: zhengruifeng/spark
    def readStream(self) -> DataStreamReader:
        """
        Returns a :class:`DataStreamReader` that can be used to read data streams
        as a streaming :class:`DataFrame`.

        .. versionadded:: 2.0.0

        Notes
        -----
        This API is evolving.

        Returns
        -------
        :class:`DataStreamReader`

        >>> text_sdf = sqlContext.readStream.text(tempfile.mkdtemp())
        >>> text_sdf.isStreaming
        True
        """
        return DataStreamReader(self.sparkSession)