Exemple #1
0
def queryAndServeWebsockets(args: dict) -> None:
    """Query SparkSession tables and returns via WebSocket.

    Args:
        args (dict): Args used to query.

    Returns:
        None
    """
    spark = getRequiredParam(args, 'spark')
    query = getRequiredParam(args, 'query')
    event = args.get("event") or "new_report"

    try:
        df = spark.sql(query)
    except:
        raise('Invalid sql query %s', query)
    rdd = df.rdd.collect()

    @asyncio.coroutine
    def sendPayload():
        print('sending data...')
        websocket = yield from websockets.connect('ws://172.17.0.1:4545/socket/websocket')
        data = dict(topic="alerts:lobby", event="phx_join", payload={}, ref=None)
        yield from websocket.send(json.dumps(data))
        for entry in rdd:
            payload = {
                'value': entry
            }
            msg = dict(topic="alerts:lobby", event=event, payload=payload, ref=None)
            yield from websocket.send(json.dumps(msg))
    asyncio.get_event_loop().run_until_complete(sendPayload())
Exemple #2
0
 def __handleSetup(self, args):
     warnings.warn('deprecated', DeprecationWarning)
     streamName = getRequiredParam(args, 'stream')
     shockAction = getRequiredParam(args, 'shock_action')
     stream = self.sources.get(streamName)
     if (stream):
         fn = getAction('setup', shockAction)
         stream.setupAction = fn
         stream.setupArgs = args
     else:
         raise Exception('Stream not found!')
Exemple #3
0
def parquetValueIngestion(args: dict) -> SparkDataFrame:
    spark = getRequiredParam(args, 'spark')
    path = getRequiredParam(args, 'path')

    mySchema = StructType() \
            .add("value", "string") \
            .add("uuid", "string") \
            .add("timestamp", "string") \
            .add("capability", "string")

    return spark.readStream \
        .format('parquet') \
        .schema(mySchema) \
        .option('path', path) \
        .load()
Exemple #4
0
def socketIngestion(args: dict) -> SparkDataFrame:
    """Return a socket ingestion stream ready to be used.

    Args:
        args (dict): dict with options used to mount the stream.

    Returns:
        SparkDataFrame: socket ingestion dataframe ready to be used.
    """
    spark = getRequiredParam(args, 'spark')
    host = getRequiredParam(args, 'host')
    port = getRequiredParam(args, 'port')
    return spark.readStream.format("socket") \
            .option("host", host) \
            .option("port", port) \
            .load()
Exemple #5
0
def kafkaIngestion(args: dict) -> SparkDataFrame:
    """Return a kafka ingestion stream ready to be used.

    Args:
        args (dict): dict with options used to mount the stream.

    Returns:
        SparkDataFrame: kafka ingestion dataframe ready to be used.
    """
    spark = getRequiredParam(args, 'spark')
    topic = getRequiredParam(args, 'topic')
    brokers = getRequiredParam(args, 'brokers')
    return spark.readStream.format("kafka") \
        .option("kafka.bootstrap.servers", brokers) \
        .option("subscribe", topic) \
        .load() \
        .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
Exemple #6
0
    def __handlePublish(self, args):
        """Handle the new publish method of a stream.

        Args:
            args (dict): Arguments used for the publish.

        Returns:
            no return.
        """
        streamName = getRequiredParam(args, 'stream')
        stream = self.sources.get(streamName)
        shockAction = getRequiredParam(args, 'shock_action')
        if (stream):
            fn = getAction('sinks', shockAction)
            stream.publishAction = fn
            stream.publishArgs = args
        else:
            raise Exception('Stream not found!')
Exemple #7
0
    def __handleIngestion(self, args):
        """Handle the new ingestion method of a stream.

        Args:
            args (dict): Arguments used for the ingestion.

        Returns:
            no return.
        """
        streamName = getRequiredParam(args, 'stream')
        stream = self.sources.get(streamName)
        shockAction = getRequiredParam(args, 'shock_action')
        if (stream):
            args["spark"] = self.spark
            fn = getAction('ingestion', shockAction)
            stream.ingestAction = fn
            stream.ingestArgs = args
        else:
            raise Exception('Stream not found!')
Exemple #8
0
def streamFilter(stream: SparkDataFrame, args: dict) -> SparkDataFrame:
    """Filter stream.

    Args:
        stream (SparkDataFrame): processed stream.
        args (dict): options to be used in the filter.

    Returns:
        SparkDataFrame: filtered stream.
    """
    query = getRequiredParam(args, 'query')
    return stream.where(query)
Exemple #9
0
    def __newStream(self, args):
        """Creates new Shock stream.

        The stream will be registered in the sources dict.

        Args:
            args (dict): Arguments used for the registration.

        Returns:
            no return.
        """
        name = getRequiredParam(args, 'stream')
        st = Stream(name)
        self.registerSource(name, st)
Exemple #10
0
    def __startStream(self, args):
        """Starts a stream.

        Args:
            args (dict): Arguments used to start the stream.

        Returns:
            no return.
        """
        streamName = getRequiredParam(args, 'stream')
        stream = self.sources.get(streamName)
        if (stream):
            stream.start()
        else:
            raise Exception('Stream not found!')
Exemple #11
0
    def __handleAnalyze(self, args):
        """Handle the new process method of a stream.

        Args:
            args (dict): Arguments used for the processing.

        Returns:
            no return.
        """
        stream = self.sources.get(args["stream"])
        shockAction = getRequiredParam(args, 'shock_action')
        if (stream):
            fn = getAction('analyze', shockAction)
            stream.analyzeAction = fn
            stream.analyzeArgs = args
        else:
            raise Exception('Stream not found!')
Exemple #12
0
    def __flush(self, args):
        """Flushs pending actions. Used for sending websockets.

        Args:
            no arguments.

        Returns:
            no return.
        """
        warnings.warn('deprecated', DeprecationWarning)
        args["spark"] = self.spark
        strategy = getRequiredParam(args, 'strategy')
        try:
            fn = getAction('flushes', strategy)
        except:
            raise ('Invalid flush strategy!')

        fn(args)
Exemple #13
0
def readAndServeWebsockets(args: dict) -> None:
    """Publish parquet results written in /analysis via websocket

    Args:
        spark (SparkSession): processed stream.

    Returns:
        None
    """
    spark = getRequiredParam(args, 'spark')

    path = args.get("path") or "/analysis"
    event = args.get("event") or "new_report"

    sch = interscitySchema()
    try:
        df = spark.read.parquet(path)
    except:
        df = spark.createDataFrame([], sch) # empty df
        return
    rdd = df.rdd.collect()


    @asyncio.coroutine
    def sendPayload():
        websocket = yield from websockets.connect('ws://172.17.0.1:4545/socket/websocket')
        data = dict(topic="alerts:lobby", event="phx_join", payload={}, ref=None)
        yield from websocket.send(json.dumps(data))
        for entry in rdd:
            payload = {
                'uuid': entry.uuid,
                'capability': entry.capability,
                'timestamp': entry.timestamp,
                'value': entry.value
            }
            msg = dict(topic="alerts:lobby", event=event, payload=payload, ref=None)
            yield from websocket.send(json.dumps(msg))
    asyncio.get_event_loop().run_until_complete(sendPayload())